From 116aa62bf54a39697e25f21d6cf6799f7faa1349 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Wed, 15 Aug 2007 14:28:22 +0000 Subject: Move the 3k reST doc tree in place. --- Doc/ACKS.txt | 196 ++ Doc/Makefile | 62 + Doc/README.txt | 121 + Doc/TODO.txt | 6 + Doc/about.rst | 33 + Doc/bugs.rst | 59 + Doc/c-api/abstract.rst | 997 ++++++++ Doc/c-api/concrete.rst | 3676 +++++++++++++++++++++++++++ Doc/c-api/exceptions.rst | 515 ++++ Doc/c-api/index.rst | 33 + Doc/c-api/init.rst | 936 +++++++ Doc/c-api/intro.rst | 630 +++++ Doc/c-api/memory.rst | 207 ++ Doc/c-api/newtypes.rst | 1740 +++++++++++++ Doc/c-api/refcounting.rst | 74 + Doc/c-api/utilities.rst | 1030 ++++++++ Doc/c-api/veryhigh.rst | 278 ++ Doc/conf.py | 56 + Doc/contents.rst | 21 + Doc/copyright.rst | 19 + Doc/data/refcounts.dat | 1747 +++++++++++++ Doc/distutils/apiref.rst | 1976 ++++++++++++++ Doc/distutils/builtdist.rst | 405 +++ Doc/distutils/commandref.rst | 104 + Doc/distutils/configfile.rst | 130 + Doc/distutils/examples.rst | 241 ++ Doc/distutils/extending.rst | 96 + Doc/distutils/index.rst | 30 + Doc/distutils/introduction.rst | 208 ++ Doc/distutils/packageindex.rst | 65 + Doc/distutils/setupscript.rst | 669 +++++ Doc/distutils/sourcedist.rst | 207 ++ Doc/distutils/uploading.rst | 37 + Doc/documenting/fromlatex.rst | 192 ++ Doc/documenting/index.rst | 33 + Doc/documenting/intro.rst | 29 + Doc/documenting/markup.rst | 775 ++++++ Doc/documenting/rest.rst | 251 ++ Doc/documenting/sphinx.rst | 60 + Doc/documenting/style.rst | 70 + Doc/extending/building.rst | 131 + Doc/extending/embedding.rst | 297 +++ Doc/extending/extending.rst | 1273 ++++++++++ Doc/extending/index.rst | 34 + Doc/extending/newtypes.rst | 1580 ++++++++++++ Doc/extending/windows.rst | 280 ++ Doc/howto/advocacy.rst | 356 +++ Doc/howto/curses.rst | 434 ++++ Doc/howto/doanddont.rst | 308 +++ Doc/howto/functional.rst | 1400 ++++++++++ Doc/howto/index.rst | 25 + Doc/howto/pythonmac.rst | 202 ++ Doc/howto/regex.rst | 1377 ++++++++++ Doc/howto/sockets.rst | 421 +++ Doc/howto/unicode.rst | 732 ++++++ Doc/howto/urllib2.rst | 578 +++++ Doc/includes/email-dir.py | 115 + Doc/includes/email-mime.py | 32 + Doc/includes/email-simple.py | 25 + Doc/includes/email-unpack.py | 68 + Doc/includes/minidom-example.py | 64 + Doc/includes/noddy.c | 54 + Doc/includes/noddy2.c | 190 ++ Doc/includes/noddy3.c | 243 ++ Doc/includes/noddy4.c | 224 ++ Doc/includes/run-func.c | 68 + Doc/includes/setup.py | 8 + Doc/includes/shoddy.c | 91 + Doc/includes/sqlite3/adapter_datetime.py | 14 + Doc/includes/sqlite3/adapter_point_1.py | 16 + Doc/includes/sqlite3/adapter_point_2.py | 17 + Doc/includes/sqlite3/collation_reverse.py | 15 + Doc/includes/sqlite3/complete_statement.py | 30 + Doc/includes/sqlite3/connect_db_1.py | 3 + Doc/includes/sqlite3/connect_db_2.py | 3 + Doc/includes/sqlite3/converter_point.py | 47 + Doc/includes/sqlite3/countcursors.py | 15 + Doc/includes/sqlite3/createdb.py | 28 + Doc/includes/sqlite3/execsql_fetchonerow.py | 17 + Doc/includes/sqlite3/execsql_printall_1.py | 13 + Doc/includes/sqlite3/execute_1.py | 11 + Doc/includes/sqlite3/execute_2.py | 12 + Doc/includes/sqlite3/execute_3.py | 12 + Doc/includes/sqlite3/executemany_1.py | 24 + Doc/includes/sqlite3/executemany_2.py | 15 + Doc/includes/sqlite3/executescript.py | 24 + Doc/includes/sqlite3/insert_more_people.py | 16 + Doc/includes/sqlite3/md5func.py | 11 + Doc/includes/sqlite3/mysumaggr.py | 20 + Doc/includes/sqlite3/parse_colnames.py | 8 + Doc/includes/sqlite3/pysqlite_datetime.py | 20 + Doc/includes/sqlite3/row_factory.py | 13 + Doc/includes/sqlite3/rowclass.py | 12 + Doc/includes/sqlite3/shared_cache.py | 6 + Doc/includes/sqlite3/shortcut_methods.py | 21 + Doc/includes/sqlite3/simple_tableprinter.py | 26 + Doc/includes/sqlite3/text_factory.py | 42 + Doc/includes/test.py | 213 ++ Doc/includes/typestruct.h | 76 + Doc/includes/tzinfo-examples.py | 139 + Doc/install/index.rst | 1011 ++++++++ Doc/library/__builtin__.rst | 41 + Doc/library/__future__.rst | 61 + Doc/library/__main__.rst | 17 + Doc/library/_ast.rst | 59 + Doc/library/_winreg.rst | 420 +++ Doc/library/aepack.rst | 92 + Doc/library/aetools.rst | 86 + Doc/library/aetypes.rst | 150 ++ Doc/library/aifc.rst | 225 ++ Doc/library/allos.rst | 27 + Doc/library/anydbm.rst | 96 + Doc/library/archiving.rst | 18 + Doc/library/array.rst | 272 ++ Doc/library/asynchat.rst | 284 +++ Doc/library/asyncore.rst | 269 ++ Doc/library/atexit.rst | 105 + Doc/library/audioop.rst | 261 ++ Doc/library/autogil.rst | 30 + Doc/library/base64.rst | 172 ++ Doc/library/basehttpserver.rst | 254 ++ Doc/library/binascii.rst | 161 ++ Doc/library/binhex.rst | 59 + Doc/library/bisect.rst | 92 + Doc/library/bsddb.rst | 211 ++ Doc/library/bz2.rst | 181 ++ Doc/library/calendar.rst | 326 +++ Doc/library/carbon.rst | 288 +++ Doc/library/cgi.rst | 558 ++++ Doc/library/cgihttpserver.rst | 73 + Doc/library/cgitb.rst | 64 + Doc/library/chunk.rst | 130 + Doc/library/cmath.rst | 156 ++ Doc/library/cmd.rst | 202 ++ Doc/library/code.rst | 167 ++ Doc/library/codecs.rst | 1230 +++++++++ Doc/library/codeop.rst | 95 + Doc/library/collections.rst | 414 +++ Doc/library/colorpicker.rst | 23 + Doc/library/colorsys.rst | 60 + Doc/library/commands.rst | 53 + Doc/library/compileall.rst | 57 + Doc/library/configparser.rst | 361 +++ Doc/library/constants.rst | 42 + Doc/library/contextlib.rst | 120 + Doc/library/cookie.rst | 282 ++ Doc/library/cookielib.rst | 768 ++++++ Doc/library/copy.rst | 85 + Doc/library/copy_reg.rst | 42 + Doc/library/crypt.rst | 66 + Doc/library/crypto.rst | 30 + Doc/library/csv.rst | 530 ++++ Doc/library/ctypes.rst | 2364 +++++++++++++++++ Doc/library/curses.ascii.rst | 228 ++ Doc/library/curses.panel.rst | 119 + Doc/library/curses.rst | 1679 ++++++++++++ Doc/library/custominterp.rst | 20 + Doc/library/datatypes.rst | 37 + Doc/library/datetime.rst | 1348 ++++++++++ Doc/library/dbhash.rst | 114 + Doc/library/dbm.rst | 74 + Doc/library/decimal.rst | 1289 ++++++++++ Doc/library/development.rst | 22 + Doc/library/difflib.rst | 644 +++++ Doc/library/dircache.rst | 56 + Doc/library/dis.rst | 775 ++++++ Doc/library/distutils.rst | 30 + Doc/library/dl.rst | 111 + Doc/library/doctest.rst | 1869 ++++++++++++++ Doc/library/docxmlrpcserver.rst | 97 + Doc/library/dumbdbm.rst | 81 + Doc/library/dummy_thread.rst | 23 + Doc/library/dummy_threading.rst | 23 + Doc/library/easydialogs.rst | 207 ++ Doc/library/email-examples.rst | 33 + Doc/library/email.charset.rst | 249 ++ Doc/library/email.encoders.rst | 57 + Doc/library/email.errors.rst | 91 + Doc/library/email.generator.rst | 123 + Doc/library/email.header.rst | 171 ++ Doc/library/email.iterators.rst | 65 + Doc/library/email.message.rst | 548 ++++ Doc/library/email.mime.rst | 175 ++ Doc/library/email.parser.rst | 220 ++ Doc/library/email.rst | 324 +++ Doc/library/email.util.rst | 166 ++ Doc/library/errno.rst | 636 +++++ Doc/library/exceptions.rst | 475 ++++ Doc/library/fcntl.rst | 155 ++ Doc/library/filecmp.rst | 152 ++ Doc/library/fileformats.rst | 18 + Doc/library/fileinput.rst | 183 ++ Doc/library/filesys.rst | 38 + Doc/library/fnmatch.rst | 91 + Doc/library/formatter.rst | 350 +++ Doc/library/fpectl.rst | 120 + Doc/library/fpformat.rst | 56 + Doc/library/framework.rst | 335 +++ Doc/library/frameworks.rst | 18 + Doc/library/ftplib.rst | 320 +++ Doc/library/functions.rst | 1138 +++++++++ Doc/library/functools.rst | 145 ++ Doc/library/gc.rst | 211 ++ Doc/library/gdbm.rst | 122 + Doc/library/gensuitemodule.rst | 63 + Doc/library/getopt.rst | 147 ++ Doc/library/getpass.rst | 38 + Doc/library/gettext.rst | 765 ++++++ Doc/library/glob.rst | 54 + Doc/library/grp.rst | 63 + Doc/library/gzip.rst | 68 + Doc/library/hashlib.rst | 121 + Doc/library/heapq.rst | 224 ++ Doc/library/hmac.rst | 61 + Doc/library/hotshot.rst | 152 ++ Doc/library/htmllib.rst | 186 ++ Doc/library/htmlparser.rst | 183 ++ Doc/library/httplib.rst | 552 ++++ Doc/library/i18n.rst | 19 + Doc/library/ic.rst | 119 + Doc/library/idle.rst | 288 +++ Doc/library/imaplib.rst | 540 ++++ Doc/library/imghdr.rst | 71 + Doc/library/imp.rst | 298 +++ Doc/library/index.rst | 81 + Doc/library/inspect.rst | 507 ++++ Doc/library/internet.rst | 47 + Doc/library/intro.rst | 51 + Doc/library/ipc.rst | 24 + Doc/library/itertools.rst | 547 ++++ Doc/library/keyword.rst | 22 + Doc/library/language.rst | 29 + Doc/library/linecache.rst | 52 + Doc/library/locale.rst | 578 +++++ Doc/library/logging.rst | 1857 ++++++++++++++ Doc/library/mac.rst | 23 + Doc/library/macos.rst | 95 + Doc/library/macosa.rst | 92 + Doc/library/macostools.rst | 115 + Doc/library/macpath.rst | 17 + Doc/library/mailbox.rst | 1679 ++++++++++++ Doc/library/mailcap.rst | 74 + Doc/library/markup.rst | 44 + Doc/library/marshal.rst | 127 + Doc/library/math.rst | 227 ++ Doc/library/mhlib.rst | 205 ++ Doc/library/mimetools.rst | 130 + Doc/library/mimetypes.rst | 232 ++ Doc/library/miniaeframe.rst | 68 + Doc/library/misc.rst | 14 + Doc/library/mm.rst | 23 + Doc/library/mmap.rst | 173 ++ Doc/library/modulefinder.rst | 52 + Doc/library/modules.rst | 20 + Doc/library/msilib.rst | 537 ++++ Doc/library/msvcrt.rst | 126 + Doc/library/multifile.rst | 190 ++ Doc/library/mutex.rst | 62 + Doc/library/netdata.rst | 26 + Doc/library/netrc.rst | 78 + Doc/library/new.rst | 53 + Doc/library/nis.rst | 68 + Doc/library/nntplib.rst | 350 +++ Doc/library/numeric.rst | 25 + Doc/library/objects.rst | 32 + Doc/library/operator.rst | 612 +++++ Doc/library/optparse.rst | 1827 +++++++++++++ Doc/library/os.path.rst | 317 +++ Doc/library/os.rst | 2036 +++++++++++++++ Doc/library/ossaudiodev.rst | 429 ++++ Doc/library/othergui.rst | 84 + Doc/library/parser.rst | 683 +++++ Doc/library/pdb.rst | 409 +++ Doc/library/persistence.rst | 32 + Doc/library/pickle.rst | 868 +++++++ Doc/library/pickletools.rst | 37 + Doc/library/pipes.rst | 92 + Doc/library/pkgutil.rst | 43 + Doc/library/platform.rst | 256 ++ Doc/library/poplib.rst | 202 ++ Doc/library/posix.rst | 103 + Doc/library/pprint.rst | 213 ++ Doc/library/profile.rst | 682 +++++ Doc/library/pty.rst | 48 + Doc/library/pwd.rst | 76 + Doc/library/py_compile.rst | 55 + Doc/library/pyclbr.rst | 112 + Doc/library/pydoc.rst | 65 + Doc/library/pyexpat.rst | 873 +++++++ Doc/library/python.rst | 27 + Doc/library/queue.rst | 152 ++ Doc/library/quopri.rst | 61 + Doc/library/random.rst | 315 +++ Doc/library/re.rst | 921 +++++++ Doc/library/readline.rst | 222 ++ Doc/library/repr.rst | 136 + Doc/library/resource.rst | 238 ++ Doc/library/rfc822.rst | 351 +++ Doc/library/rlcompleter.rst | 65 + Doc/library/robotparser.rst | 71 + Doc/library/runpy.rst | 71 + Doc/library/sched.rst | 104 + Doc/library/scrolledtext.rst | 32 + Doc/library/select.rst | 141 + Doc/library/sgmllib.rst | 270 ++ Doc/library/shelve.rst | 185 ++ Doc/library/shlex.rst | 307 +++ Doc/library/shutil.rst | 171 ++ Doc/library/signal.rst | 157 ++ Doc/library/simplehttpserver.rst | 86 + Doc/library/simplexmlrpcserver.rst | 232 ++ Doc/library/site.rst | 87 + Doc/library/smtpd.rst | 72 + Doc/library/smtplib.rst | 347 +++ Doc/library/sndhdr.rst | 42 + Doc/library/socket.rst | 941 +++++++ Doc/library/socketserver.rst | 295 +++ Doc/library/someos.rst | 23 + Doc/library/spwd.rst | 74 + Doc/library/sqlite3.rst | 689 +++++ Doc/library/stat.rst | 167 ++ Doc/library/statvfs.rst | 67 + Doc/library/stdtypes.rst | 2409 ++++++++++++++++++ Doc/library/string.rst | 468 ++++ Doc/library/stringio.rst | 122 + Doc/library/stringprep.rst | 142 ++ Doc/library/strings.rst | 31 + Doc/library/struct.rst | 292 +++ Doc/library/subprocess.rst | 340 +++ Doc/library/sunau.rst | 261 ++ Doc/library/symbol.rst | 32 + Doc/library/sys.rst | 606 +++++ Doc/library/syslog.rst | 66 + Doc/library/tabnanny.rst | 70 + Doc/library/tarfile.rst | 738 ++++++ Doc/library/telnetlib.rst | 246 ++ Doc/library/tempfile.rst | 216 ++ Doc/library/termios.rst | 111 + Doc/library/test.rst | 317 +++ Doc/library/textwrap.rst | 192 ++ Doc/library/thread.rst | 171 ++ Doc/library/threading.rst | 732 ++++++ Doc/library/time.rst | 540 ++++ Doc/library/timeit.rst | 243 ++ Doc/library/tix.rst | 602 +++++ Doc/library/tk.rst | 43 + Doc/library/tkinter.rst | 840 ++++++ Doc/library/token.rst | 47 + Doc/library/tokenize.rst | 122 + Doc/library/trace.rst | 128 + Doc/library/traceback.rst | 160 ++ Doc/library/tty.rst | 38 + Doc/library/turtle.rst | 312 +++ Doc/library/types.rst | 257 ++ Doc/library/undoc.rst | 186 ++ Doc/library/unicodedata.rst | 165 ++ Doc/library/unittest.rst | 936 +++++++ Doc/library/unix.rst | 29 + Doc/library/urllib.rst | 471 ++++ Doc/library/urllib2.rst | 927 +++++++ Doc/library/urlparse.rst | 268 ++ Doc/library/user.rst | 69 + Doc/library/userdict.rst | 188 ++ Doc/library/uu.rst | 60 + Doc/library/uuid.rst | 258 ++ Doc/library/warnings.rst | 242 ++ Doc/library/wave.rst | 201 ++ Doc/library/weakref.rst | 330 +++ Doc/library/webbrowser.rst | 199 ++ Doc/library/whichdb.rst | 20 + Doc/library/windows.rst | 14 + Doc/library/winsound.rst | 162 ++ Doc/library/wsgiref.rst | 641 +++++ Doc/library/xdrlib.rst | 276 ++ Doc/library/xml.dom.minidom.rst | 267 ++ Doc/library/xml.dom.pulldom.rst | 69 + Doc/library/xml.dom.rst | 1045 ++++++++ Doc/library/xml.etree.elementtree.rst | 444 ++++ Doc/library/xml.etree.rst | 25 + Doc/library/xml.sax.handler.rst | 402 +++ Doc/library/xml.sax.reader.rst | 386 +++ Doc/library/xml.sax.rst | 143 ++ Doc/library/xml.sax.utils.rst | 83 + Doc/library/xmlrpclib.rst | 422 +++ Doc/library/zipfile.rst | 408 +++ Doc/library/zipimport.rst | 137 + Doc/library/zlib.rst | 209 ++ Doc/license.rst | 647 +++++ Doc/reference/compound_stmts.rst | 554 ++++ Doc/reference/datamodel.rst | 2118 +++++++++++++++ Doc/reference/executionmodel.rst | 232 ++ Doc/reference/expressions.rst | 1283 ++++++++++ Doc/reference/index.rst | 30 + Doc/reference/introduction.rst | 138 + Doc/reference/lexical_analysis.rst | 758 ++++++ Doc/reference/simple_stmts.rst | 825 ++++++ Doc/reference/toplevel_components.rst | 123 + Doc/tools/sphinx-build.py | 14 + Doc/tools/sphinx-web.py | 14 + Doc/tutorial/appetite.rst | 89 + Doc/tutorial/classes.rst | 792 ++++++ Doc/tutorial/controlflow.rst | 574 +++++ Doc/tutorial/datastructures.rst | 586 +++++ Doc/tutorial/errors.rst | 418 +++ Doc/tutorial/floatingpoint.rst | 220 ++ Doc/tutorial/glossary.rst | 329 +++ Doc/tutorial/index.rst | 60 + Doc/tutorial/inputoutput.rst | 354 +++ Doc/tutorial/interactive.rst | 167 ++ Doc/tutorial/interpreter.rst | 248 ++ Doc/tutorial/introduction.rst | 645 +++++ Doc/tutorial/modules.rst | 551 ++++ Doc/tutorial/stdlib.rst | 313 +++ Doc/tutorial/stdlib2.rst | 394 +++ Doc/tutorial/whatnow.rst | 68 + Doc/whatsnew/2.0.rst | 1207 +++++++++ Doc/whatsnew/2.1.rst | 794 ++++++ Doc/whatsnew/2.2.rst | 1269 +++++++++ Doc/whatsnew/2.3.rst | 2084 +++++++++++++++ Doc/whatsnew/2.4.rst | 1571 ++++++++++++ Doc/whatsnew/2.5.rst | 2286 +++++++++++++++++ Doc/whatsnew/2.6.rst | 236 ++ Doc/whatsnew/3.0.rst | 161 ++ 423 files changed, 131199 insertions(+) create mode 100644 Doc/ACKS.txt create mode 100644 Doc/Makefile create mode 100644 Doc/README.txt create mode 100644 Doc/TODO.txt create mode 100644 Doc/about.rst create mode 100644 Doc/bugs.rst create mode 100644 Doc/c-api/abstract.rst create mode 100644 Doc/c-api/concrete.rst create mode 100644 Doc/c-api/exceptions.rst create mode 100644 Doc/c-api/index.rst create mode 100644 Doc/c-api/init.rst create mode 100644 Doc/c-api/intro.rst create mode 100644 Doc/c-api/memory.rst create mode 100644 Doc/c-api/newtypes.rst create mode 100644 Doc/c-api/refcounting.rst create mode 100644 Doc/c-api/utilities.rst create mode 100644 Doc/c-api/veryhigh.rst create mode 100644 Doc/conf.py create mode 100644 Doc/contents.rst create mode 100644 Doc/copyright.rst create mode 100644 Doc/data/refcounts.dat create mode 100644 Doc/distutils/apiref.rst create mode 100644 Doc/distutils/builtdist.rst create mode 100644 Doc/distutils/commandref.rst create mode 100644 Doc/distutils/configfile.rst create mode 100644 Doc/distutils/examples.rst create mode 100644 Doc/distutils/extending.rst create mode 100644 Doc/distutils/index.rst create mode 100644 Doc/distutils/introduction.rst create mode 100644 Doc/distutils/packageindex.rst create mode 100644 Doc/distutils/setupscript.rst create mode 100644 Doc/distutils/sourcedist.rst create mode 100644 Doc/distutils/uploading.rst create mode 100644 Doc/documenting/fromlatex.rst create mode 100644 Doc/documenting/index.rst create mode 100644 Doc/documenting/intro.rst create mode 100644 Doc/documenting/markup.rst create mode 100644 Doc/documenting/rest.rst create mode 100644 Doc/documenting/sphinx.rst create mode 100644 Doc/documenting/style.rst create mode 100644 Doc/extending/building.rst create mode 100644 Doc/extending/embedding.rst create mode 100644 Doc/extending/extending.rst create mode 100644 Doc/extending/index.rst create mode 100644 Doc/extending/newtypes.rst create mode 100644 Doc/extending/windows.rst create mode 100644 Doc/howto/advocacy.rst create mode 100644 Doc/howto/curses.rst create mode 100644 Doc/howto/doanddont.rst create mode 100644 Doc/howto/functional.rst create mode 100644 Doc/howto/index.rst create mode 100644 Doc/howto/pythonmac.rst create mode 100644 Doc/howto/regex.rst create mode 100644 Doc/howto/sockets.rst create mode 100644 Doc/howto/unicode.rst create mode 100644 Doc/howto/urllib2.rst create mode 100644 Doc/includes/email-dir.py create mode 100644 Doc/includes/email-mime.py create mode 100644 Doc/includes/email-simple.py create mode 100644 Doc/includes/email-unpack.py create mode 100644 Doc/includes/minidom-example.py create mode 100644 Doc/includes/noddy.c create mode 100644 Doc/includes/noddy2.c create mode 100644 Doc/includes/noddy3.c create mode 100644 Doc/includes/noddy4.c create mode 100644 Doc/includes/run-func.c create mode 100644 Doc/includes/setup.py create mode 100644 Doc/includes/shoddy.c create mode 100644 Doc/includes/sqlite3/adapter_datetime.py create mode 100644 Doc/includes/sqlite3/adapter_point_1.py create mode 100644 Doc/includes/sqlite3/adapter_point_2.py create mode 100644 Doc/includes/sqlite3/collation_reverse.py create mode 100644 Doc/includes/sqlite3/complete_statement.py create mode 100644 Doc/includes/sqlite3/connect_db_1.py create mode 100644 Doc/includes/sqlite3/connect_db_2.py create mode 100644 Doc/includes/sqlite3/converter_point.py create mode 100644 Doc/includes/sqlite3/countcursors.py create mode 100644 Doc/includes/sqlite3/createdb.py create mode 100644 Doc/includes/sqlite3/execsql_fetchonerow.py create mode 100644 Doc/includes/sqlite3/execsql_printall_1.py create mode 100644 Doc/includes/sqlite3/execute_1.py create mode 100644 Doc/includes/sqlite3/execute_2.py create mode 100644 Doc/includes/sqlite3/execute_3.py create mode 100644 Doc/includes/sqlite3/executemany_1.py create mode 100644 Doc/includes/sqlite3/executemany_2.py create mode 100644 Doc/includes/sqlite3/executescript.py create mode 100644 Doc/includes/sqlite3/insert_more_people.py create mode 100644 Doc/includes/sqlite3/md5func.py create mode 100644 Doc/includes/sqlite3/mysumaggr.py create mode 100644 Doc/includes/sqlite3/parse_colnames.py create mode 100644 Doc/includes/sqlite3/pysqlite_datetime.py create mode 100644 Doc/includes/sqlite3/row_factory.py create mode 100644 Doc/includes/sqlite3/rowclass.py create mode 100644 Doc/includes/sqlite3/shared_cache.py create mode 100644 Doc/includes/sqlite3/shortcut_methods.py create mode 100644 Doc/includes/sqlite3/simple_tableprinter.py create mode 100644 Doc/includes/sqlite3/text_factory.py create mode 100644 Doc/includes/test.py create mode 100644 Doc/includes/typestruct.h create mode 100644 Doc/includes/tzinfo-examples.py create mode 100644 Doc/install/index.rst create mode 100644 Doc/library/__builtin__.rst create mode 100644 Doc/library/__future__.rst create mode 100644 Doc/library/__main__.rst create mode 100644 Doc/library/_ast.rst create mode 100644 Doc/library/_winreg.rst create mode 100644 Doc/library/aepack.rst create mode 100644 Doc/library/aetools.rst create mode 100644 Doc/library/aetypes.rst create mode 100644 Doc/library/aifc.rst create mode 100644 Doc/library/allos.rst create mode 100644 Doc/library/anydbm.rst create mode 100644 Doc/library/archiving.rst create mode 100644 Doc/library/array.rst create mode 100644 Doc/library/asynchat.rst create mode 100644 Doc/library/asyncore.rst create mode 100644 Doc/library/atexit.rst create mode 100644 Doc/library/audioop.rst create mode 100644 Doc/library/autogil.rst create mode 100644 Doc/library/base64.rst create mode 100644 Doc/library/basehttpserver.rst create mode 100644 Doc/library/binascii.rst create mode 100644 Doc/library/binhex.rst create mode 100644 Doc/library/bisect.rst create mode 100644 Doc/library/bsddb.rst create mode 100644 Doc/library/bz2.rst create mode 100644 Doc/library/calendar.rst create mode 100644 Doc/library/carbon.rst create mode 100644 Doc/library/cgi.rst create mode 100644 Doc/library/cgihttpserver.rst create mode 100644 Doc/library/cgitb.rst create mode 100644 Doc/library/chunk.rst create mode 100644 Doc/library/cmath.rst create mode 100644 Doc/library/cmd.rst create mode 100644 Doc/library/code.rst create mode 100644 Doc/library/codecs.rst create mode 100644 Doc/library/codeop.rst create mode 100644 Doc/library/collections.rst create mode 100644 Doc/library/colorpicker.rst create mode 100644 Doc/library/colorsys.rst create mode 100644 Doc/library/commands.rst create mode 100644 Doc/library/compileall.rst create mode 100644 Doc/library/configparser.rst create mode 100644 Doc/library/constants.rst create mode 100644 Doc/library/contextlib.rst create mode 100644 Doc/library/cookie.rst create mode 100644 Doc/library/cookielib.rst create mode 100644 Doc/library/copy.rst create mode 100644 Doc/library/copy_reg.rst create mode 100644 Doc/library/crypt.rst create mode 100644 Doc/library/crypto.rst create mode 100644 Doc/library/csv.rst create mode 100644 Doc/library/ctypes.rst create mode 100644 Doc/library/curses.ascii.rst create mode 100644 Doc/library/curses.panel.rst create mode 100644 Doc/library/curses.rst create mode 100644 Doc/library/custominterp.rst create mode 100644 Doc/library/datatypes.rst create mode 100644 Doc/library/datetime.rst create mode 100644 Doc/library/dbhash.rst create mode 100644 Doc/library/dbm.rst create mode 100644 Doc/library/decimal.rst create mode 100644 Doc/library/development.rst create mode 100644 Doc/library/difflib.rst create mode 100644 Doc/library/dircache.rst create mode 100644 Doc/library/dis.rst create mode 100644 Doc/library/distutils.rst create mode 100644 Doc/library/dl.rst create mode 100644 Doc/library/doctest.rst create mode 100644 Doc/library/docxmlrpcserver.rst create mode 100644 Doc/library/dumbdbm.rst create mode 100644 Doc/library/dummy_thread.rst create mode 100644 Doc/library/dummy_threading.rst create mode 100644 Doc/library/easydialogs.rst create mode 100644 Doc/library/email-examples.rst create mode 100644 Doc/library/email.charset.rst create mode 100644 Doc/library/email.encoders.rst create mode 100644 Doc/library/email.errors.rst create mode 100644 Doc/library/email.generator.rst create mode 100644 Doc/library/email.header.rst create mode 100644 Doc/library/email.iterators.rst create mode 100644 Doc/library/email.message.rst create mode 100644 Doc/library/email.mime.rst create mode 100644 Doc/library/email.parser.rst create mode 100644 Doc/library/email.rst create mode 100644 Doc/library/email.util.rst create mode 100644 Doc/library/errno.rst create mode 100644 Doc/library/exceptions.rst create mode 100644 Doc/library/fcntl.rst create mode 100644 Doc/library/filecmp.rst create mode 100644 Doc/library/fileformats.rst create mode 100644 Doc/library/fileinput.rst create mode 100644 Doc/library/filesys.rst create mode 100644 Doc/library/fnmatch.rst create mode 100644 Doc/library/formatter.rst create mode 100644 Doc/library/fpectl.rst create mode 100644 Doc/library/fpformat.rst create mode 100644 Doc/library/framework.rst create mode 100644 Doc/library/frameworks.rst create mode 100644 Doc/library/ftplib.rst create mode 100644 Doc/library/functions.rst create mode 100644 Doc/library/functools.rst create mode 100644 Doc/library/gc.rst create mode 100644 Doc/library/gdbm.rst create mode 100644 Doc/library/gensuitemodule.rst create mode 100644 Doc/library/getopt.rst create mode 100644 Doc/library/getpass.rst create mode 100644 Doc/library/gettext.rst create mode 100644 Doc/library/glob.rst create mode 100644 Doc/library/grp.rst create mode 100644 Doc/library/gzip.rst create mode 100644 Doc/library/hashlib.rst create mode 100644 Doc/library/heapq.rst create mode 100644 Doc/library/hmac.rst create mode 100644 Doc/library/hotshot.rst create mode 100644 Doc/library/htmllib.rst create mode 100644 Doc/library/htmlparser.rst create mode 100644 Doc/library/httplib.rst create mode 100644 Doc/library/i18n.rst create mode 100644 Doc/library/ic.rst create mode 100644 Doc/library/idle.rst create mode 100644 Doc/library/imaplib.rst create mode 100644 Doc/library/imghdr.rst create mode 100644 Doc/library/imp.rst create mode 100644 Doc/library/index.rst create mode 100644 Doc/library/inspect.rst create mode 100644 Doc/library/internet.rst create mode 100644 Doc/library/intro.rst create mode 100644 Doc/library/ipc.rst create mode 100644 Doc/library/itertools.rst create mode 100644 Doc/library/keyword.rst create mode 100644 Doc/library/language.rst create mode 100644 Doc/library/linecache.rst create mode 100644 Doc/library/locale.rst create mode 100644 Doc/library/logging.rst create mode 100644 Doc/library/mac.rst create mode 100644 Doc/library/macos.rst create mode 100644 Doc/library/macosa.rst create mode 100644 Doc/library/macostools.rst create mode 100644 Doc/library/macpath.rst create mode 100644 Doc/library/mailbox.rst create mode 100644 Doc/library/mailcap.rst create mode 100644 Doc/library/markup.rst create mode 100644 Doc/library/marshal.rst create mode 100644 Doc/library/math.rst create mode 100644 Doc/library/mhlib.rst create mode 100644 Doc/library/mimetools.rst create mode 100644 Doc/library/mimetypes.rst create mode 100644 Doc/library/miniaeframe.rst create mode 100644 Doc/library/misc.rst create mode 100644 Doc/library/mm.rst create mode 100644 Doc/library/mmap.rst create mode 100644 Doc/library/modulefinder.rst create mode 100644 Doc/library/modules.rst create mode 100644 Doc/library/msilib.rst create mode 100644 Doc/library/msvcrt.rst create mode 100644 Doc/library/multifile.rst create mode 100644 Doc/library/mutex.rst create mode 100644 Doc/library/netdata.rst create mode 100644 Doc/library/netrc.rst create mode 100644 Doc/library/new.rst create mode 100644 Doc/library/nis.rst create mode 100644 Doc/library/nntplib.rst create mode 100644 Doc/library/numeric.rst create mode 100644 Doc/library/objects.rst create mode 100644 Doc/library/operator.rst create mode 100644 Doc/library/optparse.rst create mode 100644 Doc/library/os.path.rst create mode 100644 Doc/library/os.rst create mode 100644 Doc/library/ossaudiodev.rst create mode 100644 Doc/library/othergui.rst create mode 100644 Doc/library/parser.rst create mode 100644 Doc/library/pdb.rst create mode 100644 Doc/library/persistence.rst create mode 100644 Doc/library/pickle.rst create mode 100644 Doc/library/pickletools.rst create mode 100644 Doc/library/pipes.rst create mode 100644 Doc/library/pkgutil.rst create mode 100644 Doc/library/platform.rst create mode 100644 Doc/library/poplib.rst create mode 100644 Doc/library/posix.rst create mode 100644 Doc/library/pprint.rst create mode 100644 Doc/library/profile.rst create mode 100644 Doc/library/pty.rst create mode 100644 Doc/library/pwd.rst create mode 100644 Doc/library/py_compile.rst create mode 100644 Doc/library/pyclbr.rst create mode 100644 Doc/library/pydoc.rst create mode 100644 Doc/library/pyexpat.rst create mode 100644 Doc/library/python.rst create mode 100644 Doc/library/queue.rst create mode 100644 Doc/library/quopri.rst create mode 100644 Doc/library/random.rst create mode 100644 Doc/library/re.rst create mode 100644 Doc/library/readline.rst create mode 100644 Doc/library/repr.rst create mode 100644 Doc/library/resource.rst create mode 100644 Doc/library/rfc822.rst create mode 100644 Doc/library/rlcompleter.rst create mode 100644 Doc/library/robotparser.rst create mode 100644 Doc/library/runpy.rst create mode 100644 Doc/library/sched.rst create mode 100644 Doc/library/scrolledtext.rst create mode 100644 Doc/library/select.rst create mode 100644 Doc/library/sgmllib.rst create mode 100644 Doc/library/shelve.rst create mode 100644 Doc/library/shlex.rst create mode 100644 Doc/library/shutil.rst create mode 100644 Doc/library/signal.rst create mode 100644 Doc/library/simplehttpserver.rst create mode 100644 Doc/library/simplexmlrpcserver.rst create mode 100644 Doc/library/site.rst create mode 100644 Doc/library/smtpd.rst create mode 100644 Doc/library/smtplib.rst create mode 100644 Doc/library/sndhdr.rst create mode 100644 Doc/library/socket.rst create mode 100644 Doc/library/socketserver.rst create mode 100644 Doc/library/someos.rst create mode 100644 Doc/library/spwd.rst create mode 100644 Doc/library/sqlite3.rst create mode 100644 Doc/library/stat.rst create mode 100644 Doc/library/statvfs.rst create mode 100644 Doc/library/stdtypes.rst create mode 100644 Doc/library/string.rst create mode 100644 Doc/library/stringio.rst create mode 100644 Doc/library/stringprep.rst create mode 100644 Doc/library/strings.rst create mode 100644 Doc/library/struct.rst create mode 100644 Doc/library/subprocess.rst create mode 100644 Doc/library/sunau.rst create mode 100644 Doc/library/symbol.rst create mode 100644 Doc/library/sys.rst create mode 100644 Doc/library/syslog.rst create mode 100644 Doc/library/tabnanny.rst create mode 100644 Doc/library/tarfile.rst create mode 100644 Doc/library/telnetlib.rst create mode 100644 Doc/library/tempfile.rst create mode 100644 Doc/library/termios.rst create mode 100644 Doc/library/test.rst create mode 100644 Doc/library/textwrap.rst create mode 100644 Doc/library/thread.rst create mode 100644 Doc/library/threading.rst create mode 100644 Doc/library/time.rst create mode 100644 Doc/library/timeit.rst create mode 100644 Doc/library/tix.rst create mode 100644 Doc/library/tk.rst create mode 100644 Doc/library/tkinter.rst create mode 100644 Doc/library/token.rst create mode 100644 Doc/library/tokenize.rst create mode 100644 Doc/library/trace.rst create mode 100644 Doc/library/traceback.rst create mode 100644 Doc/library/tty.rst create mode 100644 Doc/library/turtle.rst create mode 100644 Doc/library/types.rst create mode 100644 Doc/library/undoc.rst create mode 100644 Doc/library/unicodedata.rst create mode 100644 Doc/library/unittest.rst create mode 100644 Doc/library/unix.rst create mode 100644 Doc/library/urllib.rst create mode 100644 Doc/library/urllib2.rst create mode 100644 Doc/library/urlparse.rst create mode 100644 Doc/library/user.rst create mode 100644 Doc/library/userdict.rst create mode 100644 Doc/library/uu.rst create mode 100644 Doc/library/uuid.rst create mode 100644 Doc/library/warnings.rst create mode 100644 Doc/library/wave.rst create mode 100644 Doc/library/weakref.rst create mode 100644 Doc/library/webbrowser.rst create mode 100644 Doc/library/whichdb.rst create mode 100644 Doc/library/windows.rst create mode 100644 Doc/library/winsound.rst create mode 100644 Doc/library/wsgiref.rst create mode 100644 Doc/library/xdrlib.rst create mode 100644 Doc/library/xml.dom.minidom.rst create mode 100644 Doc/library/xml.dom.pulldom.rst create mode 100644 Doc/library/xml.dom.rst create mode 100644 Doc/library/xml.etree.elementtree.rst create mode 100644 Doc/library/xml.etree.rst create mode 100644 Doc/library/xml.sax.handler.rst create mode 100644 Doc/library/xml.sax.reader.rst create mode 100644 Doc/library/xml.sax.rst create mode 100644 Doc/library/xml.sax.utils.rst create mode 100644 Doc/library/xmlrpclib.rst create mode 100644 Doc/library/zipfile.rst create mode 100644 Doc/library/zipimport.rst create mode 100644 Doc/library/zlib.rst create mode 100644 Doc/license.rst create mode 100644 Doc/reference/compound_stmts.rst create mode 100644 Doc/reference/datamodel.rst create mode 100644 Doc/reference/executionmodel.rst create mode 100644 Doc/reference/expressions.rst create mode 100644 Doc/reference/index.rst create mode 100644 Doc/reference/introduction.rst create mode 100644 Doc/reference/lexical_analysis.rst create mode 100644 Doc/reference/simple_stmts.rst create mode 100644 Doc/reference/toplevel_components.rst create mode 100644 Doc/tools/sphinx-build.py create mode 100644 Doc/tools/sphinx-web.py create mode 100644 Doc/tutorial/appetite.rst create mode 100644 Doc/tutorial/classes.rst create mode 100644 Doc/tutorial/controlflow.rst create mode 100644 Doc/tutorial/datastructures.rst create mode 100644 Doc/tutorial/errors.rst create mode 100644 Doc/tutorial/floatingpoint.rst create mode 100644 Doc/tutorial/glossary.rst create mode 100644 Doc/tutorial/index.rst create mode 100644 Doc/tutorial/inputoutput.rst create mode 100644 Doc/tutorial/interactive.rst create mode 100644 Doc/tutorial/interpreter.rst create mode 100644 Doc/tutorial/introduction.rst create mode 100644 Doc/tutorial/modules.rst create mode 100644 Doc/tutorial/stdlib.rst create mode 100644 Doc/tutorial/stdlib2.rst create mode 100644 Doc/tutorial/whatnow.rst create mode 100644 Doc/whatsnew/2.0.rst create mode 100644 Doc/whatsnew/2.1.rst create mode 100644 Doc/whatsnew/2.2.rst create mode 100644 Doc/whatsnew/2.3.rst create mode 100644 Doc/whatsnew/2.4.rst create mode 100644 Doc/whatsnew/2.5.rst create mode 100644 Doc/whatsnew/2.6.rst create mode 100644 Doc/whatsnew/3.0.rst diff --git a/Doc/ACKS.txt b/Doc/ACKS.txt new file mode 100644 index 0000000..f695403 --- /dev/null +++ b/Doc/ACKS.txt @@ -0,0 +1,196 @@ +Contributors to the Python Documentation +---------------------------------------- + +This file lists people who have contributed in some way to the Python +documentation. It is probably not complete -- if you feel that you or +anyone else should be on this list, please let us know (send email to +docs@python.org), and we'll be glad to correct the problem. + +* Aahz +* Michael Abbott +* Steve Alexander +* Jim Ahlstrom +* Fred Allen +* A. Amoroso +* Pehr Anderson +* Oliver Andrich +* Jesús Cea Avión +* Daniel Barclay +* Chris Barker +* Don Bashford +* Anthony Baxter +* Bennett Benson +* Jonathan Black +* Robin Boerdijk +* Michal Bozon +* Aaron Brancotti +* Georg Brandl +* Keith Briggs +* Lee Busby +* Lorenzo M. Catucci +* Carl Cerecke +* Mauro Cicognini +* Gilles Civario +* Mike Clarkson +* Steve Clift +* Dave Cole +* Matthew Cowles +* Jeremy Craven +* Andrew Dalke +* Ben Darnell +* L. Peter Deutsch +* Robert Donohue +* Fred L. Drake, Jr. +* Jeff Epler +* Michael Ernst +* Blame Andy Eskilsson +* Carey Evans +* Martijn Faassen +* Carl Feynman +* Hernán Martínez Foffani +* Stefan Franke +* Jim Fulton +* Peter Funk +* Lele Gaifax +* Matthew Gallagher +* Ben Gertzfield +* Nadim Ghaznavi +* Jonathan Giddy +* Shelley Gooch +* Nathaniel Gray +* Grant Griffin +* Thomas Guettler +* Anders Hammarquist +* Mark Hammond +* Harald Hanche-Olsen +* Manus Hand +* Gerhard Häring +* Travis B. Hartwell +* Tim Hatch +* Janko Hauser +* Bernhard Herzog +* Magnus L. Hetland +* Konrad Hinsen +* Stefan Hoffmeister +* Albert Hofkamp +* Gregor Hoffleit +* Steve Holden +* Thomas Holenstein +* Gerrit Holl +* Rob Hooft +* Brian Hooper +* Randall Hopper +* Michael Hudson +* Eric Huss +* Jeremy Hylton +* Roger Irwin +* Jack Jansen +* Philip H. Jensen +* Pedro Diaz Jimenez +* Kent Johnson +* Lucas de Jonge +* Andreas Jung +* Robert Kern +* Jim Kerr +* Jan Kim +* Greg Kochanski +* Guido Kollerie +* Peter A. Koren +* Daniel Kozan +* Andrew M. Kuchling +* Dave Kuhlman +* Erno Kuusela +* Detlef Lannert +* Piers Lauder +* Glyph Lefkowitz +* Marc-André Lemburg +* Ulf A. Lindgren +* Everett Lipman +* Mirko Liss +* Martin von Löwis +* Fredrik Lundh +* Jeff MacDonald +* John Machin +* Andrew MacIntyre +* Vladimir Marangozov +* Vincent Marchetti +* Laura Matson +* Daniel May +* Doug Mennella +* Paolo Milani +* Skip Montanaro +* Paul Moore +* Ross Moore +* Sjoerd Mullender +* Dale Nagata +* Ng Pheng Siong +* Koray Oner +* Tomas Oppelstrup +* Denis S. Otkidach +* Zooko O'Whielacronx +* William Park +* Joonas Paalasmaa +* Harri Pasanen +* Bo Peng +* Tim Peters +* Christopher Petrilli +* Justin D. Pettit +* Chris Phoenix +* François Pinard +* Paul Prescod +* Eric S. Raymond +* Edward K. Ream +* Sean Reifschneider +* Bernhard Reiter +* Armin Rigo +* Wes Rishel +* Jim Roskind +* Guido van Rossum +* Donald Wallace Rouse II +* Nick Russo +* Chris Ryland +* Constantina S. +* Hugh Sasse +* Bob Savage +* Scott Schram +* Neil Schemenauer +* Barry Scott +* Joakim Sernbrant +* Justin Sheehy +* Michael Simcich +* Ionel Simionescu +* Gregory P. Smith +* Roy Smith +* Clay Spence +* Nicholas Spies +* Tage Stabell-Kulo +* Frank Stajano +* Anthony Starks +* Greg Stein +* Peter Stoehr +* Mark Summerfield +* Reuben Sumner +* Kalle Svensson +* Jim Tittsler +* Ville Vainio +* Martijn Vries +* Charles G. Waldman +* Greg Ward +* Barry Warsaw +* Corran Webster +* Glyn Webster +* Bob Weiner +* Eddy Welbourne +* Mats Wichmann +* Gerry Wiener +* Timothy Wild +* Collin Winter +* Blake Winton +* Dan Wolfe +* Steven Work +* Thomas Wouters +* Ka-Ping Yee +* Rory Yorke +* Moshe Zadka +* Milan Zamazal +* Cheng Zhang diff --git a/Doc/Makefile b/Doc/Makefile new file mode 100644 index 0000000..955fb68 --- /dev/null +++ b/Doc/Makefile @@ -0,0 +1,62 @@ +# +# Makefile for Python documentation +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# + +# You can set these variables from the command line. +PYTHON ?= python +SVNROOT ?= http://svn.python.org/projects +SPHINXOPTS ?= + +ALLSPHINXOPTS = -b$(BUILDER) -dbuild/doctrees $(SPHINXOPTS) . build/$(BUILDER) + +.PHONY: help checkout update build html web htmlhelp clean + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " web to make file usable by Sphinx.web" + @echo " htmlhelp to make HTML files and a HTML help project" + +checkout: + @if [ ! -d tools/sphinx ]; then \ + echo "Checking out Sphinx..."; \ + svn checkout $(SVNROOT)/doctools/trunk/sphinx tools/sphinx; \ + fi + @if [ ! -d tools/docutils ]; then \ + echo "Checking out Docutils..."; \ + svn checkout $(SVNROOT)/external/docutils-0.4/docutils tools/docutils; \ + fi + @if [ ! -d tools/pygments ]; then \ + echo "Checking out Pygments..."; \ + svn checkout $(SVNROOT)/external/Pygments-0.8.1/pygments tools/pygments; \ + fi + +update: + svn update tools/sphinx + svn update tools/docutils + svn update tools/pygments + +build: checkout + mkdir -p build/$(BUILDER) build/doctrees + $(PYTHON) tools/sphinx-build.py $(ALLSPHINXOPTS) + @echo + +html: BUILDER = html +html: build + @echo "Build finished. The HTML pages are in build/html." + +web: BUILDER = web +web: build + @echo "Build finished; now you can run" + @echo " PYTHONPATH=tools $(PYTHON) -m sphinx.web build/web" + @echo "to start the server." + +htmlhelp: BUILDER = htmlhelp +htmlhelp: build + @echo "Build finished; now you can run HTML Help Workshop with the" \ + "build/hhp/pydoc.hhp project file." + +clean: + -rm -rf build/* + -rm -rf tools/sphinx diff --git a/Doc/README.txt b/Doc/README.txt new file mode 100644 index 0000000..c566a4a --- /dev/null +++ b/Doc/README.txt @@ -0,0 +1,121 @@ +Python Documentation README +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This directory contains the reStructuredText (reST) sources to the Python +documentation. You don't need to build them yourself, prebuilt versions are +available at http://docs.python.org/download/. + +Documentation on the authoring Python documentation, including information about +both style and markup, is available in the "Documenting Python" chapter of the +documentation. There's also a chapter intended to point out differences to +those familiar with the previous docs written in LaTeX. + + +Building the docs +================= + +You need to install Python 2.5 or higher; the toolset used to build the docs are +written in Python. The toolset used to build the documentation is called +*Sphinx*, it is not included in this tree, but maintained separately in the +Python Subversion repository. Also needed are Jinja, a templating engine +(included in Sphinx as a Subversion external), and optionally Pygments, a code +highlighter. + + +Using make +---------- + +Luckily, a Makefile has been prepared so that on Unix, provided you have +installed Python and Subversion, you can just run :: + + make html + +to check out the necessary toolset in the `tools/` subdirectory and build the +HTML output files. To view the generated HTML, point your favorite browser at +the top-level index `build/html/index.html` after running "make". + +Available make targets are: + + * "html", which builds standalone HTML files for offline viewing. + + * "web", which builds files usable with the Sphinx.web application (used to + serve the docs online at http://docs.python.org/). + + * "htmlhelp", which builds HTML files and a HTML Help project file usable to + convert them into a single Compiled HTML (.chm) file -- these are popular + under Microsoft Windows, but very handy on every platform. + + To create the CHM file, you need to run the Microsoft HTML Help Workshop + over the generated project (.hhp) file. + +A "make update" updates the Subversion checkouts in `tools/`. + + +Without make +------------ + +You'll need to checkout the Sphinx package to the `tools/` directory:: + + svn co http://svn.python.org/projects/doctools/trunk/sphinx tools/sphinx + +Then, you need to install Docutils 0.4 (the SVN snapshot won't work), either +by checking it out via :: + + svn co http://svn.python.org/projects/external/docutils-0.4/docutils tools/docutils + +or by installing it from http://docutils.sf.net/. + +You can optionally also install Pygments, either as a checkout via :: + + svn co http://svn.python.org/projects/external/Pygments-0.8.1/pygments tools/pygments + +or from PyPI at http://pypi.python.org/pypi/Pygments. + + +Then, make an output directory, e.g. under `build/`, and run :: + + python tools/sphinx-build.py -b . build/ + +where `` is one of html, web or htmlhelp (for explanations see the make +targets above). + + +Contributing +============ + +For bugs in the content, the online version at http://docs.python.org/ has a +"suggest change" facility that can be used to correct errors in the source text +and submit them as a patch to the maintainers. + +Bugs in the toolset should be reported in the Python bug tracker at +http://bugs.python.org/. + +You can also send a mail to the Python Documentation Team at docs@python.org, +and we will process your request as soon as possible. + +If you want to help the Documentation Team, you are always welcome. Just send +a mail to docs@python.org. + + +Copyright notice +================ + +The Python source is copyrighted, but you can freely use and copy it +as long as you don't change or remove the copyright notice: + +---------------------------------------------------------------------- +Copyright (c) 2000-2007 Python Software Foundation. +All rights reserved. + +Copyright (c) 2000 BeOpen.com. +All rights reserved. + +Copyright (c) 1995-2000 Corporation for National Research Initiatives. +All rights reserved. + +Copyright (c) 1991-1995 Stichting Mathematisch Centrum. +All rights reserved. + +See the file "license.rst" for information on usage and redistribution +of this file, and for a DISCLAIMER OF ALL WARRANTIES. +---------------------------------------------------------------------- diff --git a/Doc/TODO.txt b/Doc/TODO.txt new file mode 100644 index 0000000..c8d3501 --- /dev/null +++ b/Doc/TODO.txt @@ -0,0 +1,6 @@ +To do +===== + +* split very large files and add toctrees +* finish "Documenting Python" +* care about XXX comments diff --git a/Doc/about.rst b/Doc/about.rst new file mode 100644 index 0000000..d3ce2dd --- /dev/null +++ b/Doc/about.rst @@ -0,0 +1,33 @@ +===================== +About these documents +===================== + + +These documents are generated from `reStructuredText +`_ sources by *Sphinx*, a document processor +specifically written for the Python documentation. + +In the online version of these documents, you can submit comments and suggest +changes directly on the documentation pages. + +Development of the documentation and its toolchain takes place on the +docs@python.org mailing list. We're always looking for volunteers wanting +to help with the docs, so feel free to send a mail there! + +Many thanks go to: + +* Fred L. Drake, Jr., the creator of the original Python documentation toolset + and writer of much of the content; +* the `docutils `_ project for creating + reStructuredText and the docutils suite; +* Fredrik Lundh for his `Alternative Python Reference + `_ project from which Sphinx got many good + ideas. + +See :ref:`reporting-bugs` for information how to report bugs in Python itself. + +.. including the ACKS file here so that it can be maintained separately +.. include:: ACKS.txt + +It is only with the input and contributions of the Python community +that Python has such wonderful documentation -- Thank You! diff --git a/Doc/bugs.rst b/Doc/bugs.rst new file mode 100644 index 0000000..f8f75c2 --- /dev/null +++ b/Doc/bugs.rst @@ -0,0 +1,59 @@ +.. _reporting-bugs: + +************************ +Reporting Bugs in Python +************************ + +Python is a mature programming language which has established a reputation for +stability. In order to maintain this reputation, the developers would like to +know of any deficiencies you find in Python. + +If you find errors in the documentation, please use either the "Add a comment" +or the "Suggest a change" features of the relevant page in the most recent +online documentation at http://docs.python.org/. + +All other bug reports should be submitted via the Python Bug Tracker +(http://bugs.python.org/). The bug tracker offers a Web form which allows +pertinent information to be entered and submitted to the developers. + +The first step in filing a report is to determine whether the problem has +already been reported. The advantage in doing so, aside from saving the +developers time, is that you learn what has been done to fix it; it may be that +the problem has already been fixed for the next release, or additional +information is needed (in which case you are welcome to provide it if you can!). +To do this, search the bug database using the search box on the top of the page. + +If the problem you're reporting is not already in the bug tracker, go back to +the Python Bug Tracker. If you don't already have a tracker account, select the +"Register" link in the sidebar and undergo the registration procedure. +Otherwise, if you're not logged in, enter your credentials and select "Login". +It is not possible to submit a bug report anonymously. + +Being now logged in, you can submit a bug. Select the "Create New" link in the +sidebar to open the bug reporting form. + +The submission form has a number of fields. For the "Title" field, enter a +*very* short description of the problem; less than ten words is good. In the +"Type" field, select the type of your problem; also select the "Component" and +"Versions" to which the bug relates. + +In the "Change Note" field, describe the problem in detail, including what you +expected to happen and what did happen. Be sure to include whether any +extension modules were involved, and what hardware and software platform you +were using (including version information as appropriate). + +Each bug report will be assigned to a developer who will determine what needs to +be done to correct the problem. You will receive an update each time action is +taken on the bug. + + +.. seealso:: + + `How to Report Bugs Effectively `_ + Article which goes into some detail about how to create a useful bug report. + This describes what kind of information is useful and why it is useful. + + `Bug Writing Guidelines `_ + Information about writing a good bug report. Some of this is specific to the + Mozilla project, but describes general good practices. + diff --git a/Doc/c-api/abstract.rst b/Doc/c-api/abstract.rst new file mode 100644 index 0000000..1a1ff13 --- /dev/null +++ b/Doc/c-api/abstract.rst @@ -0,0 +1,997 @@ +.. highlightlang:: c + + +.. _abstract: + +********************** +Abstract Objects Layer +********************** + +The functions in this chapter interact with Python objects regardless of their +type, or with wide classes of object types (e.g. all numerical types, or all +sequence types). When used on object types for which they do not apply, they +will raise a Python exception. + +It is not possible to use these functions on objects that are not properly +initialized, such as a list object that has been created by :cfunc:`PyList_New`, +but whose items have not been set to some non-\ ``NULL`` value yet. + + +.. _object: + +Object Protocol +=============== + + +.. cfunction:: int PyObject_Print(PyObject *o, FILE *fp, int flags) + + Print an object *o*, on file *fp*. Returns ``-1`` on error. The flags argument + is used to enable certain printing options. The only option currently supported + is :const:`Py_PRINT_RAW`; if given, the :func:`str` of the object is written + instead of the :func:`repr`. + + +.. cfunction:: int PyObject_HasAttrString(PyObject *o, const char *attr_name) + + Returns ``1`` if *o* has the attribute *attr_name*, and ``0`` otherwise. This + is equivalent to the Python expression ``hasattr(o, attr_name)``. This function + always succeeds. + + +.. cfunction:: PyObject* PyObject_GetAttrString(PyObject *o, const char *attr_name) + + Retrieve an attribute named *attr_name* from object *o*. Returns the attribute + value on success, or *NULL* on failure. This is the equivalent of the Python + expression ``o.attr_name``. + + +.. cfunction:: int PyObject_HasAttr(PyObject *o, PyObject *attr_name) + + Returns ``1`` if *o* has the attribute *attr_name*, and ``0`` otherwise. This + is equivalent to the Python expression ``hasattr(o, attr_name)``. This function + always succeeds. + + +.. cfunction:: PyObject* PyObject_GetAttr(PyObject *o, PyObject *attr_name) + + Retrieve an attribute named *attr_name* from object *o*. Returns the attribute + value on success, or *NULL* on failure. This is the equivalent of the Python + expression ``o.attr_name``. + + +.. cfunction:: int PyObject_SetAttrString(PyObject *o, const char *attr_name, PyObject *v) + + Set the value of the attribute named *attr_name*, for object *o*, to the value + *v*. Returns ``-1`` on failure. This is the equivalent of the Python statement + ``o.attr_name = v``. + + +.. cfunction:: int PyObject_SetAttr(PyObject *o, PyObject *attr_name, PyObject *v) + + Set the value of the attribute named *attr_name*, for object *o*, to the value + *v*. Returns ``-1`` on failure. This is the equivalent of the Python statement + ``o.attr_name = v``. + + +.. cfunction:: int PyObject_DelAttrString(PyObject *o, const char *attr_name) + + Delete attribute named *attr_name*, for object *o*. Returns ``-1`` on failure. + This is the equivalent of the Python statement: ``del o.attr_name``. + + +.. cfunction:: int PyObject_DelAttr(PyObject *o, PyObject *attr_name) + + Delete attribute named *attr_name*, for object *o*. Returns ``-1`` on failure. + This is the equivalent of the Python statement ``del o.attr_name``. + + +.. cfunction:: PyObject* PyObject_RichCompare(PyObject *o1, PyObject *o2, int opid) + + Compare the values of *o1* and *o2* using the operation specified by *opid*, + which must be one of :const:`Py_LT`, :const:`Py_LE`, :const:`Py_EQ`, + :const:`Py_NE`, :const:`Py_GT`, or :const:`Py_GE`, corresponding to ``<``, + ``<=``, ``==``, ``!=``, ``>``, or ``>=`` respectively. This is the equivalent of + the Python expression ``o1 op o2``, where ``op`` is the operator corresponding + to *opid*. Returns the value of the comparison on success, or *NULL* on failure. + + +.. cfunction:: int PyObject_RichCompareBool(PyObject *o1, PyObject *o2, int opid) + + Compare the values of *o1* and *o2* using the operation specified by *opid*, + which must be one of :const:`Py_LT`, :const:`Py_LE`, :const:`Py_EQ`, + :const:`Py_NE`, :const:`Py_GT`, or :const:`Py_GE`, corresponding to ``<``, + ``<=``, ``==``, ``!=``, ``>``, or ``>=`` respectively. Returns ``-1`` on error, + ``0`` if the result is false, ``1`` otherwise. This is the equivalent of the + Python expression ``o1 op o2``, where ``op`` is the operator corresponding to + *opid*. + + +.. cfunction:: int PyObject_Cmp(PyObject *o1, PyObject *o2, int *result) + + .. index:: builtin: cmp + + Compare the values of *o1* and *o2* using a routine provided by *o1*, if one + exists, otherwise with a routine provided by *o2*. The result of the comparison + is returned in *result*. Returns ``-1`` on failure. This is the equivalent of + the Python statement ``result = cmp(o1, o2)``. + + +.. cfunction:: int PyObject_Compare(PyObject *o1, PyObject *o2) + + .. index:: builtin: cmp + + Compare the values of *o1* and *o2* using a routine provided by *o1*, if one + exists, otherwise with a routine provided by *o2*. Returns the result of the + comparison on success. On error, the value returned is undefined; use + :cfunc:`PyErr_Occurred` to detect an error. This is equivalent to the Python + expression ``cmp(o1, o2)``. + + +.. cfunction:: PyObject* PyObject_Repr(PyObject *o) + + .. index:: builtin: repr + + Compute a string representation of object *o*. Returns the string + representation on success, *NULL* on failure. This is the equivalent of the + Python expression ``repr(o)``. Called by the :func:`repr` built-in function and + by reverse quotes. + + +.. cfunction:: PyObject* PyObject_Str(PyObject *o) + + .. index:: builtin: str + + Compute a string representation of object *o*. Returns the string + representation on success, *NULL* on failure. This is the equivalent of the + Python expression ``str(o)``. Called by the :func:`str` built-in function and + by the :keyword:`print` statement. + + +.. cfunction:: PyObject* PyObject_Unicode(PyObject *o) + + .. index:: builtin: unicode + + Compute a Unicode string representation of object *o*. Returns the Unicode + string representation on success, *NULL* on failure. This is the equivalent of + the Python expression ``unicode(o)``. Called by the :func:`unicode` built-in + function. + + +.. cfunction:: int PyObject_IsInstance(PyObject *inst, PyObject *cls) + + Returns ``1`` if *inst* is an instance of the class *cls* or a subclass of + *cls*, or ``0`` if not. On error, returns ``-1`` and sets an exception. If + *cls* is a type object rather than a class object, :cfunc:`PyObject_IsInstance` + returns ``1`` if *inst* is of type *cls*. If *cls* is a tuple, the check will + be done against every entry in *cls*. The result will be ``1`` when at least one + of the checks returns ``1``, otherwise it will be ``0``. If *inst* is not a + class instance and *cls* is neither a type object, nor a class object, nor a + tuple, *inst* must have a :attr:`__class__` attribute --- the class relationship + of the value of that attribute with *cls* will be used to determine the result + of this function. + + .. versionadded:: 2.1 + + .. versionchanged:: 2.2 + Support for a tuple as the second argument added. + +Subclass determination is done in a fairly straightforward way, but includes a +wrinkle that implementors of extensions to the class system may want to be aware +of. If :class:`A` and :class:`B` are class objects, :class:`B` is a subclass of +:class:`A` if it inherits from :class:`A` either directly or indirectly. If +either is not a class object, a more general mechanism is used to determine the +class relationship of the two objects. When testing if *B* is a subclass of +*A*, if *A* is *B*, :cfunc:`PyObject_IsSubclass` returns true. If *A* and *B* +are different objects, *B*'s :attr:`__bases__` attribute is searched in a +depth-first fashion for *A* --- the presence of the :attr:`__bases__` attribute +is considered sufficient for this determination. + + +.. cfunction:: int PyObject_IsSubclass(PyObject *derived, PyObject *cls) + + Returns ``1`` if the class *derived* is identical to or derived from the class + *cls*, otherwise returns ``0``. In case of an error, returns ``-1``. If *cls* + is a tuple, the check will be done against every entry in *cls*. The result will + be ``1`` when at least one of the checks returns ``1``, otherwise it will be + ``0``. If either *derived* or *cls* is not an actual class object (or tuple), + this function uses the generic algorithm described above. + + .. versionadded:: 2.1 + + .. versionchanged:: 2.3 + Older versions of Python did not support a tuple as the second argument. + + +.. cfunction:: int PyCallable_Check(PyObject *o) + + Determine if the object *o* is callable. Return ``1`` if the object is callable + and ``0`` otherwise. This function always succeeds. + + +.. cfunction:: PyObject* PyObject_Call(PyObject *callable_object, PyObject *args, PyObject *kw) + + Call a callable Python object *callable_object*, with arguments given by the + tuple *args*, and named arguments given by the dictionary *kw*. If no named + arguments are needed, *kw* may be *NULL*. *args* must not be *NULL*, use an + empty tuple if no arguments are needed. Returns the result of the call on + success, or *NULL* on failure. This is the equivalent of the Python expression + ``callable_object(*args, **kw)``. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyObject_CallObject(PyObject *callable_object, PyObject *args) + + Call a callable Python object *callable_object*, with arguments given by the + tuple *args*. If no arguments are needed, then *args* may be *NULL*. Returns + the result of the call on success, or *NULL* on failure. This is the equivalent + of the Python expression ``callable_object(*args)``. + + +.. cfunction:: PyObject* PyObject_CallFunction(PyObject *callable, char *format, ...) + + Call a callable Python object *callable*, with a variable number of C arguments. + The C arguments are described using a :cfunc:`Py_BuildValue` style format + string. The format may be *NULL*, indicating that no arguments are provided. + Returns the result of the call on success, or *NULL* on failure. This is the + equivalent of the Python expression ``callable(*args)``. Note that if you only + pass :ctype:`PyObject \*` args, :cfunc:`PyObject_CallFunctionObjArgs` is a + faster alternative. + + +.. cfunction:: PyObject* PyObject_CallMethod(PyObject *o, char *method, char *format, ...) + + Call the method named *method* of object *o* with a variable number of C + arguments. The C arguments are described by a :cfunc:`Py_BuildValue` format + string that should produce a tuple. The format may be *NULL*, indicating that + no arguments are provided. Returns the result of the call on success, or *NULL* + on failure. This is the equivalent of the Python expression ``o.method(args)``. + Note that if you only pass :ctype:`PyObject \*` args, + :cfunc:`PyObject_CallMethodObjArgs` is a faster alternative. + + +.. cfunction:: PyObject* PyObject_CallFunctionObjArgs(PyObject *callable, ..., NULL) + + Call a callable Python object *callable*, with a variable number of + :ctype:`PyObject\*` arguments. The arguments are provided as a variable number + of parameters followed by *NULL*. Returns the result of the call on success, or + *NULL* on failure. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyObject_CallMethodObjArgs(PyObject *o, PyObject *name, ..., NULL) + + Calls a method of the object *o*, where the name of the method is given as a + Python string object in *name*. It is called with a variable number of + :ctype:`PyObject\*` arguments. The arguments are provided as a variable number + of parameters followed by *NULL*. Returns the result of the call on success, or + *NULL* on failure. + + .. versionadded:: 2.2 + + +.. cfunction:: long PyObject_Hash(PyObject *o) + + .. index:: builtin: hash + + Compute and return the hash value of an object *o*. On failure, return ``-1``. + This is the equivalent of the Python expression ``hash(o)``. + + +.. cfunction:: int PyObject_IsTrue(PyObject *o) + + Returns ``1`` if the object *o* is considered to be true, and ``0`` otherwise. + This is equivalent to the Python expression ``not not o``. On failure, return + ``-1``. + + +.. cfunction:: int PyObject_Not(PyObject *o) + + Returns ``0`` if the object *o* is considered to be true, and ``1`` otherwise. + This is equivalent to the Python expression ``not o``. On failure, return + ``-1``. + + +.. cfunction:: PyObject* PyObject_Type(PyObject *o) + + .. index:: builtin: type + + When *o* is non-*NULL*, returns a type object corresponding to the object type + of object *o*. On failure, raises :exc:`SystemError` and returns *NULL*. This + is equivalent to the Python expression ``type(o)``. This function increments the + reference count of the return value. There's really no reason to use this + function instead of the common expression ``o->ob_type``, which returns a + pointer of type :ctype:`PyTypeObject\*`, except when the incremented reference + count is needed. + + +.. cfunction:: int PyObject_TypeCheck(PyObject *o, PyTypeObject *type) + + Return true if the object *o* is of type *type* or a subtype of *type*. Both + parameters must be non-*NULL*. + + .. versionadded:: 2.2 + + +.. cfunction:: Py_ssize_t PyObject_Length(PyObject *o) + Py_ssize_t PyObject_Size(PyObject *o) + + .. index:: builtin: len + + Return the length of object *o*. If the object *o* provides either the sequence + and mapping protocols, the sequence length is returned. On error, ``-1`` is + returned. This is the equivalent to the Python expression ``len(o)``. + + +.. cfunction:: PyObject* PyObject_GetItem(PyObject *o, PyObject *key) + + Return element of *o* corresponding to the object *key* or *NULL* on failure. + This is the equivalent of the Python expression ``o[key]``. + + +.. cfunction:: int PyObject_SetItem(PyObject *o, PyObject *key, PyObject *v) + + Map the object *key* to the value *v*. Returns ``-1`` on failure. This is the + equivalent of the Python statement ``o[key] = v``. + + +.. cfunction:: int PyObject_DelItem(PyObject *o, PyObject *key) + + Delete the mapping for *key* from *o*. Returns ``-1`` on failure. This is the + equivalent of the Python statement ``del o[key]``. + + +.. cfunction:: int PyObject_AsFileDescriptor(PyObject *o) + + Derives a file-descriptor from a Python object. If the object is an integer or + long integer, its value is returned. If not, the object's :meth:`fileno` method + is called if it exists; the method must return an integer or long integer, which + is returned as the file descriptor value. Returns ``-1`` on failure. + + +.. cfunction:: PyObject* PyObject_Dir(PyObject *o) + + This is equivalent to the Python expression ``dir(o)``, returning a (possibly + empty) list of strings appropriate for the object argument, or *NULL* if there + was an error. If the argument is *NULL*, this is like the Python ``dir()``, + returning the names of the current locals; in this case, if no execution frame + is active then *NULL* is returned but :cfunc:`PyErr_Occurred` will return false. + + +.. cfunction:: PyObject* PyObject_GetIter(PyObject *o) + + This is equivalent to the Python expression ``iter(o)``. It returns a new + iterator for the object argument, or the object itself if the object is already + an iterator. Raises :exc:`TypeError` and returns *NULL* if the object cannot be + iterated. + + +.. _number: + +Number Protocol +=============== + + +.. cfunction:: int PyNumber_Check(PyObject *o) + + Returns ``1`` if the object *o* provides numeric protocols, and false otherwise. + This function always succeeds. + + +.. cfunction:: PyObject* PyNumber_Add(PyObject *o1, PyObject *o2) + + Returns the result of adding *o1* and *o2*, or *NULL* on failure. This is the + equivalent of the Python expression ``o1 + o2``. + + +.. cfunction:: PyObject* PyNumber_Subtract(PyObject *o1, PyObject *o2) + + Returns the result of subtracting *o2* from *o1*, or *NULL* on failure. This is + the equivalent of the Python expression ``o1 - o2``. + + +.. cfunction:: PyObject* PyNumber_Multiply(PyObject *o1, PyObject *o2) + + Returns the result of multiplying *o1* and *o2*, or *NULL* on failure. This is + the equivalent of the Python expression ``o1 * o2``. + + +.. cfunction:: PyObject* PyNumber_Divide(PyObject *o1, PyObject *o2) + + Returns the result of dividing *o1* by *o2*, or *NULL* on failure. This is the + equivalent of the Python expression ``o1 / o2``. + + +.. cfunction:: PyObject* PyNumber_FloorDivide(PyObject *o1, PyObject *o2) + + Return the floor of *o1* divided by *o2*, or *NULL* on failure. This is + equivalent to the "classic" division of integers. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyNumber_TrueDivide(PyObject *o1, PyObject *o2) + + Return a reasonable approximation for the mathematical value of *o1* divided by + *o2*, or *NULL* on failure. The return value is "approximate" because binary + floating point numbers are approximate; it is not possible to represent all real + numbers in base two. This function can return a floating point value when + passed two integers. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyNumber_Remainder(PyObject *o1, PyObject *o2) + + Returns the remainder of dividing *o1* by *o2*, or *NULL* on failure. This is + the equivalent of the Python expression ``o1 % o2``. + + +.. cfunction:: PyObject* PyNumber_Divmod(PyObject *o1, PyObject *o2) + + .. index:: builtin: divmod + + See the built-in function :func:`divmod`. Returns *NULL* on failure. This is + the equivalent of the Python expression ``divmod(o1, o2)``. + + +.. cfunction:: PyObject* PyNumber_Power(PyObject *o1, PyObject *o2, PyObject *o3) + + .. index:: builtin: pow + + See the built-in function :func:`pow`. Returns *NULL* on failure. This is the + equivalent of the Python expression ``pow(o1, o2, o3)``, where *o3* is optional. + If *o3* is to be ignored, pass :cdata:`Py_None` in its place (passing *NULL* for + *o3* would cause an illegal memory access). + + +.. cfunction:: PyObject* PyNumber_Negative(PyObject *o) + + Returns the negation of *o* on success, or *NULL* on failure. This is the + equivalent of the Python expression ``-o``. + + +.. cfunction:: PyObject* PyNumber_Positive(PyObject *o) + + Returns *o* on success, or *NULL* on failure. This is the equivalent of the + Python expression ``+o``. + + +.. cfunction:: PyObject* PyNumber_Absolute(PyObject *o) + + .. index:: builtin: abs + + Returns the absolute value of *o*, or *NULL* on failure. This is the equivalent + of the Python expression ``abs(o)``. + + +.. cfunction:: PyObject* PyNumber_Invert(PyObject *o) + + Returns the bitwise negation of *o* on success, or *NULL* on failure. This is + the equivalent of the Python expression ``~o``. + + +.. cfunction:: PyObject* PyNumber_Lshift(PyObject *o1, PyObject *o2) + + Returns the result of left shifting *o1* by *o2* on success, or *NULL* on + failure. This is the equivalent of the Python expression ``o1 << o2``. + + +.. cfunction:: PyObject* PyNumber_Rshift(PyObject *o1, PyObject *o2) + + Returns the result of right shifting *o1* by *o2* on success, or *NULL* on + failure. This is the equivalent of the Python expression ``o1 >> o2``. + + +.. cfunction:: PyObject* PyNumber_And(PyObject *o1, PyObject *o2) + + Returns the "bitwise and" of *o1* and *o2* on success and *NULL* on failure. + This is the equivalent of the Python expression ``o1 & o2``. + + +.. cfunction:: PyObject* PyNumber_Xor(PyObject *o1, PyObject *o2) + + Returns the "bitwise exclusive or" of *o1* by *o2* on success, or *NULL* on + failure. This is the equivalent of the Python expression ``o1 ^ o2``. + + +.. cfunction:: PyObject* PyNumber_Or(PyObject *o1, PyObject *o2) + + Returns the "bitwise or" of *o1* and *o2* on success, or *NULL* on failure. + This is the equivalent of the Python expression ``o1 | o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceAdd(PyObject *o1, PyObject *o2) + + Returns the result of adding *o1* and *o2*, or *NULL* on failure. The operation + is done *in-place* when *o1* supports it. This is the equivalent of the Python + statement ``o1 += o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceSubtract(PyObject *o1, PyObject *o2) + + Returns the result of subtracting *o2* from *o1*, or *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 -= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceMultiply(PyObject *o1, PyObject *o2) + + Returns the result of multiplying *o1* and *o2*, or *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 *= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceDivide(PyObject *o1, PyObject *o2) + + Returns the result of dividing *o1* by *o2*, or *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 /= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceFloorDivide(PyObject *o1, PyObject *o2) + + Returns the mathematical floor of dividing *o1* by *o2*, or *NULL* on failure. + The operation is done *in-place* when *o1* supports it. This is the equivalent + of the Python statement ``o1 //= o2``. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyNumber_InPlaceTrueDivide(PyObject *o1, PyObject *o2) + + Return a reasonable approximation for the mathematical value of *o1* divided by + *o2*, or *NULL* on failure. The return value is "approximate" because binary + floating point numbers are approximate; it is not possible to represent all real + numbers in base two. This function can return a floating point value when + passed two integers. The operation is done *in-place* when *o1* supports it. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyNumber_InPlaceRemainder(PyObject *o1, PyObject *o2) + + Returns the remainder of dividing *o1* by *o2*, or *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 %= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlacePower(PyObject *o1, PyObject *o2, PyObject *o3) + + .. index:: builtin: pow + + See the built-in function :func:`pow`. Returns *NULL* on failure. The operation + is done *in-place* when *o1* supports it. This is the equivalent of the Python + statement ``o1 **= o2`` when o3 is :cdata:`Py_None`, or an in-place variant of + ``pow(o1, o2, o3)`` otherwise. If *o3* is to be ignored, pass :cdata:`Py_None` + in its place (passing *NULL* for *o3* would cause an illegal memory access). + + +.. cfunction:: PyObject* PyNumber_InPlaceLshift(PyObject *o1, PyObject *o2) + + Returns the result of left shifting *o1* by *o2* on success, or *NULL* on + failure. The operation is done *in-place* when *o1* supports it. This is the + equivalent of the Python statement ``o1 <<= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceRshift(PyObject *o1, PyObject *o2) + + Returns the result of right shifting *o1* by *o2* on success, or *NULL* on + failure. The operation is done *in-place* when *o1* supports it. This is the + equivalent of the Python statement ``o1 >>= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceAnd(PyObject *o1, PyObject *o2) + + Returns the "bitwise and" of *o1* and *o2* on success and *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 &= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceXor(PyObject *o1, PyObject *o2) + + Returns the "bitwise exclusive or" of *o1* by *o2* on success, or *NULL* on + failure. The operation is done *in-place* when *o1* supports it. This is the + equivalent of the Python statement ``o1 ^= o2``. + + +.. cfunction:: PyObject* PyNumber_InPlaceOr(PyObject *o1, PyObject *o2) + + Returns the "bitwise or" of *o1* and *o2* on success, or *NULL* on failure. The + operation is done *in-place* when *o1* supports it. This is the equivalent of + the Python statement ``o1 |= o2``. + + +.. cfunction:: PyObject* PyNumber_Int(PyObject *o) + + .. index:: builtin: int + + Returns the *o* converted to an integer object on success, or *NULL* on failure. + If the argument is outside the integer range a long object will be returned + instead. This is the equivalent of the Python expression ``int(o)``. + + +.. cfunction:: PyObject* PyNumber_Long(PyObject *o) + + .. index:: builtin: long + + Returns the *o* converted to a long integer object on success, or *NULL* on + failure. This is the equivalent of the Python expression ``long(o)``. + + +.. cfunction:: PyObject* PyNumber_Float(PyObject *o) + + .. index:: builtin: float + + Returns the *o* converted to a float object on success, or *NULL* on failure. + This is the equivalent of the Python expression ``float(o)``. + + +.. cfunction:: PyObject* PyNumber_Index(PyObject *o) + + Returns the *o* converted to a Python int or long on success or *NULL* with a + TypeError exception raised on failure. + + .. versionadded:: 2.5 + + +.. cfunction:: Py_ssize_t PyNumber_AsSsize_t(PyObject *o, PyObject *exc) + + Returns *o* converted to a Py_ssize_t value if *o* can be interpreted as an + integer. If *o* can be converted to a Python int or long but the attempt to + convert to a Py_ssize_t value would raise an :exc:`OverflowError`, then the + *exc* argument is the type of exception that will be raised (usually + :exc:`IndexError` or :exc:`OverflowError`). If *exc* is *NULL*, then the + exception is cleared and the value is clipped to *PY_SSIZE_T_MIN* for a negative + integer or *PY_SSIZE_T_MAX* for a positive integer. + + .. versionadded:: 2.5 + + +.. cfunction:: int PyIndex_Check(PyObject *o) + + Returns True if *o* is an index integer (has the nb_index slot of the + tp_as_number structure filled in). + + .. versionadded:: 2.5 + + +.. _sequence: + +Sequence Protocol +================= + + +.. cfunction:: int PySequence_Check(PyObject *o) + + Return ``1`` if the object provides sequence protocol, and ``0`` otherwise. + This function always succeeds. + + +.. cfunction:: Py_ssize_t PySequence_Size(PyObject *o) + + .. index:: builtin: len + + Returns the number of objects in sequence *o* on success, and ``-1`` on failure. + For objects that do not provide sequence protocol, this is equivalent to the + Python expression ``len(o)``. + + +.. cfunction:: Py_ssize_t PySequence_Length(PyObject *o) + + Alternate name for :cfunc:`PySequence_Size`. + + +.. cfunction:: PyObject* PySequence_Concat(PyObject *o1, PyObject *o2) + + Return the concatenation of *o1* and *o2* on success, and *NULL* on failure. + This is the equivalent of the Python expression ``o1 + o2``. + + +.. cfunction:: PyObject* PySequence_Repeat(PyObject *o, Py_ssize_t count) + + Return the result of repeating sequence object *o* *count* times, or *NULL* on + failure. This is the equivalent of the Python expression ``o * count``. + + +.. cfunction:: PyObject* PySequence_InPlaceConcat(PyObject *o1, PyObject *o2) + + Return the concatenation of *o1* and *o2* on success, and *NULL* on failure. + The operation is done *in-place* when *o1* supports it. This is the equivalent + of the Python expression ``o1 += o2``. + + +.. cfunction:: PyObject* PySequence_InPlaceRepeat(PyObject *o, Py_ssize_t count) + + Return the result of repeating sequence object *o* *count* times, or *NULL* on + failure. The operation is done *in-place* when *o* supports it. This is the + equivalent of the Python expression ``o *= count``. + + +.. cfunction:: PyObject* PySequence_GetItem(PyObject *o, Py_ssize_t i) + + Return the *i*th element of *o*, or *NULL* on failure. This is the equivalent of + the Python expression ``o[i]``. + + +.. cfunction:: PyObject* PySequence_GetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2) + + Return the slice of sequence object *o* between *i1* and *i2*, or *NULL* on + failure. This is the equivalent of the Python expression ``o[i1:i2]``. + + +.. cfunction:: int PySequence_SetItem(PyObject *o, Py_ssize_t i, PyObject *v) + + Assign object *v* to the *i*th element of *o*. Returns ``-1`` on failure. This + is the equivalent of the Python statement ``o[i] = v``. This function *does + not* steal a reference to *v*. + + +.. cfunction:: int PySequence_DelItem(PyObject *o, Py_ssize_t i) + + Delete the *i*th element of object *o*. Returns ``-1`` on failure. This is the + equivalent of the Python statement ``del o[i]``. + + +.. cfunction:: int PySequence_SetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2, PyObject *v) + + Assign the sequence object *v* to the slice in sequence object *o* from *i1* to + *i2*. This is the equivalent of the Python statement ``o[i1:i2] = v``. + + +.. cfunction:: int PySequence_DelSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2) + + Delete the slice in sequence object *o* from *i1* to *i2*. Returns ``-1`` on + failure. This is the equivalent of the Python statement ``del o[i1:i2]``. + + +.. cfunction:: Py_ssize_t PySequence_Count(PyObject *o, PyObject *value) + + Return the number of occurrences of *value* in *o*, that is, return the number + of keys for which ``o[key] == value``. On failure, return ``-1``. This is + equivalent to the Python expression ``o.count(value)``. + + +.. cfunction:: int PySequence_Contains(PyObject *o, PyObject *value) + + Determine if *o* contains *value*. If an item in *o* is equal to *value*, + return ``1``, otherwise return ``0``. On error, return ``-1``. This is + equivalent to the Python expression ``value in o``. + + +.. cfunction:: Py_ssize_t PySequence_Index(PyObject *o, PyObject *value) + + Return the first index *i* for which ``o[i] == value``. On error, return + ``-1``. This is equivalent to the Python expression ``o.index(value)``. + + +.. cfunction:: PyObject* PySequence_List(PyObject *o) + + Return a list object with the same contents as the arbitrary sequence *o*. The + returned list is guaranteed to be new. + + +.. cfunction:: PyObject* PySequence_Tuple(PyObject *o) + + .. index:: builtin: tuple + + Return a tuple object with the same contents as the arbitrary sequence *o* or + *NULL* on failure. If *o* is a tuple, a new reference will be returned, + otherwise a tuple will be constructed with the appropriate contents. This is + equivalent to the Python expression ``tuple(o)``. + + +.. cfunction:: PyObject* PySequence_Fast(PyObject *o, const char *m) + + Returns the sequence *o* as a tuple, unless it is already a tuple or list, in + which case *o* is returned. Use :cfunc:`PySequence_Fast_GET_ITEM` to access the + members of the result. Returns *NULL* on failure. If the object is not a + sequence, raises :exc:`TypeError` with *m* as the message text. + + +.. cfunction:: PyObject* PySequence_Fast_GET_ITEM(PyObject *o, Py_ssize_t i) + + Return the *i*th element of *o*, assuming that *o* was returned by + :cfunc:`PySequence_Fast`, *o* is not *NULL*, and that *i* is within bounds. + + +.. cfunction:: PyObject** PySequence_Fast_ITEMS(PyObject *o) + + Return the underlying array of PyObject pointers. Assumes that *o* was returned + by :cfunc:`PySequence_Fast` and *o* is not *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PySequence_ITEM(PyObject *o, Py_ssize_t i) + + Return the *i*th element of *o* or *NULL* on failure. Macro form of + :cfunc:`PySequence_GetItem` but without checking that + :cfunc:`PySequence_Check(o)` is true and without adjustment for negative + indices. + + .. versionadded:: 2.3 + + +.. cfunction:: Py_ssize_t PySequence_Fast_GET_SIZE(PyObject *o) + + Returns the length of *o*, assuming that *o* was returned by + :cfunc:`PySequence_Fast` and that *o* is not *NULL*. The size can also be + gotten by calling :cfunc:`PySequence_Size` on *o*, but + :cfunc:`PySequence_Fast_GET_SIZE` is faster because it can assume *o* is a list + or tuple. + + +.. _mapping: + +Mapping Protocol +================ + + +.. cfunction:: int PyMapping_Check(PyObject *o) + + Return ``1`` if the object provides mapping protocol, and ``0`` otherwise. This + function always succeeds. + + +.. cfunction:: Py_ssize_t PyMapping_Length(PyObject *o) + + .. index:: builtin: len + + Returns the number of keys in object *o* on success, and ``-1`` on failure. For + objects that do not provide mapping protocol, this is equivalent to the Python + expression ``len(o)``. + + +.. cfunction:: int PyMapping_DelItemString(PyObject *o, char *key) + + Remove the mapping for object *key* from the object *o*. Return ``-1`` on + failure. This is equivalent to the Python statement ``del o[key]``. + + +.. cfunction:: int PyMapping_DelItem(PyObject *o, PyObject *key) + + Remove the mapping for object *key* from the object *o*. Return ``-1`` on + failure. This is equivalent to the Python statement ``del o[key]``. + + +.. cfunction:: int PyMapping_HasKeyString(PyObject *o, char *key) + + On success, return ``1`` if the mapping object has the key *key* and ``0`` + otherwise. This is equivalent to the Python expression ``o.has_key(key)``. + This function always succeeds. + + +.. cfunction:: int PyMapping_HasKey(PyObject *o, PyObject *key) + + Return ``1`` if the mapping object has the key *key* and ``0`` otherwise. This + is equivalent to the Python expression ``o.has_key(key)``. This function always + succeeds. + + +.. cfunction:: PyObject* PyMapping_Keys(PyObject *o) + + On success, return a list of the keys in object *o*. On failure, return *NULL*. + This is equivalent to the Python expression ``o.keys()``. + + +.. cfunction:: PyObject* PyMapping_Values(PyObject *o) + + On success, return a list of the values in object *o*. On failure, return + *NULL*. This is equivalent to the Python expression ``o.values()``. + + +.. cfunction:: PyObject* PyMapping_Items(PyObject *o) + + On success, return a list of the items in object *o*, where each item is a tuple + containing a key-value pair. On failure, return *NULL*. This is equivalent to + the Python expression ``o.items()``. + + +.. cfunction:: PyObject* PyMapping_GetItemString(PyObject *o, char *key) + + Return element of *o* corresponding to the object *key* or *NULL* on failure. + This is the equivalent of the Python expression ``o[key]``. + + +.. cfunction:: int PyMapping_SetItemString(PyObject *o, char *key, PyObject *v) + + Map the object *key* to the value *v* in object *o*. Returns ``-1`` on failure. + This is the equivalent of the Python statement ``o[key] = v``. + + +.. _iterator: + +Iterator Protocol +================= + +.. versionadded:: 2.2 + +There are only a couple of functions specifically for working with iterators. + + +.. cfunction:: int PyIter_Check(PyObject *o) + + Return true if the object *o* supports the iterator protocol. + + +.. cfunction:: PyObject* PyIter_Next(PyObject *o) + + Return the next value from the iteration *o*. If the object is an iterator, + this retrieves the next value from the iteration, and returns *NULL* with no + exception set if there are no remaining items. If the object is not an + iterator, :exc:`TypeError` is raised, or if there is an error in retrieving the + item, returns *NULL* and passes along the exception. + +To write a loop which iterates over an iterator, the C code should look +something like this:: + + PyObject *iterator = PyObject_GetIter(obj); + PyObject *item; + + if (iterator == NULL) { + /* propagate error */ + } + + while (item = PyIter_Next(iterator)) { + /* do something with item */ + ... + /* release reference when done */ + Py_DECREF(item); + } + + Py_DECREF(iterator); + + if (PyErr_Occurred()) { + /* propagate error */ + } + else { + /* continue doing useful work */ + } + + +.. _abstract-buffer: + +Buffer Protocol +=============== + + +.. cfunction:: int PyObject_AsCharBuffer(PyObject *obj, const char **buffer, Py_ssize_t *buffer_len) + + Returns a pointer to a read-only memory location useable as character- based + input. The *obj* argument must support the single-segment character buffer + interface. On success, returns ``0``, sets *buffer* to the memory location and + *buffer_len* to the buffer length. Returns ``-1`` and sets a :exc:`TypeError` + on error. + + .. versionadded:: 1.6 + + +.. cfunction:: int PyObject_AsReadBuffer(PyObject *obj, const void **buffer, Py_ssize_t *buffer_len) + + Returns a pointer to a read-only memory location containing arbitrary data. The + *obj* argument must support the single-segment readable buffer interface. On + success, returns ``0``, sets *buffer* to the memory location and *buffer_len* to + the buffer length. Returns ``-1`` and sets a :exc:`TypeError` on error. + + .. versionadded:: 1.6 + + +.. cfunction:: int PyObject_CheckReadBuffer(PyObject *o) + + Returns ``1`` if *o* supports the single-segment readable buffer interface. + Otherwise returns ``0``. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyObject_AsWriteBuffer(PyObject *obj, void **buffer, Py_ssize_t *buffer_len) + + Returns a pointer to a writeable memory location. The *obj* argument must + support the single-segment, character buffer interface. On success, returns + ``0``, sets *buffer* to the memory location and *buffer_len* to the buffer + length. Returns ``-1`` and sets a :exc:`TypeError` on error. + + .. versionadded:: 1.6 + diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst new file mode 100644 index 0000000..052785c --- /dev/null +++ b/Doc/c-api/concrete.rst @@ -0,0 +1,3676 @@ +.. highlightlang:: c + + +.. _concrete: + +********************** +Concrete Objects Layer +********************** + +The functions in this chapter are specific to certain Python object types. +Passing them an object of the wrong type is not a good idea; if you receive an +object from a Python program and you are not sure that it has the right type, +you must perform a type check first; for example, to check that an object is a +dictionary, use :cfunc:`PyDict_Check`. The chapter is structured like the +"family tree" of Python object types. + +.. warning:: + + While the functions described in this chapter carefully check the type of the + objects which are passed in, many of them do not check for *NULL* being passed + instead of a valid object. Allowing *NULL* to be passed in can cause memory + access violations and immediate termination of the interpreter. + + +.. _fundamental: + +Fundamental Objects +=================== + +This section describes Python type objects and the singleton object ``None``. + + +.. _typeobjects: + +Type Objects +------------ + +.. index:: object: type + + +.. ctype:: PyTypeObject + + The C structure of the objects used to describe built-in types. + + +.. cvar:: PyObject* PyType_Type + + .. index:: single: TypeType (in module types) + + This is the type object for type objects; it is the same object as ``type`` and + ``types.TypeType`` in the Python layer. + + +.. cfunction:: int PyType_Check(PyObject *o) + + Return true if the object *o* is a type object, including instances of types + derived from the standard type object. Return false in all other cases. + + +.. cfunction:: int PyType_CheckExact(PyObject *o) + + Return true if the object *o* is a type object, but not a subtype of the + standard type object. Return false in all other cases. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyType_HasFeature(PyObject *o, int feature) + + Return true if the type object *o* sets the feature *feature*. Type features + are denoted by single bit flags. + + +.. cfunction:: int PyType_IS_GC(PyObject *o) + + Return true if the type object includes support for the cycle detector; this + tests the type flag :const:`Py_TPFLAGS_HAVE_GC`. + + .. versionadded:: 2.0 + + +.. cfunction:: int PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) + + Return true if *a* is a subtype of *b*. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) + + .. versionadded:: 2.2 + + +.. cfunction:: int PyType_Ready(PyTypeObject *type) + + Finalize a type object. This should be called on all type objects to finish + their initialization. This function is responsible for adding inherited slots + from a type's base class. Return ``0`` on success, or return ``-1`` and sets an + exception on error. + + .. versionadded:: 2.2 + + +.. _noneobject: + +The None Object +--------------- + +.. index:: object: None + +Note that the :ctype:`PyTypeObject` for ``None`` is not directly exposed in the +Python/C API. Since ``None`` is a singleton, testing for object identity (using +``==`` in C) is sufficient. There is no :cfunc:`PyNone_Check` function for the +same reason. + + +.. cvar:: PyObject* Py_None + + The Python ``None`` object, denoting lack of value. This object has no methods. + It needs to be treated just like any other object with respect to reference + counts. + + +.. cmacro:: Py_RETURN_NONE + + Properly handle returning :cdata:`Py_None` from within a C function. + + .. versionadded:: 2.4 + + +.. _numericobjects: + +Numeric Objects +=============== + +.. index:: object: numeric + + +.. _intobjects: + +Plain Integer Objects +--------------------- + +.. index:: object: integer + + +.. ctype:: PyIntObject + + This subtype of :ctype:`PyObject` represents a Python integer object. + + +.. cvar:: PyTypeObject PyInt_Type + + .. index:: single: IntType (in modules types) + + This instance of :ctype:`PyTypeObject` represents the Python plain integer type. + This is the same object as ``int`` and ``types.IntType``. + + +.. cfunction:: int PyInt_Check(PyObject *o) + + Return true if *o* is of type :cdata:`PyInt_Type` or a subtype of + :cdata:`PyInt_Type`. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyInt_CheckExact(PyObject *o) + + Return true if *o* is of type :cdata:`PyInt_Type`, but not a subtype of + :cdata:`PyInt_Type`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyInt_FromString(char *str, char **pend, int base) + + Return a new :ctype:`PyIntObject` or :ctype:`PyLongObject` based on the string + value in *str*, which is interpreted according to the radix in *base*. If + *pend* is non-*NULL*, ``*pend`` will point to the first character in *str* which + follows the representation of the number. If *base* is ``0``, the radix will be + determined based on the leading characters of *str*: if *str* starts with + ``'0x'`` or ``'0X'``, radix 16 will be used; if *str* starts with ``'0'``, radix + 8 will be used; otherwise radix 10 will be used. If *base* is not ``0``, it + must be between ``2`` and ``36``, inclusive. Leading spaces are ignored. If + there are no digits, :exc:`ValueError` will be raised. If the string represents + a number too large to be contained within the machine's :ctype:`long int` type + and overflow warnings are being suppressed, a :ctype:`PyLongObject` will be + returned. If overflow warnings are not being suppressed, *NULL* will be + returned in this case. + + +.. cfunction:: PyObject* PyInt_FromLong(long ival) + + Create a new integer object with a value of *ival*. + + The current implementation keeps an array of integer objects for all integers + between ``-5`` and ``256``, when you create an int in that range you actually + just get back a reference to the existing object. So it should be possible to + change the value of ``1``. I suspect the behaviour of Python in this case is + undefined. :-) + + +.. cfunction:: PyObject* PyInt_FromSsize_t(Py_ssize_t ival) + + Create a new integer object with a value of *ival*. If the value exceeds + ``LONG_MAX``, a long integer object is returned. + + .. versionadded:: 2.5 + + +.. cfunction:: long PyInt_AsLong(PyObject *io) + + Will first attempt to cast the object to a :ctype:`PyIntObject`, if it is not + already one, and then return its value. If there is an error, ``-1`` is + returned, and the caller should check ``PyErr_Occurred()`` to find out whether + there was an error, or whether the value just happened to be -1. + + +.. cfunction:: long PyInt_AS_LONG(PyObject *io) + + Return the value of the object *io*. No error checking is performed. + + +.. cfunction:: unsigned long PyInt_AsUnsignedLongMask(PyObject *io) + + Will first attempt to cast the object to a :ctype:`PyIntObject` or + :ctype:`PyLongObject`, if it is not already one, and then return its value as + unsigned long. This function does not check for overflow. + + .. versionadded:: 2.3 + + +.. cfunction:: unsigned PY_LONG_LONG PyInt_AsUnsignedLongLongMask(PyObject *io) + + Will first attempt to cast the object to a :ctype:`PyIntObject` or + :ctype:`PyLongObject`, if it is not already one, and then return its value as + unsigned long long, without checking for overflow. + + .. versionadded:: 2.3 + + +.. cfunction:: Py_ssize_t PyInt_AsSsize_t(PyObject *io) + + Will first attempt to cast the object to a :ctype:`PyIntObject` or + :ctype:`PyLongObject`, if it is not already one, and then return its value as + :ctype:`Py_ssize_t`. + + .. versionadded:: 2.5 + + +.. cfunction:: long PyInt_GetMax() + + .. index:: single: LONG_MAX + + Return the system's idea of the largest integer it can handle + (:const:`LONG_MAX`, as defined in the system header files). + + +.. _boolobjects: + +Boolean Objects +--------------- + +Booleans in Python are implemented as a subclass of integers. There are only +two booleans, :const:`Py_False` and :const:`Py_True`. As such, the normal +creation and deletion functions don't apply to booleans. The following macros +are available, however. + + +.. cfunction:: int PyBool_Check(PyObject *o) + + Return true if *o* is of type :cdata:`PyBool_Type`. + + .. versionadded:: 2.3 + + +.. cvar:: PyObject* Py_False + + The Python ``False`` object. This object has no methods. It needs to be + treated just like any other object with respect to reference counts. + + +.. cvar:: PyObject* Py_True + + The Python ``True`` object. This object has no methods. It needs to be treated + just like any other object with respect to reference counts. + + +.. cmacro:: Py_RETURN_FALSE + + Return :const:`Py_False` from a function, properly incrementing its reference + count. + + .. versionadded:: 2.4 + + +.. cmacro:: Py_RETURN_TRUE + + Return :const:`Py_True` from a function, properly incrementing its reference + count. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyBool_FromLong(long v) + + Return a new reference to :const:`Py_True` or :const:`Py_False` depending on the + truth value of *v*. + + .. versionadded:: 2.3 + + +.. _longobjects: + +Long Integer Objects +-------------------- + +.. index:: object: long integer + + +.. ctype:: PyLongObject + + This subtype of :ctype:`PyObject` represents a Python long integer object. + + +.. cvar:: PyTypeObject PyLong_Type + + .. index:: single: LongType (in modules types) + + This instance of :ctype:`PyTypeObject` represents the Python long integer type. + This is the same object as ``long`` and ``types.LongType``. + + +.. cfunction:: int PyLong_Check(PyObject *p) + + Return true if its argument is a :ctype:`PyLongObject` or a subtype of + :ctype:`PyLongObject`. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyLong_CheckExact(PyObject *p) + + Return true if its argument is a :ctype:`PyLongObject`, but not a subtype of + :ctype:`PyLongObject`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyLong_FromLong(long v) + + Return a new :ctype:`PyLongObject` object from *v*, or *NULL* on failure. + + +.. cfunction:: PyObject* PyLong_FromUnsignedLong(unsigned long v) + + Return a new :ctype:`PyLongObject` object from a C :ctype:`unsigned long`, or + *NULL* on failure. + + +.. cfunction:: PyObject* PyLong_FromLongLong(PY_LONG_LONG v) + + Return a new :ctype:`PyLongObject` object from a C :ctype:`long long`, or *NULL* + on failure. + + +.. cfunction:: PyObject* PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG v) + + Return a new :ctype:`PyLongObject` object from a C :ctype:`unsigned long long`, + or *NULL* on failure. + + +.. cfunction:: PyObject* PyLong_FromDouble(double v) + + Return a new :ctype:`PyLongObject` object from the integer part of *v*, or + *NULL* on failure. + + +.. cfunction:: PyObject* PyLong_FromString(char *str, char **pend, int base) + + Return a new :ctype:`PyLongObject` based on the string value in *str*, which is + interpreted according to the radix in *base*. If *pend* is non-*NULL*, + ``*pend`` will point to the first character in *str* which follows the + representation of the number. If *base* is ``0``, the radix will be determined + based on the leading characters of *str*: if *str* starts with ``'0x'`` or + ``'0X'``, radix 16 will be used; if *str* starts with ``'0'``, radix 8 will be + used; otherwise radix 10 will be used. If *base* is not ``0``, it must be + between ``2`` and ``36``, inclusive. Leading spaces are ignored. If there are + no digits, :exc:`ValueError` will be raised. + + +.. cfunction:: PyObject* PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) + + Convert a sequence of Unicode digits to a Python long integer value. The first + parameter, *u*, points to the first character of the Unicode string, *length* + gives the number of characters, and *base* is the radix for the conversion. The + radix must be in the range [2, 36]; if it is out of range, :exc:`ValueError` + will be raised. + + .. versionadded:: 1.6 + + +.. cfunction:: PyObject* PyLong_FromVoidPtr(void *p) + + Create a Python integer or long integer from the pointer *p*. The pointer value + can be retrieved from the resulting value using :cfunc:`PyLong_AsVoidPtr`. + + .. versionadded:: 1.5.2 + + .. versionchanged:: 2.5 + If the integer is larger than LONG_MAX, a positive long integer is returned. + + +.. cfunction:: long PyLong_AsLong(PyObject *pylong) + + .. index:: + single: LONG_MAX + single: OverflowError (built-in exception) + + Return a C :ctype:`long` representation of the contents of *pylong*. If + *pylong* is greater than :const:`LONG_MAX`, an :exc:`OverflowError` is raised. + + +.. cfunction:: unsigned long PyLong_AsUnsignedLong(PyObject *pylong) + + .. index:: + single: ULONG_MAX + single: OverflowError (built-in exception) + + Return a C :ctype:`unsigned long` representation of the contents of *pylong*. + If *pylong* is greater than :const:`ULONG_MAX`, an :exc:`OverflowError` is + raised. + + +.. cfunction:: PY_LONG_LONG PyLong_AsLongLong(PyObject *pylong) + + Return a C :ctype:`long long` from a Python long integer. If *pylong* cannot be + represented as a :ctype:`long long`, an :exc:`OverflowError` will be raised. + + .. versionadded:: 2.2 + + +.. cfunction:: unsigned PY_LONG_LONG PyLong_AsUnsignedLongLong(PyObject *pylong) + + Return a C :ctype:`unsigned long long` from a Python long integer. If *pylong* + cannot be represented as an :ctype:`unsigned long long`, an :exc:`OverflowError` + will be raised if the value is positive, or a :exc:`TypeError` will be raised if + the value is negative. + + .. versionadded:: 2.2 + + +.. cfunction:: unsigned long PyLong_AsUnsignedLongMask(PyObject *io) + + Return a C :ctype:`unsigned long` from a Python long integer, without checking + for overflow. + + .. versionadded:: 2.3 + + +.. cfunction:: unsigned PY_LONG_LONG PyLong_AsUnsignedLongLongMask(PyObject *io) + + Return a C :ctype:`unsigned long long` from a Python long integer, without + checking for overflow. + + .. versionadded:: 2.3 + + +.. cfunction:: double PyLong_AsDouble(PyObject *pylong) + + Return a C :ctype:`double` representation of the contents of *pylong*. If + *pylong* cannot be approximately represented as a :ctype:`double`, an + :exc:`OverflowError` exception is raised and ``-1.0`` will be returned. + + +.. cfunction:: void* PyLong_AsVoidPtr(PyObject *pylong) + + Convert a Python integer or long integer *pylong* to a C :ctype:`void` pointer. + If *pylong* cannot be converted, an :exc:`OverflowError` will be raised. This + is only assured to produce a usable :ctype:`void` pointer for values created + with :cfunc:`PyLong_FromVoidPtr`. + + .. versionadded:: 1.5.2 + + .. versionchanged:: 2.5 + For values outside 0..LONG_MAX, both signed and unsigned integers are acccepted. + + +.. _floatobjects: + +Floating Point Objects +---------------------- + +.. index:: object: floating point + + +.. ctype:: PyFloatObject + + This subtype of :ctype:`PyObject` represents a Python floating point object. + + +.. cvar:: PyTypeObject PyFloat_Type + + .. index:: single: FloatType (in modules types) + + This instance of :ctype:`PyTypeObject` represents the Python floating point + type. This is the same object as ``float`` and ``types.FloatType``. + + +.. cfunction:: int PyFloat_Check(PyObject *p) + + Return true if its argument is a :ctype:`PyFloatObject` or a subtype of + :ctype:`PyFloatObject`. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyFloat_CheckExact(PyObject *p) + + Return true if its argument is a :ctype:`PyFloatObject`, but not a subtype of + :ctype:`PyFloatObject`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyFloat_FromString(PyObject *str) + + Create a :ctype:`PyFloatObject` object based on the string value in *str*, or + *NULL* on failure. + + +.. cfunction:: PyObject* PyFloat_FromDouble(double v) + + Create a :ctype:`PyFloatObject` object from *v*, or *NULL* on failure. + + +.. cfunction:: double PyFloat_AsDouble(PyObject *pyfloat) + + Return a C :ctype:`double` representation of the contents of *pyfloat*. If + *pyfloat* is not a Python floating point object but has a :meth:`__float__` + method, this method will first be called to convert *pyfloat* into a float. + + +.. cfunction:: double PyFloat_AS_DOUBLE(PyObject *pyfloat) + + Return a C :ctype:`double` representation of the contents of *pyfloat*, but + without error checking. + + +.. _complexobjects: + +Complex Number Objects +---------------------- + +.. index:: object: complex number + +Python's complex number objects are implemented as two distinct types when +viewed from the C API: one is the Python object exposed to Python programs, and +the other is a C structure which represents the actual complex number value. +The API provides functions for working with both. + + +Complex Numbers as C Structures +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note that the functions which accept these structures as parameters and return +them as results do so *by value* rather than dereferencing them through +pointers. This is consistent throughout the API. + + +.. ctype:: Py_complex + + The C structure which corresponds to the value portion of a Python complex + number object. Most of the functions for dealing with complex number objects + use structures of this type as input or output values, as appropriate. It is + defined as:: + + typedef struct { + double real; + double imag; + } Py_complex; + + +.. cfunction:: Py_complex _Py_c_sum(Py_complex left, Py_complex right) + + Return the sum of two complex numbers, using the C :ctype:`Py_complex` + representation. + + +.. cfunction:: Py_complex _Py_c_diff(Py_complex left, Py_complex right) + + Return the difference between two complex numbers, using the C + :ctype:`Py_complex` representation. + + +.. cfunction:: Py_complex _Py_c_neg(Py_complex complex) + + Return the negation of the complex number *complex*, using the C + :ctype:`Py_complex` representation. + + +.. cfunction:: Py_complex _Py_c_prod(Py_complex left, Py_complex right) + + Return the product of two complex numbers, using the C :ctype:`Py_complex` + representation. + + +.. cfunction:: Py_complex _Py_c_quot(Py_complex dividend, Py_complex divisor) + + Return the quotient of two complex numbers, using the C :ctype:`Py_complex` + representation. + + +.. cfunction:: Py_complex _Py_c_pow(Py_complex num, Py_complex exp) + + Return the exponentiation of *num* by *exp*, using the C :ctype:`Py_complex` + representation. + + +Complex Numbers as Python Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +.. ctype:: PyComplexObject + + This subtype of :ctype:`PyObject` represents a Python complex number object. + + +.. cvar:: PyTypeObject PyComplex_Type + + This instance of :ctype:`PyTypeObject` represents the Python complex number + type. It is the same object as ``complex`` and ``types.ComplexType``. + + +.. cfunction:: int PyComplex_Check(PyObject *p) + + Return true if its argument is a :ctype:`PyComplexObject` or a subtype of + :ctype:`PyComplexObject`. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyComplex_CheckExact(PyObject *p) + + Return true if its argument is a :ctype:`PyComplexObject`, but not a subtype of + :ctype:`PyComplexObject`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyComplex_FromCComplex(Py_complex v) + + Create a new Python complex number object from a C :ctype:`Py_complex` value. + + +.. cfunction:: PyObject* PyComplex_FromDoubles(double real, double imag) + + Return a new :ctype:`PyComplexObject` object from *real* and *imag*. + + +.. cfunction:: double PyComplex_RealAsDouble(PyObject *op) + + Return the real part of *op* as a C :ctype:`double`. + + +.. cfunction:: double PyComplex_ImagAsDouble(PyObject *op) + + Return the imaginary part of *op* as a C :ctype:`double`. + + +.. cfunction:: Py_complex PyComplex_AsCComplex(PyObject *op) + + Return the :ctype:`Py_complex` value of the complex number *op*. + + .. versionchanged:: 2.6 + If *op* is not a Python complex number object but has a :meth:`__complex__` + method, this method will first be called to convert *op* to a Python complex + number object. + + +.. _sequenceobjects: + +Sequence Objects +================ + +.. index:: object: sequence + +Generic operations on sequence objects were discussed in the previous chapter; +this section deals with the specific kinds of sequence objects that are +intrinsic to the Python language. + + +.. _stringobjects: + +String Objects +-------------- + +These functions raise :exc:`TypeError` when expecting a string parameter and are +called with a non-string parameter. + +.. index:: object: string + + +.. ctype:: PyStringObject + + This subtype of :ctype:`PyObject` represents a Python string object. + + +.. cvar:: PyTypeObject PyString_Type + + .. index:: single: StringType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python string type; it is + the same object as ``str`` and ``types.StringType`` in the Python layer. . + + +.. cfunction:: int PyString_Check(PyObject *o) + + Return true if the object *o* is a string object or an instance of a subtype of + the string type. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyString_CheckExact(PyObject *o) + + Return true if the object *o* is a string object, but not an instance of a + subtype of the string type. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyString_FromString(const char *v) + + Return a new string object with a copy of the string *v* as value on success, + and *NULL* on failure. The parameter *v* must not be *NULL*; it will not be + checked. + + +.. cfunction:: PyObject* PyString_FromStringAndSize(const char *v, Py_ssize_t len) + + Return a new string object with a copy of the string *v* as value and length + *len* on success, and *NULL* on failure. If *v* is *NULL*, the contents of the + string are uninitialized. + + +.. cfunction:: PyObject* PyString_FromFormat(const char *format, ...) + + Take a C :cfunc:`printf`\ -style *format* string and a variable number of + arguments, calculate the size of the resulting Python string and return a string + with the values formatted into it. The variable arguments must be C types and + must correspond exactly to the format characters in the *format* string. The + following format characters are allowed: + + .. % This should be exactly the same as the table in PyErr_Format. + .. % One should just refer to the other. + .. % The descriptions for %zd and %zu are wrong, but the truth is complicated + .. % because not all compilers support the %z width modifier -- we fake it + .. % when necessary via interpolating PY_FORMAT_SIZE_T. + .. % %u, %lu, %zu should have "new in Python 2.5" blurbs. + + +-------------------+---------------+--------------------------------+ + | Format Characters | Type | Comment | + +===================+===============+================================+ + | :attr:`%%` | *n/a* | The literal % character. | + +-------------------+---------------+--------------------------------+ + | :attr:`%c` | int | A single character, | + | | | represented as an C int. | + +-------------------+---------------+--------------------------------+ + | :attr:`%d` | int | Exactly equivalent to | + | | | ``printf("%d")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%u` | unsigned int | Exactly equivalent to | + | | | ``printf("%u")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%ld` | long | Exactly equivalent to | + | | | ``printf("%ld")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%lu` | unsigned long | Exactly equivalent to | + | | | ``printf("%lu")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%zd` | Py_ssize_t | Exactly equivalent to | + | | | ``printf("%zd")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%zu` | size_t | Exactly equivalent to | + | | | ``printf("%zu")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%i` | int | Exactly equivalent to | + | | | ``printf("%i")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%x` | int | Exactly equivalent to | + | | | ``printf("%x")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%s` | char\* | A null-terminated C character | + | | | array. | + +-------------------+---------------+--------------------------------+ + | :attr:`%p` | void\* | The hex representation of a C | + | | | pointer. Mostly equivalent to | + | | | ``printf("%p")`` except that | + | | | it is guaranteed to start with | + | | | the literal ``0x`` regardless | + | | | of what the platform's | + | | | ``printf`` yields. | + +-------------------+---------------+--------------------------------+ + + An unrecognized format character causes all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + + +.. cfunction:: PyObject* PyString_FromFormatV(const char *format, va_list vargs) + + Identical to :func:`PyString_FromFormat` except that it takes exactly two + arguments. + + +.. cfunction:: Py_ssize_t PyString_Size(PyObject *string) + + Return the length of the string in string object *string*. + + +.. cfunction:: Py_ssize_t PyString_GET_SIZE(PyObject *string) + + Macro form of :cfunc:`PyString_Size` but without error checking. + + +.. cfunction:: char* PyString_AsString(PyObject *string) + + Return a NUL-terminated representation of the contents of *string*. The pointer + refers to the internal buffer of *string*, not a copy. The data must not be + modified in any way, unless the string was just created using + ``PyString_FromStringAndSize(NULL, size)``. It must not be deallocated. If + *string* is a Unicode object, this function computes the default encoding of + *string* and operates on that. If *string* is not a string object at all, + :cfunc:`PyString_AsString` returns *NULL* and raises :exc:`TypeError`. + + +.. cfunction:: char* PyString_AS_STRING(PyObject *string) + + Macro form of :cfunc:`PyString_AsString` but without error checking. Only + string objects are supported; no Unicode objects should be passed. + + +.. cfunction:: int PyString_AsStringAndSize(PyObject *obj, char **buffer, Py_ssize_t *length) + + Return a NUL-terminated representation of the contents of the object *obj* + through the output variables *buffer* and *length*. + + The function accepts both string and Unicode objects as input. For Unicode + objects it returns the default encoded version of the object. If *length* is + *NULL*, the resulting buffer may not contain NUL characters; if it does, the + function returns ``-1`` and a :exc:`TypeError` is raised. + + The buffer refers to an internal string buffer of *obj*, not a copy. The data + must not be modified in any way, unless the string was just created using + ``PyString_FromStringAndSize(NULL, size)``. It must not be deallocated. If + *string* is a Unicode object, this function computes the default encoding of + *string* and operates on that. If *string* is not a string object at all, + :cfunc:`PyString_AsStringAndSize` returns ``-1`` and raises :exc:`TypeError`. + + +.. cfunction:: void PyString_Concat(PyObject **string, PyObject *newpart) + + Create a new string object in *\*string* containing the contents of *newpart* + appended to *string*; the caller will own the new reference. The reference to + the old value of *string* will be stolen. If the new string cannot be created, + the old reference to *string* will still be discarded and the value of + *\*string* will be set to *NULL*; the appropriate exception will be set. + + +.. cfunction:: void PyString_ConcatAndDel(PyObject **string, PyObject *newpart) + + Create a new string object in *\*string* containing the contents of *newpart* + appended to *string*. This version decrements the reference count of *newpart*. + + +.. cfunction:: int _PyString_Resize(PyObject **string, Py_ssize_t newsize) + + A way to resize a string object even though it is "immutable". Only use this to + build up a brand new string object; don't use this if the string may already be + known in other parts of the code. It is an error to call this function if the + refcount on the input string object is not one. Pass the address of an existing + string object as an lvalue (it may be written into), and the new size desired. + On success, *\*string* holds the resized string object and ``0`` is returned; + the address in *\*string* may differ from its input value. If the reallocation + fails, the original string object at *\*string* is deallocated, *\*string* is + set to *NULL*, a memory exception is set, and ``-1`` is returned. + + +.. cfunction:: PyObject* PyString_Format(PyObject *format, PyObject *args) + + Return a new string object from *format* and *args*. Analogous to ``format % + args``. The *args* argument must be a tuple. + + +.. cfunction:: void PyString_InternInPlace(PyObject **string) + + Intern the argument *\*string* in place. The argument must be the address of a + pointer variable pointing to a Python string object. If there is an existing + interned string that is the same as *\*string*, it sets *\*string* to it + (decrementing the reference count of the old string object and incrementing the + reference count of the interned string object), otherwise it leaves *\*string* + alone and interns it (incrementing its reference count). (Clarification: even + though there is a lot of talk about reference counts, think of this function as + reference-count-neutral; you own the object after the call if and only if you + owned it before the call.) + + +.. cfunction:: PyObject* PyString_InternFromString(const char *v) + + A combination of :cfunc:`PyString_FromString` and + :cfunc:`PyString_InternInPlace`, returning either a new string object that has + been interned, or a new ("owned") reference to an earlier interned string object + with the same value. + + +.. cfunction:: PyObject* PyString_Decode(const char *s, Py_ssize_t size, const char *encoding, const char *errors) + + Create an object by decoding *size* bytes of the encoded buffer *s* using the + codec registered for *encoding*. *encoding* and *errors* have the same meaning + as the parameters of the same name in the :func:`unicode` built-in function. + The codec to be used is looked up using the Python codec registry. Return + *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyString_AsDecodedObject(PyObject *str, const char *encoding, const char *errors) + + Decode a string object by passing it to the codec registered for *encoding* and + return the result as Python object. *encoding* and *errors* have the same + meaning as the parameters of the same name in the string :meth:`encode` method. + The codec to be used is looked up using the Python codec registry. Return *NULL* + if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyString_Encode(const char *s, Py_ssize_t size, const char *encoding, const char *errors) + + Encode the :ctype:`char` buffer of the given size by passing it to the codec + registered for *encoding* and return a Python object. *encoding* and *errors* + have the same meaning as the parameters of the same name in the string + :meth:`encode` method. The codec to be used is looked up using the Python codec + registry. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyString_AsEncodedObject(PyObject *str, const char *encoding, const char *errors) + + Encode a string object using the codec registered for *encoding* and return the + result as Python object. *encoding* and *errors* have the same meaning as the + parameters of the same name in the string :meth:`encode` method. The codec to be + used is looked up using the Python codec registry. Return *NULL* if an exception + was raised by the codec. + + +.. _unicodeobjects: + +Unicode Objects +--------------- + +.. sectionauthor:: Marc-Andre Lemburg + + +These are the basic Unicode object types used for the Unicode implementation in +Python: + +.. % --- Unicode Type ------------------------------------------------------- + + +.. ctype:: Py_UNICODE + + This type represents the storage type which is used by Python internally as + basis for holding Unicode ordinals. Python's default builds use a 16-bit type + for :ctype:`Py_UNICODE` and store Unicode values internally as UCS2. It is also + possible to build a UCS4 version of Python (most recent Linux distributions come + with UCS4 builds of Python). These builds then use a 32-bit type for + :ctype:`Py_UNICODE` and store Unicode data internally as UCS4. On platforms + where :ctype:`wchar_t` is available and compatible with the chosen Python + Unicode build variant, :ctype:`Py_UNICODE` is a typedef alias for + :ctype:`wchar_t` to enhance native platform compatibility. On all other + platforms, :ctype:`Py_UNICODE` is a typedef alias for either :ctype:`unsigned + short` (UCS2) or :ctype:`unsigned long` (UCS4). + +Note that UCS2 and UCS4 Python builds are not binary compatible. Please keep +this in mind when writing extensions or interfaces. + + +.. ctype:: PyUnicodeObject + + This subtype of :ctype:`PyObject` represents a Python Unicode object. + + +.. cvar:: PyTypeObject PyUnicode_Type + + This instance of :ctype:`PyTypeObject` represents the Python Unicode type. It + is exposed to Python code as ``unicode`` and ``types.UnicodeType``. + +The following APIs are really C macros and can be used to do fast checks and to +access internal read-only data of Unicode objects: + + +.. cfunction:: int PyUnicode_Check(PyObject *o) + + Return true if the object *o* is a Unicode object or an instance of a Unicode + subtype. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyUnicode_CheckExact(PyObject *o) + + Return true if the object *o* is a Unicode object, but not an instance of a + subtype. + + .. versionadded:: 2.2 + + +.. cfunction:: Py_ssize_t PyUnicode_GET_SIZE(PyObject *o) + + Return the size of the object. *o* has to be a :ctype:`PyUnicodeObject` (not + checked). + + +.. cfunction:: Py_ssize_t PyUnicode_GET_DATA_SIZE(PyObject *o) + + Return the size of the object's internal buffer in bytes. *o* has to be a + :ctype:`PyUnicodeObject` (not checked). + + +.. cfunction:: Py_UNICODE* PyUnicode_AS_UNICODE(PyObject *o) + + Return a pointer to the internal :ctype:`Py_UNICODE` buffer of the object. *o* + has to be a :ctype:`PyUnicodeObject` (not checked). + + +.. cfunction:: const char* PyUnicode_AS_DATA(PyObject *o) + + Return a pointer to the internal buffer of the object. *o* has to be a + :ctype:`PyUnicodeObject` (not checked). + +Unicode provides many different character properties. The most often needed ones +are available through these macros which are mapped to C functions depending on +the Python configuration. + +.. % --- Unicode character properties --------------------------------------- + + +.. cfunction:: int Py_UNICODE_ISSPACE(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a whitespace character. + + +.. cfunction:: int Py_UNICODE_ISLOWER(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a lowercase character. + + +.. cfunction:: int Py_UNICODE_ISUPPER(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is an uppercase character. + + +.. cfunction:: int Py_UNICODE_ISTITLE(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a titlecase character. + + +.. cfunction:: int Py_UNICODE_ISLINEBREAK(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a linebreak character. + + +.. cfunction:: int Py_UNICODE_ISDECIMAL(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a decimal character. + + +.. cfunction:: int Py_UNICODE_ISDIGIT(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a digit character. + + +.. cfunction:: int Py_UNICODE_ISNUMERIC(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is a numeric character. + + +.. cfunction:: int Py_UNICODE_ISALPHA(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is an alphabetic character. + + +.. cfunction:: int Py_UNICODE_ISALNUM(Py_UNICODE ch) + + Return 1 or 0 depending on whether *ch* is an alphanumeric character. + +These APIs can be used for fast direct character conversions: + + +.. cfunction:: Py_UNICODE Py_UNICODE_TOLOWER(Py_UNICODE ch) + + Return the character *ch* converted to lower case. + + +.. cfunction:: Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch) + + Return the character *ch* converted to upper case. + + +.. cfunction:: Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch) + + Return the character *ch* converted to title case. + + +.. cfunction:: int Py_UNICODE_TODECIMAL(Py_UNICODE ch) + + Return the character *ch* converted to a decimal positive integer. Return + ``-1`` if this is not possible. This macro does not raise exceptions. + + +.. cfunction:: int Py_UNICODE_TODIGIT(Py_UNICODE ch) + + Return the character *ch* converted to a single digit integer. Return ``-1`` if + this is not possible. This macro does not raise exceptions. + + +.. cfunction:: double Py_UNICODE_TONUMERIC(Py_UNICODE ch) + + Return the character *ch* converted to a double. Return ``-1.0`` if this is not + possible. This macro does not raise exceptions. + +To create Unicode objects and access their basic sequence properties, use these +APIs: + +.. % --- Plain Py_UNICODE --------------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) + + Create a Unicode Object from the Py_UNICODE buffer *u* of the given size. *u* + may be *NULL* which causes the contents to be undefined. It is the user's + responsibility to fill in the needed data. The buffer is copied into the new + object. If the buffer is not *NULL*, the return value might be a shared object. + Therefore, modification of the resulting Unicode object is only allowed when *u* + is *NULL*. + + +.. cfunction:: PyObject* PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) + + Create a Unicode Object from the char buffer *u*. The bytes will be interpreted + as being UTF-8 encoded. *u* may also be *NULL* which + causes the contents to be undefined. It is the user's responsibility to fill in + the needed data. The buffer is copied into the new object. If the buffer is not + *NULL*, the return value might be a shared object. Therefore, modification of + the resulting Unicode object is only allowed when *u* is *NULL*. + + .. versionadded:: 3.0 + + +.. cfunction:: PyObject *PyUnicode_FromString(const char *u) + + Create a Unicode object from an UTF-8 encoded null-terminated char buffer + *u*. + + .. versionadded:: 3.0 + + +.. cfunction:: PyObject* PyUnicode_FromFormat(const char *format, ...) + + Take a C :cfunc:`printf`\ -style *format* string and a variable number of + arguments, calculate the size of the resulting Python unicode string and return + a string with the values formatted into it. The variable arguments must be C + types and must correspond exactly to the format characters in the *format* + string. The following format characters are allowed: + + .. % The descriptions for %zd and %zu are wrong, but the truth is complicated + .. % because not all compilers support the %z width modifier -- we fake it + .. % when necessary via interpolating PY_FORMAT_SIZE_T. + + +-------------------+---------------------+--------------------------------+ + | Format Characters | Type | Comment | + +===================+=====================+================================+ + | :attr:`%%` | *n/a* | The literal % character. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%c` | int | A single character, | + | | | represented as an C int. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%d` | int | Exactly equivalent to | + | | | ``printf("%d")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%u` | unsigned int | Exactly equivalent to | + | | | ``printf("%u")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%ld` | long | Exactly equivalent to | + | | | ``printf("%ld")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%lu` | unsigned long | Exactly equivalent to | + | | | ``printf("%lu")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%zd` | Py_ssize_t | Exactly equivalent to | + | | | ``printf("%zd")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%zu` | size_t | Exactly equivalent to | + | | | ``printf("%zu")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%i` | int | Exactly equivalent to | + | | | ``printf("%i")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%x` | int | Exactly equivalent to | + | | | ``printf("%x")``. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%s` | char\* | A null-terminated C character | + | | | array. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%p` | void\* | The hex representation of a C | + | | | pointer. Mostly equivalent to | + | | | ``printf("%p")`` except that | + | | | it is guaranteed to start with | + | | | the literal ``0x`` regardless | + | | | of what the platform's | + | | | ``printf`` yields. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%U` | PyObject\* | A unicode object. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%V` | PyObject\*, char \* | A unicode object (which may be | + | | | *NULL*) and a null-terminated | + | | | C character array as a second | + | | | parameter (which will be used, | + | | | if the first parameter is | + | | | *NULL*). | + +-------------------+---------------------+--------------------------------+ + | :attr:`%S` | PyObject\* | The result of calling | + | | | :func:`PyObject_Unicode`. | + +-------------------+---------------------+--------------------------------+ + | :attr:`%R` | PyObject\* | The result of calling | + | | | :func:`PyObject_Repr`. | + +-------------------+---------------------+--------------------------------+ + + An unrecognized format character causes all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + + .. versionadded:: 3.0 + + +.. cfunction:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs) + + Identical to :func:`PyUnicode_FromFormat` except that it takes exactly two + arguments. + + .. versionadded:: 3.0 + + +.. cfunction:: Py_UNICODE* PyUnicode_AsUnicode(PyObject *unicode) + + Return a read-only pointer to the Unicode object's internal :ctype:`Py_UNICODE` + buffer, *NULL* if *unicode* is not a Unicode object. + + +.. cfunction:: Py_ssize_t PyUnicode_GetSize(PyObject *unicode) + + Return the length of the Unicode object. + + +.. cfunction:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, const char *encoding, const char *errors) + + Coerce an encoded object *obj* to an Unicode object and return a reference with + incremented refcount. + + String and other char buffer compatible objects are decoded according to the + given encoding and using the error handling defined by errors. Both can be + *NULL* to have the interface use the default values (see the next section for + details). + + All other objects, including Unicode objects, cause a :exc:`TypeError` to be + set. + + The API returns *NULL* if there was an error. The caller is responsible for + decref'ing the returned objects. + + +.. cfunction:: PyObject* PyUnicode_FromObject(PyObject *obj) + + Shortcut for ``PyUnicode_FromEncodedObject(obj, NULL, "strict")`` which is used + throughout the interpreter whenever coercion to Unicode is needed. + +If the platform supports :ctype:`wchar_t` and provides a header file wchar.h, +Python can interface directly to this type using the following functions. +Support is optimized if Python's own :ctype:`Py_UNICODE` type is identical to +the system's :ctype:`wchar_t`. + +.. % --- wchar_t support for platforms which support it --------------------- + + +.. cfunction:: PyObject* PyUnicode_FromWideChar(const wchar_t *w, Py_ssize_t size) + + Create a Unicode object from the :ctype:`wchar_t` buffer *w* of the given size. + Return *NULL* on failure. + + +.. cfunction:: Py_ssize_t PyUnicode_AsWideChar(PyUnicodeObject *unicode, wchar_t *w, Py_ssize_t size) + + Copy the Unicode object contents into the :ctype:`wchar_t` buffer *w*. At most + *size* :ctype:`wchar_t` characters are copied (excluding a possibly trailing + 0-termination character). Return the number of :ctype:`wchar_t` characters + copied or -1 in case of an error. Note that the resulting :ctype:`wchar_t` + string may or may not be 0-terminated. It is the responsibility of the caller + to make sure that the :ctype:`wchar_t` string is 0-terminated in case this is + required by the application. + + +.. _builtincodecs: + +Built-in Codecs +^^^^^^^^^^^^^^^ + +Python provides a set of builtin codecs which are written in C for speed. All of +these codecs are directly usable via the following functions. + +Many of the following APIs take two arguments encoding and errors. These +parameters encoding and errors have the same semantics as the ones of the +builtin unicode() Unicode object constructor. + +Setting encoding to *NULL* causes the default encoding to be used which is +ASCII. The file system calls should use :cdata:`Py_FileSystemDefaultEncoding` +as the encoding for file names. This variable should be treated as read-only: On +some systems, it will be a pointer to a static string, on others, it will change +at run-time (such as when the application invokes setlocale). + +Error handling is set by errors which may also be set to *NULL* meaning to use +the default handling defined for the codec. Default error handling for all +builtin codecs is "strict" (:exc:`ValueError` is raised). + +The codecs all use a similar interface. Only deviation from the following +generic ones are documented for simplicity. + +These are the generic codec APIs: + +.. % --- Generic Codecs ----------------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_Decode(const char *s, Py_ssize_t size, const char *encoding, const char *errors) + + Create a Unicode object by decoding *size* bytes of the encoded string *s*. + *encoding* and *errors* have the same meaning as the parameters of the same name + in the :func:`unicode` builtin function. The codec to be used is looked up + using the Python codec registry. Return *NULL* if an exception was raised by + the codec. + + +.. cfunction:: PyObject* PyUnicode_Encode(const Py_UNICODE *s, Py_ssize_t size, const char *encoding, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size and return a Python + string object. *encoding* and *errors* have the same meaning as the parameters + of the same name in the Unicode :meth:`encode` method. The codec to be used is + looked up using the Python codec registry. Return *NULL* if an exception was + raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsEncodedString(PyObject *unicode, const char *encoding, const char *errors) + + Encode a Unicode object and return the result as Python string object. + *encoding* and *errors* have the same meaning as the parameters of the same name + in the Unicode :meth:`encode` method. The codec to be used is looked up using + the Python codec registry. Return *NULL* if an exception was raised by the + codec. + +These are the UTF-8 codec APIs: + +.. % --- UTF-8 Codecs ------------------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_DecodeUTF8(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the UTF-8 encoded string + *s*. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_DecodeUTF8Stateful(const char *s, Py_ssize_t size, const char *errors, Py_ssize_t *consumed) + + If *consumed* is *NULL*, behave like :cfunc:`PyUnicode_DecodeUTF8`. If + *consumed* is not *NULL*, trailing incomplete UTF-8 byte sequences will not be + treated as an error. Those bytes will not be decoded and the number of bytes + that have been decoded will be stored in *consumed*. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyUnicode_EncodeUTF8(const Py_UNICODE *s, Py_ssize_t size, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using UTF-8 and return a + Python string object. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsUTF8String(PyObject *unicode) + + Encode a Unicode objects using UTF-8 and return the result as Python string + object. Error handling is "strict". Return *NULL* if an exception was raised + by the codec. + +These are the UTF-16 codec APIs: + +.. % --- UTF-16 Codecs ------------------------------------------------------ */ + + +.. cfunction:: PyObject* PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors, int *byteorder) + + Decode *length* bytes from a UTF-16 encoded buffer string and return the + corresponding Unicode object. *errors* (if non-*NULL*) defines the error + handling. It defaults to "strict". + + If *byteorder* is non-*NULL*, the decoder starts decoding using the given byte + order:: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + and then switches if the first two bytes of the input data are a byte order mark + (BOM) and the specified byte order is native order. This BOM is not copied into + the resulting Unicode string. After completion, *\*byteorder* is set to the + current byte order at the. + + If *byteorder* is *NULL*, the codec starts in native order mode. + + Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_DecodeUTF16Stateful(const char *s, Py_ssize_t size, const char *errors, int *byteorder, Py_ssize_t *consumed) + + If *consumed* is *NULL*, behave like :cfunc:`PyUnicode_DecodeUTF16`. If + *consumed* is not *NULL*, :cfunc:`PyUnicode_DecodeUTF16Stateful` will not treat + trailing incomplete UTF-16 byte sequences (such as an odd number of bytes or a + split surrogate pair) as an error. Those bytes will not be decoded and the + number of bytes that have been decoded will be stored in *consumed*. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyUnicode_EncodeUTF16(const Py_UNICODE *s, Py_ssize_t size, const char *errors, int byteorder) + + Return a Python string object holding the UTF-16 encoded value of the Unicode + data in *s*. If *byteorder* is not ``0``, output is written according to the + following byte order:: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is ``0``, the output string will always start with the Unicode BOM + mark (U+FEFF). In the other two modes, no BOM mark is prepended. + + If *Py_UNICODE_WIDE* is defined, a single :ctype:`Py_UNICODE` value may get + represented as a surrogate pair. If it is not defined, each :ctype:`Py_UNICODE` + values is interpreted as an UCS-2 character. + + Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsUTF16String(PyObject *unicode) + + Return a Python string using the UTF-16 encoding in native byte order. The + string always starts with a BOM mark. Error handling is "strict". Return + *NULL* if an exception was raised by the codec. + +These are the "Unicode Escape" codec APIs: + +.. % --- Unicode-Escape Codecs ---------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_DecodeUnicodeEscape(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the Unicode-Escape encoded + string *s*. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using Unicode-Escape and + return a Python string object. Return *NULL* if an exception was raised by the + codec. + + +.. cfunction:: PyObject* PyUnicode_AsUnicodeEscapeString(PyObject *unicode) + + Encode a Unicode objects using Unicode-Escape and return the result as Python + string object. Error handling is "strict". Return *NULL* if an exception was + raised by the codec. + +These are the "Raw Unicode Escape" codec APIs: + +.. % --- Raw-Unicode-Escape Codecs ------------------------------------------ + + +.. cfunction:: PyObject* PyUnicode_DecodeRawUnicodeEscape(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the Raw-Unicode-Escape + encoded string *s*. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, Py_ssize_t size, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using Raw-Unicode-Escape + and return a Python string object. Return *NULL* if an exception was raised by + the codec. + + +.. cfunction:: PyObject* PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode) + + Encode a Unicode objects using Raw-Unicode-Escape and return the result as + Python string object. Error handling is "strict". Return *NULL* if an exception + was raised by the codec. + +These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode +ordinals and only these are accepted by the codecs during encoding. + +.. % --- Latin-1 Codecs ----------------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_DecodeLatin1(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the Latin-1 encoded string + *s*. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_EncodeLatin1(const Py_UNICODE *s, Py_ssize_t size, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using Latin-1 and return + a Python string object. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsLatin1String(PyObject *unicode) + + Encode a Unicode objects using Latin-1 and return the result as Python string + object. Error handling is "strict". Return *NULL* if an exception was raised + by the codec. + +These are the ASCII codec APIs. Only 7-bit ASCII data is accepted. All other +codes generate errors. + +.. % --- ASCII Codecs ------------------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_DecodeASCII(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the ASCII encoded string + *s*. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_EncodeASCII(const Py_UNICODE *s, Py_ssize_t size, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using ASCII and return a + Python string object. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsASCIIString(PyObject *unicode) + + Encode a Unicode objects using ASCII and return the result as Python string + object. Error handling is "strict". Return *NULL* if an exception was raised + by the codec. + +These are the mapping codec APIs: + +.. % --- Character Map Codecs ----------------------------------------------- + +This codec is special in that it can be used to implement many different codecs +(and this is in fact what was done to obtain most of the standard codecs +included in the :mod:`encodings` package). The codec uses mapping to encode and +decode characters. + +Decoding mappings must map single string characters to single Unicode +characters, integers (which are then interpreted as Unicode ordinals) or None +(meaning "undefined mapping" and causing an error). + +Encoding mappings must map single Unicode characters to single string +characters, integers (which are then interpreted as Latin-1 ordinals) or None +(meaning "undefined mapping" and causing an error). + +The mapping objects provided must only support the __getitem__ mapping +interface. + +If a character lookup fails with a LookupError, the character is copied as-is +meaning that its ordinal value will be interpreted as Unicode or Latin-1 ordinal +resp. Because of this, mappings only need to contain those mappings which map +characters to different code points. + + +.. cfunction:: PyObject* PyUnicode_DecodeCharmap(const char *s, Py_ssize_t size, PyObject *mapping, const char *errors) + + Create a Unicode object by decoding *size* bytes of the encoded string *s* using + the given *mapping* object. Return *NULL* if an exception was raised by the + codec. If *mapping* is *NULL* latin-1 decoding will be done. Else it can be a + dictionary mapping byte or a unicode string, which is treated as a lookup table. + Byte values greater that the length of the string and U+FFFE "characters" are + treated as "undefined mapping". + + .. versionchanged:: 2.4 + Allowed unicode string as mapping argument. + + +.. cfunction:: PyObject* PyUnicode_EncodeCharmap(const Py_UNICODE *s, Py_ssize_t size, PyObject *mapping, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using the given + *mapping* object and return a Python string object. Return *NULL* if an + exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsCharmapString(PyObject *unicode, PyObject *mapping) + + Encode a Unicode objects using the given *mapping* object and return the result + as Python string object. Error handling is "strict". Return *NULL* if an + exception was raised by the codec. + +The following codec API is special in that maps Unicode to Unicode. + + +.. cfunction:: PyObject* PyUnicode_TranslateCharmap(const Py_UNICODE *s, Py_ssize_t size, PyObject *table, const char *errors) + + Translate a :ctype:`Py_UNICODE` buffer of the given length by applying a + character mapping *table* to it and return the resulting Unicode object. Return + *NULL* when an exception was raised by the codec. + + The *mapping* table must map Unicode ordinal integers to Unicode ordinal + integers or None (causing deletion of the character). + + Mapping tables need only provide the :meth:`__getitem__` interface; dictionaries + and sequences work well. Unmapped character ordinals (ones which cause a + :exc:`LookupError`) are left untouched and are copied as-is. + +These are the MBCS codec APIs. They are currently only available on Windows and +use the Win32 MBCS converters to implement the conversions. Note that MBCS (or +DBCS) is a class of encodings, not just one. The target encoding is defined by +the user settings on the machine running the codec. + +.. % --- MBCS codecs for Windows -------------------------------------------- + + +.. cfunction:: PyObject* PyUnicode_DecodeMBCS(const char *s, Py_ssize_t size, const char *errors) + + Create a Unicode object by decoding *size* bytes of the MBCS encoded string *s*. + Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_DecodeMBCSStateful(const char *s, int size, const char *errors, int *consumed) + + If *consumed* is *NULL*, behave like :cfunc:`PyUnicode_DecodeMBCS`. If + *consumed* is not *NULL*, :cfunc:`PyUnicode_DecodeMBCSStateful` will not decode + trailing lead byte and the number of bytes that have been decoded will be stored + in *consumed*. + + .. versionadded:: 2.5 + + +.. cfunction:: PyObject* PyUnicode_EncodeMBCS(const Py_UNICODE *s, Py_ssize_t size, const char *errors) + + Encode the :ctype:`Py_UNICODE` buffer of the given size using MBCS and return a + Python string object. Return *NULL* if an exception was raised by the codec. + + +.. cfunction:: PyObject* PyUnicode_AsMBCSString(PyObject *unicode) + + Encode a Unicode objects using MBCS and return the result as Python string + object. Error handling is "strict". Return *NULL* if an exception was raised + by the codec. + +.. % --- Methods & Slots ---------------------------------------------------- + + +.. _unicodemethodsandslots: + +Methods and Slot Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following APIs are capable of handling Unicode objects and strings on input +(we refer to them as strings in the descriptions) and return Unicode objects or +integers as appropriate. + +They all return *NULL* or ``-1`` if an exception occurs. + + +.. cfunction:: PyObject* PyUnicode_Concat(PyObject *left, PyObject *right) + + Concat two strings giving a new Unicode string. + + +.. cfunction:: PyObject* PyUnicode_Split(PyObject *s, PyObject *sep, Py_ssize_t maxsplit) + + Split a string giving a list of Unicode strings. If sep is *NULL*, splitting + will be done at all whitespace substrings. Otherwise, splits occur at the given + separator. At most *maxsplit* splits will be done. If negative, no limit is + set. Separators are not included in the resulting list. + + +.. cfunction:: PyObject* PyUnicode_Splitlines(PyObject *s, int keepend) + + Split a Unicode string at line breaks, returning a list of Unicode strings. + CRLF is considered to be one line break. If *keepend* is 0, the Line break + characters are not included in the resulting strings. + + +.. cfunction:: PyObject* PyUnicode_Translate(PyObject *str, PyObject *table, const char *errors) + + Translate a string by applying a character mapping table to it and return the + resulting Unicode object. + + The mapping table must map Unicode ordinal integers to Unicode ordinal integers + or None (causing deletion of the character). + + Mapping tables need only provide the :meth:`__getitem__` interface; dictionaries + and sequences work well. Unmapped character ordinals (ones which cause a + :exc:`LookupError`) are left untouched and are copied as-is. + + *errors* has the usual meaning for codecs. It may be *NULL* which indicates to + use the default error handling. + + +.. cfunction:: PyObject* PyUnicode_Join(PyObject *separator, PyObject *seq) + + Join a sequence of strings using the given separator and return the resulting + Unicode string. + + +.. cfunction:: int PyUnicode_Tailmatch(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction) + + Return 1 if *substr* matches *str*[*start*:*end*] at the given tail end + (*direction* == -1 means to do a prefix match, *direction* == 1 a suffix match), + 0 otherwise. Return ``-1`` if an error occurred. + + +.. cfunction:: Py_ssize_t PyUnicode_Find(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end, int direction) + + Return the first position of *substr* in *str*[*start*:*end*] using the given + *direction* (*direction* == 1 means to do a forward search, *direction* == -1 a + backward search). The return value is the index of the first match; a value of + ``-1`` indicates that no match was found, and ``-2`` indicates that an error + occurred and an exception has been set. + + +.. cfunction:: Py_ssize_t PyUnicode_Count(PyObject *str, PyObject *substr, Py_ssize_t start, Py_ssize_t end) + + Return the number of non-overlapping occurrences of *substr* in + ``str[start:end]``. Return ``-1`` if an error occurred. + + +.. cfunction:: PyObject* PyUnicode_Replace(PyObject *str, PyObject *substr, PyObject *replstr, Py_ssize_t maxcount) + + Replace at most *maxcount* occurrences of *substr* in *str* with *replstr* and + return the resulting Unicode object. *maxcount* == -1 means replace all + occurrences. + + +.. cfunction:: int PyUnicode_Compare(PyObject *left, PyObject *right) + + Compare two strings and return -1, 0, 1 for less than, equal, and greater than, + respectively. + + +.. cfunction:: int PyUnicode_RichCompare(PyObject *left, PyObject *right, int op) + + Rich compare two unicode strings and return one of the following: + + * ``NULL`` in case an exception was raised + * :const:`Py_True` or :const:`Py_False` for successful comparisons + * :const:`Py_NotImplemented` in case the type combination is unknown + + Note that :const:`Py_EQ` and :const:`Py_NE` comparisons can cause a + :exc:`UnicodeWarning` in case the conversion of the arguments to Unicode fails + with a :exc:`UnicodeDecodeError`. + + Possible values for *op* are :const:`Py_GT`, :const:`Py_GE`, :const:`Py_EQ`, + :const:`Py_NE`, :const:`Py_LT`, and :const:`Py_LE`. + + +.. cfunction:: PyObject* PyUnicode_Format(PyObject *format, PyObject *args) + + Return a new string object from *format* and *args*; this is analogous to + ``format % args``. The *args* argument must be a tuple. + + +.. cfunction:: int PyUnicode_Contains(PyObject *container, PyObject *element) + + Check whether *element* is contained in *container* and return true or false + accordingly. + + *element* has to coerce to a one element Unicode string. ``-1`` is returned if + there was an error. + + +.. cfunction:: void PyUnicode_InternInPlace(PyObject **string) + + Intern the argument *\*string* in place. The argument must be the address of a + pointer variable pointing to a Python unicode string object. If there is an + existing interned string that is the same as *\*string*, it sets *\*string* to + it (decrementing the reference count of the old string object and incrementing + the reference count of the interned string object), otherwise it leaves + *\*string* alone and interns it (incrementing its reference count). + (Clarification: even though there is a lot of talk about reference counts, think + of this function as reference-count-neutral; you own the object after the call + if and only if you owned it before the call.) + + +.. cfunction:: PyObject* PyUnicode_InternFromString(const char *v) + + A combination of :cfunc:`PyUnicode_FromString` and + :cfunc:`PyUnicode_InternInPlace`, returning either a new unicode string object + that has been interned, or a new ("owned") reference to an earlier interned + string object with the same value. + + +.. _bufferobjects: + +Buffer Objects +-------------- + +.. sectionauthor:: Greg Stein + + +.. index:: + object: buffer + single: buffer interface + +Python objects implemented in C can export a group of functions called the +"buffer interface." These functions can be used by an object to expose its data +in a raw, byte-oriented format. Clients of the object can use the buffer +interface to access the object data directly, without needing to copy it first. + +Two examples of objects that support the buffer interface are strings and +arrays. The string object exposes the character contents in the buffer +interface's byte-oriented form. An array can also expose its contents, but it +should be noted that array elements may be multi-byte values. + +An example user of the buffer interface is the file object's :meth:`write` +method. Any object that can export a series of bytes through the buffer +interface can be written to a file. There are a number of format codes to +:cfunc:`PyArg_ParseTuple` that operate against an object's buffer interface, +returning data from the target object. + +.. index:: single: PyBufferProcs + +More information on the buffer interface is provided in the section +:ref:`buffer-structs`, under the description for :ctype:`PyBufferProcs`. + +A "buffer object" is defined in the :file:`bufferobject.h` header (included by +:file:`Python.h`). These objects look very similar to string objects at the +Python programming level: they support slicing, indexing, concatenation, and +some other standard string operations. However, their data can come from one of +two sources: from a block of memory, or from another object which exports the +buffer interface. + +Buffer objects are useful as a way to expose the data from another object's +buffer interface to the Python programmer. They can also be used as a zero-copy +slicing mechanism. Using their ability to reference a block of memory, it is +possible to expose any data to the Python programmer quite easily. The memory +could be a large, constant array in a C extension, it could be a raw block of +memory for manipulation before passing to an operating system library, or it +could be used to pass around structured data in its native, in-memory format. + + +.. ctype:: PyBufferObject + + This subtype of :ctype:`PyObject` represents a buffer object. + + +.. cvar:: PyTypeObject PyBuffer_Type + + .. index:: single: BufferType (in module types) + + The instance of :ctype:`PyTypeObject` which represents the Python buffer type; + it is the same object as ``buffer`` and ``types.BufferType`` in the Python + layer. . + + +.. cvar:: int Py_END_OF_BUFFER + + This constant may be passed as the *size* parameter to + :cfunc:`PyBuffer_FromObject` or :cfunc:`PyBuffer_FromReadWriteObject`. It + indicates that the new :ctype:`PyBufferObject` should refer to *base* object + from the specified *offset* to the end of its exported buffer. Using this + enables the caller to avoid querying the *base* object for its length. + + +.. cfunction:: int PyBuffer_Check(PyObject *p) + + Return true if the argument has type :cdata:`PyBuffer_Type`. + + +.. cfunction:: PyObject* PyBuffer_FromObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size) + + Return a new read-only buffer object. This raises :exc:`TypeError` if *base* + doesn't support the read-only buffer protocol or doesn't provide exactly one + buffer segment, or it raises :exc:`ValueError` if *offset* is less than zero. + The buffer will hold a reference to the *base* object, and the buffer's contents + will refer to the *base* object's buffer interface, starting as position + *offset* and extending for *size* bytes. If *size* is :const:`Py_END_OF_BUFFER`, + then the new buffer's contents extend to the length of the *base* object's + exported buffer data. + + +.. cfunction:: PyObject* PyBuffer_FromReadWriteObject(PyObject *base, Py_ssize_t offset, Py_ssize_t size) + + Return a new writable buffer object. Parameters and exceptions are similar to + those for :cfunc:`PyBuffer_FromObject`. If the *base* object does not export + the writeable buffer protocol, then :exc:`TypeError` is raised. + + +.. cfunction:: PyObject* PyBuffer_FromMemory(void *ptr, Py_ssize_t size) + + Return a new read-only buffer object that reads from a specified location in + memory, with a specified size. The caller is responsible for ensuring that the + memory buffer, passed in as *ptr*, is not deallocated while the returned buffer + object exists. Raises :exc:`ValueError` if *size* is less than zero. Note that + :const:`Py_END_OF_BUFFER` may *not* be passed for the *size* parameter; + :exc:`ValueError` will be raised in that case. + + +.. cfunction:: PyObject* PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size) + + Similar to :cfunc:`PyBuffer_FromMemory`, but the returned buffer is writable. + + +.. cfunction:: PyObject* PyBuffer_New(Py_ssize_t size) + + Return a new writable buffer object that maintains its own memory buffer of + *size* bytes. :exc:`ValueError` is returned if *size* is not zero or positive. + Note that the memory buffer (as returned by :cfunc:`PyObject_AsWriteBuffer`) is + not specifically aligned. + + +.. _tupleobjects: + +Tuple Objects +------------- + +.. index:: object: tuple + + +.. ctype:: PyTupleObject + + This subtype of :ctype:`PyObject` represents a Python tuple object. + + +.. cvar:: PyTypeObject PyTuple_Type + + .. index:: single: TupleType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python tuple type; it is + the same object as ``tuple`` and ``types.TupleType`` in the Python layer.. + + +.. cfunction:: int PyTuple_Check(PyObject *p) + + Return true if *p* is a tuple object or an instance of a subtype of the tuple + type. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyTuple_CheckExact(PyObject *p) + + Return true if *p* is a tuple object, but not an instance of a subtype of the + tuple type. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyTuple_New(Py_ssize_t len) + + Return a new tuple object of size *len*, or *NULL* on failure. + + +.. cfunction:: PyObject* PyTuple_Pack(Py_ssize_t n, ...) + + Return a new tuple object of size *n*, or *NULL* on failure. The tuple values + are initialized to the subsequent *n* C arguments pointing to Python objects. + ``PyTuple_Pack(2, a, b)`` is equivalent to ``Py_BuildValue("(OO)", a, b)``. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyTuple_Size(PyObject *p) + + Take a pointer to a tuple object, and return the size of that tuple. + + +.. cfunction:: int PyTuple_GET_SIZE(PyObject *p) + + Return the size of the tuple *p*, which must be non-*NULL* and point to a tuple; + no error checking is performed. + + +.. cfunction:: PyObject* PyTuple_GetItem(PyObject *p, Py_ssize_t pos) + + Return the object at position *pos* in the tuple pointed to by *p*. If *pos* is + out of bounds, return *NULL* and sets an :exc:`IndexError` exception. + + +.. cfunction:: PyObject* PyTuple_GET_ITEM(PyObject *p, Py_ssize_t pos) + + Like :cfunc:`PyTuple_GetItem`, but does no checking of its arguments. + + +.. cfunction:: PyObject* PyTuple_GetSlice(PyObject *p, Py_ssize_t low, Py_ssize_t high) + + Take a slice of the tuple pointed to by *p* from *low* to *high* and return it + as a new tuple. + + +.. cfunction:: int PyTuple_SetItem(PyObject *p, Py_ssize_t pos, PyObject *o) + + Insert a reference to object *o* at position *pos* of the tuple pointed to by + *p*. Return ``0`` on success. + + .. note:: + + This function "steals" a reference to *o*. + + +.. cfunction:: void PyTuple_SET_ITEM(PyObject *p, Py_ssize_t pos, PyObject *o) + + Like :cfunc:`PyTuple_SetItem`, but does no error checking, and should *only* be + used to fill in brand new tuples. + + .. note:: + + This function "steals" a reference to *o*. + + +.. cfunction:: int _PyTuple_Resize(PyObject **p, Py_ssize_t newsize) + + Can be used to resize a tuple. *newsize* will be the new length of the tuple. + Because tuples are *supposed* to be immutable, this should only be used if there + is only one reference to the object. Do *not* use this if the tuple may already + be known to some other part of the code. The tuple will always grow or shrink + at the end. Think of this as destroying the old tuple and creating a new one, + only more efficiently. Returns ``0`` on success. Client code should never + assume that the resulting value of ``*p`` will be the same as before calling + this function. If the object referenced by ``*p`` is replaced, the original + ``*p`` is destroyed. On failure, returns ``-1`` and sets ``*p`` to *NULL*, and + raises :exc:`MemoryError` or :exc:`SystemError`. + + .. versionchanged:: 2.2 + Removed unused third parameter, *last_is_sticky*. + + +.. _listobjects: + +List Objects +------------ + +.. index:: object: list + + +.. ctype:: PyListObject + + This subtype of :ctype:`PyObject` represents a Python list object. + + +.. cvar:: PyTypeObject PyList_Type + + .. index:: single: ListType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python list type. This is + the same object as ``list`` and ``types.ListType`` in the Python layer. + + +.. cfunction:: int PyList_Check(PyObject *p) + + Return true if *p* is a list object or an instance of a subtype of the list + type. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyList_CheckExact(PyObject *p) + + Return true if *p* is a list object, but not an instance of a subtype of the + list type. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyList_New(Py_ssize_t len) + + Return a new list of length *len* on success, or *NULL* on failure. + + .. note:: + + If *length* is greater than zero, the returned list object's items are set to + ``NULL``. Thus you cannot use abstract API functions such as + :cfunc:`PySequence_SetItem` or expose the object to Python code before setting + all items to a real object with :cfunc:`PyList_SetItem`. + + +.. cfunction:: Py_ssize_t PyList_Size(PyObject *list) + + .. index:: builtin: len + + Return the length of the list object in *list*; this is equivalent to + ``len(list)`` on a list object. + + +.. cfunction:: Py_ssize_t PyList_GET_SIZE(PyObject *list) + + Macro form of :cfunc:`PyList_Size` without error checking. + + +.. cfunction:: PyObject* PyList_GetItem(PyObject *list, Py_ssize_t index) + + Return the object at position *pos* in the list pointed to by *p*. The position + must be positive, indexing from the end of the list is not supported. If *pos* + is out of bounds, return *NULL* and set an :exc:`IndexError` exception. + + +.. cfunction:: PyObject* PyList_GET_ITEM(PyObject *list, Py_ssize_t i) + + Macro form of :cfunc:`PyList_GetItem` without error checking. + + +.. cfunction:: int PyList_SetItem(PyObject *list, Py_ssize_t index, PyObject *item) + + Set the item at index *index* in list to *item*. Return ``0`` on success or + ``-1`` on failure. + + .. note:: + + This function "steals" a reference to *item* and discards a reference to an item + already in the list at the affected position. + + +.. cfunction:: void PyList_SET_ITEM(PyObject *list, Py_ssize_t i, PyObject *o) + + Macro form of :cfunc:`PyList_SetItem` without error checking. This is normally + only used to fill in new lists where there is no previous content. + + .. note:: + + This function "steals" a reference to *item*, and, unlike + :cfunc:`PyList_SetItem`, does *not* discard a reference to any item that it + being replaced; any reference in *list* at position *i* will be leaked. + + +.. cfunction:: int PyList_Insert(PyObject *list, Py_ssize_t index, PyObject *item) + + Insert the item *item* into list *list* in front of index *index*. Return ``0`` + if successful; return ``-1`` and set an exception if unsuccessful. Analogous to + ``list.insert(index, item)``. + + +.. cfunction:: int PyList_Append(PyObject *list, PyObject *item) + + Append the object *item* at the end of list *list*. Return ``0`` if successful; + return ``-1`` and set an exception if unsuccessful. Analogous to + ``list.append(item)``. + + +.. cfunction:: PyObject* PyList_GetSlice(PyObject *list, Py_ssize_t low, Py_ssize_t high) + + Return a list of the objects in *list* containing the objects *between* *low* + and *high*. Return *NULL* and set an exception if unsuccessful. Analogous to + ``list[low:high]``. + + +.. cfunction:: int PyList_SetSlice(PyObject *list, Py_ssize_t low, Py_ssize_t high, PyObject *itemlist) + + Set the slice of *list* between *low* and *high* to the contents of *itemlist*. + Analogous to ``list[low:high] = itemlist``. The *itemlist* may be *NULL*, + indicating the assignment of an empty list (slice deletion). Return ``0`` on + success, ``-1`` on failure. + + +.. cfunction:: int PyList_Sort(PyObject *list) + + Sort the items of *list* in place. Return ``0`` on success, ``-1`` on failure. + This is equivalent to ``list.sort()``. + + +.. cfunction:: int PyList_Reverse(PyObject *list) + + Reverse the items of *list* in place. Return ``0`` on success, ``-1`` on + failure. This is the equivalent of ``list.reverse()``. + + +.. cfunction:: PyObject* PyList_AsTuple(PyObject *list) + + .. index:: builtin: tuple + + Return a new tuple object containing the contents of *list*; equivalent to + ``tuple(list)``. + + +.. _mapobjects: + +Mapping Objects +=============== + +.. index:: object: mapping + + +.. _dictobjects: + +Dictionary Objects +------------------ + +.. index:: object: dictionary + + +.. ctype:: PyDictObject + + This subtype of :ctype:`PyObject` represents a Python dictionary object. + + +.. cvar:: PyTypeObject PyDict_Type + + .. index:: + single: DictType (in module types) + single: DictionaryType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python dictionary type. + This is exposed to Python programs as ``dict`` and ``types.DictType``. + + +.. cfunction:: int PyDict_Check(PyObject *p) + + Return true if *p* is a dict object or an instance of a subtype of the dict + type. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyDict_CheckExact(PyObject *p) + + Return true if *p* is a dict object, but not an instance of a subtype of the + dict type. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyDict_New() + + Return a new empty dictionary, or *NULL* on failure. + + +.. cfunction:: PyObject* PyDictProxy_New(PyObject *dict) + + Return a proxy object for a mapping which enforces read-only behavior. This is + normally used to create a proxy to prevent modification of the dictionary for + non-dynamic class types. + + .. versionadded:: 2.2 + + +.. cfunction:: void PyDict_Clear(PyObject *p) + + Empty an existing dictionary of all key-value pairs. + + +.. cfunction:: int PyDict_Contains(PyObject *p, PyObject *key) + + Determine if dictionary *p* contains *key*. If an item in *p* is matches *key*, + return ``1``, otherwise return ``0``. On error, return ``-1``. This is + equivalent to the Python expression ``key in p``. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyDict_Copy(PyObject *p) + + Return a new dictionary that contains the same key-value pairs as *p*. + + .. versionadded:: 1.6 + + +.. cfunction:: int PyDict_SetItem(PyObject *p, PyObject *key, PyObject *val) + + Insert *value* into the dictionary *p* with a key of *key*. *key* must be + hashable; if it isn't, :exc:`TypeError` will be raised. Return ``0`` on success + or ``-1`` on failure. + + +.. cfunction:: int PyDict_SetItemString(PyObject *p, const char *key, PyObject *val) + + .. index:: single: PyString_FromString() + + Insert *value* into the dictionary *p* using *key* as a key. *key* should be a + :ctype:`char\*`. The key object is created using ``PyString_FromString(key)``. + Return ``0`` on success or ``-1`` on failure. + + +.. cfunction:: int PyDict_DelItem(PyObject *p, PyObject *key) + + Remove the entry in dictionary *p* with key *key*. *key* must be hashable; if it + isn't, :exc:`TypeError` is raised. Return ``0`` on success or ``-1`` on + failure. + + +.. cfunction:: int PyDict_DelItemString(PyObject *p, char *key) + + Remove the entry in dictionary *p* which has a key specified by the string + *key*. Return ``0`` on success or ``-1`` on failure. + + +.. cfunction:: PyObject* PyDict_GetItem(PyObject *p, PyObject *key) + + Return the object from dictionary *p* which has a key *key*. Return *NULL* if + the key *key* is not present, but *without* setting an exception. + + +.. cfunction:: PyObject* PyDict_GetItemString(PyObject *p, const char *key) + + This is the same as :cfunc:`PyDict_GetItem`, but *key* is specified as a + :ctype:`char\*`, rather than a :ctype:`PyObject\*`. + + +.. cfunction:: PyObject* PyDict_Items(PyObject *p) + + Return a :ctype:`PyListObject` containing all the items from the dictionary, as + in the dictionary method :meth:`dict.items`. + + +.. cfunction:: PyObject* PyDict_Keys(PyObject *p) + + Return a :ctype:`PyListObject` containing all the keys from the dictionary, as + in the dictionary method :meth:`dict.keys`. + + +.. cfunction:: PyObject* PyDict_Values(PyObject *p) + + Return a :ctype:`PyListObject` containing all the values from the dictionary + *p*, as in the dictionary method :meth:`dict.values`. + + +.. cfunction:: Py_ssize_t PyDict_Size(PyObject *p) + + .. index:: builtin: len + + Return the number of items in the dictionary. This is equivalent to ``len(p)`` + on a dictionary. + + +.. cfunction:: int PyDict_Next(PyObject *p, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) + + Iterate over all key-value pairs in the dictionary *p*. The :ctype:`int` + referred to by *ppos* must be initialized to ``0`` prior to the first call to + this function to start the iteration; the function returns true for each pair in + the dictionary, and false once all pairs have been reported. The parameters + *pkey* and *pvalue* should either point to :ctype:`PyObject\*` variables that + will be filled in with each key and value, respectively, or may be *NULL*. Any + references returned through them are borrowed. *ppos* should not be altered + during iteration. Its value represents offsets within the internal dictionary + structure, and since the structure is sparse, the offsets are not consecutive. + + For example:: + + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(self->dict, &pos, &key, &value)) { + /* do something interesting with the values... */ + ... + } + + The dictionary *p* should not be mutated during iteration. It is safe (since + Python 2.1) to modify the values of the keys as you iterate over the dictionary, + but only so long as the set of keys does not change. For example:: + + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(self->dict, &pos, &key, &value)) { + int i = PyInt_AS_LONG(value) + 1; + PyObject *o = PyInt_FromLong(i); + if (o == NULL) + return -1; + if (PyDict_SetItem(self->dict, key, o) < 0) { + Py_DECREF(o); + return -1; + } + Py_DECREF(o); + } + + +.. cfunction:: int PyDict_Merge(PyObject *a, PyObject *b, int override) + + Iterate over mapping object *b* adding key-value pairs to dictionary *a*. *b* + may be a dictionary, or any object supporting :func:`PyMapping_Keys` and + :func:`PyObject_GetItem`. If *override* is true, existing pairs in *a* will be + replaced if a matching key is found in *b*, otherwise pairs will only be added + if there is not a matching key in *a*. Return ``0`` on success or ``-1`` if an + exception was raised. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyDict_Update(PyObject *a, PyObject *b) + + This is the same as ``PyDict_Merge(a, b, 1)`` in C, or ``a.update(b)`` in + Python. Return ``0`` on success or ``-1`` if an exception was raised. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyDict_MergeFromSeq2(PyObject *a, PyObject *seq2, int override) + + Update or merge into dictionary *a*, from the key-value pairs in *seq2*. *seq2* + must be an iterable object producing iterable objects of length 2, viewed as + key-value pairs. In case of duplicate keys, the last wins if *override* is + true, else the first wins. Return ``0`` on success or ``-1`` if an exception was + raised. Equivalent Python (except for the return value):: + + def PyDict_MergeFromSeq2(a, seq2, override): + for key, value in seq2: + if override or key not in a: + a[key] = value + + .. versionadded:: 2.2 + + +.. _otherobjects: + +Other Objects +============= + + +.. _classobjects: + +Class Objects +------------- + +.. index:: object: class + +Note that the class objects described here represent old-style classes, which +will go away in Python 3. When creating new types for extension modules, you +will want to work with type objects (section :ref:`typeobjects`). + + +.. ctype:: PyClassObject + + The C structure of the objects used to describe built-in classes. + + +.. cvar:: PyObject* PyClass_Type + + .. index:: single: ClassType (in module types) + + This is the type object for class objects; it is the same object as + ``types.ClassType`` in the Python layer. + + +.. cfunction:: int PyClass_Check(PyObject *o) + + Return true if the object *o* is a class object, including instances of types + derived from the standard class object. Return false in all other cases. + + +.. cfunction:: int PyClass_IsSubclass(PyObject *klass, PyObject *base) + + Return true if *klass* is a subclass of *base*. Return false in all other cases. + + +.. _fileobjects: + +File Objects +------------ + +.. index:: object: file + +Python's built-in file objects are implemented entirely on the :ctype:`FILE\*` +support from the C standard library. This is an implementation detail and may +change in future releases of Python. + + +.. ctype:: PyFileObject + + This subtype of :ctype:`PyObject` represents a Python file object. + + +.. cvar:: PyTypeObject PyFile_Type + + .. index:: single: FileType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python file type. This is + exposed to Python programs as ``file`` and ``types.FileType``. + + +.. cfunction:: int PyFile_Check(PyObject *p) + + Return true if its argument is a :ctype:`PyFileObject` or a subtype of + :ctype:`PyFileObject`. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyFile_CheckExact(PyObject *p) + + Return true if its argument is a :ctype:`PyFileObject`, but not a subtype of + :ctype:`PyFileObject`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyFile_FromString(char *filename, char *mode) + + .. index:: single: fopen() + + On success, return a new file object that is opened on the file given by + *filename*, with a file mode given by *mode*, where *mode* has the same + semantics as the standard C routine :cfunc:`fopen`. On failure, return *NULL*. + + +.. cfunction:: PyObject* PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE*)) + + Create a new :ctype:`PyFileObject` from the already-open standard C file + pointer, *fp*. The function *close* will be called when the file should be + closed. Return *NULL* on failure. + + +.. cfunction:: FILE* PyFile_AsFile(PyObject *p) + + Return the file object associated with *p* as a :ctype:`FILE\*`. + + +.. cfunction:: PyObject* PyFile_GetLine(PyObject *p, int n) + + .. index:: single: EOFError (built-in exception) + + Equivalent to ``p.readline([n])``, this function reads one line from the + object *p*. *p* may be a file object or any object with a :meth:`readline` + method. If *n* is ``0``, exactly one line is read, regardless of the length of + the line. If *n* is greater than ``0``, no more than *n* bytes will be read + from the file; a partial line can be returned. In both cases, an empty string + is returned if the end of the file is reached immediately. If *n* is less than + ``0``, however, one line is read regardless of length, but :exc:`EOFError` is + raised if the end of the file is reached immediately. + + +.. cfunction:: PyObject* PyFile_Name(PyObject *p) + + Return the name of the file specified by *p* as a string object. + + +.. cfunction:: void PyFile_SetBufSize(PyFileObject *p, int n) + + .. index:: single: setvbuf() + + Available on systems with :cfunc:`setvbuf` only. This should only be called + immediately after file object creation. + + +.. cfunction:: int PyFile_Encoding(PyFileObject *p, char *enc) + + Set the file's encoding for Unicode output to *enc*. Return 1 on success and 0 + on failure. + + .. versionadded:: 2.3 + + +.. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag) + + .. index:: single: softspace (file attribute) + + This function exists for internal use by the interpreter. Set the + :attr:`softspace` attribute of *p* to *newflag* and return the previous value. + *p* does not have to be a file object for this function to work properly; any + object is supported (thought its only interesting if the :attr:`softspace` + attribute can be set). This function clears any errors, and will return ``0`` + as the previous value if the attribute either does not exist or if there were + errors in retrieving it. There is no way to detect errors from this function, + but doing so should not be needed. + + +.. cfunction:: int PyFile_WriteObject(PyObject *obj, PyObject *p, int flags) + + .. index:: single: Py_PRINT_RAW + + Write object *obj* to file object *p*. The only supported flag for *flags* is + :const:`Py_PRINT_RAW`; if given, the :func:`str` of the object is written + instead of the :func:`repr`. Return ``0`` on success or ``-1`` on failure; the + appropriate exception will be set. + + +.. cfunction:: int PyFile_WriteString(const char *s, PyObject *p) + + Write string *s* to file object *p*. Return ``0`` on success or ``-1`` on + failure; the appropriate exception will be set. + + +.. _instanceobjects: + +Instance Objects +---------------- + +.. index:: object: instance + +There are very few functions specific to instance objects. + + +.. cvar:: PyTypeObject PyInstance_Type + + Type object for class instances. + + +.. cfunction:: int PyInstance_Check(PyObject *obj) + + Return true if *obj* is an instance. + + +.. cfunction:: PyObject* PyInstance_New(PyObject *class, PyObject *arg, PyObject *kw) + + Create a new instance of a specific class. The parameters *arg* and *kw* are + used as the positional and keyword parameters to the object's constructor. + + +.. cfunction:: PyObject* PyInstance_NewRaw(PyObject *class, PyObject *dict) + + Create a new instance of a specific class without calling its constructor. + *class* is the class of new object. The *dict* parameter will be used as the + object's :attr:`__dict__`; if *NULL*, a new dictionary will be created for the + instance. + + +.. _function-objects: + +Function Objects +---------------- + +.. index:: object: function + +There are a few functions specific to Python functions. + + +.. ctype:: PyFunctionObject + + The C structure used for functions. + + +.. cvar:: PyTypeObject PyFunction_Type + + .. index:: single: MethodType (in module types) + + This is an instance of :ctype:`PyTypeObject` and represents the Python function + type. It is exposed to Python programmers as ``types.FunctionType``. + + +.. cfunction:: int PyFunction_Check(PyObject *o) + + Return true if *o* is a function object (has type :cdata:`PyFunction_Type`). + The parameter must not be *NULL*. + + +.. cfunction:: PyObject* PyFunction_New(PyObject *code, PyObject *globals) + + Return a new function object associated with the code object *code*. *globals* + must be a dictionary with the global variables accessible to the function. + + The function's docstring, name and *__module__* are retrieved from the code + object, the argument defaults and closure are set to *NULL*. + + +.. cfunction:: PyObject* PyFunction_GetCode(PyObject *op) + + Return the code object associated with the function object *op*. + + +.. cfunction:: PyObject* PyFunction_GetGlobals(PyObject *op) + + Return the globals dictionary associated with the function object *op*. + + +.. cfunction:: PyObject* PyFunction_GetModule(PyObject *op) + + Return the *__module__* attribute of the function object *op*. This is normally + a string containing the module name, but can be set to any other object by + Python code. + + +.. cfunction:: PyObject* PyFunction_GetDefaults(PyObject *op) + + Return the argument default values of the function object *op*. This can be a + tuple of arguments or *NULL*. + + +.. cfunction:: int PyFunction_SetDefaults(PyObject *op, PyObject *defaults) + + Set the argument default values for the function object *op*. *defaults* must be + *Py_None* or a tuple. + + Raises :exc:`SystemError` and returns ``-1`` on failure. + + +.. cfunction:: PyObject* PyFunction_GetClosure(PyObject *op) + + Return the closure associated with the function object *op*. This can be *NULL* + or a tuple of cell objects. + + +.. cfunction:: int PyFunction_SetClosure(PyObject *op, PyObject *closure) + + Set the closure associated with the function object *op*. *closure* must be + *Py_None* or a tuple of cell objects. + + Raises :exc:`SystemError` and returns ``-1`` on failure. + + +.. _method-objects: + +Method Objects +-------------- + +.. index:: object: method + +There are some useful functions that are useful for working with method objects. + + +.. cvar:: PyTypeObject PyMethod_Type + + .. index:: single: MethodType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python method type. This + is exposed to Python programs as ``types.MethodType``. + + +.. cfunction:: int PyMethod_Check(PyObject *o) + + Return true if *o* is a method object (has type :cdata:`PyMethod_Type`). The + parameter must not be *NULL*. + + +.. cfunction:: PyObject* PyMethod_New(PyObject *func, PyObject *self, PyObject *class) + + Return a new method object, with *func* being any callable object; this is the + function that will be called when the method is called. If this method should + be bound to an instance, *self* should be the instance and *class* should be the + class of *self*, otherwise *self* should be *NULL* and *class* should be the + class which provides the unbound method.. + + +.. cfunction:: PyObject* PyMethod_Class(PyObject *meth) + + Return the class object from which the method *meth* was created; if this was + created from an instance, it will be the class of the instance. + + +.. cfunction:: PyObject* PyMethod_GET_CLASS(PyObject *meth) + + Macro version of :cfunc:`PyMethod_Class` which avoids error checking. + + +.. cfunction:: PyObject* PyMethod_Function(PyObject *meth) + + Return the function object associated with the method *meth*. + + +.. cfunction:: PyObject* PyMethod_GET_FUNCTION(PyObject *meth) + + Macro version of :cfunc:`PyMethod_Function` which avoids error checking. + + +.. cfunction:: PyObject* PyMethod_Self(PyObject *meth) + + Return the instance associated with the method *meth* if it is bound, otherwise + return *NULL*. + + +.. cfunction:: PyObject* PyMethod_GET_SELF(PyObject *meth) + + Macro version of :cfunc:`PyMethod_Self` which avoids error checking. + + +.. _moduleobjects: + +Module Objects +-------------- + +.. index:: object: module + +There are only a few functions special to module objects. + + +.. cvar:: PyTypeObject PyModule_Type + + .. index:: single: ModuleType (in module types) + + This instance of :ctype:`PyTypeObject` represents the Python module type. This + is exposed to Python programs as ``types.ModuleType``. + + +.. cfunction:: int PyModule_Check(PyObject *p) + + Return true if *p* is a module object, or a subtype of a module object. + + .. versionchanged:: 2.2 + Allowed subtypes to be accepted. + + +.. cfunction:: int PyModule_CheckExact(PyObject *p) + + Return true if *p* is a module object, but not a subtype of + :cdata:`PyModule_Type`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyModule_New(const char *name) + + .. index:: + single: __name__ (module attribute) + single: __doc__ (module attribute) + single: __file__ (module attribute) + + Return a new module object with the :attr:`__name__` attribute set to *name*. + Only the module's :attr:`__doc__` and :attr:`__name__` attributes are filled in; + the caller is responsible for providing a :attr:`__file__` attribute. + + +.. cfunction:: PyObject* PyModule_GetDict(PyObject *module) + + .. index:: single: __dict__ (module attribute) + + Return the dictionary object that implements *module*'s namespace; this object + is the same as the :attr:`__dict__` attribute of the module object. This + function never fails. It is recommended extensions use other + :cfunc:`PyModule_\*` and :cfunc:`PyObject_\*` functions rather than directly + manipulate a module's :attr:`__dict__`. + + +.. cfunction:: char* PyModule_GetName(PyObject *module) + + .. index:: + single: __name__ (module attribute) + single: SystemError (built-in exception) + + Return *module*'s :attr:`__name__` value. If the module does not provide one, + or if it is not a string, :exc:`SystemError` is raised and *NULL* is returned. + + +.. cfunction:: char* PyModule_GetFilename(PyObject *module) + + .. index:: + single: __file__ (module attribute) + single: SystemError (built-in exception) + + Return the name of the file from which *module* was loaded using *module*'s + :attr:`__file__` attribute. If this is not defined, or if it is not a string, + raise :exc:`SystemError` and return *NULL*. + + +.. cfunction:: int PyModule_AddObject(PyObject *module, const char *name, PyObject *value) + + Add an object to *module* as *name*. This is a convenience function which can + be used from the module's initialization function. This steals a reference to + *value*. Return ``-1`` on error, ``0`` on success. + + .. versionadded:: 2.0 + + +.. cfunction:: int PyModule_AddIntConstant(PyObject *module, const char *name, long value) + + Add an integer constant to *module* as *name*. This convenience function can be + used from the module's initialization function. Return ``-1`` on error, ``0`` on + success. + + .. versionadded:: 2.0 + + +.. cfunction:: int PyModule_AddStringConstant(PyObject *module, const char *name, const char *value) + + Add a string constant to *module* as *name*. This convenience function can be + used from the module's initialization function. The string *value* must be + null-terminated. Return ``-1`` on error, ``0`` on success. + + .. versionadded:: 2.0 + + +.. _iterator-objects: + +Iterator Objects +---------------- + +Python provides two general-purpose iterator objects. The first, a sequence +iterator, works with an arbitrary sequence supporting the :meth:`__getitem__` +method. The second works with a callable object and a sentinel value, calling +the callable for each item in the sequence, and ending the iteration when the +sentinel value is returned. + + +.. cvar:: PyTypeObject PySeqIter_Type + + Type object for iterator objects returned by :cfunc:`PySeqIter_New` and the + one-argument form of the :func:`iter` built-in function for built-in sequence + types. + + .. versionadded:: 2.2 + + +.. cfunction:: int PySeqIter_Check(op) + + Return true if the type of *op* is :cdata:`PySeqIter_Type`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PySeqIter_New(PyObject *seq) + + Return an iterator that works with a general sequence object, *seq*. The + iteration ends when the sequence raises :exc:`IndexError` for the subscripting + operation. + + .. versionadded:: 2.2 + + +.. cvar:: PyTypeObject PyCallIter_Type + + Type object for iterator objects returned by :cfunc:`PyCallIter_New` and the + two-argument form of the :func:`iter` built-in function. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyCallIter_Check(op) + + Return true if the type of *op* is :cdata:`PyCallIter_Type`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyCallIter_New(PyObject *callable, PyObject *sentinel) + + Return a new iterator. The first parameter, *callable*, can be any Python + callable object that can be called with no parameters; each call to it should + return the next item in the iteration. When *callable* returns a value equal to + *sentinel*, the iteration will be terminated. + + .. versionadded:: 2.2 + + +.. _descriptor-objects: + +Descriptor Objects +------------------ + +"Descriptors" are objects that describe some attribute of an object. They are +found in the dictionary of type objects. + + +.. cvar:: PyTypeObject PyProperty_Type + + The type object for the built-in descriptor types. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyDescr_NewGetSet(PyTypeObject *type, struct PyGetSetDef *getset) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyDescr_NewMember(PyTypeObject *type, struct PyMemberDef *meth) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyDescr_NewMethod(PyTypeObject *type, struct PyMethodDef *meth) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *wrapper, void *wrapped) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyDescr_NewClassMethod(PyTypeObject *type, PyMethodDef *method) + + .. versionadded:: 2.3 + + +.. cfunction:: int PyDescr_IsData(PyObject *descr) + + Return true if the descriptor objects *descr* describes a data attribute, or + false if it describes a method. *descr* must be a descriptor object; there is + no error checking. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyWrapper_New(PyObject *, PyObject *) + + .. versionadded:: 2.2 + + +.. _slice-objects: + +Slice Objects +------------- + + +.. cvar:: PyTypeObject PySlice_Type + + .. index:: single: SliceType (in module types) + + The type object for slice objects. This is the same as ``slice`` and + ``types.SliceType``. + + +.. cfunction:: int PySlice_Check(PyObject *ob) + + Return true if *ob* is a slice object; *ob* must not be *NULL*. + + +.. cfunction:: PyObject* PySlice_New(PyObject *start, PyObject *stop, PyObject *step) + + Return a new slice object with the given values. The *start*, *stop*, and + *step* parameters are used as the values of the slice object attributes of the + same names. Any of the values may be *NULL*, in which case the ``None`` will be + used for the corresponding attribute. Return *NULL* if the new object could not + be allocated. + + +.. cfunction:: int PySlice_GetIndices(PySliceObject *slice, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) + + Retrieve the start, stop and step indices from the slice object *slice*, + assuming a sequence of length *length*. Treats indices greater than *length* as + errors. + + Returns 0 on success and -1 on error with no exception set (unless one of the + indices was not :const:`None` and failed to be converted to an integer, in which + case -1 is returned with an exception set). + + You probably do not want to use this function. If you want to use slice objects + in versions of Python prior to 2.3, you would probably do well to incorporate + the source of :cfunc:`PySlice_GetIndicesEx`, suitably renamed, in the source of + your extension. + + +.. cfunction:: int PySlice_GetIndicesEx(PySliceObject *slice, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelength) + + Usable replacement for :cfunc:`PySlice_GetIndices`. Retrieve the start, stop, + and step indices from the slice object *slice* assuming a sequence of length + *length*, and store the length of the slice in *slicelength*. Out of bounds + indices are clipped in a manner consistent with the handling of normal slices. + + Returns 0 on success and -1 on error with exception set. + + .. versionadded:: 2.3 + + +.. _weakrefobjects: + +Weak Reference Objects +---------------------- + +Python supports *weak references* as first-class objects. There are two +specific object types which directly implement weak references. The first is a +simple reference object, and the second acts as a proxy for the original object +as much as it can. + + +.. cfunction:: int PyWeakref_Check(ob) + + Return true if *ob* is either a reference or proxy object. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyWeakref_CheckRef(ob) + + Return true if *ob* is a reference object. + + .. versionadded:: 2.2 + + +.. cfunction:: int PyWeakref_CheckProxy(ob) + + Return true if *ob* is a proxy object. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyWeakref_NewRef(PyObject *ob, PyObject *callback) + + Return a weak reference object for the object *ob*. This will always return + a new reference, but is not guaranteed to create a new object; an existing + reference object may be returned. The second parameter, *callback*, can be a + callable object that receives notification when *ob* is garbage collected; it + should accept a single parameter, which will be the weak reference object + itself. *callback* may also be ``None`` or *NULL*. If *ob* is not a + weakly-referencable object, or if *callback* is not callable, ``None``, or + *NULL*, this will return *NULL* and raise :exc:`TypeError`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyWeakref_NewProxy(PyObject *ob, PyObject *callback) + + Return a weak reference proxy object for the object *ob*. This will always + return a new reference, but is not guaranteed to create a new object; an + existing proxy object may be returned. The second parameter, *callback*, can + be a callable object that receives notification when *ob* is garbage + collected; it should accept a single parameter, which will be the weak + reference object itself. *callback* may also be ``None`` or *NULL*. If *ob* + is not a weakly-referencable object, or if *callback* is not callable, + ``None``, or *NULL*, this will return *NULL* and raise :exc:`TypeError`. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyWeakref_GetObject(PyObject *ref) + + Return the referenced object from a weak reference, *ref*. If the referent is + no longer live, returns ``None``. + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* PyWeakref_GET_OBJECT(PyObject *ref) + + Similar to :cfunc:`PyWeakref_GetObject`, but implemented as a macro that does no + error checking. + + .. versionadded:: 2.2 + + +.. _cobjects: + +CObjects +-------- + +.. index:: object: CObject + +Refer to *Extending and Embedding the Python Interpreter*, section 1.12, +"Providing a C API for an Extension Module," for more information on using these +objects. + + +.. ctype:: PyCObject + + This subtype of :ctype:`PyObject` represents an opaque value, useful for C + extension modules who need to pass an opaque value (as a :ctype:`void\*` + pointer) through Python code to other C code. It is often used to make a C + function pointer defined in one module available to other modules, so the + regular import mechanism can be used to access C APIs defined in dynamically + loaded modules. + + +.. cfunction:: int PyCObject_Check(PyObject *p) + + Return true if its argument is a :ctype:`PyCObject`. + + +.. cfunction:: PyObject* PyCObject_FromVoidPtr(void* cobj, void (*destr)(void *)) + + Create a :ctype:`PyCObject` from the ``void *`` *cobj*. The *destr* function + will be called when the object is reclaimed, unless it is *NULL*. + + +.. cfunction:: PyObject* PyCObject_FromVoidPtrAndDesc(void* cobj, void* desc, void (*destr)(void *, void *)) + + Create a :ctype:`PyCObject` from the :ctype:`void \*` *cobj*. The *destr* + function will be called when the object is reclaimed. The *desc* argument can + be used to pass extra callback data for the destructor function. + + +.. cfunction:: void* PyCObject_AsVoidPtr(PyObject* self) + + Return the object :ctype:`void \*` that the :ctype:`PyCObject` *self* was + created with. + + +.. cfunction:: void* PyCObject_GetDesc(PyObject* self) + + Return the description :ctype:`void \*` that the :ctype:`PyCObject` *self* was + created with. + + +.. cfunction:: int PyCObject_SetVoidPtr(PyObject* self, void* cobj) + + Set the void pointer inside *self* to *cobj*. The :ctype:`PyCObject` must not + have an associated destructor. Return true on success, false on failure. + + +.. _cell-objects: + +Cell Objects +------------ + +"Cell" objects are used to implement variables referenced by multiple scopes. +For each such variable, a cell object is created to store the value; the local +variables of each stack frame that references the value contains a reference to +the cells from outer scopes which also use that variable. When the value is +accessed, the value contained in the cell is used instead of the cell object +itself. This de-referencing of the cell object requires support from the +generated byte-code; these are not automatically de-referenced when accessed. +Cell objects are not likely to be useful elsewhere. + + +.. ctype:: PyCellObject + + The C structure used for cell objects. + + +.. cvar:: PyTypeObject PyCell_Type + + The type object corresponding to cell objects. + + +.. cfunction:: int PyCell_Check(ob) + + Return true if *ob* is a cell object; *ob* must not be *NULL*. + + +.. cfunction:: PyObject* PyCell_New(PyObject *ob) + + Create and return a new cell object containing the value *ob*. The parameter may + be *NULL*. + + +.. cfunction:: PyObject* PyCell_Get(PyObject *cell) + + Return the contents of the cell *cell*. + + +.. cfunction:: PyObject* PyCell_GET(PyObject *cell) + + Return the contents of the cell *cell*, but without checking that *cell* is + non-*NULL* and a cell object. + + +.. cfunction:: int PyCell_Set(PyObject *cell, PyObject *value) + + Set the contents of the cell object *cell* to *value*. This releases the + reference to any current content of the cell. *value* may be *NULL*. *cell* + must be non-*NULL*; if it is not a cell object, ``-1`` will be returned. On + success, ``0`` will be returned. + + +.. cfunction:: void PyCell_SET(PyObject *cell, PyObject *value) + + Sets the value of the cell object *cell* to *value*. No reference counts are + adjusted, and no checks are made for safety; *cell* must be non-*NULL* and must + be a cell object. + + +.. _gen-objects: + +Generator Objects +----------------- + +Generator objects are what Python uses to implement generator iterators. They +are normally created by iterating over a function that yields values, rather +than explicitly calling :cfunc:`PyGen_New`. + + +.. ctype:: PyGenObject + + The C structure used for generator objects. + + +.. cvar:: PyTypeObject PyGen_Type + + The type object corresponding to generator objects + + +.. cfunction:: int PyGen_Check(ob) + + Return true if *ob* is a generator object; *ob* must not be *NULL*. + + +.. cfunction:: int PyGen_CheckExact(ob) + + Return true if *ob*'s type is *PyGen_Type* is a generator object; *ob* must not + be *NULL*. + + +.. cfunction:: PyObject* PyGen_New(PyFrameObject *frame) + + Create and return a new generator object based on the *frame* object. A + reference to *frame* is stolen by this function. The parameter must not be + *NULL*. + + +.. _datetimeobjects: + +DateTime Objects +---------------- + +Various date and time objects are supplied by the :mod:`datetime` module. +Before using any of these functions, the header file :file:`datetime.h` must be +included in your source (note that this is not included by :file:`Python.h`), +and the macro :cfunc:`PyDateTime_IMPORT` must be invoked. The macro puts a +pointer to a C structure into a static variable, ``PyDateTimeAPI``, that is +used by the following macros. + +Type-check macros: + + +.. cfunction:: int PyDate_Check(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DateType` or a subtype of + :cdata:`PyDateTime_DateType`. *ob* must not be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDate_CheckExact(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DateType`. *ob* must not be + *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_Check(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DateTimeType` or a subtype of + :cdata:`PyDateTime_DateTimeType`. *ob* must not be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_CheckExact(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DateTimeType`. *ob* must not + be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyTime_Check(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_TimeType` or a subtype of + :cdata:`PyDateTime_TimeType`. *ob* must not be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyTime_CheckExact(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_TimeType`. *ob* must not be + *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDelta_Check(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DeltaType` or a subtype of + :cdata:`PyDateTime_DeltaType`. *ob* must not be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDelta_CheckExact(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_DeltaType`. *ob* must not be + *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyTZInfo_Check(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_TZInfoType` or a subtype of + :cdata:`PyDateTime_TZInfoType`. *ob* must not be *NULL*. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyTZInfo_CheckExact(PyObject *ob) + + Return true if *ob* is of type :cdata:`PyDateTime_TZInfoType`. *ob* must not be + *NULL*. + + .. versionadded:: 2.4 + +Macros to create objects: + + +.. cfunction:: PyObject* PyDate_FromDate(int year, int month, int day) + + Return a ``datetime.date`` object with the specified year, month and day. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyDateTime_FromDateAndTime(int year, int month, int day, int hour, int minute, int second, int usecond) + + Return a ``datetime.datetime`` object with the specified year, month, day, hour, + minute, second and microsecond. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyTime_FromTime(int hour, int minute, int second, int usecond) + + Return a ``datetime.time`` object with the specified hour, minute, second and + microsecond. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyDelta_FromDSU(int days, int seconds, int useconds) + + Return a ``datetime.timedelta`` object representing the given number of days, + seconds and microseconds. Normalization is performed so that the resulting + number of microseconds and seconds lie in the ranges documented for + ``datetime.timedelta`` objects. + + .. versionadded:: 2.4 + +Macros to extract fields from date objects. The argument must be an instance of +:cdata:`PyDateTime_Date`, including subclasses (such as +:cdata:`PyDateTime_DateTime`). The argument must not be *NULL*, and the type is +not checked: + + +.. cfunction:: int PyDateTime_GET_YEAR(PyDateTime_Date *o) + + Return the year, as a positive int. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_GET_MONTH(PyDateTime_Date *o) + + Return the month, as an int from 1 through 12. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_GET_DAY(PyDateTime_Date *o) + + Return the day, as an int from 1 through 31. + + .. versionadded:: 2.4 + +Macros to extract fields from datetime objects. The argument must be an +instance of :cdata:`PyDateTime_DateTime`, including subclasses. The argument +must not be *NULL*, and the type is not checked: + + +.. cfunction:: int PyDateTime_DATE_GET_HOUR(PyDateTime_DateTime *o) + + Return the hour, as an int from 0 through 23. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_DATE_GET_MINUTE(PyDateTime_DateTime *o) + + Return the minute, as an int from 0 through 59. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_DATE_GET_SECOND(PyDateTime_DateTime *o) + + Return the second, as an int from 0 through 59. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_DATE_GET_MICROSECOND(PyDateTime_DateTime *o) + + Return the microsecond, as an int from 0 through 999999. + + .. versionadded:: 2.4 + +Macros to extract fields from time objects. The argument must be an instance of +:cdata:`PyDateTime_Time`, including subclasses. The argument must not be *NULL*, +and the type is not checked: + + +.. cfunction:: int PyDateTime_TIME_GET_HOUR(PyDateTime_Time *o) + + Return the hour, as an int from 0 through 23. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_TIME_GET_MINUTE(PyDateTime_Time *o) + + Return the minute, as an int from 0 through 59. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_TIME_GET_SECOND(PyDateTime_Time *o) + + Return the second, as an int from 0 through 59. + + .. versionadded:: 2.4 + + +.. cfunction:: int PyDateTime_TIME_GET_MICROSECOND(PyDateTime_Time *o) + + Return the microsecond, as an int from 0 through 999999. + + .. versionadded:: 2.4 + +Macros for the convenience of modules implementing the DB API: + + +.. cfunction:: PyObject* PyDateTime_FromTimestamp(PyObject *args) + + Create and return a new ``datetime.datetime`` object given an argument tuple + suitable for passing to ``datetime.datetime.fromtimestamp()``. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* PyDate_FromTimestamp(PyObject *args) + + Create and return a new ``datetime.date`` object given an argument tuple + suitable for passing to ``datetime.date.fromtimestamp()``. + + .. versionadded:: 2.4 + + +.. _setobjects: + +Set Objects +----------- + +.. sectionauthor:: Raymond D. Hettinger + + +.. index:: + object: set + object: frozenset + +.. versionadded:: 2.5 + +This section details the public API for :class:`set` and :class:`frozenset` +objects. Any functionality not listed below is best accessed using the either +the abstract object protocol (including :cfunc:`PyObject_CallMethod`, +:cfunc:`PyObject_RichCompareBool`, :cfunc:`PyObject_Hash`, +:cfunc:`PyObject_Repr`, :cfunc:`PyObject_IsTrue`, :cfunc:`PyObject_Print`, and +:cfunc:`PyObject_GetIter`) or the abstract number protocol (including +:cfunc:`PyNumber_And`, :cfunc:`PyNumber_Subtract`, :cfunc:`PyNumber_Or`, +:cfunc:`PyNumber_Xor`, :cfunc:`PyNumber_InPlaceAnd`, +:cfunc:`PyNumber_InPlaceSubtract`, :cfunc:`PyNumber_InPlaceOr`, and +:cfunc:`PyNumber_InPlaceXor`). + + +.. ctype:: PySetObject + + This subtype of :ctype:`PyObject` is used to hold the internal data for both + :class:`set` and :class:`frozenset` objects. It is like a :ctype:`PyDictObject` + in that it is a fixed size for small sets (much like tuple storage) and will + point to a separate, variable sized block of memory for medium and large sized + sets (much like list storage). None of the fields of this structure should be + considered public and are subject to change. All access should be done through + the documented API rather than by manipulating the values in the structure. + + +.. cvar:: PyTypeObject PySet_Type + + This is an instance of :ctype:`PyTypeObject` representing the Python + :class:`set` type. + + +.. cvar:: PyTypeObject PyFrozenSet_Type + + This is an instance of :ctype:`PyTypeObject` representing the Python + :class:`frozenset` type. + +The following type check macros work on pointers to any Python object. Likewise, +the constructor functions work with any iterable Python object. + + +.. cfunction:: int PyAnySet_Check(PyObject *p) + + Return true if *p* is a :class:`set` object, a :class:`frozenset` object, or an + instance of a subtype. + + +.. cfunction:: int PyAnySet_CheckExact(PyObject *p) + + Return true if *p* is a :class:`set` object or a :class:`frozenset` object but + not an instance of a subtype. + + +.. cfunction:: int PyFrozenSet_CheckExact(PyObject *p) + + Return true if *p* is a :class:`frozenset` object but not an instance of a + subtype. + + +.. cfunction:: PyObject* PySet_New(PyObject *iterable) + + Return a new :class:`set` containing objects returned by the *iterable*. The + *iterable* may be *NULL* to create a new empty set. Return the new set on + success or *NULL* on failure. Raise :exc:`TypeError` if *iterable* is not + actually iterable. The constructor is also useful for copying a set + (``c=set(s)``). + + +.. cfunction:: PyObject* PyFrozenSet_New(PyObject *iterable) + + Return a new :class:`frozenset` containing objects returned by the *iterable*. + The *iterable* may be *NULL* to create a new empty frozenset. Return the new + set on success or *NULL* on failure. Raise :exc:`TypeError` if *iterable* is + not actually iterable. + +The following functions and macros are available for instances of :class:`set` +or :class:`frozenset` or instances of their subtypes. + + +.. cfunction:: int PySet_Size(PyObject *anyset) + + .. index:: builtin: len + + Return the length of a :class:`set` or :class:`frozenset` object. Equivalent to + ``len(anyset)``. Raises a :exc:`PyExc_SystemError` if *anyset* is not a + :class:`set`, :class:`frozenset`, or an instance of a subtype. + + +.. cfunction:: int PySet_GET_SIZE(PyObject *anyset) + + Macro form of :cfunc:`PySet_Size` without error checking. + + +.. cfunction:: int PySet_Contains(PyObject *anyset, PyObject *key) + + Return 1 if found, 0 if not found, and -1 if an error is encountered. Unlike + the Python :meth:`__contains__` method, this function does not automatically + convert unhashable sets into temporary frozensets. Raise a :exc:`TypeError` if + the *key* is unhashable. Raise :exc:`PyExc_SystemError` if *anyset* is not a + :class:`set`, :class:`frozenset`, or an instance of a subtype. + +The following functions are available for instances of :class:`set` or its +subtypes but not for instances of :class:`frozenset` or its subtypes. + + +.. cfunction:: int PySet_Add(PyObject *set, PyObject *key) + + Add *key* to a :class:`set` instance. Does not apply to :class:`frozenset` + instances. Return 0 on success or -1 on failure. Raise a :exc:`TypeError` if + the *key* is unhashable. Raise a :exc:`MemoryError` if there is no room to grow. + Raise a :exc:`SystemError` if *set* is an not an instance of :class:`set` or its + subtype. + + +.. cfunction:: int PySet_Discard(PyObject *set, PyObject *key) + + Return 1 if found and removed, 0 if not found (no action taken), and -1 if an + error is encountered. Does not raise :exc:`KeyError` for missing keys. Raise a + :exc:`TypeError` if the *key* is unhashable. Unlike the Python :meth:`discard` + method, this function does not automatically convert unhashable sets into + temporary frozensets. Raise :exc:`PyExc_SystemError` if *set* is an not an + instance of :class:`set` or its subtype. + + +.. cfunction:: PyObject* PySet_Pop(PyObject *set) + + Return a new reference to an arbitrary object in the *set*, and removes the + object from the *set*. Return *NULL* on failure. Raise :exc:`KeyError` if the + set is empty. Raise a :exc:`SystemError` if *set* is an not an instance of + :class:`set` or its subtype. + + +.. cfunction:: int PySet_Clear(PyObject *set) + + Empty an existing set of all elements. + diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst new file mode 100644 index 0000000..68ac090 --- /dev/null +++ b/Doc/c-api/exceptions.rst @@ -0,0 +1,515 @@ +.. highlightlang:: c + + +.. _exceptionhandling: + +****************** +Exception Handling +****************** + +The functions described in this chapter will let you handle and raise Python +exceptions. It is important to understand some of the basics of Python +exception handling. It works somewhat like the Unix :cdata:`errno` variable: +there is a global indicator (per thread) of the last error that occurred. Most +functions don't clear this on success, but will set it to indicate the cause of +the error on failure. Most functions also return an error indicator, usually +*NULL* if they are supposed to return a pointer, or ``-1`` if they return an +integer (exception: the :cfunc:`PyArg_\*` functions return ``1`` for success and +``0`` for failure). + +When a function must fail because some function it called failed, it generally +doesn't set the error indicator; the function it called already set it. It is +responsible for either handling the error and clearing the exception or +returning after cleaning up any resources it holds (such as object references or +memory allocations); it should *not* continue normally if it is not prepared to +handle the error. If returning due to an error, it is important to indicate to +the caller that an error has been set. If the error is not handled or carefully +propagated, additional calls into the Python/C API may not behave as intended +and may fail in mysterious ways. + +The error indicator consists of three Python objects corresponding to the result +of ``sys.exc_info()``. API functions exist to interact with the error indicator +in various ways. There is a separate error indicator for each thread. + +.. % XXX Order of these should be more thoughtful. +.. % Either alphabetical or some kind of structure. + + +.. cfunction:: void PyErr_Print() + + Print a standard traceback to ``sys.stderr`` and clear the error indicator. + Call this function only when the error indicator is set. (Otherwise it will + cause a fatal error!) + + +.. cfunction:: PyObject* PyErr_Occurred() + + Test whether the error indicator is set. If set, return the exception *type* + (the first argument to the last call to one of the :cfunc:`PyErr_Set\*` + functions or to :cfunc:`PyErr_Restore`). If not set, return *NULL*. You do not + own a reference to the return value, so you do not need to :cfunc:`Py_DECREF` + it. + + .. note:: + + Do not compare the return value to a specific exception; use + :cfunc:`PyErr_ExceptionMatches` instead, shown below. (The comparison could + easily fail since the exception may be an instance instead of a class, in the + case of a class exception, or it may the a subclass of the expected exception.) + + +.. cfunction:: int PyErr_ExceptionMatches(PyObject *exc) + + Equivalent to ``PyErr_GivenExceptionMatches(PyErr_Occurred(), exc)``. This + should only be called when an exception is actually set; a memory access + violation will occur if no exception has been raised. + + +.. cfunction:: int PyErr_GivenExceptionMatches(PyObject *given, PyObject *exc) + + Return true if the *given* exception matches the exception in *exc*. If *exc* + is a class object, this also returns true when *given* is an instance of a + subclass. If *exc* is a tuple, all exceptions in the tuple (and recursively in + subtuples) are searched for a match. If *given* is *NULL*, a memory access + violation will occur. + + +.. cfunction:: void PyErr_NormalizeException(PyObject**exc, PyObject**val, PyObject**tb) + + Under certain circumstances, the values returned by :cfunc:`PyErr_Fetch` below + can be "unnormalized", meaning that ``*exc`` is a class object but ``*val`` is + not an instance of the same class. This function can be used to instantiate + the class in that case. If the values are already normalized, nothing happens. + The delayed normalization is implemented to improve performance. + + +.. cfunction:: void PyErr_Clear() + + Clear the error indicator. If the error indicator is not set, there is no + effect. + + +.. cfunction:: void PyErr_Fetch(PyObject **ptype, PyObject **pvalue, PyObject **ptraceback) + + Retrieve the error indicator into three variables whose addresses are passed. + If the error indicator is not set, set all three variables to *NULL*. If it is + set, it will be cleared and you own a reference to each object retrieved. The + value and traceback object may be *NULL* even when the type object is not. + + .. note:: + + This function is normally only used by code that needs to handle exceptions or + by code that needs to save and restore the error indicator temporarily. + + +.. cfunction:: void PyErr_Restore(PyObject *type, PyObject *value, PyObject *traceback) + + Set the error indicator from the three objects. If the error indicator is + already set, it is cleared first. If the objects are *NULL*, the error + indicator is cleared. Do not pass a *NULL* type and non-*NULL* value or + traceback. The exception type should be a class. Do not pass an invalid + exception type or value. (Violating these rules will cause subtle problems + later.) This call takes away a reference to each object: you must own a + reference to each object before the call and after the call you no longer own + these references. (If you don't understand this, don't use this function. I + warned you.) + + .. note:: + + This function is normally only used by code that needs to save and restore the + error indicator temporarily; use :cfunc:`PyErr_Fetch` to save the current + exception state. + + +.. cfunction:: void PyErr_SetString(PyObject *type, const char *message) + + This is the most common way to set the error indicator. The first argument + specifies the exception type; it is normally one of the standard exceptions, + e.g. :cdata:`PyExc_RuntimeError`. You need not increment its reference count. + The second argument is an error message; it is converted to a string object. + + +.. cfunction:: void PyErr_SetObject(PyObject *type, PyObject *value) + + This function is similar to :cfunc:`PyErr_SetString` but lets you specify an + arbitrary Python object for the "value" of the exception. + + +.. cfunction:: PyObject* PyErr_Format(PyObject *exception, const char *format, ...) + + This function sets the error indicator and returns *NULL*. *exception* should be + a Python exception (class, not an instance). *format* should be a string, + containing format codes, similar to :cfunc:`printf`. The ``width.precision`` + before a format code is parsed, but the width part is ignored. + + .. % This should be exactly the same as the table in PyString_FromFormat. + .. % One should just refer to the other. + .. % The descriptions for %zd and %zu are wrong, but the truth is complicated + .. % because not all compilers support the %z width modifier -- we fake it + .. % when necessary via interpolating PY_FORMAT_SIZE_T. + .. % %u, %lu, %zu should have "new in Python 2.5" blurbs. + + +-------------------+---------------+--------------------------------+ + | Format Characters | Type | Comment | + +===================+===============+================================+ + | :attr:`%%` | *n/a* | The literal % character. | + +-------------------+---------------+--------------------------------+ + | :attr:`%c` | int | A single character, | + | | | represented as an C int. | + +-------------------+---------------+--------------------------------+ + | :attr:`%d` | int | Exactly equivalent to | + | | | ``printf("%d")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%u` | unsigned int | Exactly equivalent to | + | | | ``printf("%u")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%ld` | long | Exactly equivalent to | + | | | ``printf("%ld")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%lu` | unsigned long | Exactly equivalent to | + | | | ``printf("%lu")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%zd` | Py_ssize_t | Exactly equivalent to | + | | | ``printf("%zd")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%zu` | size_t | Exactly equivalent to | + | | | ``printf("%zu")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%i` | int | Exactly equivalent to | + | | | ``printf("%i")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%x` | int | Exactly equivalent to | + | | | ``printf("%x")``. | + +-------------------+---------------+--------------------------------+ + | :attr:`%s` | char\* | A null-terminated C character | + | | | array. | + +-------------------+---------------+--------------------------------+ + | :attr:`%p` | void\* | The hex representation of a C | + | | | pointer. Mostly equivalent to | + | | | ``printf("%p")`` except that | + | | | it is guaranteed to start with | + | | | the literal ``0x`` regardless | + | | | of what the platform's | + | | | ``printf`` yields. | + +-------------------+---------------+--------------------------------+ + + An unrecognized format character causes all the rest of the format string to be + copied as-is to the result string, and any extra arguments discarded. + + +.. cfunction:: void PyErr_SetNone(PyObject *type) + + This is a shorthand for ``PyErr_SetObject(type, Py_None)``. + + +.. cfunction:: int PyErr_BadArgument() + + This is a shorthand for ``PyErr_SetString(PyExc_TypeError, message)``, where + *message* indicates that a built-in operation was invoked with an illegal + argument. It is mostly for internal use. + + +.. cfunction:: PyObject* PyErr_NoMemory() + + This is a shorthand for ``PyErr_SetNone(PyExc_MemoryError)``; it returns *NULL* + so an object allocation function can write ``return PyErr_NoMemory();`` when it + runs out of memory. + + +.. cfunction:: PyObject* PyErr_SetFromErrno(PyObject *type) + + .. index:: single: strerror() + + This is a convenience function to raise an exception when a C library function + has returned an error and set the C variable :cdata:`errno`. It constructs a + tuple object whose first item is the integer :cdata:`errno` value and whose + second item is the corresponding error message (gotten from :cfunc:`strerror`), + and then calls ``PyErr_SetObject(type, object)``. On Unix, when the + :cdata:`errno` value is :const:`EINTR`, indicating an interrupted system call, + this calls :cfunc:`PyErr_CheckSignals`, and if that set the error indicator, + leaves it set to that. The function always returns *NULL*, so a wrapper + function around a system call can write ``return PyErr_SetFromErrno(type);`` + when the system call returns an error. + + +.. cfunction:: PyObject* PyErr_SetFromErrnoWithFilename(PyObject *type, const char *filename) + + Similar to :cfunc:`PyErr_SetFromErrno`, with the additional behavior that if + *filename* is not *NULL*, it is passed to the constructor of *type* as a third + parameter. In the case of exceptions such as :exc:`IOError` and :exc:`OSError`, + this is used to define the :attr:`filename` attribute of the exception instance. + + +.. cfunction:: PyObject* PyErr_SetFromWindowsErr(int ierr) + + This is a convenience function to raise :exc:`WindowsError`. If called with + *ierr* of :cdata:`0`, the error code returned by a call to :cfunc:`GetLastError` + is used instead. It calls the Win32 function :cfunc:`FormatMessage` to retrieve + the Windows description of error code given by *ierr* or :cfunc:`GetLastError`, + then it constructs a tuple object whose first item is the *ierr* value and whose + second item is the corresponding error message (gotten from + :cfunc:`FormatMessage`), and then calls ``PyErr_SetObject(PyExc_WindowsError, + object)``. This function always returns *NULL*. Availability: Windows. + + +.. cfunction:: PyObject* PyErr_SetExcFromWindowsErr(PyObject *type, int ierr) + + Similar to :cfunc:`PyErr_SetFromWindowsErr`, with an additional parameter + specifying the exception type to be raised. Availability: Windows. + + .. versionadded:: 2.3 + + +.. cfunction:: PyObject* PyErr_SetFromWindowsErrWithFilename(int ierr, const char *filename) + + Similar to :cfunc:`PyErr_SetFromWindowsErr`, with the additional behavior that + if *filename* is not *NULL*, it is passed to the constructor of + :exc:`WindowsError` as a third parameter. Availability: Windows. + + +.. cfunction:: PyObject* PyErr_SetExcFromWindowsErrWithFilename(PyObject *type, int ierr, char *filename) + + Similar to :cfunc:`PyErr_SetFromWindowsErrWithFilename`, with an additional + parameter specifying the exception type to be raised. Availability: Windows. + + .. versionadded:: 2.3 + + +.. cfunction:: void PyErr_BadInternalCall() + + This is a shorthand for ``PyErr_SetString(PyExc_TypeError, message)``, where + *message* indicates that an internal operation (e.g. a Python/C API function) + was invoked with an illegal argument. It is mostly for internal use. + + +.. cfunction:: int PyErr_WarnEx(PyObject *category, char *message, int stacklevel) + + Issue a warning message. The *category* argument is a warning category (see + below) or *NULL*; the *message* argument is a message string. *stacklevel* is a + positive number giving a number of stack frames; the warning will be issued from + the currently executing line of code in that stack frame. A *stacklevel* of 1 + is the function calling :cfunc:`PyErr_WarnEx`, 2 is the function above that, + and so forth. + + This function normally prints a warning message to *sys.stderr*; however, it is + also possible that the user has specified that warnings are to be turned into + errors, and in that case this will raise an exception. It is also possible that + the function raises an exception because of a problem with the warning machinery + (the implementation imports the :mod:`warnings` module to do the heavy lifting). + The return value is ``0`` if no exception is raised, or ``-1`` if an exception + is raised. (It is not possible to determine whether a warning message is + actually printed, nor what the reason is for the exception; this is + intentional.) If an exception is raised, the caller should do its normal + exception handling (for example, :cfunc:`Py_DECREF` owned references and return + an error value). + + Warning categories must be subclasses of :cdata:`Warning`; the default warning + category is :cdata:`RuntimeWarning`. The standard Python warning categories are + available as global variables whose names are ``PyExc_`` followed by the Python + exception name. These have the type :ctype:`PyObject\*`; they are all class + objects. Their names are :cdata:`PyExc_Warning`, :cdata:`PyExc_UserWarning`, + :cdata:`PyExc_UnicodeWarning`, :cdata:`PyExc_DeprecationWarning`, + :cdata:`PyExc_SyntaxWarning`, :cdata:`PyExc_RuntimeWarning`, and + :cdata:`PyExc_FutureWarning`. :cdata:`PyExc_Warning` is a subclass of + :cdata:`PyExc_Exception`; the other warning categories are subclasses of + :cdata:`PyExc_Warning`. + + For information about warning control, see the documentation for the + :mod:`warnings` module and the :option:`-W` option in the command line + documentation. There is no C API for warning control. + + +.. cfunction:: int PyErr_WarnExplicit(PyObject *category, const char *message, const char *filename, int lineno, const char *module, PyObject *registry) + + Issue a warning message with explicit control over all warning attributes. This + is a straightforward wrapper around the Python function + :func:`warnings.warn_explicit`, see there for more information. The *module* + and *registry* arguments may be set to *NULL* to get the default effect + described there. + + +.. cfunction:: int PyErr_CheckSignals() + + .. index:: + module: signal + single: SIGINT + single: KeyboardInterrupt (built-in exception) + + This function interacts with Python's signal handling. It checks whether a + signal has been sent to the processes and if so, invokes the corresponding + signal handler. If the :mod:`signal` module is supported, this can invoke a + signal handler written in Python. In all cases, the default effect for + :const:`SIGINT` is to raise the :exc:`KeyboardInterrupt` exception. If an + exception is raised the error indicator is set and the function returns ``-1``; + otherwise the function returns ``0``. The error indicator may or may not be + cleared if it was previously set. + + +.. cfunction:: void PyErr_SetInterrupt() + + .. index:: + single: SIGINT + single: KeyboardInterrupt (built-in exception) + + This function simulates the effect of a :const:`SIGINT` signal arriving --- the + next time :cfunc:`PyErr_CheckSignals` is called, :exc:`KeyboardInterrupt` will + be raised. It may be called without holding the interpreter lock. + + .. % XXX This was described as obsolete, but is used in + .. % thread.interrupt_main() (used from IDLE), so it's still needed. + + +.. cfunction:: PyObject* PyErr_NewException(char *name, PyObject *base, PyObject *dict) + + This utility function creates and returns a new exception object. The *name* + argument must be the name of the new exception, a C string of the form + ``module.class``. The *base* and *dict* arguments are normally *NULL*. This + creates a class object derived from :exc:`Exception` (accessible in C as + :cdata:`PyExc_Exception`). + + The :attr:`__module__` attribute of the new class is set to the first part (up + to the last dot) of the *name* argument, and the class name is set to the last + part (after the last dot). The *base* argument can be used to specify alternate + base classes; it can either be only one class or a tuple of classes. The *dict* + argument can be used to specify a dictionary of class variables and methods. + + +.. cfunction:: void PyErr_WriteUnraisable(PyObject *obj) + + This utility function prints a warning message to ``sys.stderr`` when an + exception has been set but it is impossible for the interpreter to actually + raise the exception. It is used, for example, when an exception occurs in an + :meth:`__del__` method. + + The function is called with a single argument *obj* that identifies the context + in which the unraisable exception occurred. The repr of *obj* will be printed in + the warning message. + + +.. _standardexceptions: + +Standard Exceptions +=================== + +All standard Python exceptions are available as global variables whose names are +``PyExc_`` followed by the Python exception name. These have the type +:ctype:`PyObject\*`; they are all class objects. For completeness, here are all +the variables: + ++------------------------------------+----------------------------+----------+ +| C Name | Python Name | Notes | ++====================================+============================+==========+ +| :cdata:`PyExc_BaseException` | :exc:`BaseException` | (1), (4) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_Exception` | :exc:`Exception` | \(1) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_ArithmeticError` | :exc:`ArithmeticError` | \(1) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_LookupError` | :exc:`LookupError` | \(1) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_AssertionError` | :exc:`AssertionError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_AttributeError` | :exc:`AttributeError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_EOFError` | :exc:`EOFError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_EnvironmentError` | :exc:`EnvironmentError` | \(1) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_FloatingPointError` | :exc:`FloatingPointError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_IOError` | :exc:`IOError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_ImportError` | :exc:`ImportError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_IndexError` | :exc:`IndexError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_KeyError` | :exc:`KeyError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_KeyboardInterrupt` | :exc:`KeyboardInterrupt` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_MemoryError` | :exc:`MemoryError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_NameError` | :exc:`NameError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_NotImplementedError` | :exc:`NotImplementedError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_OSError` | :exc:`OSError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_OverflowError` | :exc:`OverflowError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_ReferenceError` | :exc:`ReferenceError` | \(2) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_RuntimeError` | :exc:`RuntimeError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_SyntaxError` | :exc:`SyntaxError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_SystemError` | :exc:`SystemError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_SystemExit` | :exc:`SystemExit` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_TypeError` | :exc:`TypeError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_ValueError` | :exc:`ValueError` | | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_WindowsError` | :exc:`WindowsError` | \(3) | ++------------------------------------+----------------------------+----------+ +| :cdata:`PyExc_ZeroDivisionError` | :exc:`ZeroDivisionError` | | ++------------------------------------+----------------------------+----------+ + +.. index:: + single: PyExc_BaseException + single: PyExc_Exception + single: PyExc_ArithmeticError + single: PyExc_LookupError + single: PyExc_AssertionError + single: PyExc_AttributeError + single: PyExc_EOFError + single: PyExc_EnvironmentError + single: PyExc_FloatingPointError + single: PyExc_IOError + single: PyExc_ImportError + single: PyExc_IndexError + single: PyExc_KeyError + single: PyExc_KeyboardInterrupt + single: PyExc_MemoryError + single: PyExc_NameError + single: PyExc_NotImplementedError + single: PyExc_OSError + single: PyExc_OverflowError + single: PyExc_ReferenceError + single: PyExc_RuntimeError + single: PyExc_SyntaxError + single: PyExc_SystemError + single: PyExc_SystemExit + single: PyExc_TypeError + single: PyExc_ValueError + single: PyExc_WindowsError + single: PyExc_ZeroDivisionError + +Notes: + +(1) + This is a base class for other standard exceptions. + +(2) + This is the same as :exc:`weakref.ReferenceError`. + +(3) + Only defined on Windows; protect code that uses this by testing that the + preprocessor macro ``MS_WINDOWS`` is defined. + +(4) + .. versionadded:: 2.5 + + +Deprecation of String Exceptions +================================ + +.. index:: single: BaseException (built-in exception) + +All exceptions built into Python or provided in the standard library are derived +from :exc:`BaseException`. + +String exceptions are still supported in the interpreter to allow existing code +to run unmodified, but this will also change in a future release. + diff --git a/Doc/c-api/index.rst b/Doc/c-api/index.rst new file mode 100644 index 0000000..c643312 --- /dev/null +++ b/Doc/c-api/index.rst @@ -0,0 +1,33 @@ +.. _c-api-index: + +################################## + Python/C API Reference Manual +################################## + +:Release: |version| +:Date: |today| + +This manual documents the API used by C and C++ programmers who want to write +extension modules or embed Python. It is a companion to :ref:`extending-index`, +which describes the general principles of extension writing but does not +document the API functions in detail. + +.. warning:: + + The current version of this document is somewhat incomplete. However, most of + the important functions, types and structures are described. + + +.. toctree:: + :maxdepth: 2 + + intro.rst + veryhigh.rst + refcounting.rst + exceptions.rst + utilities.rst + abstract.rst + concrete.rst + init.rst + memory.rst + newtypes.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst new file mode 100644 index 0000000..2509e0b --- /dev/null +++ b/Doc/c-api/init.rst @@ -0,0 +1,936 @@ +.. highlightlang:: c + + +.. _initialization: + +***************************************** +Initialization, Finalization, and Threads +***************************************** + + +.. cfunction:: void Py_Initialize() + + .. index:: + single: Py_SetProgramName() + single: PyEval_InitThreads() + single: PyEval_ReleaseLock() + single: PyEval_AcquireLock() + single: modules (in module sys) + single: path (in module sys) + module: __builtin__ + module: __main__ + module: sys + triple: module; search; path + single: PySys_SetArgv() + single: Py_Finalize() + + Initialize the Python interpreter. In an application embedding Python, this + should be called before using any other Python/C API functions; with the + exception of :cfunc:`Py_SetProgramName`, :cfunc:`PyEval_InitThreads`, + :cfunc:`PyEval_ReleaseLock`, and :cfunc:`PyEval_AcquireLock`. This initializes + the table of loaded modules (``sys.modules``), and creates the fundamental + modules :mod:`__builtin__`, :mod:`__main__` and :mod:`sys`. It also initializes + the module search path (``sys.path``). It does not set ``sys.argv``; use + :cfunc:`PySys_SetArgv` for that. This is a no-op when called for a second time + (without calling :cfunc:`Py_Finalize` first). There is no return value; it is a + fatal error if the initialization fails. + + +.. cfunction:: void Py_InitializeEx(int initsigs) + + This function works like :cfunc:`Py_Initialize` if *initsigs* is 1. If + *initsigs* is 0, it skips initialization registration of signal handlers, which + might be useful when Python is embedded. + + .. versionadded:: 2.4 + + +.. cfunction:: int Py_IsInitialized() + + Return true (nonzero) when the Python interpreter has been initialized, false + (zero) if not. After :cfunc:`Py_Finalize` is called, this returns false until + :cfunc:`Py_Initialize` is called again. + + +.. cfunction:: void Py_Finalize() + + Undo all initializations made by :cfunc:`Py_Initialize` and subsequent use of + Python/C API functions, and destroy all sub-interpreters (see + :cfunc:`Py_NewInterpreter` below) that were created and not yet destroyed since + the last call to :cfunc:`Py_Initialize`. Ideally, this frees all memory + allocated by the Python interpreter. This is a no-op when called for a second + time (without calling :cfunc:`Py_Initialize` again first). There is no return + value; errors during finalization are ignored. + + This function is provided for a number of reasons. An embedding application + might want to restart Python without having to restart the application itself. + An application that has loaded the Python interpreter from a dynamically + loadable library (or DLL) might want to free all memory allocated by Python + before unloading the DLL. During a hunt for memory leaks in an application a + developer might want to free all memory allocated by Python before exiting from + the application. + + **Bugs and caveats:** The destruction of modules and objects in modules is done + in random order; this may cause destructors (:meth:`__del__` methods) to fail + when they depend on other objects (even functions) or modules. Dynamically + loaded extension modules loaded by Python are not unloaded. Small amounts of + memory allocated by the Python interpreter may not be freed (if you find a leak, + please report it). Memory tied up in circular references between objects is not + freed. Some memory allocated by extension modules may not be freed. Some + extensions may not work properly if their initialization routine is called more + than once; this can happen if an application calls :cfunc:`Py_Initialize` and + :cfunc:`Py_Finalize` more than once. + + +.. cfunction:: PyThreadState* Py_NewInterpreter() + + .. index:: + module: __builtin__ + module: __main__ + module: sys + single: stdout (in module sys) + single: stderr (in module sys) + single: stdin (in module sys) + + Create a new sub-interpreter. This is an (almost) totally separate environment + for the execution of Python code. In particular, the new interpreter has + separate, independent versions of all imported modules, including the + fundamental modules :mod:`__builtin__`, :mod:`__main__` and :mod:`sys`. The + table of loaded modules (``sys.modules``) and the module search path + (``sys.path``) are also separate. The new environment has no ``sys.argv`` + variable. It has new standard I/O stream file objects ``sys.stdin``, + ``sys.stdout`` and ``sys.stderr`` (however these refer to the same underlying + :ctype:`FILE` structures in the C library). + + The return value points to the first thread state created in the new + sub-interpreter. This thread state is made in the current thread state. + Note that no actual thread is created; see the discussion of thread states + below. If creation of the new interpreter is unsuccessful, *NULL* is + returned; no exception is set since the exception state is stored in the + current thread state and there may not be a current thread state. (Like all + other Python/C API functions, the global interpreter lock must be held before + calling this function and is still held when it returns; however, unlike most + other Python/C API functions, there needn't be a current thread state on + entry.) + + .. index:: + single: Py_Finalize() + single: Py_Initialize() + + Extension modules are shared between (sub-)interpreters as follows: the first + time a particular extension is imported, it is initialized normally, and a + (shallow) copy of its module's dictionary is squirreled away. When the same + extension is imported by another (sub-)interpreter, a new module is initialized + and filled with the contents of this copy; the extension's ``init`` function is + not called. Note that this is different from what happens when an extension is + imported after the interpreter has been completely re-initialized by calling + :cfunc:`Py_Finalize` and :cfunc:`Py_Initialize`; in that case, the extension's + ``initmodule`` function *is* called again. + + .. index:: single: close() (in module os) + + **Bugs and caveats:** Because sub-interpreters (and the main interpreter) are + part of the same process, the insulation between them isn't perfect --- for + example, using low-level file operations like :func:`os.close` they can + (accidentally or maliciously) affect each other's open files. Because of the + way extensions are shared between (sub-)interpreters, some extensions may not + work properly; this is especially likely when the extension makes use of + (static) global variables, or when the extension manipulates its module's + dictionary after its initialization. It is possible to insert objects created + in one sub-interpreter into a namespace of another sub-interpreter; this should + be done with great care to avoid sharing user-defined functions, methods, + instances or classes between sub-interpreters, since import operations executed + by such objects may affect the wrong (sub-)interpreter's dictionary of loaded + modules. (XXX This is a hard-to-fix bug that will be addressed in a future + release.) + + Also note that the use of this functionality is incompatible with extension + modules such as PyObjC and ctypes that use the :cfunc:`PyGILState_\*` APIs (and + this is inherent in the way the :cfunc:`PyGILState_\*` functions work). Simple + things may work, but confusing behavior will always be near. + + +.. cfunction:: void Py_EndInterpreter(PyThreadState *tstate) + + .. index:: single: Py_Finalize() + + Destroy the (sub-)interpreter represented by the given thread state. The given + thread state must be the current thread state. See the discussion of thread + states below. When the call returns, the current thread state is *NULL*. All + thread states associated with this interpreter are destroyed. (The global + interpreter lock must be held before calling this function and is still held + when it returns.) :cfunc:`Py_Finalize` will destroy all sub-interpreters that + haven't been explicitly destroyed at that point. + + +.. cfunction:: void Py_SetProgramName(char *name) + + .. index:: + single: Py_Initialize() + single: main() + single: Py_GetPath() + + This function should be called before :cfunc:`Py_Initialize` is called for + the first time, if it is called at all. It tells the interpreter the value + of the ``argv[0]`` argument to the :cfunc:`main` function of the program. + This is used by :cfunc:`Py_GetPath` and some other functions below to find + the Python run-time libraries relative to the interpreter executable. The + default value is ``'python'``. The argument should point to a + zero-terminated character string in static storage whose contents will not + change for the duration of the program's execution. No code in the Python + interpreter will change the contents of this storage. + + +.. cfunction:: char* Py_GetProgramName() + + .. index:: single: Py_SetProgramName() + + Return the program name set with :cfunc:`Py_SetProgramName`, or the default. + The returned string points into static storage; the caller should not modify its + value. + + +.. cfunction:: char* Py_GetPrefix() + + Return the *prefix* for installed platform-independent files. This is derived + through a number of complicated rules from the program name set with + :cfunc:`Py_SetProgramName` and some environment variables; for example, if the + program name is ``'/usr/local/bin/python'``, the prefix is ``'/usr/local'``. The + returned string points into static storage; the caller should not modify its + value. This corresponds to the :makevar:`prefix` variable in the top-level + :file:`Makefile` and the :option:`--prefix` argument to the :program:`configure` + script at build time. The value is available to Python code as ``sys.prefix``. + It is only useful on Unix. See also the next function. + + +.. cfunction:: char* Py_GetExecPrefix() + + Return the *exec-prefix* for installed platform-*dependent* files. This is + derived through a number of complicated rules from the program name set with + :cfunc:`Py_SetProgramName` and some environment variables; for example, if the + program name is ``'/usr/local/bin/python'``, the exec-prefix is + ``'/usr/local'``. The returned string points into static storage; the caller + should not modify its value. This corresponds to the :makevar:`exec_prefix` + variable in the top-level :file:`Makefile` and the :option:`--exec-prefix` + argument to the :program:`configure` script at build time. The value is + available to Python code as ``sys.exec_prefix``. It is only useful on Unix. + + Background: The exec-prefix differs from the prefix when platform dependent + files (such as executables and shared libraries) are installed in a different + directory tree. In a typical installation, platform dependent files may be + installed in the :file:`/usr/local/plat` subtree while platform independent may + be installed in :file:`/usr/local`. + + Generally speaking, a platform is a combination of hardware and software + families, e.g. Sparc machines running the Solaris 2.x operating system are + considered the same platform, but Intel machines running Solaris 2.x are another + platform, and Intel machines running Linux are yet another platform. Different + major revisions of the same operating system generally also form different + platforms. Non-Unix operating systems are a different story; the installation + strategies on those systems are so different that the prefix and exec-prefix are + meaningless, and set to the empty string. Note that compiled Python bytecode + files are platform independent (but not independent from the Python version by + which they were compiled!). + + System administrators will know how to configure the :program:`mount` or + :program:`automount` programs to share :file:`/usr/local` between platforms + while having :file:`/usr/local/plat` be a different filesystem for each + platform. + + +.. cfunction:: char* Py_GetProgramFullPath() + + .. index:: + single: Py_SetProgramName() + single: executable (in module sys) + + Return the full program name of the Python executable; this is computed as a + side-effect of deriving the default module search path from the program name + (set by :cfunc:`Py_SetProgramName` above). The returned string points into + static storage; the caller should not modify its value. The value is available + to Python code as ``sys.executable``. + + +.. cfunction:: char* Py_GetPath() + + .. index:: + triple: module; search; path + single: path (in module sys) + + Return the default module search path; this is computed from the program name + (set by :cfunc:`Py_SetProgramName` above) and some environment variables. The + returned string consists of a series of directory names separated by a platform + dependent delimiter character. The delimiter character is ``':'`` on Unix and + Mac OS X, ``';'`` on Windows. The returned string points into static storage; + the caller should not modify its value. The value is available to Python code + as the list ``sys.path``, which may be modified to change the future search path + for loaded modules. + + .. % XXX should give the exact rules + + +.. cfunction:: const char* Py_GetVersion() + + Return the version of this Python interpreter. This is a string that looks + something like :: + + "1.5 (#67, Dec 31 1997, 22:34:28) [GCC 2.7.2.2]" + + .. index:: single: version (in module sys) + + The first word (up to the first space character) is the current Python version; + the first three characters are the major and minor version separated by a + period. The returned string points into static storage; the caller should not + modify its value. The value is available to Python code as ``sys.version``. + + +.. cfunction:: const char* Py_GetBuildNumber() + + Return a string representing the Subversion revision that this Python executable + was built from. This number is a string because it may contain a trailing 'M' + if Python was built from a mixed revision source tree. + + .. versionadded:: 2.5 + + +.. cfunction:: const char* Py_GetPlatform() + + .. index:: single: platform (in module sys) + + Return the platform identifier for the current platform. On Unix, this is + formed from the "official" name of the operating system, converted to lower + case, followed by the major revision number; e.g., for Solaris 2.x, which is + also known as SunOS 5.x, the value is ``'sunos5'``. On Mac OS X, it is + ``'darwin'``. On Windows, it is ``'win'``. The returned string points into + static storage; the caller should not modify its value. The value is available + to Python code as ``sys.platform``. + + +.. cfunction:: const char* Py_GetCopyright() + + Return the official copyright string for the current Python version, for example + + ``'Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam'`` + + .. index:: single: copyright (in module sys) + + The returned string points into static storage; the caller should not modify its + value. The value is available to Python code as ``sys.copyright``. + + +.. cfunction:: const char* Py_GetCompiler() + + Return an indication of the compiler used to build the current Python version, + in square brackets, for example:: + + "[GCC 2.7.2.2]" + + .. index:: single: version (in module sys) + + The returned string points into static storage; the caller should not modify its + value. The value is available to Python code as part of the variable + ``sys.version``. + + +.. cfunction:: const char* Py_GetBuildInfo() + + Return information about the sequence number and build date and time of the + current Python interpreter instance, for example :: + + "#67, Aug 1 1997, 22:34:28" + + .. index:: single: version (in module sys) + + The returned string points into static storage; the caller should not modify its + value. The value is available to Python code as part of the variable + ``sys.version``. + + +.. cfunction:: void PySys_SetArgv(int argc, char **argv) + + .. index:: + single: main() + single: Py_FatalError() + single: argv (in module sys) + + Set ``sys.argv`` based on *argc* and *argv*. These parameters are similar to + those passed to the program's :cfunc:`main` function with the difference that + the first entry should refer to the script file to be executed rather than the + executable hosting the Python interpreter. If there isn't a script that will be + run, the first entry in *argv* can be an empty string. If this function fails + to initialize ``sys.argv``, a fatal condition is signalled using + :cfunc:`Py_FatalError`. + + .. % XXX impl. doesn't seem consistent in allowing 0/NULL for the params; + .. % check w/ Guido. + +.. % XXX Other PySys thingies (doesn't really belong in this chapter) + + +.. _threads: + +Thread State and the Global Interpreter Lock +============================================ + +.. index:: + single: global interpreter lock + single: interpreter lock + single: lock, interpreter + +The Python interpreter is not fully thread safe. In order to support +multi-threaded Python programs, there's a global lock that must be held by the +current thread before it can safely access Python objects. Without the lock, +even the simplest operations could cause problems in a multi-threaded program: +for example, when two threads simultaneously increment the reference count of +the same object, the reference count could end up being incremented only once +instead of twice. + +.. index:: single: setcheckinterval() (in module sys) + +Therefore, the rule exists that only the thread that has acquired the global +interpreter lock may operate on Python objects or call Python/C API functions. +In order to support multi-threaded Python programs, the interpreter regularly +releases and reacquires the lock --- by default, every 100 bytecode instructions +(this can be changed with :func:`sys.setcheckinterval`). The lock is also +released and reacquired around potentially blocking I/O operations like reading +or writing a file, so that other threads can run while the thread that requests +the I/O is waiting for the I/O operation to complete. + +.. index:: + single: PyThreadState + single: PyThreadState + +The Python interpreter needs to keep some bookkeeping information separate per +thread --- for this it uses a data structure called :ctype:`PyThreadState`. +There's one global variable, however: the pointer to the current +:ctype:`PyThreadState` structure. While most thread packages have a way to +store "per-thread global data," Python's internal platform independent thread +abstraction doesn't support this yet. Therefore, the current thread state must +be manipulated explicitly. + +This is easy enough in most cases. Most code manipulating the global +interpreter lock has the following simple structure:: + + Save the thread state in a local variable. + Release the interpreter lock. + ...Do some blocking I/O operation... + Reacquire the interpreter lock. + Restore the thread state from the local variable. + +This is so common that a pair of macros exists to simplify it:: + + Py_BEGIN_ALLOW_THREADS + ...Do some blocking I/O operation... + Py_END_ALLOW_THREADS + +.. index:: + single: Py_BEGIN_ALLOW_THREADS + single: Py_END_ALLOW_THREADS + +The :cmacro:`Py_BEGIN_ALLOW_THREADS` macro opens a new block and declares a +hidden local variable; the :cmacro:`Py_END_ALLOW_THREADS` macro closes the +block. Another advantage of using these two macros is that when Python is +compiled without thread support, they are defined empty, thus saving the thread +state and lock manipulations. + +When thread support is enabled, the block above expands to the following code:: + + PyThreadState *_save; + + _save = PyEval_SaveThread(); + ...Do some blocking I/O operation... + PyEval_RestoreThread(_save); + +Using even lower level primitives, we can get roughly the same effect as +follows:: + + PyThreadState *_save; + + _save = PyThreadState_Swap(NULL); + PyEval_ReleaseLock(); + ...Do some blocking I/O operation... + PyEval_AcquireLock(); + PyThreadState_Swap(_save); + +.. index:: + single: PyEval_RestoreThread() + single: errno + single: PyEval_SaveThread() + single: PyEval_ReleaseLock() + single: PyEval_AcquireLock() + +There are some subtle differences; in particular, :cfunc:`PyEval_RestoreThread` +saves and restores the value of the global variable :cdata:`errno`, since the +lock manipulation does not guarantee that :cdata:`errno` is left alone. Also, +when thread support is disabled, :cfunc:`PyEval_SaveThread` and +:cfunc:`PyEval_RestoreThread` don't manipulate the lock; in this case, +:cfunc:`PyEval_ReleaseLock` and :cfunc:`PyEval_AcquireLock` are not available. +This is done so that dynamically loaded extensions compiled with thread support +enabled can be loaded by an interpreter that was compiled with disabled thread +support. + +The global interpreter lock is used to protect the pointer to the current thread +state. When releasing the lock and saving the thread state, the current thread +state pointer must be retrieved before the lock is released (since another +thread could immediately acquire the lock and store its own thread state in the +global variable). Conversely, when acquiring the lock and restoring the thread +state, the lock must be acquired before storing the thread state pointer. + +Why am I going on with so much detail about this? Because when threads are +created from C, they don't have the global interpreter lock, nor is there a +thread state data structure for them. Such threads must bootstrap themselves +into existence, by first creating a thread state data structure, then acquiring +the lock, and finally storing their thread state pointer, before they can start +using the Python/C API. When they are done, they should reset the thread state +pointer, release the lock, and finally free their thread state data structure. + +Beginning with version 2.3, threads can now take advantage of the +:cfunc:`PyGILState_\*` functions to do all of the above automatically. The +typical idiom for calling into Python from a C thread is now:: + + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + /* Perform Python actions here. */ + result = CallSomeFunction(); + /* evaluate result */ + + /* Release the thread. No Python API allowed beyond this point. */ + PyGILState_Release(gstate); + +Note that the :cfunc:`PyGILState_\*` functions assume there is only one global +interpreter (created automatically by :cfunc:`Py_Initialize`). Python still +supports the creation of additional interpreters (using +:cfunc:`Py_NewInterpreter`), but mixing multiple interpreters and the +:cfunc:`PyGILState_\*` API is unsupported. + + +.. ctype:: PyInterpreterState + + This data structure represents the state shared by a number of cooperating + threads. Threads belonging to the same interpreter share their module + administration and a few other internal items. There are no public members in + this structure. + + Threads belonging to different interpreters initially share nothing, except + process state like available memory, open file descriptors and such. The global + interpreter lock is also shared by all threads, regardless of to which + interpreter they belong. + + +.. ctype:: PyThreadState + + This data structure represents the state of a single thread. The only public + data member is :ctype:`PyInterpreterState \*`:attr:`interp`, which points to + this thread's interpreter state. + + +.. cfunction:: void PyEval_InitThreads() + + .. index:: + single: PyEval_ReleaseLock() + single: PyEval_ReleaseThread() + single: PyEval_SaveThread() + single: PyEval_RestoreThread() + + Initialize and acquire the global interpreter lock. It should be called in the + main thread before creating a second thread or engaging in any other thread + operations such as :cfunc:`PyEval_ReleaseLock` or + ``PyEval_ReleaseThread(tstate)``. It is not needed before calling + :cfunc:`PyEval_SaveThread` or :cfunc:`PyEval_RestoreThread`. + + .. index:: single: Py_Initialize() + + This is a no-op when called for a second time. It is safe to call this function + before calling :cfunc:`Py_Initialize`. + + .. index:: module: thread + + When only the main thread exists, no lock operations are needed. This is a + common situation (most Python programs do not use threads), and the lock + operations slow the interpreter down a bit. Therefore, the lock is not created + initially. This situation is equivalent to having acquired the lock: when + there is only a single thread, all object accesses are safe. Therefore, when + this function initializes the lock, it also acquires it. Before the Python + :mod:`thread` module creates a new thread, knowing that either it has the lock + or the lock hasn't been created yet, it calls :cfunc:`PyEval_InitThreads`. When + this call returns, it is guaranteed that the lock has been created and that the + calling thread has acquired it. + + It is **not** safe to call this function when it is unknown which thread (if + any) currently has the global interpreter lock. + + This function is not available when thread support is disabled at compile time. + + +.. cfunction:: int PyEval_ThreadsInitialized() + + Returns a non-zero value if :cfunc:`PyEval_InitThreads` has been called. This + function can be called without holding the lock, and therefore can be used to + avoid calls to the locking API when running single-threaded. This function is + not available when thread support is disabled at compile time. + + .. versionadded:: 2.4 + + +.. cfunction:: void PyEval_AcquireLock() + + Acquire the global interpreter lock. The lock must have been created earlier. + If this thread already has the lock, a deadlock ensues. This function is not + available when thread support is disabled at compile time. + + +.. cfunction:: void PyEval_ReleaseLock() + + Release the global interpreter lock. The lock must have been created earlier. + This function is not available when thread support is disabled at compile time. + + +.. cfunction:: void PyEval_AcquireThread(PyThreadState *tstate) + + Acquire the global interpreter lock and set the current thread state to + *tstate*, which should not be *NULL*. The lock must have been created earlier. + If this thread already has the lock, deadlock ensues. This function is not + available when thread support is disabled at compile time. + + +.. cfunction:: void PyEval_ReleaseThread(PyThreadState *tstate) + + Reset the current thread state to *NULL* and release the global interpreter + lock. The lock must have been created earlier and must be held by the current + thread. The *tstate* argument, which must not be *NULL*, is only used to check + that it represents the current thread state --- if it isn't, a fatal error is + reported. This function is not available when thread support is disabled at + compile time. + + +.. cfunction:: PyThreadState* PyEval_SaveThread() + + Release the interpreter lock (if it has been created and thread support is + enabled) and reset the thread state to *NULL*, returning the previous thread + state (which is not *NULL*). If the lock has been created, the current thread + must have acquired it. (This function is available even when thread support is + disabled at compile time.) + + +.. cfunction:: void PyEval_RestoreThread(PyThreadState *tstate) + + Acquire the interpreter lock (if it has been created and thread support is + enabled) and set the thread state to *tstate*, which must not be *NULL*. If the + lock has been created, the current thread must not have acquired it, otherwise + deadlock ensues. (This function is available even when thread support is + disabled at compile time.) + +The following macros are normally used without a trailing semicolon; look for +example usage in the Python source distribution. + + +.. cmacro:: Py_BEGIN_ALLOW_THREADS + + This macro expands to ``{ PyThreadState *_save; _save = PyEval_SaveThread();``. + Note that it contains an opening brace; it must be matched with a following + :cmacro:`Py_END_ALLOW_THREADS` macro. See above for further discussion of this + macro. It is a no-op when thread support is disabled at compile time. + + +.. cmacro:: Py_END_ALLOW_THREADS + + This macro expands to ``PyEval_RestoreThread(_save); }``. Note that it contains + a closing brace; it must be matched with an earlier + :cmacro:`Py_BEGIN_ALLOW_THREADS` macro. See above for further discussion of + this macro. It is a no-op when thread support is disabled at compile time. + + +.. cmacro:: Py_BLOCK_THREADS + + This macro expands to ``PyEval_RestoreThread(_save);``: it is equivalent to + :cmacro:`Py_END_ALLOW_THREADS` without the closing brace. It is a no-op when + thread support is disabled at compile time. + + +.. cmacro:: Py_UNBLOCK_THREADS + + This macro expands to ``_save = PyEval_SaveThread();``: it is equivalent to + :cmacro:`Py_BEGIN_ALLOW_THREADS` without the opening brace and variable + declaration. It is a no-op when thread support is disabled at compile time. + +All of the following functions are only available when thread support is enabled +at compile time, and must be called only when the interpreter lock has been +created. + + +.. cfunction:: PyInterpreterState* PyInterpreterState_New() + + Create a new interpreter state object. The interpreter lock need not be held, + but may be held if it is necessary to serialize calls to this function. + + +.. cfunction:: void PyInterpreterState_Clear(PyInterpreterState *interp) + + Reset all information in an interpreter state object. The interpreter lock must + be held. + + +.. cfunction:: void PyInterpreterState_Delete(PyInterpreterState *interp) + + Destroy an interpreter state object. The interpreter lock need not be held. + The interpreter state must have been reset with a previous call to + :cfunc:`PyInterpreterState_Clear`. + + +.. cfunction:: PyThreadState* PyThreadState_New(PyInterpreterState *interp) + + Create a new thread state object belonging to the given interpreter object. The + interpreter lock need not be held, but may be held if it is necessary to + serialize calls to this function. + + +.. cfunction:: void PyThreadState_Clear(PyThreadState *tstate) + + Reset all information in a thread state object. The interpreter lock must be + held. + + +.. cfunction:: void PyThreadState_Delete(PyThreadState *tstate) + + Destroy a thread state object. The interpreter lock need not be held. The + thread state must have been reset with a previous call to + :cfunc:`PyThreadState_Clear`. + + +.. cfunction:: PyThreadState* PyThreadState_Get() + + Return the current thread state. The interpreter lock must be held. When the + current thread state is *NULL*, this issues a fatal error (so that the caller + needn't check for *NULL*). + + +.. cfunction:: PyThreadState* PyThreadState_Swap(PyThreadState *tstate) + + Swap the current thread state with the thread state given by the argument + *tstate*, which may be *NULL*. The interpreter lock must be held. + + +.. cfunction:: PyObject* PyThreadState_GetDict() + + Return a dictionary in which extensions can store thread-specific state + information. Each extension should use a unique key to use to store state in + the dictionary. It is okay to call this function when no current thread state + is available. If this function returns *NULL*, no exception has been raised and + the caller should assume no current thread state is available. + + .. versionchanged:: 2.3 + Previously this could only be called when a current thread is active, and *NULL* + meant that an exception was raised. + + +.. cfunction:: int PyThreadState_SetAsyncExc(long id, PyObject *exc) + + Asynchronously raise an exception in a thread. The *id* argument is the thread + id of the target thread; *exc* is the exception object to be raised. This + function does not steal any references to *exc*. To prevent naive misuse, you + must write your own C extension to call this. Must be called with the GIL held. + Returns the number of thread states modified; this is normally one, but will be + zero if the thread id isn't found. If *exc* is :const:`NULL`, the pending + exception (if any) for the thread is cleared. This raises no exceptions. + + .. versionadded:: 2.3 + + +.. cfunction:: PyGILState_STATE PyGILState_Ensure() + + Ensure that the current thread is ready to call the Python C API regardless of + the current state of Python, or of its thread lock. This may be called as many + times as desired by a thread as long as each call is matched with a call to + :cfunc:`PyGILState_Release`. In general, other thread-related APIs may be used + between :cfunc:`PyGILState_Ensure` and :cfunc:`PyGILState_Release` calls as long + as the thread state is restored to its previous state before the Release(). For + example, normal usage of the :cmacro:`Py_BEGIN_ALLOW_THREADS` and + :cmacro:`Py_END_ALLOW_THREADS` macros is acceptable. + + The return value is an opaque "handle" to the thread state when + :cfunc:`PyGILState_Acquire` was called, and must be passed to + :cfunc:`PyGILState_Release` to ensure Python is left in the same state. Even + though recursive calls are allowed, these handles *cannot* be shared - each + unique call to :cfunc:`PyGILState_Ensure` must save the handle for its call to + :cfunc:`PyGILState_Release`. + + When the function returns, the current thread will hold the GIL. Failure is a + fatal error. + + .. versionadded:: 2.3 + + +.. cfunction:: void PyGILState_Release(PyGILState_STATE) + + Release any resources previously acquired. After this call, Python's state will + be the same as it was prior to the corresponding :cfunc:`PyGILState_Ensure` call + (but generally this state will be unknown to the caller, hence the use of the + GILState API.) + + Every call to :cfunc:`PyGILState_Ensure` must be matched by a call to + :cfunc:`PyGILState_Release` on the same thread. + + .. versionadded:: 2.3 + + +.. _profiling: + +Profiling and Tracing +===================== + +.. sectionauthor:: Fred L. Drake, Jr. + + +The Python interpreter provides some low-level support for attaching profiling +and execution tracing facilities. These are used for profiling, debugging, and +coverage analysis tools. + +Starting with Python 2.2, the implementation of this facility was substantially +revised, and an interface from C was added. This C interface allows the +profiling or tracing code to avoid the overhead of calling through Python-level +callable objects, making a direct C function call instead. The essential +attributes of the facility have not changed; the interface allows trace +functions to be installed per-thread, and the basic events reported to the trace +function are the same as had been reported to the Python-level trace functions +in previous versions. + + +.. ctype:: int (*Py_tracefunc)(PyObject *obj, PyFrameObject *frame, int what, PyObject *arg) + + The type of the trace function registered using :cfunc:`PyEval_SetProfile` and + :cfunc:`PyEval_SetTrace`. The first parameter is the object passed to the + registration function as *obj*, *frame* is the frame object to which the event + pertains, *what* is one of the constants :const:`PyTrace_CALL`, + :const:`PyTrace_EXCEPTION`, :const:`PyTrace_LINE`, :const:`PyTrace_RETURN`, + :const:`PyTrace_C_CALL`, :const:`PyTrace_C_EXCEPTION`, or + :const:`PyTrace_C_RETURN`, and *arg* depends on the value of *what*: + + +------------------------------+--------------------------------------+ + | Value of *what* | Meaning of *arg* | + +==============================+======================================+ + | :const:`PyTrace_CALL` | Always *NULL*. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_EXCEPTION` | Exception information as returned by | + | | :func:`sys.exc_info`. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_LINE` | Always *NULL*. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_RETURN` | Value being returned to the caller. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_C_CALL` | Name of function being called. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_C_EXCEPTION` | Always *NULL*. | + +------------------------------+--------------------------------------+ + | :const:`PyTrace_C_RETURN` | Always *NULL*. | + +------------------------------+--------------------------------------+ + + +.. cvar:: int PyTrace_CALL + + The value of the *what* parameter to a :ctype:`Py_tracefunc` function when a new + call to a function or method is being reported, or a new entry into a generator. + Note that the creation of the iterator for a generator function is not reported + as there is no control transfer to the Python bytecode in the corresponding + frame. + + +.. cvar:: int PyTrace_EXCEPTION + + The value of the *what* parameter to a :ctype:`Py_tracefunc` function when an + exception has been raised. The callback function is called with this value for + *what* when after any bytecode is processed after which the exception becomes + set within the frame being executed. The effect of this is that as exception + propagation causes the Python stack to unwind, the callback is called upon + return to each frame as the exception propagates. Only trace functions receives + these events; they are not needed by the profiler. + + +.. cvar:: int PyTrace_LINE + + The value passed as the *what* parameter to a trace function (but not a + profiling function) when a line-number event is being reported. + + +.. cvar:: int PyTrace_RETURN + + The value for the *what* parameter to :ctype:`Py_tracefunc` functions when a + call is returning without propagating an exception. + + +.. cvar:: int PyTrace_C_CALL + + The value for the *what* parameter to :ctype:`Py_tracefunc` functions when a C + function is about to be called. + + +.. cvar:: int PyTrace_C_EXCEPTION + + The value for the *what* parameter to :ctype:`Py_tracefunc` functions when a C + function has thrown an exception. + + +.. cvar:: int PyTrace_C_RETURN + + The value for the *what* parameter to :ctype:`Py_tracefunc` functions when a C + function has returned. + + +.. cfunction:: void PyEval_SetProfile(Py_tracefunc func, PyObject *obj) + + Set the profiler function to *func*. The *obj* parameter is passed to the + function as its first parameter, and may be any Python object, or *NULL*. If + the profile function needs to maintain state, using a different value for *obj* + for each thread provides a convenient and thread-safe place to store it. The + profile function is called for all monitored events except the line-number + events. + + +.. cfunction:: void PyEval_SetTrace(Py_tracefunc func, PyObject *obj) + + Set the tracing function to *func*. This is similar to + :cfunc:`PyEval_SetProfile`, except the tracing function does receive line-number + events. + + +.. _advanced-debugging: + +Advanced Debugger Support +========================= + +.. sectionauthor:: Fred L. Drake, Jr. + + +These functions are only intended to be used by advanced debugging tools. + + +.. cfunction:: PyInterpreterState* PyInterpreterState_Head() + + Return the interpreter state object at the head of the list of all such objects. + + .. versionadded:: 2.2 + + +.. cfunction:: PyInterpreterState* PyInterpreterState_Next(PyInterpreterState *interp) + + Return the next interpreter state object after *interp* from the list of all + such objects. + + .. versionadded:: 2.2 + + +.. cfunction:: PyThreadState * PyInterpreterState_ThreadHead(PyInterpreterState *interp) + + Return the a pointer to the first :ctype:`PyThreadState` object in the list of + threads associated with the interpreter *interp*. + + .. versionadded:: 2.2 + + +.. cfunction:: PyThreadState* PyThreadState_Next(PyThreadState *tstate) + + Return the next thread state object after *tstate* from the list of all such + objects belonging to the same :ctype:`PyInterpreterState` object. + + .. versionadded:: 2.2 + diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst new file mode 100644 index 0000000..60b0052 --- /dev/null +++ b/Doc/c-api/intro.rst @@ -0,0 +1,630 @@ +.. highlightlang:: c + + +.. _api-intro: + +************ +Introduction +************ + +The Application Programmer's Interface to Python gives C and C++ programmers +access to the Python interpreter at a variety of levels. The API is equally +usable from C++, but for brevity it is generally referred to as the Python/C +API. There are two fundamentally different reasons for using the Python/C API. +The first reason is to write *extension modules* for specific purposes; these +are C modules that extend the Python interpreter. This is probably the most +common use. The second reason is to use Python as a component in a larger +application; this technique is generally referred to as :dfn:`embedding` Python +in an application. + +Writing an extension module is a relatively well-understood process, where a +"cookbook" approach works well. There are several tools that automate the +process to some extent. While people have embedded Python in other +applications since its early existence, the process of embedding Python is less +straightforward than writing an extension. + +Many API functions are useful independent of whether you're embedding or +extending Python; moreover, most applications that embed Python will need to +provide a custom extension as well, so it's probably a good idea to become +familiar with writing an extension before attempting to embed Python in a real +application. + + +.. _api-includes: + +Include Files +============= + +All function, type and macro definitions needed to use the Python/C API are +included in your code by the following line:: + + #include "Python.h" + +This implies inclusion of the following standard headers: ````, +````, ````, ````, and ```` (if +available). + +.. warning:: + + Since Python may define some pre-processor definitions which affect the standard + headers on some systems, you *must* include :file:`Python.h` before any standard + headers are included. + +All user visible names defined by Python.h (except those defined by the included +standard headers) have one of the prefixes ``Py`` or ``_Py``. Names beginning +with ``_Py`` are for internal use by the Python implementation and should not be +used by extension writers. Structure member names do not have a reserved prefix. + +**Important:** user code should never define names that begin with ``Py`` or +``_Py``. This confuses the reader, and jeopardizes the portability of the user +code to future Python versions, which may define additional names beginning with +one of these prefixes. + +The header files are typically installed with Python. On Unix, these are +located in the directories :file:`{prefix}/include/pythonversion/` and +:file:`{exec_prefix}/include/pythonversion/`, where :envvar:`prefix` and +:envvar:`exec_prefix` are defined by the corresponding parameters to Python's +:program:`configure` script and *version* is ``sys.version[:3]``. On Windows, +the headers are installed in :file:`{prefix}/include`, where :envvar:`prefix` is +the installation directory specified to the installer. + +To include the headers, place both directories (if different) on your compiler's +search path for includes. Do *not* place the parent directories on the search +path and then use ``#include ``; this will break on +multi-platform builds since the platform independent headers under +:envvar:`prefix` include the platform specific headers from +:envvar:`exec_prefix`. + +C++ users should note that though the API is defined entirely using C, the +header files do properly declare the entry points to be ``extern "C"``, so there +is no need to do anything special to use the API from C++. + + +.. _api-objects: + +Objects, Types and Reference Counts +=================================== + +.. index:: object: type + +Most Python/C API functions have one or more arguments as well as a return value +of type :ctype:`PyObject\*`. This type is a pointer to an opaque data type +representing an arbitrary Python object. Since all Python object types are +treated the same way by the Python language in most situations (e.g., +assignments, scope rules, and argument passing), it is only fitting that they +should be represented by a single C type. Almost all Python objects live on the +heap: you never declare an automatic or static variable of type +:ctype:`PyObject`, only pointer variables of type :ctype:`PyObject\*` can be +declared. The sole exception are the type objects; since these must never be +deallocated, they are typically static :ctype:`PyTypeObject` objects. + +All Python objects (even Python integers) have a :dfn:`type` and a +:dfn:`reference count`. An object's type determines what kind of object it is +(e.g., an integer, a list, or a user-defined function; there are many more as +explained in :ref:`types`). For each of the well-known types there is a macro +to check whether an object is of that type; for instance, ``PyList_Check(a)`` is +true if (and only if) the object pointed to by *a* is a Python list. + + +.. _api-refcounts: + +Reference Counts +---------------- + +The reference count is important because today's computers have a finite (and +often severely limited) memory size; it counts how many different places there +are that have a reference to an object. Such a place could be another object, +or a global (or static) C variable, or a local variable in some C function. +When an object's reference count becomes zero, the object is deallocated. If +it contains references to other objects, their reference count is decremented. +Those other objects may be deallocated in turn, if this decrement makes their +reference count become zero, and so on. (There's an obvious problem with +objects that reference each other here; for now, the solution is "don't do +that.") + +.. index:: + single: Py_INCREF() + single: Py_DECREF() + +Reference counts are always manipulated explicitly. The normal way is to use +the macro :cfunc:`Py_INCREF` to increment an object's reference count by one, +and :cfunc:`Py_DECREF` to decrement it by one. The :cfunc:`Py_DECREF` macro +is considerably more complex than the incref one, since it must check whether +the reference count becomes zero and then cause the object's deallocator to be +called. The deallocator is a function pointer contained in the object's type +structure. The type-specific deallocator takes care of decrementing the +reference counts for other objects contained in the object if this is a compound +object type, such as a list, as well as performing any additional finalization +that's needed. There's no chance that the reference count can overflow; at +least as many bits are used to hold the reference count as there are distinct +memory locations in virtual memory (assuming ``sizeof(long) >= sizeof(char*)``). +Thus, the reference count increment is a simple operation. + +It is not necessary to increment an object's reference count for every local +variable that contains a pointer to an object. In theory, the object's +reference count goes up by one when the variable is made to point to it and it +goes down by one when the variable goes out of scope. However, these two +cancel each other out, so at the end the reference count hasn't changed. The +only real reason to use the reference count is to prevent the object from being +deallocated as long as our variable is pointing to it. If we know that there +is at least one other reference to the object that lives at least as long as +our variable, there is no need to increment the reference count temporarily. +An important situation where this arises is in objects that are passed as +arguments to C functions in an extension module that are called from Python; +the call mechanism guarantees to hold a reference to every argument for the +duration of the call. + +However, a common pitfall is to extract an object from a list and hold on to it +for a while without incrementing its reference count. Some other operation might +conceivably remove the object from the list, decrementing its reference count +and possible deallocating it. The real danger is that innocent-looking +operations may invoke arbitrary Python code which could do this; there is a code +path which allows control to flow back to the user from a :cfunc:`Py_DECREF`, so +almost any operation is potentially dangerous. + +A safe approach is to always use the generic operations (functions whose name +begins with ``PyObject_``, ``PyNumber_``, ``PySequence_`` or ``PyMapping_``). +These operations always increment the reference count of the object they return. +This leaves the caller with the responsibility to call :cfunc:`Py_DECREF` when +they are done with the result; this soon becomes second nature. + + +.. _api-refcountdetails: + +Reference Count Details +^^^^^^^^^^^^^^^^^^^^^^^ + +The reference count behavior of functions in the Python/C API is best explained +in terms of *ownership of references*. Ownership pertains to references, never +to objects (objects are not owned: they are always shared). "Owning a +reference" means being responsible for calling Py_DECREF on it when the +reference is no longer needed. Ownership can also be transferred, meaning that +the code that receives ownership of the reference then becomes responsible for +eventually decref'ing it by calling :cfunc:`Py_DECREF` or :cfunc:`Py_XDECREF` +when it's no longer needed---or passing on this responsibility (usually to its +caller). When a function passes ownership of a reference on to its caller, the +caller is said to receive a *new* reference. When no ownership is transferred, +the caller is said to *borrow* the reference. Nothing needs to be done for a +borrowed reference. + +Conversely, when a calling function passes it a reference to an object, there +are two possibilities: the function *steals* a reference to the object, or it +does not. *Stealing a reference* means that when you pass a reference to a +function, that function assumes that it now owns that reference, and you are not +responsible for it any longer. + +.. index:: + single: PyList_SetItem() + single: PyTuple_SetItem() + +Few functions steal references; the two notable exceptions are +:cfunc:`PyList_SetItem` and :cfunc:`PyTuple_SetItem`, which steal a reference +to the item (but not to the tuple or list into which the item is put!). These +functions were designed to steal a reference because of a common idiom for +populating a tuple or list with newly created objects; for example, the code to +create the tuple ``(1, 2, "three")`` could look like this (forgetting about +error handling for the moment; a better way to code this is shown below):: + + PyObject *t; + + t = PyTuple_New(3); + PyTuple_SetItem(t, 0, PyInt_FromLong(1L)); + PyTuple_SetItem(t, 1, PyInt_FromLong(2L)); + PyTuple_SetItem(t, 2, PyString_FromString("three")); + +Here, :cfunc:`PyInt_FromLong` returns a new reference which is immediately +stolen by :cfunc:`PyTuple_SetItem`. When you want to keep using an object +although the reference to it will be stolen, use :cfunc:`Py_INCREF` to grab +another reference before calling the reference-stealing function. + +Incidentally, :cfunc:`PyTuple_SetItem` is the *only* way to set tuple items; +:cfunc:`PySequence_SetItem` and :cfunc:`PyObject_SetItem` refuse to do this +since tuples are an immutable data type. You should only use +:cfunc:`PyTuple_SetItem` for tuples that you are creating yourself. + +Equivalent code for populating a list can be written using :cfunc:`PyList_New` +and :cfunc:`PyList_SetItem`. + +However, in practice, you will rarely use these ways of creating and populating +a tuple or list. There's a generic function, :cfunc:`Py_BuildValue`, that can +create most common objects from C values, directed by a :dfn:`format string`. +For example, the above two blocks of code could be replaced by the following +(which also takes care of the error checking):: + + PyObject *tuple, *list; + + tuple = Py_BuildValue("(iis)", 1, 2, "three"); + list = Py_BuildValue("[iis]", 1, 2, "three"); + +It is much more common to use :cfunc:`PyObject_SetItem` and friends with items +whose references you are only borrowing, like arguments that were passed in to +the function you are writing. In that case, their behaviour regarding reference +counts is much saner, since you don't have to increment a reference count so you +can give a reference away ("have it be stolen"). For example, this function +sets all items of a list (actually, any mutable sequence) to a given item:: + + int + set_all(PyObject *target, PyObject *item) + { + int i, n; + + n = PyObject_Length(target); + if (n < 0) + return -1; + for (i = 0; i < n; i++) { + PyObject *index = PyInt_FromLong(i); + if (!index) + return -1; + if (PyObject_SetItem(target, index, item) < 0) + return -1; + Py_DECREF(index); + } + return 0; + } + +.. index:: single: set_all() + +The situation is slightly different for function return values. While passing +a reference to most functions does not change your ownership responsibilities +for that reference, many functions that return a reference to an object give +you ownership of the reference. The reason is simple: in many cases, the +returned object is created on the fly, and the reference you get is the only +reference to the object. Therefore, the generic functions that return object +references, like :cfunc:`PyObject_GetItem` and :cfunc:`PySequence_GetItem`, +always return a new reference (the caller becomes the owner of the reference). + +It is important to realize that whether you own a reference returned by a +function depends on which function you call only --- *the plumage* (the type of +the object passed as an argument to the function) *doesn't enter into it!* +Thus, if you extract an item from a list using :cfunc:`PyList_GetItem`, you +don't own the reference --- but if you obtain the same item from the same list +using :cfunc:`PySequence_GetItem` (which happens to take exactly the same +arguments), you do own a reference to the returned object. + +.. index:: + single: PyList_GetItem() + single: PySequence_GetItem() + +Here is an example of how you could write a function that computes the sum of +the items in a list of integers; once using :cfunc:`PyList_GetItem`, and once +using :cfunc:`PySequence_GetItem`. :: + + long + sum_list(PyObject *list) + { + int i, n; + long total = 0; + PyObject *item; + + n = PyList_Size(list); + if (n < 0) + return -1; /* Not a list */ + for (i = 0; i < n; i++) { + item = PyList_GetItem(list, i); /* Can't fail */ + if (!PyInt_Check(item)) continue; /* Skip non-integers */ + total += PyInt_AsLong(item); + } + return total; + } + +.. index:: single: sum_list() + +:: + + long + sum_sequence(PyObject *sequence) + { + int i, n; + long total = 0; + PyObject *item; + n = PySequence_Length(sequence); + if (n < 0) + return -1; /* Has no length */ + for (i = 0; i < n; i++) { + item = PySequence_GetItem(sequence, i); + if (item == NULL) + return -1; /* Not a sequence, or other failure */ + if (PyInt_Check(item)) + total += PyInt_AsLong(item); + Py_DECREF(item); /* Discard reference ownership */ + } + return total; + } + +.. index:: single: sum_sequence() + + +.. _api-types: + +Types +----- + +There are few other data types that play a significant role in the Python/C +API; most are simple C types such as :ctype:`int`, :ctype:`long`, +:ctype:`double` and :ctype:`char\*`. A few structure types are used to +describe static tables used to list the functions exported by a module or the +data attributes of a new object type, and another is used to describe the value +of a complex number. These will be discussed together with the functions that +use them. + + +.. _api-exceptions: + +Exceptions +========== + +The Python programmer only needs to deal with exceptions if specific error +handling is required; unhandled exceptions are automatically propagated to the +caller, then to the caller's caller, and so on, until they reach the top-level +interpreter, where they are reported to the user accompanied by a stack +traceback. + +.. index:: single: PyErr_Occurred() + +For C programmers, however, error checking always has to be explicit. All +functions in the Python/C API can raise exceptions, unless an explicit claim is +made otherwise in a function's documentation. In general, when a function +encounters an error, it sets an exception, discards any object references that +it owns, and returns an error indicator --- usually *NULL* or ``-1``. A few +functions return a Boolean true/false result, with false indicating an error. +Very few functions return no explicit error indicator or have an ambiguous +return value, and require explicit testing for errors with +:cfunc:`PyErr_Occurred`. + +.. index:: + single: PyErr_SetString() + single: PyErr_Clear() + +Exception state is maintained in per-thread storage (this is equivalent to +using global storage in an unthreaded application). A thread can be in one of +two states: an exception has occurred, or not. The function +:cfunc:`PyErr_Occurred` can be used to check for this: it returns a borrowed +reference to the exception type object when an exception has occurred, and +*NULL* otherwise. There are a number of functions to set the exception state: +:cfunc:`PyErr_SetString` is the most common (though not the most general) +function to set the exception state, and :cfunc:`PyErr_Clear` clears the +exception state. + +The full exception state consists of three objects (all of which can be +*NULL*): the exception type, the corresponding exception value, and the +traceback. These have the same meanings as the Python result of +``sys.exc_info()``; however, they are not the same: the Python objects represent +the last exception being handled by a Python :keyword:`try` ... +:keyword:`except` statement, while the C level exception state only exists while +an exception is being passed on between C functions until it reaches the Python +bytecode interpreter's main loop, which takes care of transferring it to +``sys.exc_info()`` and friends. + +.. index:: single: exc_info() (in module sys) + +Note that starting with Python 1.5, the preferred, thread-safe way to access the +exception state from Python code is to call the function :func:`sys.exc_info`, +which returns the per-thread exception state for Python code. Also, the +semantics of both ways to access the exception state have changed so that a +function which catches an exception will save and restore its thread's exception +state so as to preserve the exception state of its caller. This prevents common +bugs in exception handling code caused by an innocent-looking function +overwriting the exception being handled; it also reduces the often unwanted +lifetime extension for objects that are referenced by the stack frames in the +traceback. + +As a general principle, a function that calls another function to perform some +task should check whether the called function raised an exception, and if so, +pass the exception state on to its caller. It should discard any object +references that it owns, and return an error indicator, but it should *not* set +another exception --- that would overwrite the exception that was just raised, +and lose important information about the exact cause of the error. + +.. index:: single: sum_sequence() + +A simple example of detecting exceptions and passing them on is shown in the +:cfunc:`sum_sequence` example above. It so happens that that example doesn't +need to clean up any owned references when it detects an error. The following +example function shows some error cleanup. First, to remind you why you like +Python, we show the equivalent Python code:: + + def incr_item(dict, key): + try: + item = dict[key] + except KeyError: + item = 0 + dict[key] = item + 1 + +.. index:: single: incr_item() + +Here is the corresponding C code, in all its glory:: + + int + incr_item(PyObject *dict, PyObject *key) + { + /* Objects all initialized to NULL for Py_XDECREF */ + PyObject *item = NULL, *const_one = NULL, *incremented_item = NULL; + int rv = -1; /* Return value initialized to -1 (failure) */ + + item = PyObject_GetItem(dict, key); + if (item == NULL) { + /* Handle KeyError only: */ + if (!PyErr_ExceptionMatches(PyExc_KeyError)) + goto error; + + /* Clear the error and use zero: */ + PyErr_Clear(); + item = PyInt_FromLong(0L); + if (item == NULL) + goto error; + } + const_one = PyInt_FromLong(1L); + if (const_one == NULL) + goto error; + + incremented_item = PyNumber_Add(item, const_one); + if (incremented_item == NULL) + goto error; + + if (PyObject_SetItem(dict, key, incremented_item) < 0) + goto error; + rv = 0; /* Success */ + /* Continue with cleanup code */ + + error: + /* Cleanup code, shared by success and failure path */ + + /* Use Py_XDECREF() to ignore NULL references */ + Py_XDECREF(item); + Py_XDECREF(const_one); + Py_XDECREF(incremented_item); + + return rv; /* -1 for error, 0 for success */ + } + +.. index:: single: incr_item() + +.. index:: + single: PyErr_ExceptionMatches() + single: PyErr_Clear() + single: Py_XDECREF() + +This example represents an endorsed use of the :keyword:`goto` statement in C! +It illustrates the use of :cfunc:`PyErr_ExceptionMatches` and +:cfunc:`PyErr_Clear` to handle specific exceptions, and the use of +:cfunc:`Py_XDECREF` to dispose of owned references that may be *NULL* (note the +``'X'`` in the name; :cfunc:`Py_DECREF` would crash when confronted with a +*NULL* reference). It is important that the variables used to hold owned +references are initialized to *NULL* for this to work; likewise, the proposed +return value is initialized to ``-1`` (failure) and only set to success after +the final call made is successful. + + +.. _api-embedding: + +Embedding Python +================ + +The one important task that only embedders (as opposed to extension writers) of +the Python interpreter have to worry about is the initialization, and possibly +the finalization, of the Python interpreter. Most functionality of the +interpreter can only be used after the interpreter has been initialized. + +.. index:: + single: Py_Initialize() + module: __builtin__ + module: __main__ + module: sys + module: exceptions + triple: module; search; path + single: path (in module sys) + +The basic initialization function is :cfunc:`Py_Initialize`. This initializes +the table of loaded modules, and creates the fundamental modules +:mod:`__builtin__`, :mod:`__main__`, :mod:`sys`, and :mod:`exceptions`. It also +initializes the module search path (``sys.path``). + +.. index:: single: PySys_SetArgv() + +:cfunc:`Py_Initialize` does not set the "script argument list" (``sys.argv``). +If this variable is needed by Python code that will be executed later, it must +be set explicitly with a call to ``PySys_SetArgv(argc, argv)`` subsequent to +the call to :cfunc:`Py_Initialize`. + +On most systems (in particular, on Unix and Windows, although the details are +slightly different), :cfunc:`Py_Initialize` calculates the module search path +based upon its best guess for the location of the standard Python interpreter +executable, assuming that the Python library is found in a fixed location +relative to the Python interpreter executable. In particular, it looks for a +directory named :file:`lib/python{X.Y}` relative to the parent directory +where the executable named :file:`python` is found on the shell command search +path (the environment variable :envvar:`PATH`). + +For instance, if the Python executable is found in +:file:`/usr/local/bin/python`, it will assume that the libraries are in +:file:`/usr/local/lib/python{X.Y}`. (In fact, this particular path is also +the "fallback" location, used when no executable file named :file:`python` is +found along :envvar:`PATH`.) The user can override this behavior by setting the +environment variable :envvar:`PYTHONHOME`, or insert additional directories in +front of the standard path by setting :envvar:`PYTHONPATH`. + +.. index:: + single: Py_SetProgramName() + single: Py_GetPath() + single: Py_GetPrefix() + single: Py_GetExecPrefix() + single: Py_GetProgramFullPath() + +The embedding application can steer the search by calling +``Py_SetProgramName(file)`` *before* calling :cfunc:`Py_Initialize`. Note that +:envvar:`PYTHONHOME` still overrides this and :envvar:`PYTHONPATH` is still +inserted in front of the standard path. An application that requires total +control has to provide its own implementation of :cfunc:`Py_GetPath`, +:cfunc:`Py_GetPrefix`, :cfunc:`Py_GetExecPrefix`, and +:cfunc:`Py_GetProgramFullPath` (all defined in :file:`Modules/getpath.c`). + +.. index:: single: Py_IsInitialized() + +Sometimes, it is desirable to "uninitialize" Python. For instance, the +application may want to start over (make another call to +:cfunc:`Py_Initialize`) or the application is simply done with its use of +Python and wants to free memory allocated by Python. This can be accomplished +by calling :cfunc:`Py_Finalize`. The function :cfunc:`Py_IsInitialized` returns +true if Python is currently in the initialized state. More information about +these functions is given in a later chapter. Notice that :cfunc:`Py_Finalize` +does *not* free all memory allocated by the Python interpreter, e.g. memory +allocated by extension modules currently cannot be released. + + +.. _api-debugging: + +Debugging Builds +================ + +Python can be built with several macros to enable extra checks of the +interpreter and extension modules. These checks tend to add a large amount of +overhead to the runtime so they are not enabled by default. + +A full list of the various types of debugging builds is in the file +:file:`Misc/SpecialBuilds.txt` in the Python source distribution. Builds are +available that support tracing of reference counts, debugging the memory +allocator, or low-level profiling of the main interpreter loop. Only the most +frequently-used builds will be described in the remainder of this section. + +Compiling the interpreter with the :cmacro:`Py_DEBUG` macro defined produces +what is generally meant by "a debug build" of Python. :cmacro:`Py_DEBUG` is +enabled in the Unix build by adding :option:`--with-pydebug` to the +:file:`configure` command. It is also implied by the presence of the +not-Python-specific :cmacro:`_DEBUG` macro. When :cmacro:`Py_DEBUG` is enabled +in the Unix build, compiler optimization is disabled. + +In addition to the reference count debugging described below, the following +extra checks are performed: + +* Extra checks are added to the object allocator. + +* Extra checks are added to the parser and compiler. + +* Downcasts from wide types to narrow types are checked for loss of information. + +* A number of assertions are added to the dictionary and set implementations. + In addition, the set object acquires a :meth:`test_c_api` method. + +* Sanity checks of the input arguments are added to frame creation. + +* The storage for long ints is initialized with a known invalid pattern to catch + reference to uninitialized digits. + +* Low-level tracing and extra exception checking are added to the runtime + virtual machine. + +* Extra checks are added to the memory arena implementation. + +* Extra debugging is added to the thread module. + +There may be additional checks not mentioned here. + +Defining :cmacro:`Py_TRACE_REFS` enables reference tracing. When defined, a +circular doubly linked list of active objects is maintained by adding two extra +fields to every :ctype:`PyObject`. Total allocations are tracked as well. Upon +exit, all existing references are printed. (In interactive mode this happens +after every statement run by the interpreter.) Implied by :cmacro:`Py_DEBUG`. + +Please refer to :file:`Misc/SpecialBuilds.txt` in the Python source distribution +for more detailed information. + diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst new file mode 100644 index 0000000..1dcb115 --- /dev/null +++ b/Doc/c-api/memory.rst @@ -0,0 +1,207 @@ +.. highlightlang:: c + + +.. _memory: + +***************** +Memory Management +***************** + +.. sectionauthor:: Vladimir Marangozov + + + +.. _memoryoverview: + +Overview +======== + +Memory management in Python involves a private heap containing all Python +objects and data structures. The management of this private heap is ensured +internally by the *Python memory manager*. The Python memory manager has +different components which deal with various dynamic storage management aspects, +like sharing, segmentation, preallocation or caching. + +At the lowest level, a raw memory allocator ensures that there is enough room in +the private heap for storing all Python-related data by interacting with the +memory manager of the operating system. On top of the raw memory allocator, +several object-specific allocators operate on the same heap and implement +distinct memory management policies adapted to the peculiarities of every object +type. For example, integer objects are managed differently within the heap than +strings, tuples or dictionaries because integers imply different storage +requirements and speed/space tradeoffs. The Python memory manager thus delegates +some of the work to the object-specific allocators, but ensures that the latter +operate within the bounds of the private heap. + +It is important to understand that the management of the Python heap is +performed by the interpreter itself and that the user has no control over it, +even if she regularly manipulates object pointers to memory blocks inside that +heap. The allocation of heap space for Python objects and other internal +buffers is performed on demand by the Python memory manager through the Python/C +API functions listed in this document. + +.. index:: + single: malloc() + single: calloc() + single: realloc() + single: free() + +To avoid memory corruption, extension writers should never try to operate on +Python objects with the functions exported by the C library: :cfunc:`malloc`, +:cfunc:`calloc`, :cfunc:`realloc` and :cfunc:`free`. This will result in mixed +calls between the C allocator and the Python memory manager with fatal +consequences, because they implement different algorithms and operate on +different heaps. However, one may safely allocate and release memory blocks +with the C library allocator for individual purposes, as shown in the following +example:: + + PyObject *res; + char *buf = (char *) malloc(BUFSIZ); /* for I/O */ + + if (buf == NULL) + return PyErr_NoMemory(); + ...Do some I/O operation involving buf... + res = PyString_FromString(buf); + free(buf); /* malloc'ed */ + return res; + +In this example, the memory request for the I/O buffer is handled by the C +library allocator. The Python memory manager is involved only in the allocation +of the string object returned as a result. + +In most situations, however, it is recommended to allocate memory from the +Python heap specifically because the latter is under control of the Python +memory manager. For example, this is required when the interpreter is extended +with new object types written in C. Another reason for using the Python heap is +the desire to *inform* the Python memory manager about the memory needs of the +extension module. Even when the requested memory is used exclusively for +internal, highly-specific purposes, delegating all memory requests to the Python +memory manager causes the interpreter to have a more accurate image of its +memory footprint as a whole. Consequently, under certain circumstances, the +Python memory manager may or may not trigger appropriate actions, like garbage +collection, memory compaction or other preventive procedures. Note that by using +the C library allocator as shown in the previous example, the allocated memory +for the I/O buffer escapes completely the Python memory manager. + + +.. _memoryinterface: + +Memory Interface +================ + +The following function sets, modeled after the ANSI C standard, but specifying +behavior when requesting zero bytes, are available for allocating and releasing +memory from the Python heap: + + +.. cfunction:: void* PyMem_Malloc(size_t n) + + Allocates *n* bytes and returns a pointer of type :ctype:`void\*` to the + allocated memory, or *NULL* if the request fails. Requesting zero bytes returns + a distinct non-*NULL* pointer if possible, as if :cfunc:`PyMem_Malloc(1)` had + been called instead. The memory will not have been initialized in any way. + + +.. cfunction:: void* PyMem_Realloc(void *p, size_t n) + + Resizes the memory block pointed to by *p* to *n* bytes. The contents will be + unchanged to the minimum of the old and the new sizes. If *p* is *NULL*, the + call is equivalent to :cfunc:`PyMem_Malloc(n)`; else if *n* is equal to zero, + the memory block is resized but is not freed, and the returned pointer is + non-*NULL*. Unless *p* is *NULL*, it must have been returned by a previous call + to :cfunc:`PyMem_Malloc` or :cfunc:`PyMem_Realloc`. If the request fails, + :cfunc:`PyMem_Realloc` returns *NULL* and *p* remains a valid pointer to the + previous memory area. + + +.. cfunction:: void PyMem_Free(void *p) + + Frees the memory block pointed to by *p*, which must have been returned by a + previous call to :cfunc:`PyMem_Malloc` or :cfunc:`PyMem_Realloc`. Otherwise, or + if :cfunc:`PyMem_Free(p)` has been called before, undefined behavior occurs. If + *p* is *NULL*, no operation is performed. + +The following type-oriented macros are provided for convenience. Note that +*TYPE* refers to any C type. + + +.. cfunction:: TYPE* PyMem_New(TYPE, size_t n) + + Same as :cfunc:`PyMem_Malloc`, but allocates ``(n * sizeof(TYPE))`` bytes of + memory. Returns a pointer cast to :ctype:`TYPE\*`. The memory will not have + been initialized in any way. + + +.. cfunction:: TYPE* PyMem_Resize(void *p, TYPE, size_t n) + + Same as :cfunc:`PyMem_Realloc`, but the memory block is resized to ``(n * + sizeof(TYPE))`` bytes. Returns a pointer cast to :ctype:`TYPE\*`. On return, + *p* will be a pointer to the new memory area, or *NULL* in the event of failure. + + +.. cfunction:: void PyMem_Del(void *p) + + Same as :cfunc:`PyMem_Free`. + +In addition, the following macro sets are provided for calling the Python memory +allocator directly, without involving the C API functions listed above. However, +note that their use does not preserve binary compatibility across Python +versions and is therefore deprecated in extension modules. + +:cfunc:`PyMem_MALLOC`, :cfunc:`PyMem_REALLOC`, :cfunc:`PyMem_FREE`. + +:cfunc:`PyMem_NEW`, :cfunc:`PyMem_RESIZE`, :cfunc:`PyMem_DEL`. + + +.. _memoryexamples: + +Examples +======== + +Here is the example from section :ref:`memoryoverview`, rewritten so that the +I/O buffer is allocated from the Python heap by using the first function set:: + + PyObject *res; + char *buf = (char *) PyMem_Malloc(BUFSIZ); /* for I/O */ + + if (buf == NULL) + return PyErr_NoMemory(); + /* ...Do some I/O operation involving buf... */ + res = PyString_FromString(buf); + PyMem_Free(buf); /* allocated with PyMem_Malloc */ + return res; + +The same code using the type-oriented function set:: + + PyObject *res; + char *buf = PyMem_New(char, BUFSIZ); /* for I/O */ + + if (buf == NULL) + return PyErr_NoMemory(); + /* ...Do some I/O operation involving buf... */ + res = PyString_FromString(buf); + PyMem_Del(buf); /* allocated with PyMem_New */ + return res; + +Note that in the two examples above, the buffer is always manipulated via +functions belonging to the same set. Indeed, it is required to use the same +memory API family for a given memory block, so that the risk of mixing different +allocators is reduced to a minimum. The following code sequence contains two +errors, one of which is labeled as *fatal* because it mixes two different +allocators operating on different heaps. :: + + char *buf1 = PyMem_New(char, BUFSIZ); + char *buf2 = (char *) malloc(BUFSIZ); + char *buf3 = (char *) PyMem_Malloc(BUFSIZ); + ... + PyMem_Del(buf3); /* Wrong -- should be PyMem_Free() */ + free(buf2); /* Right -- allocated via malloc() */ + free(buf1); /* Fatal -- should be PyMem_Del() */ + +In addition to the functions aimed at handling raw memory blocks from the Python +heap, objects in Python are allocated and released with :cfunc:`PyObject_New`, +:cfunc:`PyObject_NewVar` and :cfunc:`PyObject_Del`. + +These will be explained in the next chapter on defining and implementing new +object types in C. + diff --git a/Doc/c-api/newtypes.rst b/Doc/c-api/newtypes.rst new file mode 100644 index 0000000..5933f99 --- /dev/null +++ b/Doc/c-api/newtypes.rst @@ -0,0 +1,1740 @@ +.. highlightlang:: c + + +.. _newtypes: + +***************************** +Object Implementation Support +***************************** + +This chapter describes the functions, types, and macros used when defining new +object types. + + +.. _allocating-objects: + +Allocating Objects on the Heap +============================== + + +.. cfunction:: PyObject* _PyObject_New(PyTypeObject *type) + + +.. cfunction:: PyVarObject* _PyObject_NewVar(PyTypeObject *type, Py_ssize_t size) + + +.. cfunction:: void _PyObject_Del(PyObject *op) + + +.. cfunction:: PyObject* PyObject_Init(PyObject *op, PyTypeObject *type) + + Initialize a newly-allocated object *op* with its type and initial reference. + Returns the initialized object. If *type* indicates that the object + participates in the cyclic garbage detector, it is added to the detector's set + of observed objects. Other fields of the object are not affected. + + +.. cfunction:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) + + This does everything :cfunc:`PyObject_Init` does, and also initializes the + length information for a variable-size object. + + +.. cfunction:: TYPE* PyObject_New(TYPE, PyTypeObject *type) + + Allocate a new Python object using the C structure type *TYPE* and the Python + type object *type*. Fields not defined by the Python object header are not + initialized; the object's reference count will be one. The size of the memory + allocation is determined from the :attr:`tp_basicsize` field of the type object. + + +.. cfunction:: TYPE* PyObject_NewVar(TYPE, PyTypeObject *type, Py_ssize_t size) + + Allocate a new Python object using the C structure type *TYPE* and the Python + type object *type*. Fields not defined by the Python object header are not + initialized. The allocated memory allows for the *TYPE* structure plus *size* + fields of the size given by the :attr:`tp_itemsize` field of *type*. This is + useful for implementing objects like tuples, which are able to determine their + size at construction time. Embedding the array of fields into the same + allocation decreases the number of allocations, improving the memory management + efficiency. + + +.. cfunction:: void PyObject_Del(PyObject *op) + + Releases memory allocated to an object using :cfunc:`PyObject_New` or + :cfunc:`PyObject_NewVar`. This is normally called from the :attr:`tp_dealloc` + handler specified in the object's type. The fields of the object should not be + accessed after this call as the memory is no longer a valid Python object. + + +.. cfunction:: PyObject* Py_InitModule(char *name, PyMethodDef *methods) + + Create a new module object based on a name and table of functions, returning the + new module object. + + .. versionchanged:: 2.3 + Older versions of Python did not support *NULL* as the value for the *methods* + argument. + + +.. cfunction:: PyObject* Py_InitModule3(char *name, PyMethodDef *methods, char *doc) + + Create a new module object based on a name and table of functions, returning the + new module object. If *doc* is non-*NULL*, it will be used to define the + docstring for the module. + + .. versionchanged:: 2.3 + Older versions of Python did not support *NULL* as the value for the *methods* + argument. + + +.. cfunction:: PyObject* Py_InitModule4(char *name, PyMethodDef *methods, char *doc, PyObject *self, int apiver) + + Create a new module object based on a name and table of functions, returning the + new module object. If *doc* is non-*NULL*, it will be used to define the + docstring for the module. If *self* is non-*NULL*, it will passed to the + functions of the module as their (otherwise *NULL*) first parameter. (This was + added as an experimental feature, and there are no known uses in the current + version of Python.) For *apiver*, the only value which should be passed is + defined by the constant :const:`PYTHON_API_VERSION`. + + .. note:: + + Most uses of this function should probably be using the :cfunc:`Py_InitModule3` + instead; only use this if you are sure you need it. + + .. versionchanged:: 2.3 + Older versions of Python did not support *NULL* as the value for the *methods* + argument. + + +.. cvar:: PyObject _Py_NoneStruct + + Object which is visible in Python as ``None``. This should only be accessed + using the ``Py_None`` macro, which evaluates to a pointer to this object. + + +.. _common-structs: + +Common Object Structures +======================== + +There are a large number of structures which are used in the definition of +object types for Python. This section describes these structures and how they +are used. + +All Python objects ultimately share a small number of fields at the beginning of +the object's representation in memory. These are represented by the +:ctype:`PyObject` and :ctype:`PyVarObject` types, which are defined, in turn, by +the expansions of some macros also used, whether directly or indirectly, in the +definition of all other Python objects. + + +.. ctype:: PyObject + + All object types are extensions of this type. This is a type which contains the + information Python needs to treat a pointer to an object as an object. In a + normal "release" build, it contains only the objects reference count and a + pointer to the corresponding type object. It corresponds to the fields defined + by the expansion of the ``PyObject_HEAD`` macro. + + +.. ctype:: PyVarObject + + This is an extension of :ctype:`PyObject` that adds the :attr:`ob_size` field. + This is only used for objects that have some notion of *length*. This type does + not often appear in the Python/C API. It corresponds to the fields defined by + the expansion of the ``PyObject_VAR_HEAD`` macro. + +These macros are used in the definition of :ctype:`PyObject` and +:ctype:`PyVarObject`: + + +.. cmacro:: PyObject_HEAD + + This is a macro which expands to the declarations of the fields of the + :ctype:`PyObject` type; it is used when declaring new types which represent + objects without a varying length. The specific fields it expands to depend on + the definition of :cmacro:`Py_TRACE_REFS`. By default, that macro is not + defined, and :cmacro:`PyObject_HEAD` expands to:: + + Py_ssize_t ob_refcnt; + PyTypeObject *ob_type; + + When :cmacro:`Py_TRACE_REFS` is defined, it expands to:: + + PyObject *_ob_next, *_ob_prev; + Py_ssize_t ob_refcnt; + PyTypeObject *ob_type; + + +.. cmacro:: PyObject_VAR_HEAD + + This is a macro which expands to the declarations of the fields of the + :ctype:`PyVarObject` type; it is used when declaring new types which represent + objects with a length that varies from instance to instance. This macro always + expands to:: + + PyObject_HEAD + Py_ssize_t ob_size; + + Note that :cmacro:`PyObject_HEAD` is part of the expansion, and that its own + expansion varies depending on the definition of :cmacro:`Py_TRACE_REFS`. + +PyObject_HEAD_INIT + + +.. ctype:: PyCFunction + + Type of the functions used to implement most Python callables in C. Functions of + this type take two :ctype:`PyObject\*` parameters and return one such value. If + the return value is *NULL*, an exception shall have been set. If not *NULL*, + the return value is interpreted as the return value of the function as exposed + in Python. The function must return a new reference. + + +.. ctype:: PyMethodDef + + Structure used to describe a method of an extension type. This structure has + four fields: + + +------------------+-------------+-------------------------------+ + | Field | C Type | Meaning | + +==================+=============+===============================+ + | :attr:`ml_name` | char \* | name of the method | + +------------------+-------------+-------------------------------+ + | :attr:`ml_meth` | PyCFunction | pointer to the C | + | | | implementation | + +------------------+-------------+-------------------------------+ + | :attr:`ml_flags` | int | flag bits indicating how the | + | | | call should be constructed | + +------------------+-------------+-------------------------------+ + | :attr:`ml_doc` | char \* | points to the contents of the | + | | | docstring | + +------------------+-------------+-------------------------------+ + +The :attr:`ml_meth` is a C function pointer. The functions may be of different +types, but they always return :ctype:`PyObject\*`. If the function is not of +the :ctype:`PyCFunction`, the compiler will require a cast in the method table. +Even though :ctype:`PyCFunction` defines the first parameter as +:ctype:`PyObject\*`, it is common that the method implementation uses a the +specific C type of the *self* object. + +The :attr:`ml_flags` field is a bitfield which can include the following flags. +The individual flags indicate either a calling convention or a binding +convention. Of the calling convention flags, only :const:`METH_VARARGS` and +:const:`METH_KEYWORDS` can be combined (but note that :const:`METH_KEYWORDS` +alone is equivalent to ``METH_VARARGS | METH_KEYWORDS``). Any of the calling +convention flags can be combined with a binding flag. + + +.. data:: METH_VARARGS + + This is the typical calling convention, where the methods have the type + :ctype:`PyCFunction`. The function expects two :ctype:`PyObject\*` values. The + first one is the *self* object for methods; for module functions, it has the + value given to :cfunc:`Py_InitModule4` (or *NULL* if :cfunc:`Py_InitModule` was + used). The second parameter (often called *args*) is a tuple object + representing all arguments. This parameter is typically processed using + :cfunc:`PyArg_ParseTuple` or :cfunc:`PyArg_UnpackTuple`. + + +.. data:: METH_KEYWORDS + + Methods with these flags must be of type :ctype:`PyCFunctionWithKeywords`. The + function expects three parameters: *self*, *args*, and a dictionary of all the + keyword arguments. The flag is typically combined with :const:`METH_VARARGS`, + and the parameters are typically processed using + :cfunc:`PyArg_ParseTupleAndKeywords`. + + +.. data:: METH_NOARGS + + Methods without parameters don't need to check whether arguments are given if + they are listed with the :const:`METH_NOARGS` flag. They need to be of type + :ctype:`PyCFunction`. When used with object methods, the first parameter is + typically named ``self`` and will hold a reference to the object instance. In + all cases the second parameter will be *NULL*. + + +.. data:: METH_O + + Methods with a single object argument can be listed with the :const:`METH_O` + flag, instead of invoking :cfunc:`PyArg_ParseTuple` with a ``"O"`` argument. + They have the type :ctype:`PyCFunction`, with the *self* parameter, and a + :ctype:`PyObject\*` parameter representing the single argument. + + +.. data:: METH_OLDARGS + + This calling convention is deprecated. The method must be of type + :ctype:`PyCFunction`. The second argument is *NULL* if no arguments are given, + a single object if exactly one argument is given, and a tuple of objects if more + than one argument is given. There is no way for a function using this + convention to distinguish between a call with multiple arguments and a call with + a tuple as the only argument. + +These two constants are not used to indicate the calling convention but the +binding when use with methods of classes. These may not be used for functions +defined for modules. At most one of these flags may be set for any given +method. + + +.. data:: METH_CLASS + + .. index:: builtin: classmethod + + The method will be passed the type object as the first parameter rather than an + instance of the type. This is used to create *class methods*, similar to what + is created when using the :func:`classmethod` built-in function. + + .. versionadded:: 2.3 + + +.. data:: METH_STATIC + + .. index:: builtin: staticmethod + + The method will be passed *NULL* as the first parameter rather than an instance + of the type. This is used to create *static methods*, similar to what is + created when using the :func:`staticmethod` built-in function. + + .. versionadded:: 2.3 + +One other constant controls whether a method is loaded in place of another +definition with the same method name. + + +.. data:: METH_COEXIST + + The method will be loaded in place of existing definitions. Without + *METH_COEXIST*, the default is to skip repeated definitions. Since slot + wrappers are loaded before the method table, the existence of a *sq_contains* + slot, for example, would generate a wrapped method named :meth:`__contains__` + and preclude the loading of a corresponding PyCFunction with the same name. + With the flag defined, the PyCFunction will be loaded in place of the wrapper + object and will co-exist with the slot. This is helpful because calls to + PyCFunctions are optimized more than wrapper object calls. + + .. versionadded:: 2.4 + + +.. cfunction:: PyObject* Py_FindMethod(PyMethodDef table[], PyObject *ob, char *name) + + Return a bound method object for an extension type implemented in C. This can + be useful in the implementation of a :attr:`tp_getattro` or :attr:`tp_getattr` + handler that does not use the :cfunc:`PyObject_GenericGetAttr` function. + + +.. _type-structs: + +Type Objects +============ + +Perhaps one of the most important structures of the Python object system is the +structure that defines a new type: the :ctype:`PyTypeObject` structure. Type +objects can be handled using any of the :cfunc:`PyObject_\*` or +:cfunc:`PyType_\*` functions, but do not offer much that's interesting to most +Python applications. These objects are fundamental to how objects behave, so +they are very important to the interpreter itself and to any extension module +that implements new types. + +Type objects are fairly large compared to most of the standard types. The reason +for the size is that each type object stores a large number of values, mostly C +function pointers, each of which implements a small part of the type's +functionality. The fields of the type object are examined in detail in this +section. The fields will be described in the order in which they occur in the +structure. + +Typedefs: unaryfunc, binaryfunc, ternaryfunc, inquiry, coercion, intargfunc, +intintargfunc, intobjargproc, intintobjargproc, objobjargproc, destructor, +freefunc, printfunc, getattrfunc, getattrofunc, setattrfunc, setattrofunc, +cmpfunc, reprfunc, hashfunc + +The structure definition for :ctype:`PyTypeObject` can be found in +:file:`Include/object.h`. For convenience of reference, this repeats the +definition found there: + +.. literalinclude:: ../includes/typestruct.h + + +The type object structure extends the :ctype:`PyVarObject` structure. The +:attr:`ob_size` field is used for dynamic types (created by :func:`type_new`, +usually called from a class statement). Note that :cdata:`PyType_Type` (the +metatype) initializes :attr:`tp_itemsize`, which means that its instances (i.e. +type objects) *must* have the :attr:`ob_size` field. + + +.. cmember:: PyObject* PyObject._ob_next + PyObject* PyObject._ob_prev + + These fields are only present when the macro ``Py_TRACE_REFS`` is defined. + Their initialization to *NULL* is taken care of by the ``PyObject_HEAD_INIT`` + macro. For statically allocated objects, these fields always remain *NULL*. + For dynamically allocated objects, these two fields are used to link the object + into a doubly-linked list of *all* live objects on the heap. This could be used + for various debugging purposes; currently the only use is to print the objects + that are still alive at the end of a run when the environment variable + :envvar:`PYTHONDUMPREFS` is set. + + These fields are not inherited by subtypes. + + +.. cmember:: Py_ssize_t PyObject.ob_refcnt + + This is the type object's reference count, initialized to ``1`` by the + ``PyObject_HEAD_INIT`` macro. Note that for statically allocated type objects, + the type's instances (objects whose :attr:`ob_type` points back to the type) do + *not* count as references. But for dynamically allocated type objects, the + instances *do* count as references. + + This field is not inherited by subtypes. + + +.. cmember:: PyTypeObject* PyObject.ob_type + + This is the type's type, in other words its metatype. It is initialized by the + argument to the ``PyObject_HEAD_INIT`` macro, and its value should normally be + ``&PyType_Type``. However, for dynamically loadable extension modules that must + be usable on Windows (at least), the compiler complains that this is not a valid + initializer. Therefore, the convention is to pass *NULL* to the + ``PyObject_HEAD_INIT`` macro and to initialize this field explicitly at the + start of the module's initialization function, before doing anything else. This + is typically done like this:: + + Foo_Type.ob_type = &PyType_Type; + + This should be done before any instances of the type are created. + :cfunc:`PyType_Ready` checks if :attr:`ob_type` is *NULL*, and if so, + initializes it: in Python 2.2, it is set to ``&PyType_Type``; in Python 2.2.1 + and later it is initialized to the :attr:`ob_type` field of the base class. + :cfunc:`PyType_Ready` will not change this field if it is non-zero. + + In Python 2.2, this field is not inherited by subtypes. In 2.2.1, and in 2.3 + and beyond, it is inherited by subtypes. + + +.. cmember:: Py_ssize_t PyVarObject.ob_size + + For statically allocated type objects, this should be initialized to zero. For + dynamically allocated type objects, this field has a special internal meaning. + + This field is not inherited by subtypes. + + +.. cmember:: char* PyTypeObject.tp_name + + Pointer to a NUL-terminated string containing the name of the type. For types + that are accessible as module globals, the string should be the full module + name, followed by a dot, followed by the type name; for built-in types, it + should be just the type name. If the module is a submodule of a package, the + full package name is part of the full module name. For example, a type named + :class:`T` defined in module :mod:`M` in subpackage :mod:`Q` in package :mod:`P` + should have the :attr:`tp_name` initializer ``"P.Q.M.T"``. + + For dynamically allocated type objects, this should just be the type name, and + the module name explicitly stored in the type dict as the value for key + ``'__module__'``. + + For statically allocated type objects, the tp_name field should contain a dot. + Everything before the last dot is made accessible as the :attr:`__module__` + attribute, and everything after the last dot is made accessible as the + :attr:`__name__` attribute. + + If no dot is present, the entire :attr:`tp_name` field is made accessible as the + :attr:`__name__` attribute, and the :attr:`__module__` attribute is undefined + (unless explicitly set in the dictionary, as explained above). This means your + type will be impossible to pickle. + + This field is not inherited by subtypes. + + +.. cmember:: Py_ssize_t PyTypeObject.tp_basicsize + Py_ssize_t PyTypeObject.tp_itemsize + + These fields allow calculating the size in bytes of instances of the type. + + There are two kinds of types: types with fixed-length instances have a zero + :attr:`tp_itemsize` field, types with variable-length instances have a non-zero + :attr:`tp_itemsize` field. For a type with fixed-length instances, all + instances have the same size, given in :attr:`tp_basicsize`. + + For a type with variable-length instances, the instances must have an + :attr:`ob_size` field, and the instance size is :attr:`tp_basicsize` plus N + times :attr:`tp_itemsize`, where N is the "length" of the object. The value of + N is typically stored in the instance's :attr:`ob_size` field. There are + exceptions: for example, long ints use a negative :attr:`ob_size` to indicate a + negative number, and N is ``abs(ob_size)`` there. Also, the presence of an + :attr:`ob_size` field in the instance layout doesn't mean that the instance + structure is variable-length (for example, the structure for the list type has + fixed-length instances, yet those instances have a meaningful :attr:`ob_size` + field). + + The basic size includes the fields in the instance declared by the macro + :cmacro:`PyObject_HEAD` or :cmacro:`PyObject_VAR_HEAD` (whichever is used to + declare the instance struct) and this in turn includes the :attr:`_ob_prev` and + :attr:`_ob_next` fields if they are present. This means that the only correct + way to get an initializer for the :attr:`tp_basicsize` is to use the + :keyword:`sizeof` operator on the struct used to declare the instance layout. + The basic size does not include the GC header size (this is new in Python 2.2; + in 2.1 and 2.0, the GC header size was included in :attr:`tp_basicsize`). + + These fields are inherited separately by subtypes. If the base type has a + non-zero :attr:`tp_itemsize`, it is generally not safe to set + :attr:`tp_itemsize` to a different non-zero value in a subtype (though this + depends on the implementation of the base type). + + A note about alignment: if the variable items require a particular alignment, + this should be taken care of by the value of :attr:`tp_basicsize`. Example: + suppose a type implements an array of ``double``. :attr:`tp_itemsize` is + ``sizeof(double)``. It is the programmer's responsibility that + :attr:`tp_basicsize` is a multiple of ``sizeof(double)`` (assuming this is the + alignment requirement for ``double``). + + +.. cmember:: destructor PyTypeObject.tp_dealloc + + A pointer to the instance destructor function. This function must be defined + unless the type guarantees that its instances will never be deallocated (as is + the case for the singletons ``None`` and ``Ellipsis``). + + The destructor function is called by the :cfunc:`Py_DECREF` and + :cfunc:`Py_XDECREF` macros when the new reference count is zero. At this point, + the instance is still in existence, but there are no references to it. The + destructor function should free all references which the instance owns, free all + memory buffers owned by the instance (using the freeing function corresponding + to the allocation function used to allocate the buffer), and finally (as its + last action) call the type's :attr:`tp_free` function. If the type is not + subtypable (doesn't have the :const:`Py_TPFLAGS_BASETYPE` flag bit set), it is + permissible to call the object deallocator directly instead of via + :attr:`tp_free`. The object deallocator should be the one used to allocate the + instance; this is normally :cfunc:`PyObject_Del` if the instance was allocated + using :cfunc:`PyObject_New` or :cfunc:`PyObject_VarNew`, or + :cfunc:`PyObject_GC_Del` if the instance was allocated using + :cfunc:`PyObject_GC_New` or :cfunc:`PyObject_GC_VarNew`. + + This field is inherited by subtypes. + + +.. cmember:: printfunc PyTypeObject.tp_print + + An optional pointer to the instance print function. + + The print function is only called when the instance is printed to a *real* file; + when it is printed to a pseudo-file (like a :class:`StringIO` instance), the + instance's :attr:`tp_repr` or :attr:`tp_str` function is called to convert it to + a string. These are also called when the type's :attr:`tp_print` field is + *NULL*. A type should never implement :attr:`tp_print` in a way that produces + different output than :attr:`tp_repr` or :attr:`tp_str` would. + + The print function is called with the same signature as :cfunc:`PyObject_Print`: + ``int tp_print(PyObject *self, FILE *file, int flags)``. The *self* argument is + the instance to be printed. The *file* argument is the stdio file to which it + is to be printed. The *flags* argument is composed of flag bits. The only flag + bit currently defined is :const:`Py_PRINT_RAW`. When the :const:`Py_PRINT_RAW` + flag bit is set, the instance should be printed the same way as :attr:`tp_str` + would format it; when the :const:`Py_PRINT_RAW` flag bit is clear, the instance + should be printed the same was as :attr:`tp_repr` would format it. It should + return ``-1`` and set an exception condition when an error occurred during the + comparison. + + It is possible that the :attr:`tp_print` field will be deprecated. In any case, + it is recommended not to define :attr:`tp_print`, but instead to rely on + :attr:`tp_repr` and :attr:`tp_str` for printing. + + This field is inherited by subtypes. + + +.. cmember:: getattrfunc PyTypeObject.tp_getattr + + An optional pointer to the get-attribute-string function. + + This field is deprecated. When it is defined, it should point to a function + that acts the same as the :attr:`tp_getattro` function, but taking a C string + instead of a Python string object to give the attribute name. The signature is + the same as for :cfunc:`PyObject_GetAttrString`. + + This field is inherited by subtypes together with :attr:`tp_getattro`: a subtype + inherits both :attr:`tp_getattr` and :attr:`tp_getattro` from its base type when + the subtype's :attr:`tp_getattr` and :attr:`tp_getattro` are both *NULL*. + + +.. cmember:: setattrfunc PyTypeObject.tp_setattr + + An optional pointer to the set-attribute-string function. + + This field is deprecated. When it is defined, it should point to a function + that acts the same as the :attr:`tp_setattro` function, but taking a C string + instead of a Python string object to give the attribute name. The signature is + the same as for :cfunc:`PyObject_SetAttrString`. + + This field is inherited by subtypes together with :attr:`tp_setattro`: a subtype + inherits both :attr:`tp_setattr` and :attr:`tp_setattro` from its base type when + the subtype's :attr:`tp_setattr` and :attr:`tp_setattro` are both *NULL*. + + +.. cmember:: cmpfunc PyTypeObject.tp_compare + + An optional pointer to the three-way comparison function. + + The signature is the same as for :cfunc:`PyObject_Compare`. The function should + return ``1`` if *self* greater than *other*, ``0`` if *self* is equal to + *other*, and ``-1`` if *self* less than *other*. It should return ``-1`` and + set an exception condition when an error occurred during the comparison. + + This field is inherited by subtypes together with :attr:`tp_richcompare` and + :attr:`tp_hash`: a subtypes inherits all three of :attr:`tp_compare`, + :attr:`tp_richcompare`, and :attr:`tp_hash` when the subtype's + :attr:`tp_compare`, :attr:`tp_richcompare`, and :attr:`tp_hash` are all *NULL*. + + +.. cmember:: reprfunc PyTypeObject.tp_repr + + .. index:: builtin: repr + + An optional pointer to a function that implements the built-in function + :func:`repr`. + + The signature is the same as for :cfunc:`PyObject_Repr`; it must return a string + or a Unicode object. Ideally, this function should return a string that, when + passed to :func:`eval`, given a suitable environment, returns an object with the + same value. If this is not feasible, it should return a string starting with + ``'<'`` and ending with ``'>'`` from which both the type and the value of the + object can be deduced. + + When this field is not set, a string of the form ``<%s object at %p>`` is + returned, where ``%s`` is replaced by the type name, and ``%p`` by the object's + memory address. + + This field is inherited by subtypes. + +.. cmember:: PyNumberMethods *tp_as_number; + + XXX + +.. cmember:: PySequenceMethods *tp_as_sequence; + + XXX + +.. cmember:: PyMappingMethods *tp_as_mapping; + + XXX + + +.. cmember:: hashfunc PyTypeObject.tp_hash + + .. index:: builtin: hash + + An optional pointer to a function that implements the built-in function + :func:`hash`. + + The signature is the same as for :cfunc:`PyObject_Hash`; it must return a C + long. The value ``-1`` should not be returned as a normal return value; when an + error occurs during the computation of the hash value, the function should set + an exception and return ``-1``. + + When this field is not set, two possibilities exist: if the :attr:`tp_compare` + and :attr:`tp_richcompare` fields are both *NULL*, a default hash value based on + the object's address is returned; otherwise, a :exc:`TypeError` is raised. + + This field is inherited by subtypes together with :attr:`tp_richcompare` and + :attr:`tp_compare`: a subtypes inherits all three of :attr:`tp_compare`, + :attr:`tp_richcompare`, and :attr:`tp_hash`, when the subtype's + :attr:`tp_compare`, :attr:`tp_richcompare` and :attr:`tp_hash` are all *NULL*. + + +.. cmember:: ternaryfunc PyTypeObject.tp_call + + An optional pointer to a function that implements calling the object. This + should be *NULL* if the object is not callable. The signature is the same as + for :cfunc:`PyObject_Call`. + + This field is inherited by subtypes. + + +.. cmember:: reprfunc PyTypeObject.tp_str + + An optional pointer to a function that implements the built-in operation + :func:`str`. (Note that :class:`str` is a type now, and :func:`str` calls the + constructor for that type. This constructor calls :cfunc:`PyObject_Str` to do + the actual work, and :cfunc:`PyObject_Str` will call this handler.) + + The signature is the same as for :cfunc:`PyObject_Str`; it must return a string + or a Unicode object. This function should return a "friendly" string + representation of the object, as this is the representation that will be used by + the print statement. + + When this field is not set, :cfunc:`PyObject_Repr` is called to return a string + representation. + + This field is inherited by subtypes. + + +.. cmember:: getattrofunc PyTypeObject.tp_getattro + + An optional pointer to the get-attribute function. + + The signature is the same as for :cfunc:`PyObject_GetAttr`. It is usually + convenient to set this field to :cfunc:`PyObject_GenericGetAttr`, which + implements the normal way of looking for object attributes. + + This field is inherited by subtypes together with :attr:`tp_getattr`: a subtype + inherits both :attr:`tp_getattr` and :attr:`tp_getattro` from its base type when + the subtype's :attr:`tp_getattr` and :attr:`tp_getattro` are both *NULL*. + + +.. cmember:: setattrofunc PyTypeObject.tp_setattro + + An optional pointer to the set-attribute function. + + The signature is the same as for :cfunc:`PyObject_SetAttr`. It is usually + convenient to set this field to :cfunc:`PyObject_GenericSetAttr`, which + implements the normal way of setting object attributes. + + This field is inherited by subtypes together with :attr:`tp_setattr`: a subtype + inherits both :attr:`tp_setattr` and :attr:`tp_setattro` from its base type when + the subtype's :attr:`tp_setattr` and :attr:`tp_setattro` are both *NULL*. + + +.. cmember:: PyBufferProcs* PyTypeObject.tp_as_buffer + + Pointer to an additional structure that contains fields relevant only to objects + which implement the buffer interface. These fields are documented in + :ref:`buffer-structs`. + + The :attr:`tp_as_buffer` field is not inherited, but the contained fields are + inherited individually. + + +.. cmember:: long PyTypeObject.tp_flags + + This field is a bit mask of various flags. Some flags indicate variant + semantics for certain situations; others are used to indicate that certain + fields in the type object (or in the extension structures referenced via + :attr:`tp_as_number`, :attr:`tp_as_sequence`, :attr:`tp_as_mapping`, and + :attr:`tp_as_buffer`) that were historically not always present are valid; if + such a flag bit is clear, the type fields it guards must not be accessed and + must be considered to have a zero or *NULL* value instead. + + Inheritance of this field is complicated. Most flag bits are inherited + individually, i.e. if the base type has a flag bit set, the subtype inherits + this flag bit. The flag bits that pertain to extension structures are strictly + inherited if the extension structure is inherited, i.e. the base type's value of + the flag bit is copied into the subtype together with a pointer to the extension + structure. The :const:`Py_TPFLAGS_HAVE_GC` flag bit is inherited together with + the :attr:`tp_traverse` and :attr:`tp_clear` fields, i.e. if the + :const:`Py_TPFLAGS_HAVE_GC` flag bit is clear in the subtype and the + :attr:`tp_traverse` and :attr:`tp_clear` fields in the subtype exist (as + indicated by the :const:`Py_TPFLAGS_HAVE_RICHCOMPARE` flag bit) and have *NULL* + values. + + The following bit masks are currently defined; these can be or-ed together using + the ``|`` operator to form the value of the :attr:`tp_flags` field. The macro + :cfunc:`PyType_HasFeature` takes a type and a flags value, *tp* and *f*, and + checks whether ``tp->tp_flags & f`` is non-zero. + + + .. data:: Py_TPFLAGS_HAVE_GETCHARBUFFER + + If this bit is set, the :ctype:`PyBufferProcs` struct referenced by + :attr:`tp_as_buffer` has the :attr:`bf_getcharbuffer` field. + + + .. data:: Py_TPFLAGS_HAVE_SEQUENCE_IN + + If this bit is set, the :ctype:`PySequenceMethods` struct referenced by + :attr:`tp_as_sequence` has the :attr:`sq_contains` field. + + + .. data:: Py_TPFLAGS_GC + + This bit is obsolete. The bit it used to name is no longer in use. The symbol + is now defined as zero. + + + .. data:: Py_TPFLAGS_HAVE_INPLACEOPS + + If this bit is set, the :ctype:`PySequenceMethods` struct referenced by + :attr:`tp_as_sequence` and the :ctype:`PyNumberMethods` structure referenced by + :attr:`tp_as_number` contain the fields for in-place operators. In particular, + this means that the :ctype:`PyNumberMethods` structure has the fields + :attr:`nb_inplace_add`, :attr:`nb_inplace_subtract`, + :attr:`nb_inplace_multiply`, :attr:`nb_inplace_divide`, + :attr:`nb_inplace_remainder`, :attr:`nb_inplace_power`, + :attr:`nb_inplace_lshift`, :attr:`nb_inplace_rshift`, :attr:`nb_inplace_and`, + :attr:`nb_inplace_xor`, and :attr:`nb_inplace_or`; and the + :ctype:`PySequenceMethods` struct has the fields :attr:`sq_inplace_concat` and + :attr:`sq_inplace_repeat`. + + + .. data:: Py_TPFLAGS_CHECKTYPES + + If this bit is set, the binary and ternary operations in the + :ctype:`PyNumberMethods` structure referenced by :attr:`tp_as_number` accept + arguments of arbitrary object types, and do their own type conversions if + needed. If this bit is clear, those operations require that all arguments have + the current type as their type, and the caller is supposed to perform a coercion + operation first. This applies to :attr:`nb_add`, :attr:`nb_subtract`, + :attr:`nb_multiply`, :attr:`nb_divide`, :attr:`nb_remainder`, :attr:`nb_divmod`, + :attr:`nb_power`, :attr:`nb_lshift`, :attr:`nb_rshift`, :attr:`nb_and`, + :attr:`nb_xor`, and :attr:`nb_or`. + + + .. data:: Py_TPFLAGS_HAVE_RICHCOMPARE + + If this bit is set, the type object has the :attr:`tp_richcompare` field, as + well as the :attr:`tp_traverse` and the :attr:`tp_clear` fields. + + + .. data:: Py_TPFLAGS_HAVE_WEAKREFS + + If this bit is set, the :attr:`tp_weaklistoffset` field is defined. Instances + of a type are weakly referenceable if the type's :attr:`tp_weaklistoffset` field + has a value greater than zero. + + + .. data:: Py_TPFLAGS_HAVE_ITER + + If this bit is set, the type object has the :attr:`tp_iter` and + :attr:`tp_iternext` fields. + + + .. data:: Py_TPFLAGS_HAVE_CLASS + + If this bit is set, the type object has several new fields defined starting in + Python 2.2: :attr:`tp_methods`, :attr:`tp_members`, :attr:`tp_getset`, + :attr:`tp_base`, :attr:`tp_dict`, :attr:`tp_descr_get`, :attr:`tp_descr_set`, + :attr:`tp_dictoffset`, :attr:`tp_init`, :attr:`tp_alloc`, :attr:`tp_new`, + :attr:`tp_free`, :attr:`tp_is_gc`, :attr:`tp_bases`, :attr:`tp_mro`, + :attr:`tp_cache`, :attr:`tp_subclasses`, and :attr:`tp_weaklist`. + + + .. data:: Py_TPFLAGS_HEAPTYPE + + This bit is set when the type object itself is allocated on the heap. In this + case, the :attr:`ob_type` field of its instances is considered a reference to + the type, and the type object is INCREF'ed when a new instance is created, and + DECREF'ed when an instance is destroyed (this does not apply to instances of + subtypes; only the type referenced by the instance's ob_type gets INCREF'ed or + DECREF'ed). + + + .. data:: Py_TPFLAGS_BASETYPE + + This bit is set when the type can be used as the base type of another type. If + this bit is clear, the type cannot be subtyped (similar to a "final" class in + Java). + + + .. data:: Py_TPFLAGS_READY + + This bit is set when the type object has been fully initialized by + :cfunc:`PyType_Ready`. + + + .. data:: Py_TPFLAGS_READYING + + This bit is set while :cfunc:`PyType_Ready` is in the process of initializing + the type object. + + + .. data:: Py_TPFLAGS_HAVE_GC + + This bit is set when the object supports garbage collection. If this bit + is set, instances must be created using :cfunc:`PyObject_GC_New` and + destroyed using :cfunc:`PyObject_GC_Del`. More information in section + :ref:`supporting-cycle-detection`. This bit also implies that the + GC-related fields :attr:`tp_traverse` and :attr:`tp_clear` are present in + the type object; but those fields also exist when + :const:`Py_TPFLAGS_HAVE_GC` is clear but + :const:`Py_TPFLAGS_HAVE_RICHCOMPARE` is set. + + + .. data:: Py_TPFLAGS_DEFAULT + + This is a bitmask of all the bits that pertain to the existence of certain + fields in the type object and its extension structures. Currently, it includes + the following bits: :const:`Py_TPFLAGS_HAVE_GETCHARBUFFER`, + :const:`Py_TPFLAGS_HAVE_SEQUENCE_IN`, :const:`Py_TPFLAGS_HAVE_INPLACEOPS`, + :const:`Py_TPFLAGS_HAVE_RICHCOMPARE`, :const:`Py_TPFLAGS_HAVE_WEAKREFS`, + :const:`Py_TPFLAGS_HAVE_ITER`, and :const:`Py_TPFLAGS_HAVE_CLASS`. + + +.. cmember:: char* PyTypeObject.tp_doc + + An optional pointer to a NUL-terminated C string giving the docstring for this + type object. This is exposed as the :attr:`__doc__` attribute on the type and + instances of the type. + + This field is *not* inherited by subtypes. + +The following three fields only exist if the +:const:`Py_TPFLAGS_HAVE_RICHCOMPARE` flag bit is set. + + +.. cmember:: traverseproc PyTypeObject.tp_traverse + + An optional pointer to a traversal function for the garbage collector. This is + only used if the :const:`Py_TPFLAGS_HAVE_GC` flag bit is set. More information + about Python's garbage collection scheme can be found in section + :ref:`supporting-cycle-detection`. + + The :attr:`tp_traverse` pointer is used by the garbage collector to detect + reference cycles. A typical implementation of a :attr:`tp_traverse` function + simply calls :cfunc:`Py_VISIT` on each of the instance's members that are Python + objects. For exampe, this is function :cfunc:`local_traverse` from the + :mod:`thread` extension module:: + + static int + local_traverse(localobject *self, visitproc visit, void *arg) + { + Py_VISIT(self->args); + Py_VISIT(self->kw); + Py_VISIT(self->dict); + return 0; + } + + Note that :cfunc:`Py_VISIT` is called only on those members that can participate + in reference cycles. Although there is also a ``self->key`` member, it can only + be *NULL* or a Python string and therefore cannot be part of a reference cycle. + + On the other hand, even if you know a member can never be part of a cycle, as a + debugging aid you may want to visit it anyway just so the :mod:`gc` module's + :func:`get_referents` function will include it. + + Note that :cfunc:`Py_VISIT` requires the *visit* and *arg* parameters to + :cfunc:`local_traverse` to have these specific names; don't name them just + anything. + + This field is inherited by subtypes together with :attr:`tp_clear` and the + :const:`Py_TPFLAGS_HAVE_GC` flag bit: the flag bit, :attr:`tp_traverse`, and + :attr:`tp_clear` are all inherited from the base type if they are all zero in + the subtype *and* the subtype has the :const:`Py_TPFLAGS_HAVE_RICHCOMPARE` flag + bit set. + + +.. cmember:: inquiry PyTypeObject.tp_clear + + An optional pointer to a clear function for the garbage collector. This is only + used if the :const:`Py_TPFLAGS_HAVE_GC` flag bit is set. + + The :attr:`tp_clear` member function is used to break reference cycles in cyclic + garbage detected by the garbage collector. Taken together, all :attr:`tp_clear` + functions in the system must combine to break all reference cycles. This is + subtle, and if in any doubt supply a :attr:`tp_clear` function. For example, + the tuple type does not implement a :attr:`tp_clear` function, because it's + possible to prove that no reference cycle can be composed entirely of tuples. + Therefore the :attr:`tp_clear` functions of other types must be sufficient to + break any cycle containing a tuple. This isn't immediately obvious, and there's + rarely a good reason to avoid implementing :attr:`tp_clear`. + + Implementations of :attr:`tp_clear` should drop the instance's references to + those of its members that may be Python objects, and set its pointers to those + members to *NULL*, as in the following example:: + + static int + local_clear(localobject *self) + { + Py_CLEAR(self->key); + Py_CLEAR(self->args); + Py_CLEAR(self->kw); + Py_CLEAR(self->dict); + return 0; + } + + The :cfunc:`Py_CLEAR` macro should be used, because clearing references is + delicate: the reference to the contained object must not be decremented until + after the pointer to the contained object is set to *NULL*. This is because + decrementing the reference count may cause the contained object to become trash, + triggering a chain of reclamation activity that may include invoking arbitrary + Python code (due to finalizers, or weakref callbacks, associated with the + contained object). If it's possible for such code to reference *self* again, + it's important that the pointer to the contained object be *NULL* at that time, + so that *self* knows the contained object can no longer be used. The + :cfunc:`Py_CLEAR` macro performs the operations in a safe order. + + Because the goal of :attr:`tp_clear` functions is to break reference cycles, + it's not necessary to clear contained objects like Python strings or Python + integers, which can't participate in reference cycles. On the other hand, it may + be convenient to clear all contained Python objects, and write the type's + :attr:`tp_dealloc` function to invoke :attr:`tp_clear`. + + More information about Python's garbage collection scheme can be found in + section :ref:`supporting-cycle-detection`. + + This field is inherited by subtypes together with :attr:`tp_traverse` and the + :const:`Py_TPFLAGS_HAVE_GC` flag bit: the flag bit, :attr:`tp_traverse`, and + :attr:`tp_clear` are all inherited from the base type if they are all zero in + the subtype *and* the subtype has the :const:`Py_TPFLAGS_HAVE_RICHCOMPARE` flag + bit set. + + +.. cmember:: richcmpfunc PyTypeObject.tp_richcompare + + An optional pointer to the rich comparison function. + + The signature is the same as for :cfunc:`PyObject_RichCompare`. The function + should return the result of the comparison (usually ``Py_True`` or + ``Py_False``). If the comparison is undefined, it must return + ``Py_NotImplemented``, if another error occurred it must return ``NULL`` and set + an exception condition. + + This field is inherited by subtypes together with :attr:`tp_compare` and + :attr:`tp_hash`: a subtype inherits all three of :attr:`tp_compare`, + :attr:`tp_richcompare`, and :attr:`tp_hash`, when the subtype's + :attr:`tp_compare`, :attr:`tp_richcompare`, and :attr:`tp_hash` are all *NULL*. + + The following constants are defined to be used as the third argument for + :attr:`tp_richcompare` and for :cfunc:`PyObject_RichCompare`: + + +----------------+------------+ + | Constant | Comparison | + +================+============+ + | :const:`Py_LT` | ``<`` | + +----------------+------------+ + | :const:`Py_LE` | ``<=`` | + +----------------+------------+ + | :const:`Py_EQ` | ``==`` | + +----------------+------------+ + | :const:`Py_NE` | ``!=`` | + +----------------+------------+ + | :const:`Py_GT` | ``>`` | + +----------------+------------+ + | :const:`Py_GE` | ``>=`` | + +----------------+------------+ + +The next field only exists if the :const:`Py_TPFLAGS_HAVE_WEAKREFS` flag bit is +set. + + +.. cmember:: long PyTypeObject.tp_weaklistoffset + + If the instances of this type are weakly referenceable, this field is greater + than zero and contains the offset in the instance structure of the weak + reference list head (ignoring the GC header, if present); this offset is used by + :cfunc:`PyObject_ClearWeakRefs` and the :cfunc:`PyWeakref_\*` functions. The + instance structure needs to include a field of type :ctype:`PyObject\*` which is + initialized to *NULL*. + + Do not confuse this field with :attr:`tp_weaklist`; that is the list head for + weak references to the type object itself. + + This field is inherited by subtypes, but see the rules listed below. A subtype + may override this offset; this means that the subtype uses a different weak + reference list head than the base type. Since the list head is always found via + :attr:`tp_weaklistoffset`, this should not be a problem. + + When a type defined by a class statement has no :attr:`__slots__` declaration, + and none of its base types are weakly referenceable, the type is made weakly + referenceable by adding a weak reference list head slot to the instance layout + and setting the :attr:`tp_weaklistoffset` of that slot's offset. + + When a type's :attr:`__slots__` declaration contains a slot named + :attr:`__weakref__`, that slot becomes the weak reference list head for + instances of the type, and the slot's offset is stored in the type's + :attr:`tp_weaklistoffset`. + + When a type's :attr:`__slots__` declaration does not contain a slot named + :attr:`__weakref__`, the type inherits its :attr:`tp_weaklistoffset` from its + base type. + +The next two fields only exist if the :const:`Py_TPFLAGS_HAVE_CLASS` flag bit is +set. + + +.. cmember:: getiterfunc PyTypeObject.tp_iter + + An optional pointer to a function that returns an iterator for the object. Its + presence normally signals that the instances of this type are iterable (although + sequences may be iterable without this function, and classic instances always + have this function, even if they don't define an :meth:`__iter__` method). + + This function has the same signature as :cfunc:`PyObject_GetIter`. + + This field is inherited by subtypes. + + +.. cmember:: iternextfunc PyTypeObject.tp_iternext + + An optional pointer to a function that returns the next item in an iterator, or + raises :exc:`StopIteration` when the iterator is exhausted. Its presence + normally signals that the instances of this type are iterators (although classic + instances always have this function, even if they don't define a + :meth:`__next__` method). + + Iterator types should also define the :attr:`tp_iter` function, and that + function should return the iterator instance itself (not a new iterator + instance). + + This function has the same signature as :cfunc:`PyIter_Next`. + + This field is inherited by subtypes. + +The next fields, up to and including :attr:`tp_weaklist`, only exist if the +:const:`Py_TPFLAGS_HAVE_CLASS` flag bit is set. + + +.. cmember:: struct PyMethodDef* PyTypeObject.tp_methods + + An optional pointer to a static *NULL*-terminated array of :ctype:`PyMethodDef` + structures, declaring regular methods of this type. + + For each entry in the array, an entry is added to the type's dictionary (see + :attr:`tp_dict` below) containing a method descriptor. + + This field is not inherited by subtypes (methods are inherited through a + different mechanism). + + +.. cmember:: struct PyMemberDef* PyTypeObject.tp_members + + An optional pointer to a static *NULL*-terminated array of :ctype:`PyMemberDef` + structures, declaring regular data members (fields or slots) of instances of + this type. + + For each entry in the array, an entry is added to the type's dictionary (see + :attr:`tp_dict` below) containing a member descriptor. + + This field is not inherited by subtypes (members are inherited through a + different mechanism). + + +.. cmember:: struct PyGetSetDef* PyTypeObject.tp_getset + + An optional pointer to a static *NULL*-terminated array of :ctype:`PyGetSetDef` + structures, declaring computed attributes of instances of this type. + + For each entry in the array, an entry is added to the type's dictionary (see + :attr:`tp_dict` below) containing a getset descriptor. + + This field is not inherited by subtypes (computed attributes are inherited + through a different mechanism). + + Docs for PyGetSetDef (XXX belong elsewhere):: + + typedef PyObject *(*getter)(PyObject *, void *); + typedef int (*setter)(PyObject *, PyObject *, void *); + + typedef struct PyGetSetDef { + char *name; /* attribute name */ + getter get; /* C function to get the attribute */ + setter set; /* C function to set the attribute */ + char *doc; /* optional doc string */ + void *closure; /* optional additional data for getter and setter */ + } PyGetSetDef; + + +.. cmember:: PyTypeObject* PyTypeObject.tp_base + + An optional pointer to a base type from which type properties are inherited. At + this level, only single inheritance is supported; multiple inheritance require + dynamically creating a type object by calling the metatype. + + This field is not inherited by subtypes (obviously), but it defaults to + ``&PyBaseObject_Type`` (which to Python programmers is known as the type + :class:`object`). + + +.. cmember:: PyObject* PyTypeObject.tp_dict + + The type's dictionary is stored here by :cfunc:`PyType_Ready`. + + This field should normally be initialized to *NULL* before PyType_Ready is + called; it may also be initialized to a dictionary containing initial attributes + for the type. Once :cfunc:`PyType_Ready` has initialized the type, extra + attributes for the type may be added to this dictionary only if they don't + correspond to overloaded operations (like :meth:`__add__`). + + This field is not inherited by subtypes (though the attributes defined in here + are inherited through a different mechanism). + + +.. cmember:: descrgetfunc PyTypeObject.tp_descr_get + + An optional pointer to a "descriptor get" function. + + The function signature is :: + + PyObject * tp_descr_get(PyObject *self, PyObject *obj, PyObject *type); + + XXX blah, blah. + + This field is inherited by subtypes. + + +.. cmember:: descrsetfunc PyTypeObject.tp_descr_set + + An optional pointer to a "descriptor set" function. + + The function signature is :: + + int tp_descr_set(PyObject *self, PyObject *obj, PyObject *value); + + This field is inherited by subtypes. + + XXX blah, blah. + + +.. cmember:: long PyTypeObject.tp_dictoffset + + If the instances of this type have a dictionary containing instance variables, + this field is non-zero and contains the offset in the instances of the type of + the instance variable dictionary; this offset is used by + :cfunc:`PyObject_GenericGetAttr`. + + Do not confuse this field with :attr:`tp_dict`; that is the dictionary for + attributes of the type object itself. + + If the value of this field is greater than zero, it specifies the offset from + the start of the instance structure. If the value is less than zero, it + specifies the offset from the *end* of the instance structure. A negative + offset is more expensive to use, and should only be used when the instance + structure contains a variable-length part. This is used for example to add an + instance variable dictionary to subtypes of :class:`str` or :class:`tuple`. Note + that the :attr:`tp_basicsize` field should account for the dictionary added to + the end in that case, even though the dictionary is not included in the basic + object layout. On a system with a pointer size of 4 bytes, + :attr:`tp_dictoffset` should be set to ``-4`` to indicate that the dictionary is + at the very end of the structure. + + The real dictionary offset in an instance can be computed from a negative + :attr:`tp_dictoffset` as follows:: + + dictoffset = tp_basicsize + abs(ob_size)*tp_itemsize + tp_dictoffset + if dictoffset is not aligned on sizeof(void*): + round up to sizeof(void*) + + where :attr:`tp_basicsize`, :attr:`tp_itemsize` and :attr:`tp_dictoffset` are + taken from the type object, and :attr:`ob_size` is taken from the instance. The + absolute value is taken because long ints use the sign of :attr:`ob_size` to + store the sign of the number. (There's never a need to do this calculation + yourself; it is done for you by :cfunc:`_PyObject_GetDictPtr`.) + + This field is inherited by subtypes, but see the rules listed below. A subtype + may override this offset; this means that the subtype instances store the + dictionary at a difference offset than the base type. Since the dictionary is + always found via :attr:`tp_dictoffset`, this should not be a problem. + + When a type defined by a class statement has no :attr:`__slots__` declaration, + and none of its base types has an instance variable dictionary, a dictionary + slot is added to the instance layout and the :attr:`tp_dictoffset` is set to + that slot's offset. + + When a type defined by a class statement has a :attr:`__slots__` declaration, + the type inherits its :attr:`tp_dictoffset` from its base type. + + (Adding a slot named :attr:`__dict__` to the :attr:`__slots__` declaration does + not have the expected effect, it just causes confusion. Maybe this should be + added as a feature just like :attr:`__weakref__` though.) + + +.. cmember:: initproc PyTypeObject.tp_init + + An optional pointer to an instance initialization function. + + This function corresponds to the :meth:`__init__` method of classes. Like + :meth:`__init__`, it is possible to create an instance without calling + :meth:`__init__`, and it is possible to reinitialize an instance by calling its + :meth:`__init__` method again. + + The function signature is :: + + int tp_init(PyObject *self, PyObject *args, PyObject *kwds) + + The self argument is the instance to be initialized; the *args* and *kwds* + arguments represent positional and keyword arguments of the call to + :meth:`__init__`. + + The :attr:`tp_init` function, if not *NULL*, is called when an instance is + created normally by calling its type, after the type's :attr:`tp_new` function + has returned an instance of the type. If the :attr:`tp_new` function returns an + instance of some other type that is not a subtype of the original type, no + :attr:`tp_init` function is called; if :attr:`tp_new` returns an instance of a + subtype of the original type, the subtype's :attr:`tp_init` is called. (VERSION + NOTE: described here is what is implemented in Python 2.2.1 and later. In + Python 2.2, the :attr:`tp_init` of the type of the object returned by + :attr:`tp_new` was always called, if not *NULL*.) + + This field is inherited by subtypes. + + +.. cmember:: allocfunc PyTypeObject.tp_alloc + + An optional pointer to an instance allocation function. + + The function signature is :: + + PyObject *tp_alloc(PyTypeObject *self, Py_ssize_t nitems) + + The purpose of this function is to separate memory allocation from memory + initialization. It should return a pointer to a block of memory of adequate + length for the instance, suitably aligned, and initialized to zeros, but with + :attr:`ob_refcnt` set to ``1`` and :attr:`ob_type` set to the type argument. If + the type's :attr:`tp_itemsize` is non-zero, the object's :attr:`ob_size` field + should be initialized to *nitems* and the length of the allocated memory block + should be ``tp_basicsize + nitems*tp_itemsize``, rounded up to a multiple of + ``sizeof(void*)``; otherwise, *nitems* is not used and the length of the block + should be :attr:`tp_basicsize`. + + Do not use this function to do any other instance initialization, not even to + allocate additional memory; that should be done by :attr:`tp_new`. + + This field is inherited by static subtypes, but not by dynamic subtypes + (subtypes created by a class statement); in the latter, this field is always set + to :cfunc:`PyType_GenericAlloc`, to force a standard heap allocation strategy. + That is also the recommended value for statically defined types. + + +.. cmember:: newfunc PyTypeObject.tp_new + + An optional pointer to an instance creation function. + + If this function is *NULL* for a particular type, that type cannot be called to + create new instances; presumably there is some other way to create instances, + like a factory function. + + The function signature is :: + + PyObject *tp_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) + + The subtype argument is the type of the object being created; the *args* and + *kwds* arguments represent positional and keyword arguments of the call to the + type. Note that subtype doesn't have to equal the type whose :attr:`tp_new` + function is called; it may be a subtype of that type (but not an unrelated + type). + + The :attr:`tp_new` function should call ``subtype->tp_alloc(subtype, nitems)`` + to allocate space for the object, and then do only as much further + initialization as is absolutely necessary. Initialization that can safely be + ignored or repeated should be placed in the :attr:`tp_init` handler. A good + rule of thumb is that for immutable types, all initialization should take place + in :attr:`tp_new`, while for mutable types, most initialization should be + deferred to :attr:`tp_init`. + + This field is inherited by subtypes, except it is not inherited by static types + whose :attr:`tp_base` is *NULL* or ``&PyBaseObject_Type``. The latter exception + is a precaution so that old extension types don't become callable simply by + being linked with Python 2.2. + + +.. cmember:: destructor PyTypeObject.tp_free + + An optional pointer to an instance deallocation function. + + The signature of this function has changed slightly: in Python 2.2 and 2.2.1, + its signature is :ctype:`destructor`:: + + void tp_free(PyObject *) + + In Python 2.3 and beyond, its signature is :ctype:`freefunc`:: + + void tp_free(void *) + + The only initializer that is compatible with both versions is ``_PyObject_Del``, + whose definition has suitably adapted in Python 2.3. + + This field is inherited by static subtypes, but not by dynamic subtypes + (subtypes created by a class statement); in the latter, this field is set to a + deallocator suitable to match :cfunc:`PyType_GenericAlloc` and the value of the + :const:`Py_TPFLAGS_HAVE_GC` flag bit. + + +.. cmember:: inquiry PyTypeObject.tp_is_gc + + An optional pointer to a function called by the garbage collector. + + The garbage collector needs to know whether a particular object is collectible + or not. Normally, it is sufficient to look at the object's type's + :attr:`tp_flags` field, and check the :const:`Py_TPFLAGS_HAVE_GC` flag bit. But + some types have a mixture of statically and dynamically allocated instances, and + the statically allocated instances are not collectible. Such types should + define this function; it should return ``1`` for a collectible instance, and + ``0`` for a non-collectible instance. The signature is :: + + int tp_is_gc(PyObject *self) + + (The only example of this are types themselves. The metatype, + :cdata:`PyType_Type`, defines this function to distinguish between statically + and dynamically allocated types.) + + This field is inherited by subtypes. (VERSION NOTE: in Python 2.2, it was not + inherited. It is inherited in 2.2.1 and later versions.) + + +.. cmember:: PyObject* PyTypeObject.tp_bases + + Tuple of base types. + + This is set for types created by a class statement. It should be *NULL* for + statically defined types. + + This field is not inherited. + + +.. cmember:: PyObject* PyTypeObject.tp_mro + + Tuple containing the expanded set of base types, starting with the type itself + and ending with :class:`object`, in Method Resolution Order. + + This field is not inherited; it is calculated fresh by :cfunc:`PyType_Ready`. + + +.. cmember:: PyObject* PyTypeObject.tp_cache + + Unused. Not inherited. Internal use only. + + +.. cmember:: PyObject* PyTypeObject.tp_subclasses + + List of weak references to subclasses. Not inherited. Internal use only. + + +.. cmember:: PyObject* PyTypeObject.tp_weaklist + + Weak reference list head, for weak references to this type object. Not + inherited. Internal use only. + +The remaining fields are only defined if the feature test macro +:const:`COUNT_ALLOCS` is defined, and are for internal use only. They are +documented here for completeness. None of these fields are inherited by +subtypes. + + +.. cmember:: Py_ssize_t PyTypeObject.tp_allocs + + Number of allocations. + + +.. cmember:: Py_ssize_t PyTypeObject.tp_frees + + Number of frees. + + +.. cmember:: Py_ssize_t PyTypeObject.tp_maxalloc + + Maximum simultaneously allocated objects. + + +.. cmember:: PyTypeObject* PyTypeObject.tp_next + + Pointer to the next type object with a non-zero :attr:`tp_allocs` field. + +Also, note that, in a garbage collected Python, tp_dealloc may be called from +any Python thread, not just the thread which created the object (if the object +becomes part of a refcount cycle, that cycle might be collected by a garbage +collection on any thread). This is not a problem for Python API calls, since +the thread on which tp_dealloc is called will own the Global Interpreter Lock +(GIL). However, if the object being destroyed in turn destroys objects from some +other C or C++ library, care should be taken to ensure that destroying those +objects on the thread which called tp_dealloc will not violate any assumptions +of the library. + + +.. _mapping-structs: + +Mapping Object Structures +========================= + + +.. ctype:: PyMappingMethods + + Structure used to hold pointers to the functions used to implement the mapping + protocol for an extension type. + + +.. _number-structs: + +Number Object Structures +======================== + + +.. ctype:: PyNumberMethods + + Structure used to hold pointers to the functions an extension type uses to + implement the number protocol. + + +.. _sequence-structs: + +Sequence Object Structures +========================== + + +.. ctype:: PySequenceMethods + + Structure used to hold pointers to the functions which an object uses to + implement the sequence protocol. + + +.. _buffer-structs: + +Buffer Object Structures +======================== + +.. sectionauthor:: Greg J. Stein + + +The buffer interface exports a model where an object can expose its internal +data as a set of chunks of data, where each chunk is specified as a +pointer/length pair. These chunks are called :dfn:`segments` and are presumed +to be non-contiguous in memory. + +If an object does not export the buffer interface, then its :attr:`tp_as_buffer` +member in the :ctype:`PyTypeObject` structure should be *NULL*. Otherwise, the +:attr:`tp_as_buffer` will point to a :ctype:`PyBufferProcs` structure. + +.. note:: + + It is very important that your :ctype:`PyTypeObject` structure uses + :const:`Py_TPFLAGS_DEFAULT` for the value of the :attr:`tp_flags` member rather + than ``0``. This tells the Python runtime that your :ctype:`PyBufferProcs` + structure contains the :attr:`bf_getcharbuffer` slot. Older versions of Python + did not have this member, so a new Python interpreter using an old extension + needs to be able to test for its presence before using it. + + +.. ctype:: PyBufferProcs + + Structure used to hold the function pointers which define an implementation of + the buffer protocol. + + The first slot is :attr:`bf_getreadbuffer`, of type :ctype:`getreadbufferproc`. + If this slot is *NULL*, then the object does not support reading from the + internal data. This is non-sensical, so implementors should fill this in, but + callers should test that the slot contains a non-*NULL* value. + + The next slot is :attr:`bf_getwritebuffer` having type + :ctype:`getwritebufferproc`. This slot may be *NULL* if the object does not + allow writing into its returned buffers. + + The third slot is :attr:`bf_getsegcount`, with type :ctype:`getsegcountproc`. + This slot must not be *NULL* and is used to inform the caller how many segments + the object contains. Simple objects such as :ctype:`PyString_Type` and + :ctype:`PyBuffer_Type` objects contain a single segment. + + .. index:: single: PyType_HasFeature() + + The last slot is :attr:`bf_getcharbuffer`, of type :ctype:`getcharbufferproc`. + This slot will only be present if the :const:`Py_TPFLAGS_HAVE_GETCHARBUFFER` + flag is present in the :attr:`tp_flags` field of the object's + :ctype:`PyTypeObject`. Before using this slot, the caller should test whether it + is present by using the :cfunc:`PyType_HasFeature` function. If the flag is + present, :attr:`bf_getcharbuffer` may be *NULL*, indicating that the object's + contents cannot be used as *8-bit characters*. The slot function may also raise + an error if the object's contents cannot be interpreted as 8-bit characters. + For example, if the object is an array which is configured to hold floating + point values, an exception may be raised if a caller attempts to use + :attr:`bf_getcharbuffer` to fetch a sequence of 8-bit characters. This notion of + exporting the internal buffers as "text" is used to distinguish between objects + that are binary in nature, and those which have character-based content. + + .. note:: + + The current policy seems to state that these characters may be multi-byte + characters. This implies that a buffer size of *N* does not mean there are *N* + characters present. + + +.. data:: Py_TPFLAGS_HAVE_GETCHARBUFFER + + Flag bit set in the type structure to indicate that the :attr:`bf_getcharbuffer` + slot is known. This being set does not indicate that the object supports the + buffer interface or that the :attr:`bf_getcharbuffer` slot is non-*NULL*. + + +.. ctype:: Py_ssize_t (*readbufferproc) (PyObject *self, Py_ssize_t segment, void **ptrptr) + + Return a pointer to a readable segment of the buffer in ``*ptrptr``. This + function is allowed to raise an exception, in which case it must return ``-1``. + The *segment* which is specified must be zero or positive, and strictly less + than the number of segments returned by the :attr:`bf_getsegcount` slot + function. On success, it returns the length of the segment, and sets + ``*ptrptr`` to a pointer to that memory. + + +.. ctype:: Py_ssize_t (*writebufferproc) (PyObject *self, Py_ssize_t segment, void **ptrptr) + + Return a pointer to a writable memory buffer in ``*ptrptr``, and the length of + that segment as the function return value. The memory buffer must correspond to + buffer segment *segment*. Must return ``-1`` and set an exception on error. + :exc:`TypeError` should be raised if the object only supports read-only buffers, + and :exc:`SystemError` should be raised when *segment* specifies a segment that + doesn't exist. + + .. % Why doesn't it raise ValueError for this one? + .. % GJS: because you shouldn't be calling it with an invalid + .. % segment. That indicates a blatant programming error in the C + .. % code. + + +.. ctype:: Py_ssize_t (*segcountproc) (PyObject *self, Py_ssize_t *lenp) + + Return the number of memory segments which comprise the buffer. If *lenp* is + not *NULL*, the implementation must report the sum of the sizes (in bytes) of + all segments in ``*lenp``. The function cannot fail. + + +.. ctype:: Py_ssize_t (*charbufferproc) (PyObject *self, Py_ssize_t segment, const char **ptrptr) + + Return the size of the segment *segment* that *ptrptr* is set to. ``*ptrptr`` + is set to the memory buffer. Returns ``-1`` on error. + + +.. _supporting-iteration: + +Supporting the Iterator Protocol +================================ + + +.. _supporting-cycle-detection: + +Supporting Cyclic Garbage Collection +==================================== + +Python's support for detecting and collecting garbage which involves circular +references requires support from object types which are "containers" for other +objects which may also be containers. Types which do not store references to +other objects, or which only store references to atomic types (such as numbers +or strings), do not need to provide any explicit support for garbage collection. + +.. An example showing the use of these interfaces can be found in "Supporting the +.. Cycle Collector (XXX not found: ../ext/example-cycle-support.html)". + +To create a container type, the :attr:`tp_flags` field of the type object must +include the :const:`Py_TPFLAGS_HAVE_GC` and provide an implementation of the +:attr:`tp_traverse` handler. If instances of the type are mutable, a +:attr:`tp_clear` implementation must also be provided. + + +.. data:: Py_TPFLAGS_HAVE_GC + + Objects with a type with this flag set must conform with the rules documented + here. For convenience these objects will be referred to as container objects. + +Constructors for container types must conform to two rules: + +#. The memory for the object must be allocated using :cfunc:`PyObject_GC_New` or + :cfunc:`PyObject_GC_VarNew`. + +#. Once all the fields which may contain references to other containers are + initialized, it must call :cfunc:`PyObject_GC_Track`. + + +.. cfunction:: TYPE* PyObject_GC_New(TYPE, PyTypeObject *type) + + Analogous to :cfunc:`PyObject_New` but for container objects with the + :const:`Py_TPFLAGS_HAVE_GC` flag set. + + +.. cfunction:: TYPE* PyObject_GC_NewVar(TYPE, PyTypeObject *type, Py_ssize_t size) + + Analogous to :cfunc:`PyObject_NewVar` but for container objects with the + :const:`Py_TPFLAGS_HAVE_GC` flag set. + + +.. cfunction:: PyVarObject * PyObject_GC_Resize(PyVarObject *op, Py_ssize_t) + + Resize an object allocated by :cfunc:`PyObject_NewVar`. Returns the resized + object or *NULL* on failure. + + +.. cfunction:: void PyObject_GC_Track(PyObject *op) + + Adds the object *op* to the set of container objects tracked by the collector. + The collector can run at unexpected times so objects must be valid while being + tracked. This should be called once all the fields followed by the + :attr:`tp_traverse` handler become valid, usually near the end of the + constructor. + + +.. cfunction:: void _PyObject_GC_TRACK(PyObject *op) + + A macro version of :cfunc:`PyObject_GC_Track`. It should not be used for + extension modules. + +Similarly, the deallocator for the object must conform to a similar pair of +rules: + +#. Before fields which refer to other containers are invalidated, + :cfunc:`PyObject_GC_UnTrack` must be called. + +#. The object's memory must be deallocated using :cfunc:`PyObject_GC_Del`. + + +.. cfunction:: void PyObject_GC_Del(void *op) + + Releases memory allocated to an object using :cfunc:`PyObject_GC_New` or + :cfunc:`PyObject_GC_NewVar`. + + +.. cfunction:: void PyObject_GC_UnTrack(void *op) + + Remove the object *op* from the set of container objects tracked by the + collector. Note that :cfunc:`PyObject_GC_Track` can be called again on this + object to add it back to the set of tracked objects. The deallocator + (:attr:`tp_dealloc` handler) should call this for the object before any of the + fields used by the :attr:`tp_traverse` handler become invalid. + + +.. cfunction:: void _PyObject_GC_UNTRACK(PyObject *op) + + A macro version of :cfunc:`PyObject_GC_UnTrack`. It should not be used for + extension modules. + +The :attr:`tp_traverse` handler accepts a function parameter of this type: + + +.. ctype:: int (*visitproc)(PyObject *object, void *arg) + + Type of the visitor function passed to the :attr:`tp_traverse` handler. The + function should be called with an object to traverse as *object* and the third + parameter to the :attr:`tp_traverse` handler as *arg*. The Python core uses + several visitor functions to implement cyclic garbage detection; it's not + expected that users will need to write their own visitor functions. + +The :attr:`tp_traverse` handler must have the following type: + + +.. ctype:: int (*traverseproc)(PyObject *self, visitproc visit, void *arg) + + Traversal function for a container object. Implementations must call the + *visit* function for each object directly contained by *self*, with the + parameters to *visit* being the contained object and the *arg* value passed to + the handler. The *visit* function must not be called with a *NULL* object + argument. If *visit* returns a non-zero value that value should be returned + immediately. + +To simplify writing :attr:`tp_traverse` handlers, a :cfunc:`Py_VISIT` macro is +provided. In order to use this macro, the :attr:`tp_traverse` implementation +must name its arguments exactly *visit* and *arg*: + + +.. cfunction:: void Py_VISIT(PyObject *o) + + Call the *visit* callback, with arguments *o* and *arg*. If *visit* returns a + non-zero value, then return it. Using this macro, :attr:`tp_traverse` handlers + look like:: + + static int + my_traverse(Noddy *self, visitproc visit, void *arg) + { + Py_VISIT(self->foo); + Py_VISIT(self->bar); + return 0; + } + + .. versionadded:: 2.4 + +The :attr:`tp_clear` handler must be of the :ctype:`inquiry` type, or *NULL* if +the object is immutable. + + +.. ctype:: int (*inquiry)(PyObject *self) + + Drop references that may have created reference cycles. Immutable objects do + not have to define this method since they can never directly create reference + cycles. Note that the object must still be valid after calling this method + (don't just call :cfunc:`Py_DECREF` on a reference). The collector will call + this method if it detects that this object is involved in a reference cycle. + diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst new file mode 100644 index 0000000..9dc357f --- /dev/null +++ b/Doc/c-api/refcounting.rst @@ -0,0 +1,74 @@ +.. highlightlang:: c + + +.. _countingrefs: + +****************** +Reference Counting +****************** + +The macros in this section are used for managing reference counts of Python +objects. + + +.. cfunction:: void Py_INCREF(PyObject *o) + + Increment the reference count for object *o*. The object must not be *NULL*; if + you aren't sure that it isn't *NULL*, use :cfunc:`Py_XINCREF`. + + +.. cfunction:: void Py_XINCREF(PyObject *o) + + Increment the reference count for object *o*. The object may be *NULL*, in + which case the macro has no effect. + + +.. cfunction:: void Py_DECREF(PyObject *o) + + Decrement the reference count for object *o*. The object must not be *NULL*; if + you aren't sure that it isn't *NULL*, use :cfunc:`Py_XDECREF`. If the reference + count reaches zero, the object's type's deallocation function (which must not be + *NULL*) is invoked. + + .. warning:: + + The deallocation function can cause arbitrary Python code to be invoked (e.g. + when a class instance with a :meth:`__del__` method is deallocated). While + exceptions in such code are not propagated, the executed code has free access to + all Python global variables. This means that any object that is reachable from + a global variable should be in a consistent state before :cfunc:`Py_DECREF` is + invoked. For example, code to delete an object from a list should copy a + reference to the deleted object in a temporary variable, update the list data + structure, and then call :cfunc:`Py_DECREF` for the temporary variable. + + +.. cfunction:: void Py_XDECREF(PyObject *o) + + Decrement the reference count for object *o*. The object may be *NULL*, in + which case the macro has no effect; otherwise the effect is the same as for + :cfunc:`Py_DECREF`, and the same warning applies. + + +.. cfunction:: void Py_CLEAR(PyObject *o) + + Decrement the reference count for object *o*. The object may be *NULL*, in + which case the macro has no effect; otherwise the effect is the same as for + :cfunc:`Py_DECREF`, except that the argument is also set to *NULL*. The warning + for :cfunc:`Py_DECREF` does not apply with respect to the object passed because + the macro carefully uses a temporary variable and sets the argument to *NULL* + before decrementing its reference count. + + It is a good idea to use this macro whenever decrementing the value of a + variable that might be traversed during garbage collection. + + .. versionadded:: 2.4 + +The following functions are for runtime dynamic embedding of Python: +``Py_IncRef(PyObject \*o)``, `Py_DecRef(PyObject \*o)``. They are +simply exported function versions of :cfunc:`Py_XINCREF` and +:cfunc:`Py_XDECREF`, respectively. + +The following functions or macros are only for use within the interpreter core: +:cfunc:`_Py_Dealloc`, :cfunc:`_Py_ForgetReference`, :cfunc:`_Py_NewReference`, +as well as the global variable :cdata:`_Py_RefTotal`. + diff --git a/Doc/c-api/utilities.rst b/Doc/c-api/utilities.rst new file mode 100644 index 0000000..01c1ceb --- /dev/null +++ b/Doc/c-api/utilities.rst @@ -0,0 +1,1030 @@ +.. highlightlang:: c + + +.. _utilities: + +********* +Utilities +********* + +The functions in this chapter perform various utility tasks, ranging from +helping C code be more portable across platforms, using Python modules from C, +and parsing function arguments and constructing Python values from C values. + + +.. _os: + +Operating System Utilities +========================== + + +.. cfunction:: int Py_FdIsInteractive(FILE *fp, const char *filename) + + Return true (nonzero) if the standard I/O file *fp* with name *filename* is + deemed interactive. This is the case for files for which ``isatty(fileno(fp))`` + is true. If the global flag :cdata:`Py_InteractiveFlag` is true, this function + also returns true if the *filename* pointer is *NULL* or if the name is equal to + one of the strings ``''`` or ``'???'``. + + +.. cfunction:: long PyOS_GetLastModificationTime(char *filename) + + Return the time of last modification of the file *filename*. The result is + encoded in the same way as the timestamp returned by the standard C library + function :cfunc:`time`. + + +.. cfunction:: void PyOS_AfterFork() + + Function to update some internal state after a process fork; this should be + called in the new process if the Python interpreter will continue to be used. + If a new executable is loaded into the new process, this function does not need + to be called. + + +.. cfunction:: int PyOS_CheckStack() + + Return true when the interpreter runs out of stack space. This is a reliable + check, but is only available when :const:`USE_STACKCHECK` is defined (currently + on Windows using the Microsoft Visual C++ compiler). :const:`USE_STACKCHECK` + will be defined automatically; you should never change the definition in your + own code. + + +.. cfunction:: PyOS_sighandler_t PyOS_getsig(int i) + + Return the current signal handler for signal *i*. This is a thin wrapper around + either :cfunc:`sigaction` or :cfunc:`signal`. Do not call those functions + directly! :ctype:`PyOS_sighandler_t` is a typedef alias for :ctype:`void + (\*)(int)`. + + +.. cfunction:: PyOS_sighandler_t PyOS_setsig(int i, PyOS_sighandler_t h) + + Set the signal handler for signal *i* to be *h*; return the old signal handler. + This is a thin wrapper around either :cfunc:`sigaction` or :cfunc:`signal`. Do + not call those functions directly! :ctype:`PyOS_sighandler_t` is a typedef + alias for :ctype:`void (\*)(int)`. + + +.. _processcontrol: + +Process Control +=============== + + +.. cfunction:: void Py_FatalError(const char *message) + + .. index:: single: abort() + + Print a fatal error message and kill the process. No cleanup is performed. + This function should only be invoked when a condition is detected that would + make it dangerous to continue using the Python interpreter; e.g., when the + object administration appears to be corrupted. On Unix, the standard C library + function :cfunc:`abort` is called which will attempt to produce a :file:`core` + file. + + +.. cfunction:: void Py_Exit(int status) + + .. index:: + single: Py_Finalize() + single: exit() + + Exit the current process. This calls :cfunc:`Py_Finalize` and then calls the + standard C library function ``exit(status)``. + + +.. cfunction:: int Py_AtExit(void (*func) ()) + + .. index:: + single: Py_Finalize() + single: cleanup functions + + Register a cleanup function to be called by :cfunc:`Py_Finalize`. The cleanup + function will be called with no arguments and should return no value. At most + 32 cleanup functions can be registered. When the registration is successful, + :cfunc:`Py_AtExit` returns ``0``; on failure, it returns ``-1``. The cleanup + function registered last is called first. Each cleanup function will be called + at most once. Since Python's internal finalization will have completed before + the cleanup function, no Python APIs should be called by *func*. + + +.. _importing: + +Importing Modules +================= + + +.. cfunction:: PyObject* PyImport_ImportModule(const char *name) + + .. index:: + single: package variable; __all__ + single: __all__ (package variable) + + This is a simplified interface to :cfunc:`PyImport_ImportModuleEx` below, + leaving the *globals* and *locals* arguments set to *NULL*. When the *name* + argument contains a dot (when it specifies a submodule of a package), the + *fromlist* argument is set to the list ``['*']`` so that the return value is the + named module rather than the top-level package containing it as would otherwise + be the case. (Unfortunately, this has an additional side effect when *name* in + fact specifies a subpackage instead of a submodule: the submodules specified in + the package's ``__all__`` variable are loaded.) Return a new reference to the + imported module, or *NULL* with an exception set on failure. Before Python 2.4, + the module may still be created in the failure case --- examine ``sys.modules`` + to find out. Starting with Python 2.4, a failing import of a module no longer + leaves the module in ``sys.modules``. + + .. versionchanged:: 2.4 + failing imports remove incomplete module objects. + + .. index:: single: modules (in module sys) + + +.. cfunction:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) + + .. index:: builtin: __import__ + + Import a module. This is best described by referring to the built-in Python + function :func:`__import__`, as the standard :func:`__import__` function calls + this function directly. + + The return value is a new reference to the imported module or top-level package, + or *NULL* with an exception set on failure (before Python 2.4, the module may + still be created in this case). Like for :func:`__import__`, the return value + when a submodule of a package was requested is normally the top-level package, + unless a non-empty *fromlist* was given. + + .. versionchanged:: 2.4 + failing imports remove incomplete module objects. + + +.. cfunction:: PyObject* PyImport_Import(PyObject *name) + + .. index:: + module: rexec + module: ihooks + + This is a higher-level interface that calls the current "import hook function". + It invokes the :func:`__import__` function from the ``__builtins__`` of the + current globals. This means that the import is done using whatever import hooks + are installed in the current environment, e.g. by :mod:`rexec` or :mod:`ihooks`. + + +.. cfunction:: PyObject* PyImport_ReloadModule(PyObject *m) + + Reload a module. Return a new reference to the reloaded module, or *NULL* with + an exception set on failure (the module still exists in this case). + + +.. cfunction:: PyObject* PyImport_AddModule(const char *name) + + Return the module object corresponding to a module name. The *name* argument + may be of the form ``package.module``. First check the modules dictionary if + there's one there, and if not, create a new one and insert it in the modules + dictionary. Return *NULL* with an exception set on failure. + + .. note:: + + This function does not load or import the module; if the module wasn't already + loaded, you will get an empty module object. Use :cfunc:`PyImport_ImportModule` + or one of its variants to import a module. Package structures implied by a + dotted name for *name* are not created if not already present. + + +.. cfunction:: PyObject* PyImport_ExecCodeModule(char *name, PyObject *co) + + .. index:: builtin: compile + + Given a module name (possibly of the form ``package.module``) and a code object + read from a Python bytecode file or obtained from the built-in function + :func:`compile`, load the module. Return a new reference to the module object, + or *NULL* with an exception set if an error occurred. Before Python 2.4, the + module could still be created in error cases. Starting with Python 2.4, *name* + is removed from ``sys.modules`` in error cases, and even if *name* was already + in ``sys.modules`` on entry to :cfunc:`PyImport_ExecCodeModule`. Leaving + incompletely initialized modules in ``sys.modules`` is dangerous, as imports of + such modules have no way to know that the module object is an unknown (and + probably damaged with respect to the module author's intents) state. + + This function will reload the module if it was already imported. See + :cfunc:`PyImport_ReloadModule` for the intended way to reload a module. + + If *name* points to a dotted name of the form ``package.module``, any package + structures not already created will still not be created. + + .. versionchanged:: 2.4 + *name* is removed from ``sys.modules`` in error cases. + + +.. cfunction:: long PyImport_GetMagicNumber() + + Return the magic number for Python bytecode files (a.k.a. :file:`.pyc` and + :file:`.pyo` files). The magic number should be present in the first four bytes + of the bytecode file, in little-endian byte order. + + +.. cfunction:: PyObject* PyImport_GetModuleDict() + + Return the dictionary used for the module administration (a.k.a. + ``sys.modules``). Note that this is a per-interpreter variable. + + +.. cfunction:: void _PyImport_Init() + + Initialize the import mechanism. For internal use only. + + +.. cfunction:: void PyImport_Cleanup() + + Empty the module table. For internal use only. + + +.. cfunction:: void _PyImport_Fini() + + Finalize the import mechanism. For internal use only. + + +.. cfunction:: PyObject* _PyImport_FindExtension(char *, char *) + + For internal use only. + + +.. cfunction:: PyObject* _PyImport_FixupExtension(char *, char *) + + For internal use only. + + +.. cfunction:: int PyImport_ImportFrozenModule(char *name) + + Load a frozen module named *name*. Return ``1`` for success, ``0`` if the + module is not found, and ``-1`` with an exception set if the initialization + failed. To access the imported module on a successful load, use + :cfunc:`PyImport_ImportModule`. (Note the misnomer --- this function would + reload the module if it was already imported.) + + +.. ctype:: struct _frozen + + .. index:: single: freeze utility + + This is the structure type definition for frozen module descriptors, as + generated by the :program:`freeze` utility (see :file:`Tools/freeze/` in the + Python source distribution). Its definition, found in :file:`Include/import.h`, + is:: + + struct _frozen { + char *name; + unsigned char *code; + int size; + }; + + +.. cvar:: struct _frozen* PyImport_FrozenModules + + This pointer is initialized to point to an array of :ctype:`struct _frozen` + records, terminated by one whose members are all *NULL* or zero. When a frozen + module is imported, it is searched in this table. Third-party code could play + tricks with this to provide a dynamically created collection of frozen modules. + + +.. cfunction:: int PyImport_AppendInittab(char *name, void (*initfunc)(void)) + + Add a single module to the existing table of built-in modules. This is a + convenience wrapper around :cfunc:`PyImport_ExtendInittab`, returning ``-1`` if + the table could not be extended. The new module can be imported by the name + *name*, and uses the function *initfunc* as the initialization function called + on the first attempted import. This should be called before + :cfunc:`Py_Initialize`. + + +.. ctype:: struct _inittab + + Structure describing a single entry in the list of built-in modules. Each of + these structures gives the name and initialization function for a module built + into the interpreter. Programs which embed Python may use an array of these + structures in conjunction with :cfunc:`PyImport_ExtendInittab` to provide + additional built-in modules. The structure is defined in + :file:`Include/import.h` as:: + + struct _inittab { + char *name; + void (*initfunc)(void); + }; + + +.. cfunction:: int PyImport_ExtendInittab(struct _inittab *newtab) + + Add a collection of modules to the table of built-in modules. The *newtab* + array must end with a sentinel entry which contains *NULL* for the :attr:`name` + field; failure to provide the sentinel value can result in a memory fault. + Returns ``0`` on success or ``-1`` if insufficient memory could be allocated to + extend the internal table. In the event of failure, no modules are added to the + internal table. This should be called before :cfunc:`Py_Initialize`. + + +.. _marshalling-utils: + +Data marshalling support +======================== + +These routines allow C code to work with serialized objects using the same data +format as the :mod:`marshal` module. There are functions to write data into the +serialization format, and additional functions that can be used to read the data +back. Files used to store marshalled data must be opened in binary mode. + +Numeric values are stored with the least significant byte first. + +The module supports two versions of the data format: version 0 is the historical +version, version 1 (new in Python 2.4) shares interned strings in the file, and +upon unmarshalling. *Py_MARSHAL_VERSION* indicates the current file format +(currently 1). + + +.. cfunction:: void PyMarshal_WriteLongToFile(long value, FILE *file, int version) + + Marshal a :ctype:`long` integer, *value*, to *file*. This will only write the + least-significant 32 bits of *value*; regardless of the size of the native + :ctype:`long` type. + + .. versionchanged:: 2.4 + *version* indicates the file format. + + +.. cfunction:: void PyMarshal_WriteObjectToFile(PyObject *value, FILE *file, int version) + + Marshal a Python object, *value*, to *file*. + + .. versionchanged:: 2.4 + *version* indicates the file format. + + +.. cfunction:: PyObject* PyMarshal_WriteObjectToString(PyObject *value, int version) + + Return a string object containing the marshalled representation of *value*. + + .. versionchanged:: 2.4 + *version* indicates the file format. + + +The following functions allow marshalled values to be read back in. + +XXX What about error detection? It appears that reading past the end of the +file will always result in a negative numeric value (where that's relevant), but +it's not clear that negative values won't be handled properly when there's no +error. What's the right way to tell? Should only non-negative values be written +using these routines? + + +.. cfunction:: long PyMarshal_ReadLongFromFile(FILE *file) + + Return a C :ctype:`long` from the data stream in a :ctype:`FILE\*` opened for + reading. Only a 32-bit value can be read in using this function, regardless of + the native size of :ctype:`long`. + + +.. cfunction:: int PyMarshal_ReadShortFromFile(FILE *file) + + Return a C :ctype:`short` from the data stream in a :ctype:`FILE\*` opened for + reading. Only a 16-bit value can be read in using this function, regardless of + the native size of :ctype:`short`. + + +.. cfunction:: PyObject* PyMarshal_ReadObjectFromFile(FILE *file) + + Return a Python object from the data stream in a :ctype:`FILE\*` opened for + reading. On error, sets the appropriate exception (:exc:`EOFError` or + :exc:`TypeError`) and returns *NULL*. + + +.. cfunction:: PyObject* PyMarshal_ReadLastObjectFromFile(FILE *file) + + Return a Python object from the data stream in a :ctype:`FILE\*` opened for + reading. Unlike :cfunc:`PyMarshal_ReadObjectFromFile`, this function assumes + that no further objects will be read from the file, allowing it to aggressively + load file data into memory so that the de-serialization can operate from data in + memory rather than reading a byte at a time from the file. Only use these + variant if you are certain that you won't be reading anything else from the + file. On error, sets the appropriate exception (:exc:`EOFError` or + :exc:`TypeError`) and returns *NULL*. + + +.. cfunction:: PyObject* PyMarshal_ReadObjectFromString(char *string, Py_ssize_t len) + + Return a Python object from the data stream in a character buffer containing + *len* bytes pointed to by *string*. On error, sets the appropriate exception + (:exc:`EOFError` or :exc:`TypeError`) and returns *NULL*. + + +.. _arg-parsing: + +Parsing arguments and building values +===================================== + +These functions are useful when creating your own extensions functions and +methods. Additional information and examples are available in +:ref:`extending-index`. + +The first three of these functions described, :cfunc:`PyArg_ParseTuple`, +:cfunc:`PyArg_ParseTupleAndKeywords`, and :cfunc:`PyArg_Parse`, all use *format +strings* which are used to tell the function about the expected arguments. The +format strings use the same syntax for each of these functions. + +A format string consists of zero or more "format units." A format unit +describes one Python object; it is usually a single character or a parenthesized +sequence of format units. With a few exceptions, a format unit that is not a +parenthesized sequence normally corresponds to a single address argument to +these functions. In the following description, the quoted form is the format +unit; the entry in (round) parentheses is the Python object type that matches +the format unit; and the entry in [square] brackets is the type of the C +variable(s) whose address should be passed. + +``s`` (string or Unicode object) [const char \*] + Convert a Python string or Unicode object to a C pointer to a character string. + You must not provide storage for the string itself; a pointer to an existing + string is stored into the character pointer variable whose address you pass. + The C string is NUL-terminated. The Python string must not contain embedded NUL + bytes; if it does, a :exc:`TypeError` exception is raised. Unicode objects are + converted to C strings using the default encoding. If this conversion fails, a + :exc:`UnicodeError` is raised. + +``s#`` (string, Unicode or any read buffer compatible object) [const char \*, int] + This variant on ``s`` stores into two C variables, the first one a pointer to a + character string, the second one its length. In this case the Python string may + contain embedded null bytes. Unicode objects pass back a pointer to the default + encoded string version of the object if such a conversion is possible. All + other read-buffer compatible objects pass back a reference to the raw internal + data representation. + +``y`` (bytes object) [const char \*] + This variant on ``s`` convert a Python bytes object to a C pointer to a + character string. The bytes object must not contain embedded NUL bytes; if it + does, a :exc:`TypeError` exception is raised. + +``y#`` (bytes object) [const char \*, int] + This variant on ``s#`` stores into two C variables, the first one a pointer to a + character string, the second one its length. This only accepts bytes objects. + +``z`` (string or ``None``) [const char \*] + Like ``s``, but the Python object may also be ``None``, in which case the C + pointer is set to *NULL*. + +``z#`` (string or ``None`` or any read buffer compatible object) [const char \*, int] + This is to ``s#`` as ``z`` is to ``s``. + +``u`` (Unicode object) [Py_UNICODE \*] + Convert a Python Unicode object to a C pointer to a NUL-terminated buffer of + 16-bit Unicode (UTF-16) data. As with ``s``, there is no need to provide + storage for the Unicode data buffer; a pointer to the existing Unicode data is + stored into the :ctype:`Py_UNICODE` pointer variable whose address you pass. + +``u#`` (Unicode object) [Py_UNICODE \*, int] + This variant on ``u`` stores into two C variables, the first one a pointer to a + Unicode data buffer, the second one its length. Non-Unicode objects are handled + by interpreting their read-buffer pointer as pointer to a :ctype:`Py_UNICODE` + array. + +``es`` (string, Unicode object or character buffer compatible object) [const char \*encoding, char \*\*buffer] + This variant on ``s`` is used for encoding Unicode and objects convertible to + Unicode into a character buffer. It only works for encoded data without embedded + NUL bytes. + + This format requires two arguments. The first is only used as input, and + must be a :ctype:`const char\*` which points to the name of an encoding as a + NUL-terminated string, or *NULL*, in which case the default encoding is used. + An exception is raised if the named encoding is not known to Python. The + second argument must be a :ctype:`char\*\*`; the value of the pointer it + references will be set to a buffer with the contents of the argument text. + The text will be encoded in the encoding specified by the first argument. + + :cfunc:`PyArg_ParseTuple` will allocate a buffer of the needed size, copy the + encoded data into this buffer and adjust *\*buffer* to reference the newly + allocated storage. The caller is responsible for calling :cfunc:`PyMem_Free` to + free the allocated buffer after use. + +``et`` (string, Unicode object or character buffer compatible object) [const char \*encoding, char \*\*buffer] + Same as ``es`` except that 8-bit string objects are passed through without + recoding them. Instead, the implementation assumes that the string object uses + the encoding passed in as parameter. + +``es#`` (string, Unicode object or character buffer compatible object) [const char \*encoding, char \*\*buffer, int \*buffer_length] + This variant on ``s#`` is used for encoding Unicode and objects convertible to + Unicode into a character buffer. Unlike the ``es`` format, this variant allows + input data which contains NUL characters. + + It requires three arguments. The first is only used as input, and must be a + :ctype:`const char\*` which points to the name of an encoding as a + NUL-terminated string, or *NULL*, in which case the default encoding is used. + An exception is raised if the named encoding is not known to Python. The + second argument must be a :ctype:`char\*\*`; the value of the pointer it + references will be set to a buffer with the contents of the argument text. + The text will be encoded in the encoding specified by the first argument. + The third argument must be a pointer to an integer; the referenced integer + will be set to the number of bytes in the output buffer. + + There are two modes of operation: + + If *\*buffer* points a *NULL* pointer, the function will allocate a buffer of + the needed size, copy the encoded data into this buffer and set *\*buffer* to + reference the newly allocated storage. The caller is responsible for calling + :cfunc:`PyMem_Free` to free the allocated buffer after usage. + + If *\*buffer* points to a non-*NULL* pointer (an already allocated buffer), + :cfunc:`PyArg_ParseTuple` will use this location as the buffer and interpret the + initial value of *\*buffer_length* as the buffer size. It will then copy the + encoded data into the buffer and NUL-terminate it. If the buffer is not large + enough, a :exc:`ValueError` will be set. + + In both cases, *\*buffer_length* is set to the length of the encoded data + without the trailing NUL byte. + +``et#`` (string, Unicode object or character buffer compatible object) [const char \*encoding, char \*\*buffer] + Same as ``es#`` except that string objects are passed through without recoding + them. Instead, the implementation assumes that the string object uses the + encoding passed in as parameter. + +``b`` (integer) [char] + Convert a Python integer to a tiny int, stored in a C :ctype:`char`. + +``B`` (integer) [unsigned char] + Convert a Python integer to a tiny int without overflow checking, stored in a C + :ctype:`unsigned char`. + + .. versionadded:: 2.3 + +``h`` (integer) [short int] + Convert a Python integer to a C :ctype:`short int`. + +``H`` (integer) [unsigned short int] + Convert a Python integer to a C :ctype:`unsigned short int`, without overflow + checking. + + .. versionadded:: 2.3 + +``i`` (integer) [int] + Convert a Python integer to a plain C :ctype:`int`. + +``I`` (integer) [unsigned int] + Convert a Python integer to a C :ctype:`unsigned int`, without overflow + checking. + + .. versionadded:: 2.3 + +``l`` (integer) [long int] + Convert a Python integer to a C :ctype:`long int`. + +``k`` (integer) [unsigned long] + Convert a Python integer or long integer to a C :ctype:`unsigned long` without + overflow checking. + + .. versionadded:: 2.3 + +``L`` (integer) [PY_LONG_LONG] + Convert a Python integer to a C :ctype:`long long`. This format is only + available on platforms that support :ctype:`long long` (or :ctype:`_int64` on + Windows). + +``K`` (integer) [unsigned PY_LONG_LONG] + Convert a Python integer or long integer to a C :ctype:`unsigned long long` + without overflow checking. This format is only available on platforms that + support :ctype:`unsigned long long` (or :ctype:`unsigned _int64` on Windows). + + .. versionadded:: 2.3 + +``n`` (integer) [Py_ssize_t] + Convert a Python integer or long integer to a C :ctype:`Py_ssize_t`. + + .. versionadded:: 2.5 + +``c`` (string of length 1) [char] + Convert a Python character, represented as a string of length 1, to a C + :ctype:`char`. + +``f`` (float) [float] + Convert a Python floating point number to a C :ctype:`float`. + +``d`` (float) [double] + Convert a Python floating point number to a C :ctype:`double`. + +``D`` (complex) [Py_complex] + Convert a Python complex number to a C :ctype:`Py_complex` structure. + +``O`` (object) [PyObject \*] + Store a Python object (without any conversion) in a C object pointer. The C + program thus receives the actual object that was passed. The object's reference + count is not increased. The pointer stored is not *NULL*. + +``O!`` (object) [*typeobject*, PyObject \*] + Store a Python object in a C object pointer. This is similar to ``O``, but + takes two C arguments: the first is the address of a Python type object, the + second is the address of the C variable (of type :ctype:`PyObject\*`) into which + the object pointer is stored. If the Python object does not have the required + type, :exc:`TypeError` is raised. + +``O&`` (object) [*converter*, *anything*] + Convert a Python object to a C variable through a *converter* function. This + takes two arguments: the first is a function, the second is the address of a C + variable (of arbitrary type), converted to :ctype:`void \*`. The *converter* + function in turn is called as follows:: + + status = converter(object, address); + + where *object* is the Python object to be converted and *address* is the + :ctype:`void\*` argument that was passed to the :cfunc:`PyArg_Parse\*` function. + The returned *status* should be ``1`` for a successful conversion and ``0`` if + the conversion has failed. When the conversion fails, the *converter* function + should raise an exception. + +``S`` (string) [PyStringObject \*] + Like ``O`` but requires that the Python object is a string object. Raises + :exc:`TypeError` if the object is not a string object. The C variable may also + be declared as :ctype:`PyObject\*`. + +``U`` (Unicode string) [PyUnicodeObject \*] + Like ``O`` but requires that the Python object is a Unicode object. Raises + :exc:`TypeError` if the object is not a Unicode object. The C variable may also + be declared as :ctype:`PyObject\*`. + +``t#`` (read-only character buffer) [char \*, int] + Like ``s#``, but accepts any object which implements the read-only buffer + interface. The :ctype:`char\*` variable is set to point to the first byte of + the buffer, and the :ctype:`int` is set to the length of the buffer. Only + single-segment buffer objects are accepted; :exc:`TypeError` is raised for all + others. + +``w`` (read-write character buffer) [char \*] + Similar to ``s``, but accepts any object which implements the read-write buffer + interface. The caller must determine the length of the buffer by other means, + or use ``w#`` instead. Only single-segment buffer objects are accepted; + :exc:`TypeError` is raised for all others. + +``w#`` (read-write character buffer) [char \*, int] + Like ``s#``, but accepts any object which implements the read-write buffer + interface. The :ctype:`char \*` variable is set to point to the first byte of + the buffer, and the :ctype:`int` is set to the length of the buffer. Only + single-segment buffer objects are accepted; :exc:`TypeError` is raised for all + others. + +``(items)`` (tuple) [*matching-items*] + The object must be a Python sequence whose length is the number of format units + in *items*. The C arguments must correspond to the individual format units in + *items*. Format units for sequences may be nested. + + .. note:: + + Prior to Python version 1.5.2, this format specifier only accepted a tuple + containing the individual parameters, not an arbitrary sequence. Code which + previously caused :exc:`TypeError` to be raised here may now proceed without an + exception. This is not expected to be a problem for existing code. + +It is possible to pass Python long integers where integers are requested; +however no proper range checking is done --- the most significant bits are +silently truncated when the receiving field is too small to receive the value +(actually, the semantics are inherited from downcasts in C --- your mileage may +vary). + +A few other characters have a meaning in a format string. These may not occur +inside nested parentheses. They are: + +``|`` + Indicates that the remaining arguments in the Python argument list are optional. + The C variables corresponding to optional arguments should be initialized to + their default value --- when an optional argument is not specified, + :cfunc:`PyArg_ParseTuple` does not touch the contents of the corresponding C + variable(s). + +``:`` + The list of format units ends here; the string after the colon is used as the + function name in error messages (the "associated value" of the exception that + :cfunc:`PyArg_ParseTuple` raises). + +``;`` + The list of format units ends here; the string after the semicolon is used as + the error message *instead* of the default error message. Clearly, ``:`` and + ``;`` mutually exclude each other. + +Note that any Python object references which are provided to the caller are +*borrowed* references; do not decrement their reference count! + +Additional arguments passed to these functions must be addresses of variables +whose type is determined by the format string; these are used to store values +from the input tuple. There are a few cases, as described in the list of format +units above, where these parameters are used as input values; they should match +what is specified for the corresponding format unit in that case. + +For the conversion to succeed, the *arg* object must match the format and the +format must be exhausted. On success, the :cfunc:`PyArg_Parse\*` functions +return true, otherwise they return false and raise an appropriate exception. + + +.. cfunction:: int PyArg_ParseTuple(PyObject *args, const char *format, ...) + + Parse the parameters of a function that takes only positional parameters into + local variables. Returns true on success; on failure, it returns false and + raises the appropriate exception. + + +.. cfunction:: int PyArg_VaParse(PyObject *args, const char *format, va_list vargs) + + Identical to :cfunc:`PyArg_ParseTuple`, except that it accepts a va_list rather + than a variable number of arguments. + + +.. cfunction:: int PyArg_ParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], ...) + + Parse the parameters of a function that takes both positional and keyword + parameters into local variables. Returns true on success; on failure, it + returns false and raises the appropriate exception. + + +.. cfunction:: int PyArg_VaParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], va_list vargs) + + Identical to :cfunc:`PyArg_ParseTupleAndKeywords`, except that it accepts a + va_list rather than a variable number of arguments. + + +.. cfunction:: int PyArg_Parse(PyObject *args, const char *format, ...) + + Function used to deconstruct the argument lists of "old-style" functions --- + these are functions which use the :const:`METH_OLDARGS` parameter parsing + method. This is not recommended for use in parameter parsing in new code, and + most code in the standard interpreter has been modified to no longer use this + for that purpose. It does remain a convenient way to decompose other tuples, + however, and may continue to be used for that purpose. + + +.. cfunction:: int PyArg_UnpackTuple(PyObject *args, const char *name, Py_ssize_t min, Py_ssize_t max, ...) + + A simpler form of parameter retrieval which does not use a format string to + specify the types of the arguments. Functions which use this method to retrieve + their parameters should be declared as :const:`METH_VARARGS` in function or + method tables. The tuple containing the actual parameters should be passed as + *args*; it must actually be a tuple. The length of the tuple must be at least + *min* and no more than *max*; *min* and *max* may be equal. Additional + arguments must be passed to the function, each of which should be a pointer to a + :ctype:`PyObject\*` variable; these will be filled in with the values from + *args*; they will contain borrowed references. The variables which correspond + to optional parameters not given by *args* will not be filled in; these should + be initialized by the caller. This function returns true on success and false if + *args* is not a tuple or contains the wrong number of elements; an exception + will be set if there was a failure. + + This is an example of the use of this function, taken from the sources for the + :mod:`_weakref` helper module for weak references:: + + static PyObject * + weakref_ref(PyObject *self, PyObject *args) + { + PyObject *object; + PyObject *callback = NULL; + PyObject *result = NULL; + + if (PyArg_UnpackTuple(args, "ref", 1, 2, &object, &callback)) { + result = PyWeakref_NewRef(object, callback); + } + return result; + } + + The call to :cfunc:`PyArg_UnpackTuple` in this example is entirely equivalent to + this call to :cfunc:`PyArg_ParseTuple`:: + + PyArg_ParseTuple(args, "O|O:ref", &object, &callback) + + .. versionadded:: 2.2 + + +.. cfunction:: PyObject* Py_BuildValue(const char *format, ...) + + Create a new value based on a format string similar to those accepted by the + :cfunc:`PyArg_Parse\*` family of functions and a sequence of values. Returns + the value or *NULL* in the case of an error; an exception will be raised if + *NULL* is returned. + + :cfunc:`Py_BuildValue` does not always build a tuple. It builds a tuple only if + its format string contains two or more format units. If the format string is + empty, it returns ``None``; if it contains exactly one format unit, it returns + whatever object is described by that format unit. To force it to return a tuple + of size 0 or one, parenthesize the format string. + + When memory buffers are passed as parameters to supply data to build objects, as + for the ``s`` and ``s#`` formats, the required data is copied. Buffers provided + by the caller are never referenced by the objects created by + :cfunc:`Py_BuildValue`. In other words, if your code invokes :cfunc:`malloc` + and passes the allocated memory to :cfunc:`Py_BuildValue`, your code is + responsible for calling :cfunc:`free` for that memory once + :cfunc:`Py_BuildValue` returns. + + In the following description, the quoted form is the format unit; the entry in + (round) parentheses is the Python object type that the format unit will return; + and the entry in [square] brackets is the type of the C value(s) to be passed. + + The characters space, tab, colon and comma are ignored in format strings (but + not within format units such as ``s#``). This can be used to make long format + strings a tad more readable. + + ``s`` (string) [char \*] + Convert a null-terminated C string to a Python object. If the C string pointer + is *NULL*, ``None`` is used. + + ``s#`` (string) [char \*, int] + Convert a C string and its length to a Python object. If the C string pointer + is *NULL*, the length is ignored and ``None`` is returned. + + ``z`` (string or ``None``) [char \*] + Same as ``s``. + + ``z#`` (string or ``None``) [char \*, int] + Same as ``s#``. + + ``u`` (Unicode string) [Py_UNICODE \*] + Convert a null-terminated buffer of Unicode (UCS-2 or UCS-4) data to a Python + Unicode object. If the Unicode buffer pointer is *NULL*, ``None`` is returned. + + ``u#`` (Unicode string) [Py_UNICODE \*, int] + Convert a Unicode (UCS-2 or UCS-4) data buffer and its length to a Python + Unicode object. If the Unicode buffer pointer is *NULL*, the length is ignored + and ``None`` is returned. + + ``U`` (string) [char \*] + Convert a null-terminated C string to a Python unicode object. If the C string + pointer is *NULL*, ``None`` is used. + + ``U#`` (string) [char \*, int] + Convert a C string and its length to a Python unicode object. If the C string + pointer is *NULL*, the length is ignored and ``None`` is returned. + + ``i`` (integer) [int] + Convert a plain C :ctype:`int` to a Python integer object. + + ``b`` (integer) [char] + Convert a plain C :ctype:`char` to a Python integer object. + + ``h`` (integer) [short int] + Convert a plain C :ctype:`short int` to a Python integer object. + + ``l`` (integer) [long int] + Convert a C :ctype:`long int` to a Python integer object. + + ``B`` (integer) [unsigned char] + Convert a C :ctype:`unsigned char` to a Python integer object. + + ``H`` (integer) [unsigned short int] + Convert a C :ctype:`unsigned short int` to a Python integer object. + + ``I`` (integer/long) [unsigned int] + Convert a C :ctype:`unsigned int` to a Python integer object or a Python long + integer object, if it is larger than ``sys.maxint``. + + ``k`` (integer/long) [unsigned long] + Convert a C :ctype:`unsigned long` to a Python integer object or a Python long + integer object, if it is larger than ``sys.maxint``. + + ``L`` (long) [PY_LONG_LONG] + Convert a C :ctype:`long long` to a Python long integer object. Only available + on platforms that support :ctype:`long long`. + + ``K`` (long) [unsigned PY_LONG_LONG] + Convert a C :ctype:`unsigned long long` to a Python long integer object. Only + available on platforms that support :ctype:`unsigned long long`. + + ``n`` (int) [Py_ssize_t] + Convert a C :ctype:`Py_ssize_t` to a Python integer or long integer. + + .. versionadded:: 2.5 + + ``c`` (string of length 1) [char] + Convert a C :ctype:`int` representing a character to a Python string of length + 1. + + ``d`` (float) [double] + Convert a C :ctype:`double` to a Python floating point number. + + ``f`` (float) [float] + Same as ``d``. + + ``D`` (complex) [Py_complex \*] + Convert a C :ctype:`Py_complex` structure to a Python complex number. + + ``O`` (object) [PyObject \*] + Pass a Python object untouched (except for its reference count, which is + incremented by one). If the object passed in is a *NULL* pointer, it is assumed + that this was caused because the call producing the argument found an error and + set an exception. Therefore, :cfunc:`Py_BuildValue` will return *NULL* but won't + raise an exception. If no exception has been raised yet, :exc:`SystemError` is + set. + + ``S`` (object) [PyObject \*] + Same as ``O``. + + ``N`` (object) [PyObject \*] + Same as ``O``, except it doesn't increment the reference count on the object. + Useful when the object is created by a call to an object constructor in the + argument list. + + ``O&`` (object) [*converter*, *anything*] + Convert *anything* to a Python object through a *converter* function. The + function is called with *anything* (which should be compatible with :ctype:`void + \*`) as its argument and should return a "new" Python object, or *NULL* if an + error occurred. + + ``(items)`` (tuple) [*matching-items*] + Convert a sequence of C values to a Python tuple with the same number of items. + + ``[items]`` (list) [*matching-items*] + Convert a sequence of C values to a Python list with the same number of items. + + ``{items}`` (dictionary) [*matching-items*] + Convert a sequence of C values to a Python dictionary. Each pair of consecutive + C values adds one item to the dictionary, serving as key and value, + respectively. + + If there is an error in the format string, the :exc:`SystemError` exception is + set and *NULL* returned. + + +.. _string-conversion: + +String conversion and formatting +================================ + +Functions for number conversion and formatted string output. + + +.. cfunction:: int PyOS_snprintf(char *str, size_t size, const char *format, ...) + + Output not more than *size* bytes to *str* according to the format string + *format* and the extra arguments. See the Unix man page :manpage:`snprintf(2)`. + + +.. cfunction:: int PyOS_vsnprintf(char *str, size_t size, const char *format, va_list va) + + Output not more than *size* bytes to *str* according to the format string + *format* and the variable argument list *va*. Unix man page + :manpage:`vsnprintf(2)`. + +:cfunc:`PyOS_snprintf` and :cfunc:`PyOS_vsnprintf` wrap the Standard C library +functions :cfunc:`snprintf` and :cfunc:`vsnprintf`. Their purpose is to +guarantee consistent behavior in corner cases, which the Standard C functions do +not. + +The wrappers ensure that *str*[*size*-1] is always ``'\0'`` upon return. They +never write more than *size* bytes (including the trailing ``'\0'``) into str. +Both functions require that ``str != NULL``, ``size > 0`` and ``format != +NULL``. + +If the platform doesn't have :cfunc:`vsnprintf` and the buffer size needed to +avoid truncation exceeds *size* by more than 512 bytes, Python aborts with a +*Py_FatalError*. + +The return value (*rv*) for these functions should be interpreted as follows: + +* When ``0 <= rv < size``, the output conversion was successful and *rv* + characters were written to *str* (excluding the trailing ``'\0'`` byte at + *str*[*rv*]). + +* When ``rv >= size``, the output conversion was truncated and a buffer with + ``rv + 1`` bytes would have been needed to succeed. *str*[*size*-1] is ``'\0'`` + in this case. + +* When ``rv < 0``, "something bad happened." *str*[*size*-1] is ``'\0'`` in + this case too, but the rest of *str* is undefined. The exact cause of the error + depends on the underlying platform. + +The following functions provide locale-independent string to number conversions. + + +.. cfunction:: double PyOS_ascii_strtod(const char *nptr, char **endptr) + + Convert a string to a :ctype:`double`. This function behaves like the Standard C + function :cfunc:`strtod` does in the C locale. It does this without changing the + current locale, since that would not be thread-safe. + + :cfunc:`PyOS_ascii_strtod` should typically be used for reading configuration + files or other non-user input that should be locale independent. + + .. versionadded:: 2.4 + + See the Unix man page :manpage:`strtod(2)` for details. + + +.. cfunction:: char * PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d) + + Convert a :ctype:`double` to a string using the ``'.'`` as the decimal + separator. *format* is a :cfunc:`printf`\ -style format string specifying the + number format. Allowed conversion characters are ``'e'``, ``'E'``, ``'f'``, + ``'F'``, ``'g'`` and ``'G'``. + + The return value is a pointer to *buffer* with the converted string or NULL if + the conversion failed. + + .. versionadded:: 2.4 + + +.. cfunction:: double PyOS_ascii_atof(const char *nptr) + + Convert a string to a :ctype:`double` in a locale-independent way. + + .. versionadded:: 2.4 + + See the Unix man page :manpage:`atof(2)` for details. + diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst new file mode 100644 index 0000000..4b26da6 --- /dev/null +++ b/Doc/c-api/veryhigh.rst @@ -0,0 +1,278 @@ +.. highlightlang:: c + + +.. _veryhigh: + +************************* +The Very High Level Layer +************************* + +The functions in this chapter will let you execute Python source code given in a +file or a buffer, but they will not let you interact in a more detailed way with +the interpreter. + +Several of these functions accept a start symbol from the grammar as a +parameter. The available start symbols are :const:`Py_eval_input`, +:const:`Py_file_input`, and :const:`Py_single_input`. These are described +following the functions which accept them as parameters. + +Note also that several of these functions take :ctype:`FILE\*` parameters. On +particular issue which needs to be handled carefully is that the :ctype:`FILE` +structure for different C libraries can be different and incompatible. Under +Windows (at least), it is possible for dynamically linked extensions to actually +use different libraries, so care should be taken that :ctype:`FILE\*` parameters +are only passed to these functions if it is certain that they were created by +the same library that the Python runtime is using. + + +.. cfunction:: int Py_Main(int argc, char **argv) + + The main program for the standard interpreter. This is made available for + programs which embed Python. The *argc* and *argv* parameters should be + prepared exactly as those which are passed to a C program's :cfunc:`main` + function. It is important to note that the argument list may be modified (but + the contents of the strings pointed to by the argument list are not). The return + value will be the integer passed to the :func:`sys.exit` function, ``1`` if the + interpreter exits due to an exception, or ``2`` if the parameter list does not + represent a valid Python command line. + + +.. cfunction:: int PyRun_AnyFile(FILE *fp, const char *filename) + + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving + *closeit* set to ``0`` and *flags* set to *NULL*. + + +.. cfunction:: int PyRun_AnyFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) + + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving + the *closeit* argument set to ``0``. + + +.. cfunction:: int PyRun_AnyFileEx(FILE *fp, const char *filename, int closeit) + + This is a simplified interface to :cfunc:`PyRun_AnyFileExFlags` below, leaving + the *flags* argument set to *NULL*. + + +.. cfunction:: int PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit, PyCompilerFlags *flags) + + If *fp* refers to a file associated with an interactive device (console or + terminal input or Unix pseudo-terminal), return the value of + :cfunc:`PyRun_InteractiveLoop`, otherwise return the result of + :cfunc:`PyRun_SimpleFile`. If *filename* is *NULL*, this function uses + ``"???"`` as the filename. + + +.. cfunction:: int PyRun_SimpleString(const char *command) + + This is a simplified interface to :cfunc:`PyRun_SimpleStringFlags` below, + leaving the *PyCompilerFlags\** argument set to NULL. + + +.. cfunction:: int PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) + + Executes the Python source code from *command* in the :mod:`__main__` module + according to the *flags* argument. If :mod:`__main__` does not already exist, it + is created. Returns ``0`` on success or ``-1`` if an exception was raised. If + there was an error, there is no way to get the exception information. For the + meaning of *flags*, see below. + + +.. cfunction:: int PyRun_SimpleFile(FILE *fp, const char *filename) + + This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, + leaving *closeit* set to ``0`` and *flags* set to *NULL*. + + +.. cfunction:: int PyRun_SimpleFileFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) + + This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, + leaving *closeit* set to ``0``. + + +.. cfunction:: int PyRun_SimpleFileEx(FILE *fp, const char *filename, int closeit) + + This is a simplified interface to :cfunc:`PyRun_SimpleFileExFlags` below, + leaving *flags* set to *NULL*. + + +.. cfunction:: int PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit, PyCompilerFlags *flags) + + Similar to :cfunc:`PyRun_SimpleStringFlags`, but the Python source code is read + from *fp* instead of an in-memory string. *filename* should be the name of the + file. If *closeit* is true, the file is closed before PyRun_SimpleFileExFlags + returns. + + +.. cfunction:: int PyRun_InteractiveOne(FILE *fp, const char *filename) + + This is a simplified interface to :cfunc:`PyRun_InteractiveOneFlags` below, + leaving *flags* set to *NULL*. + + +.. cfunction:: int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) + + Read and execute a single statement from a file associated with an interactive + device according to the *flags* argument. If *filename* is *NULL*, ``"???"`` is + used instead. The user will be prompted using ``sys.ps1`` and ``sys.ps2``. + Returns ``0`` when the input was executed successfully, ``-1`` if there was an + exception, or an error code from the :file:`errcode.h` include file distributed + as part of Python if there was a parse error. (Note that :file:`errcode.h` is + not included by :file:`Python.h`, so must be included specifically if needed.) + + +.. cfunction:: int PyRun_InteractiveLoop(FILE *fp, const char *filename) + + This is a simplified interface to :cfunc:`PyRun_InteractiveLoopFlags` below, + leaving *flags* set to *NULL*. + + +.. cfunction:: int PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) + + Read and execute statements from a file associated with an interactive device + until EOF is reached. If *filename* is *NULL*, ``"???"`` is used instead. The + user will be prompted using ``sys.ps1`` and ``sys.ps2``. Returns ``0`` at EOF. + + +.. cfunction:: struct _node* PyParser_SimpleParseString(const char *str, int start) + + This is a simplified interface to + :cfunc:`PyParser_SimpleParseStringFlagsFilename` below, leaving *filename* set + to *NULL* and *flags* set to ``0``. + + +.. cfunction:: struct _node* PyParser_SimpleParseStringFlags( const char *str, int start, int flags) + + This is a simplified interface to + :cfunc:`PyParser_SimpleParseStringFlagsFilename` below, leaving *filename* set + to *NULL*. + + +.. cfunction:: struct _node* PyParser_SimpleParseStringFlagsFilename( const char *str, const char *filename, int start, int flags) + + Parse Python source code from *str* using the start token *start* according to + the *flags* argument. The result can be used to create a code object which can + be evaluated efficiently. This is useful if a code fragment must be evaluated + many times. + + +.. cfunction:: struct _node* PyParser_SimpleParseFile(FILE *fp, const char *filename, int start) + + This is a simplified interface to :cfunc:`PyParser_SimpleParseFileFlags` below, + leaving *flags* set to ``0`` + + +.. cfunction:: struct _node* PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int flags) + + Similar to :cfunc:`PyParser_SimpleParseStringFlagsFilename`, but the Python + source code is read from *fp* instead of an in-memory string. + + +.. cfunction:: PyObject* PyRun_String(const char *str, int start, PyObject *globals, PyObject *locals) + + This is a simplified interface to :cfunc:`PyRun_StringFlags` below, leaving + *flags* set to *NULL*. + + +.. cfunction:: PyObject* PyRun_StringFlags(const char *str, int start, PyObject *globals, PyObject *locals, PyCompilerFlags *flags) + + Execute Python source code from *str* in the context specified by the + dictionaries *globals* and *locals* with the compiler flags specified by + *flags*. The parameter *start* specifies the start token that should be used to + parse the source code. + + Returns the result of executing the code as a Python object, or *NULL* if an + exception was raised. + + +.. cfunction:: PyObject* PyRun_File(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals) + + This is a simplified interface to :cfunc:`PyRun_FileExFlags` below, leaving + *closeit* set to ``0`` and *flags* set to *NULL*. + + +.. cfunction:: PyObject* PyRun_FileEx(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals, int closeit) + + This is a simplified interface to :cfunc:`PyRun_FileExFlags` below, leaving + *flags* set to *NULL*. + + +.. cfunction:: PyObject* PyRun_FileFlags(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals, PyCompilerFlags *flags) + + This is a simplified interface to :cfunc:`PyRun_FileExFlags` below, leaving + *closeit* set to ``0``. + + +.. cfunction:: PyObject* PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, PyObject *locals, int closeit, PyCompilerFlags *flags) + + Similar to :cfunc:`PyRun_StringFlags`, but the Python source code is read from + *fp* instead of an in-memory string. *filename* should be the name of the file. + If *closeit* is true, the file is closed before :cfunc:`PyRun_FileExFlags` + returns. + + +.. cfunction:: PyObject* Py_CompileString(const char *str, const char *filename, int start) + + This is a simplified interface to :cfunc:`Py_CompileStringFlags` below, leaving + *flags* set to *NULL*. + + +.. cfunction:: PyObject* Py_CompileStringFlags(const char *str, const char *filename, int start, PyCompilerFlags *flags) + + Parse and compile the Python source code in *str*, returning the resulting code + object. The start token is given by *start*; this can be used to constrain the + code which can be compiled and should be :const:`Py_eval_input`, + :const:`Py_file_input`, or :const:`Py_single_input`. The filename specified by + *filename* is used to construct the code object and may appear in tracebacks or + :exc:`SyntaxError` exception messages. This returns *NULL* if the code cannot + be parsed or compiled. + + +.. cvar:: int Py_eval_input + + .. index:: single: Py_CompileString() + + The start symbol from the Python grammar for isolated expressions; for use with + :cfunc:`Py_CompileString`. + + +.. cvar:: int Py_file_input + + .. index:: single: Py_CompileString() + + The start symbol from the Python grammar for sequences of statements as read + from a file or other source; for use with :cfunc:`Py_CompileString`. This is + the symbol to use when compiling arbitrarily long Python source code. + + +.. cvar:: int Py_single_input + + .. index:: single: Py_CompileString() + + The start symbol from the Python grammar for a single statement; for use with + :cfunc:`Py_CompileString`. This is the symbol used for the interactive + interpreter loop. + + +.. ctype:: struct PyCompilerFlags + + This is the structure used to hold compiler flags. In cases where code is only + being compiled, it is passed as ``int flags``, and in cases where code is being + executed, it is passed as ``PyCompilerFlags *flags``. In this case, ``from + __future__ import`` can modify *flags*. + + Whenever ``PyCompilerFlags *flags`` is *NULL*, :attr:`cf_flags` is treated as + equal to ``0``, and any modification due to ``from __future__ import`` is + discarded. :: + + struct PyCompilerFlags { + int cf_flags; + } + + +.. cvar:: int CO_FUTURE_DIVISION + + This bit can be set in *flags* to cause division operator ``/`` to be + interpreted as "true division" according to :pep:`238`. + diff --git a/Doc/conf.py b/Doc/conf.py new file mode 100644 index 0000000..6736f53 --- /dev/null +++ b/Doc/conf.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# Python documentation build configuration file +# +# The contents of this file are pickled, so don't put values in the namespace +# that aren't pickleable (module imports are okay, they're removed automatically). +# + +# The default replacements for |version| and |release|. +# If 'auto', Sphinx looks for the Include/patchlevel.h file in the current Python +# source tree and replaces the values accordingly. +# +# The short X.Y version. +# version = '2.6' +version = 'auto' +# The full version, including alpha/beta/rc tags. +# release = '2.6a0' +release = 'auto' + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +today = '' +# Else, today_fmt is used as the format for a strftime call. +today_fmt = '%B %d, %Y' + +# The base URL for download links. +download_base_url = 'http://docs.python.org/ftp/python/doc/' + +# List of files that shouldn't be included in the build. +unused_files = [ + 'whatsnew/2.0.rst', + 'whatsnew/2.1.rst', + 'whatsnew/2.2.rst', + 'whatsnew/2.3.rst', + 'whatsnew/2.4.rst', + 'whatsnew/2.5.rst', + 'whatsnew/2.6.rst', + 'maclib/scrap.rst', + 'library/xmllib.rst', + 'library/xml.etree.rst', +] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +last_updated_format = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +use_smartypants = True + +# If true, '()' will be appended to :func: etc. cross-reference text. +add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +add_module_names = True diff --git a/Doc/contents.rst b/Doc/contents.rst new file mode 100644 index 0000000..59a72fa --- /dev/null +++ b/Doc/contents.rst @@ -0,0 +1,21 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + Python Documentation contents +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +.. toctree:: + + whatsnew/3.0.rst + tutorial/index.rst + reference/index.rst + library/index.rst + extending/index.rst + c-api/index.rst + distutils/index.rst + install/index.rst + documenting/index.rst + howto/index.rst + + about.rst + bugs.rst + copyright.rst + license.rst diff --git a/Doc/copyright.rst b/Doc/copyright.rst new file mode 100644 index 0000000..9a472af --- /dev/null +++ b/Doc/copyright.rst @@ -0,0 +1,19 @@ +********* +Copyright +********* + +Python and this documentation is: + +Copyright © 2001-2007 Python Software Foundation. All rights reserved. + +Copyright © 2000 BeOpen.com. All rights reserved. + +Copyright © 1995-2000 Corporation for National Research Initiatives. All rights +reserved. + +Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved. + +------- + +See :ref:`history-and-license` for complete license and permissions information. + diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat new file mode 100644 index 0000000..5cb0b7a --- /dev/null +++ b/Doc/data/refcounts.dat @@ -0,0 +1,1747 @@ +# Created by Skip Montanaro . + +# Format: +# function ':' type ':' [param name] ':' [refcount effect] ':' [comment] +# If the param name slot is empty, that line corresponds to the function's +# return value, otherwise it's the type of the named parameter. + +# The first line of a function block gives type/refcount information for the +# function's return value. Successive lines with the same function name +# correspond to the function's parameter list and appear in the order the +# parameters appear in the function's prototype. + +# For readability, each function's lines are surrounded by a blank line. +# The blocks are sorted alphabetically by function name. + +# Refcount behavior is given for all PyObject* types: 0 (no change), +1 +# (increment) and -1 (decrement). A blank refcount field indicates the +# parameter or function value is not a PyObject* and is therefore not +# subject to reference counting. A special case for the value "null" +# (without quotes) is used for functions which return a PyObject* type but +# always return NULL. This is used by some of the PyErr_*() functions, in +# particular. + +# XXX NOTE: the 0/+1/-1 refcount information for arguments is +# confusing! Much more useful would be to indicate whether the +# function "steals" a reference to the argument or not. Take for +# example PyList_SetItem(list, i, item). This lists as a 0 change for +# both the list and the item arguments. However, in fact it steals a +# reference to the item argument! + +# The parameter names are as they appear in the API manual, not the source +# code. + +PyBool_FromLong:PyObject*::+1: +PyBool_FromLong:long:v:0: + +PyBuffer_FromObject:PyObject*::+1: +PyBuffer_FromObject:PyObject*:base:+1: +PyBuffer_FromObject:int:offset:: +PyBuffer_FromObject:int:size:: + +PyBuffer_FromReadWriteObject:PyObject*::+1: +PyBuffer_FromReadWriteObject:PyObject*:base:+1: +PyBuffer_FromReadWriteObject:int:offset:: +PyBuffer_FromReadWriteObject:int:size:: + +PyBuffer_FromMemory:PyObject*::+1: +PyBuffer_FromMemory:void*:ptr:: +PyBuffer_FromMemory:int:size:: + +PyBuffer_FromReadWriteMemory:PyObject*::+1: +PyBuffer_FromReadWriteMemory:void*:ptr:: +PyBuffer_FromReadWriteMemory:int:size:: + +PyBuffer_New:PyObject*::+1: +PyBuffer_New:int:size:: + +PyCObject_AsVoidPtr:void*::: +PyCObject_AsVoidPtr:PyObject*:self:0: + +PyCObject_FromVoidPtr:PyObject*::+1: +PyCObject_FromVoidPtr:void*:cobj:: +PyCObject_FromVoidPtr::void (* destr)(void* ):: + +PyCObject_FromVoidPtrAndDesc:PyObject*::+1: +PyCObject_FromVoidPtrAndDesc:void*:cobj:: +PyCObject_FromVoidPtrAndDesc:void*:desc:: +PyCObject_FromVoidPtrAndDesc:void(*)(void*,void*):destr:: + +PyCObject_GetDesc:void*::: +PyCObject_GetDesc:PyObject*:self:0: + +PyCell_New:PyObject*::+1: +PyCell_New:PyObject*:ob:0: + +PyCell_GET:PyObject*::0: +PyCell_GET:PyObject*:ob:0: + +PyCell_Get:PyObject*::+1: +PyCell_Get:PyObject*:cell:0: + +PyCell_SET:void::: +PyCell_SET:PyObject*:cell:0: +PyCell_SET:PyObject*:value:0: + +PyCell_Set:int::: +PyCell_Set:PyObject*:cell:0: +PyCell_Set:PyObject*:value:0: + +PyCallIter_New:PyObject*::+1: +PyCallIter_New:PyObject*:callable:: +PyCallIter_New:PyObject*:sentinel:: + +PyCallable_Check:int::: +PyCallable_Check:PyObject*:o:0: + +PyComplex_AsCComplex:Py_complex::: +PyComplex_AsCComplex:PyObject*:op:0: + +PyComplex_Check:int::: +PyComplex_Check:PyObject*:p:0: + +PyComplex_FromCComplex:PyObject*::+1: +PyComplex_FromCComplex::Py_complex v:: + +PyComplex_FromDoubles:PyObject*::+1: +PyComplex_FromDoubles::double real:: +PyComplex_FromDoubles::double imag:: + +PyComplex_ImagAsDouble:double::: +PyComplex_ImagAsDouble:PyObject*:op:0: + +PyComplex_RealAsDouble:double::: +PyComplex_RealAsDouble:PyObject*:op:0: + +PyDate_FromDate:PyObject*::+1: +PyDate_FromDate:int:year:: +PyDate_FromDate:int:month:: +PyDate_FromDate:int:day:: + +PyDate_FromTimestamp:PyObject*::+1: +PyDate_FromTimestamp:PyObject*:args:0: + +PyDateTime_FromDateAndTime:PyObject*::+1: +PyDateTime_FromDateAndTime:int:year:: +PyDateTime_FromDateAndTime:int:month:: +PyDateTime_FromDateAndTime:int:day:: +PyDateTime_FromDateAndTime:int:hour:: +PyDateTime_FromDateAndTime:int:minute:: +PyDateTime_FromDateAndTime:int:second:: +PyDateTime_FromDateAndTime:int:usecond:: + +PyDateTime_FromTimestamp:PyObject*::+1: +PyDateTime_FromTimestamp:PyObject*:args:0: + +PyDelta_FromDSU:PyObject*::+1: +PyDelta_FromDSU:int:days:: +PyDelta_FromDSU:int:seconds:: +PyDelta_FromDSU:int:useconds:: + +PyDescr_NewClassMethod:PyObject*::+1: +PyDescr_NewClassMethod:PyTypeObject*:type:: +PyDescr_NewClassMethod:PyMethodDef*:method:: + +PyDescr_NewGetSet:PyObject*::+1: +PyDescr_NewGetSet:PyTypeObject*:type:: +PyDescr_NewGetSet:PyGetSetDef*:getset:: + +PyDescr_NewMember:PyObject*::+1: +PyDescr_NewMember:PyTypeObject*:type:: +PyDescr_NewMember:PyMemberDef*:member:: + +PyDescr_NewMethod:PyObject*::+1: +PyDescr_NewMethod:PyTypeObject*:type:: +PyDescr_NewMethod:PyMethodDef*:meth:: + +PyDescr_NewWrapper:PyObject*::+1: +PyDescr_NewWrapper:PyTypeObject*:type:: +PyDescr_NewWrapper:struct wrapperbase*:base:: +PyDescr_NewWrapper:void*:wrapped:: + +PyDict_Check:int::: +PyDict_Check:PyObject*:p:0: + +PyDict_Clear:void::: +PyDict_Clear:PyObject*:p:0: + +PyDict_DelItem:int::: +PyDict_DelItem:PyObject*:p:0: +PyDict_DelItem:PyObject*:key:0: + +PyDict_DelItemString:int::: +PyDict_DelItemString:PyObject*:p:0: +PyDict_DelItemString:char*:key:: + +PyDict_GetItem:PyObject*::0:0 +PyDict_GetItem:PyObject*:p:0: +PyDict_GetItem:PyObject*:key:0: + +PyDict_GetItemString:PyObject*::0: +PyDict_GetItemString:PyObject*:p:0: +PyDict_GetItemString:char*:key:: + +PyDict_Items:PyObject*::+1: +PyDict_Items:PyObject*:p:0: + +PyDict_Keys:PyObject*::+1: +PyDict_Keys:PyObject*:p:0: + +PyDict_New:PyObject*::+1: + +PyDict_Copy:PyObject*::+1: +PyDict_Copy:PyObject*:p:0: + +PyDict_Next:int::: +PyDict_Next:PyObject*:p:0: +PyDict_Next:int:ppos:: +PyDict_Next:PyObject**:pkey:0: +PyDict_Next:PyObject**:pvalue:0: + +PyDict_SetItem:int::: +PyDict_SetItem:PyObject*:p:0: +PyDict_SetItem:PyObject*:key:+1: +PyDict_SetItem:PyObject*:val:+1: + +PyDict_SetItemString:int::: +PyDict_SetItemString:PyObject*:p:0: +PyDict_SetItemString:char*:key:: +PyDict_SetItemString:PyObject*:val:+1: + +PyDict_Size:int::: +PyDict_Size:PyObject*:p:: + +PyDict_Values:PyObject*::+1: +PyDict_Values:PyObject*:p:0: + +PyDictProxy_New:PyObject*::+1: +PyDictProxy_New:PyObject*:dict:0: + +PyErr_BadArgument:int::: + +PyErr_BadInternalCall:void::: + +PyErr_CheckSignals:int::: + +PyErr_Clear:void::: + +PyErr_ExceptionMatches:int::: +PyErr_ExceptionMatches:PyObject*:exc:0: + +PyErr_Fetch:void::: +PyErr_Fetch:PyObject**:ptype:0: +PyErr_Fetch:PyObject**:pvalue:0: +PyErr_Fetch:PyObject**:ptraceback:0: + +PyErr_GivenExceptionMatches:int::: +PyErr_GivenExceptionMatches:PyObject*:given:0: +PyErr_GivenExceptionMatches:PyObject*:exc:0: + +PyErr_NewException:PyObject*::+1: +PyErr_NewException:char*:name:: +PyErr_NewException:PyObject*:base:0: +PyErr_NewException:PyObject*:dict:0: + +PyErr_NoMemory:PyObject*::null: + +PyErr_NormalizeException:void::: +PyErr_NormalizeException:PyObject**:exc::??? +PyErr_NormalizeException:PyObject**:val::??? +PyErr_NormalizeException:PyObject**:tb::??? + +PyErr_Occurred:PyObject*::0: + +PyErr_Print:void::: + +PyErr_Restore:void::: +PyErr_Restore:PyObject*:type:-1: +PyErr_Restore:PyObject*:value:-1: +PyErr_Restore:PyObject*:traceback:-1: + +PyErr_SetExcFromWindowsErr:PyObject*::null: +PyErr_SetExcFromWindowsErr:PyObject*:type:0: +PyErr_SetExcFromWindowsErr:int:ierr:: + +PyErr_SetExcFromWindowsErrWithFilename:PyObject*::null: +PyErr_SetExcFromWindowsErrWithFilename:PyObject*:type:0: +PyErr_SetExcFromWindowsErrWithFilename:int:ierr:: +PyErr_SetExcFromWindowsErrWithFilename:char*:filename:: + +PyErr_SetFromErrno:PyObject*::null: +PyErr_SetFromErrno:PyObject*:type:0: + +PyErr_SetFromErrnoWithFilename:PyObject*::null: +PyErr_SetFromErrnoWithFilename:PyObject*:type:0: +PyErr_SetFromErrnoWithFilename:char*:filename:: + +PyErr_SetFromWindowsErr:PyObject*::null: +PyErr_SetFromWindowsErr:int:ierr:: + +PyErr_SetFromWindowsErrWithFilename:PyObject*::null: +PyErr_SetFromWindowsErrWithFilename:int:ierr:: +PyErr_SetFromWindowsErrWithFilename:char*:filename:: + +PyErr_SetInterrupt:void::: + +PyErr_SetNone:void::: +PyErr_SetNone:PyObject*:type:+1: + +PyErr_SetObject:void::: +PyErr_SetObject:PyObject*:type:+1: +PyErr_SetObject:PyObject*:value:+1: + +PyErr_SetString:void::: +PyErr_SetString:PyObject*:type:+1: +PyErr_SetString:char*:message:: + +PyErr_Format:PyObject*::null: +PyErr_Format:PyObject*:exception:+1: +PyErr_Format:char*:format:: +PyErr_Format::...:: + +PyErr_WarnEx:int::: +PyErr_WarnEx:PyObject*:category:0: +PyErr_WarnEx:const char*:message:: +PyErr_WarnEx:Py_ssize_t:stack_level:: + +PyEval_AcquireLock:void::: + +PyEval_AcquireThread:void::: +PyEval_AcquireThread:PyThreadState*:tstate:: + +PyEval_InitThreads:void::: + +PyEval_ReleaseLock:void::: + +PyEval_ReleaseThread:void::: +PyEval_ReleaseThread:PyThreadState*:tstate:: + +PyEval_RestoreThread:void::: +PyEval_RestoreThread:PyThreadState*:tstate:: + +PyEval_SaveThread:PyThreadState*::: + +PyEval_EvalCode:PyObject*::+1: +PyEval_EvalCode:PyCodeObject*:co:0: +PyEval_EvalCode:PyObject*:globals:0: +PyEval_EvalCode:PyObject*:locals:0: + +PyFile_AsFile:FILE*::: +PyFile_AsFile:PyFileObject*:p:0: + +PyFile_Check:int::: +PyFile_Check:PyObject*:p:0: + +PyFile_FromFile:PyObject*::+1: +PyFile_FromFile:FILE*:fp:: +PyFile_FromFile:char*:name:: +PyFile_FromFile:char*:mode:: +PyFile_FromFile:int(*:close):: + +PyFile_FromString:PyObject*::+1: +PyFile_FromString:char*:name:: +PyFile_FromString:char*:mode:: + +PyFile_GetLine:PyObject*::+1: +PyFile_GetLine:PyObject*:p:: +PyFile_GetLine:int:n:: + +PyFile_Name:PyObject*::0: +PyFile_Name:PyObject*:p:0: + +PyFile_SetBufSize:void::: +PyFile_SetBufSize:PyFileObject*:p:0: +PyFile_SetBufSize:int:n:: + +PyFile_SoftSpace:int::: +PyFile_SoftSpace:PyFileObject*:p:0: +PyFile_SoftSpace:int:newflag:: + +PyFile_WriteObject:int::: +PyFile_WriteObject:PyObject*:obj:0: +PyFile_WriteObject:PyFileObject*:p:0: +PyFile_WriteObject:int:flags:: + +PyFile_WriteString:int::: +PyFile_WriteString:const char*:s:: +PyFile_WriteString:PyFileObject*:p:0: +PyFile_WriteString:int:flags:: + +PyFloat_AS_DOUBLE:double::: +PyFloat_AS_DOUBLE:PyObject*:pyfloat:0: + +PyFloat_AsDouble:double::: +PyFloat_AsDouble:PyObject*:pyfloat:0: + +PyFloat_Check:int::: +PyFloat_Check:PyObject*:p:0: + +PyFloat_FromDouble:PyObject*::+1: +PyFloat_FromDouble:double:v:: + +PyFloat_FromString:PyObject*::+1: +PyFloat_FromString:PyObject*:str:0: + +PyFrozenSet_New:PyObject*::+1: +PyFrozenSet_New:PyObject*:iterable:0: + +PyFunction_GetClosure:PyObject*::0: +PyFunction_GetClosure:PyObject*:op:0: + +PyFunction_GetCode:PyObject*::0: +PyFunction_GetCode:PyObject*:op:0: + +PyFunction_GetDefaults:PyObject*::0: +PyFunction_GetDefaults:PyObject*:op:0: + +PyFunction_GetGlobals:PyObject*::0: +PyFunction_GetGlobals:PyObject*:op:0: + +PyFunction_GetModule:PyObject*::0: +PyFunction_GetModule:PyObject*:op:0: + +PyFunction_New:PyObject*::+1: +PyFunction_New:PyObject*:code:+1: +PyFunction_New:PyObject*:globals:+1: + +PyFunction_SetClosure:int::: +PyFunction_SetClosure:PyObject*:op:0: +PyFunction_SetClosure:PyObject*:closure:+1: + +PyFunction_SetDefaults:int::: +PyFunction_SetDefaults:PyObject*:op:0: +PyFunction_SetDefaults:PyObject*:defaults:+1: + +PyGen_New:PyObject*::+1: +PyGen_New:PyFrameObject*:frame:0: + +Py_InitModule:PyObject*::0: +Py_InitModule:char*:name:: +Py_InitModule:PyMethodDef[]:methods:: + +Py_InitModule3:PyObject*::0: +Py_InitModule3:char*:name:: +Py_InitModule3:PyMethodDef[]:methods:: +Py_InitModule3:char*:doc:: + +Py_InitModule4:PyObject*::0: +Py_InitModule4:char*:name:: +Py_InitModule4:PyMethodDef[]:methods:: +Py_InitModule4:char*:doc:: +Py_InitModule4:PyObject*:self:: +Py_InitModule4:int:apiver::usually provided by Py_InitModule or Py_InitModule3 + +PyImport_AddModule:PyObject*::0:reference borrowed from sys.modules +PyImport_AddModule:char*:name:: + +PyImport_Cleanup:void::: + +PyImport_ExecCodeModule:PyObject*::+1: +PyImport_ExecCodeModule:char*:name:: +PyImport_ExecCodeModule:PyObject*:co:0: + +PyImport_GetMagicNumber:long::: + +PyImport_GetModuleDict:PyObject*::0: + +PyImport_Import:PyObject*::+1: +PyImport_Import:PyObject*:name:0: + +PyImport_ImportFrozenModule:int::: +PyImport_ImportFrozenModule:char*::: + +PyImport_ImportModule:PyObject*::+1: +PyImport_ImportModule:char*:name:: + +PyImport_ImportModuleEx:PyObject*::+1: +PyImport_ImportModuleEx:char*:name:: +PyImport_ImportModuleEx:PyObject*:globals:0:??? +PyImport_ImportModuleEx:PyObject*:locals:0:??? +PyImport_ImportModuleEx:PyObject*:fromlist:0:??? + +PyImport_ReloadModule:PyObject*::+1: +PyImport_ReloadModule:PyObject*:m:0: + +PyInstance_New:PyObject*::+1: +PyInstance_New:PyObject*:klass:+1: +PyInstance_New:PyObject*:arg:0: +PyInstance_New:PyObject*:kw:0: + +PyInstance_NewRaw:PyObject*::+1: +PyInstance_NewRaw:PyObject*:klass:+1: +PyInstance_NewRaw:PyObject*:dict:+1: + +PyInt_AS_LONG:long::: +PyInt_AS_LONG:PyIntObject*:io:0: + +PyInt_AsLong:long::: +PyInt_AsLong:PyObject*:io:0: + +PyInt_Check:int::: +PyInt_Check:PyObject*:op:0: + +PyInt_FromLong:PyObject*::+1: +PyInt_FromLong:long:ival:: + +PyInt_FromString:PyObject*::+1: +PyInt_FromString:char*:str:0: +PyInt_FromString:char**:pend:0: +PyInt_FromString:int:base:0: + +PyInt_FromSsize_t:PyObject*::+1: +PyInt_FromSsize_t:Py_ssize_t:ival:: + +PyInt_GetMax:long::: + +PyInterpreterState_Clear:void::: +PyInterpreterState_Clear:PyInterpreterState*:interp:: + +PyInterpreterState_Delete:void::: +PyInterpreterState_Delete:PyInterpreterState*:interp:: + +PyInterpreterState_New:PyInterpreterState*::: + +PyIter_Check:int:o:0: + +PyIter_Next:PyObject*::+1: +PyIter_Next:PyObject*:o:0: + +PyList_Append:int::: +PyList_Append:PyObject*:list:0: +PyList_Append:PyObject*:item:+1: + +PyList_AsTuple:PyObject*::+1: +PyList_AsTuple:PyObject*:list:0: + +PyList_Check:int::: +PyList_Check:PyObject*:p:0: + +PyList_GET_ITEM:PyObject*::0: +PyList_GET_ITEM:PyObject*:list:0: +PyList_GET_ITEM:int:i:0: + +PyList_GET_SIZE:int::: +PyList_GET_SIZE:PyObject*:list:0: + +PyList_GetItem:PyObject*::0: +PyList_GetItem:PyObject*:list:0: +PyList_GetItem:int:index:: + +PyList_GetSlice:PyObject*::+1: +PyList_GetSlice:PyObject*:list:0: +PyList_GetSlice:int:low:: +PyList_GetSlice:int:high:: + +PyList_Insert:int::: +PyList_Insert:PyObject*:list:0: +PyList_Insert:int:index:: +PyList_Insert:PyObject*:item:+1: + +PyList_New:PyObject*::+1: +PyList_New:int:len:: + +PyList_Reverse:int::: +PyList_Reverse:PyObject*:list:0: + +PyList_SET_ITEM:void::: +PyList_SET_ITEM:PyObject*:list:0: +PyList_SET_ITEM:int:i:: +PyList_SET_ITEM:PyObject*:o:0: + +PyList_SetItem:int::: +PyList_SetItem:PyObject*:list:0: +PyList_SetItem:int:index:: +PyList_SetItem:PyObject*:item:0: + +PyList_SetSlice:int::: +PyList_SetSlice:PyObject*:list:0: +PyList_SetSlice:int:low:: +PyList_SetSlice:int:high:: +PyList_SetSlice:PyObject*:itemlist:0:but increfs its elements? + +PyList_Size:int::: +PyList_Size:PyObject*:list:0: + +PyList_Sort:int::: +PyList_Sort:PyObject*:list:0: + +PyLong_AsDouble:double::: +PyLong_AsDouble:PyObject*:pylong:0: + +PyLong_AsLong:long::: +PyLong_AsLong:PyObject*:pylong:0: + +PyLong_AsUnsignedLong:unsigned long::: +PyLong_AsUnsignedLong:PyObject*:pylong:0: + +PyLong_Check:int::: +PyLong_Check:PyObject*:p:0: + +PyLong_FromDouble:PyObject*::+1: +PyLong_FromDouble:double:v:: + +PyLong_FromLong:PyObject*::+1: +PyLong_FromLong:long:v:: + +PyLong_FromLongLong:PyObject*::+1: +PyLong_FromLongLong:long long:v:: + +PyLong_FromUnsignedLongLong:PyObject*::+1: +PyLong_FromUnsignedLongLong:unsigned long long:v:: + +PyLong_FromString:PyObject*::+1: +PyLong_FromString:char*:str:: +PyLong_FromString:char**:pend:: +PyLong_FromString:int:base:: + +PyLong_FromUnicode:PyObject*::+1: +PyLong_FromUnicode:Py_UNICODE:u:: +PyLong_FromUnicode:int:length:: +PyLong_FromUnicode:int:base:: + +PyLong_FromUnsignedLong:PyObject*::+1: +PyLong_FromUnsignedLong:unsignedlong:v:: + +PyLong_FromVoidPtr:PyObject*::+1: +PyLong_FromVoidPtr:void*:p:: + +PyMapping_Check:int::: +PyMapping_Check:PyObject*:o:0: + +PyMapping_DelItem:int::: +PyMapping_DelItem:PyObject*:o:0: +PyMapping_DelItem:PyObject*:key:0: + +PyMapping_DelItemString:int::: +PyMapping_DelItemString:PyObject*:o:0: +PyMapping_DelItemString:char*:key:: + +PyMapping_GetItemString:PyObject*::+1: +PyMapping_GetItemString:PyObject*:o:0: +PyMapping_GetItemString:char*:key:: + +PyMapping_HasKey:int::: +PyMapping_HasKey:PyObject*:o:0: +PyMapping_HasKey:PyObject*:key:: + +PyMapping_HasKeyString:int::: +PyMapping_HasKeyString:PyObject*:o:0: +PyMapping_HasKeyString:char*:key:: + +PyMapping_Items:PyObject*::+1: +PyMapping_Items:PyObject*:o:0: + +PyMapping_Keys:PyObject*::+1: +PyMapping_Keys:PyObject*:o:0: + +PyMapping_Length:int::: +PyMapping_Length:PyObject*:o:0: + +PyMapping_SetItemString:int::: +PyMapping_SetItemString:PyObject*:o:0: +PyMapping_SetItemString:char*:key:: +PyMapping_SetItemString:PyObject*:v:+1: + +PyMapping_Values:PyObject*::+1: +PyMapping_Values:PyObject*:o:0: + +PyMarshal_ReadLastObjectFromFile:PyObject*::+1: +PyMarshal_ReadLastObjectFromFile:FILE*:file:: + +PyMarshal_ReadObjectFromFile:PyObject*::+1: +PyMarshal_ReadObjectFromFile:FILE*:file:: + +PyMarshal_ReadObjectFromString:PyObject*::+1: +PyMarshal_ReadObjectFromString:char*:string:: +PyMarshal_ReadObjectFromString:int:len:: + +PyMarshal_WriteObjectToString:PyObject*::+1: +PyMarshal_WriteObjectToString:PyObject*:value:0: + +PyMethod_Class:PyObject*::0: +PyMethod_Class:PyObject*:im:0: + +PyMethod_Function:PyObject*::0: +PyMethod_Function:PyObject*:im:0: + +PyMethod_GET_CLASS:PyObject*::0: +PyMethod_GET_CLASS:PyObject*:im:0: + +PyMethod_GET_FUNCTION:PyObject*::0: +PyMethod_GET_FUNCTION:PyObject*:im:0: + +PyMethod_GET_SELF:PyObject*::0: +PyMethod_GET_SELF:PyObject*:im:0: + +PyMethod_New:PyObject*::+1: +PyMethod_New:PyObject*:func:0: +PyMethod_New:PyObject*:self:0: +PyMethod_New:PyObject*:class:0: + +PyMethod_Self:PyObject*::0: +PyMethod_Self:PyObject*:im:0: + +PyModule_GetDict:PyObject*::0: +PyModule_GetDict::PyObject* module:0: + +PyModule_GetFilename:char*::: +PyModule_GetFilename:PyObject*:module:0: + +PyModule_GetName:char*::: +PyModule_GetName:PyObject*:module:0: + +PyModule_New:PyObject*::+1: +PyModule_New::char* name:: + +PyNumber_Absolute:PyObject*::+1: +PyNumber_Absolute:PyObject*:o:0: + +PyNumber_Add:PyObject*::+1: +PyNumber_Add:PyObject*:o1:0: +PyNumber_Add:PyObject*:o2:0: + +PyNumber_And:PyObject*::+1: +PyNumber_And:PyObject*:o1:0: +PyNumber_And:PyObject*:o2:0: + +PyNumber_Check:PyObject*:o:0: +PyNumber_Check:int::: + +PyNumber_Divide:PyObject*::+1: +PyNumber_Divide:PyObject*:o1:0: +PyNumber_Divide:PyObject*:o2:0: + +PyNumber_Divmod:PyObject*::+1: +PyNumber_Divmod:PyObject*:o1:0: +PyNumber_Divmod:PyObject*:o2:0: + +PyNumber_Float:PyObject*::+1: +PyNumber_Float:PyObject*:o:0: + +PyNumber_FloorDivide:PyObject*::+1: +PyNumber_FloorDivide:PyObject*:v:0: +PyNumber_FloorDivide:PyObject*:w:0: + +PyNumber_InPlaceAdd:PyObject*::+1: +PyNumber_InPlaceAdd:PyObject*:v:0: +PyNumber_InPlaceAdd:PyObject*:w:0: + +PyNumber_InPlaceAnd:PyObject*::+1: +PyNumber_InPlaceAnd:PyObject*:v:0: +PyNumber_InPlaceAnd:PyObject*:w:0: + +PyNumber_InPlaceDivide:PyObject*::+1: +PyNumber_InPlaceDivide:PyObject*:v:0: +PyNumber_InPlaceDivide:PyObject*:w:0: + +PyNumber_InPlaceFloorDivide:PyObject*::+1: +PyNumber_InPlaceFloorDivide:PyObject*:v:0: +PyNumber_InPlaceFloorDivide:PyObject*:w:0: + +PyNumber_InPlaceLshift:PyObject*::+1: +PyNumber_InPlaceLshift:PyObject*:v:0: +PyNumber_InPlaceLshift:PyObject*:w:0: + +PyNumber_InPlaceMultiply:PyObject*::+1: +PyNumber_InPlaceMultiply:PyObject*:v:0: +PyNumber_InPlaceMultiply:PyObject*:w:0: + +PyNumber_InPlaceOr:PyObject*::+1: +PyNumber_InPlaceOr:PyObject*:v:0: +PyNumber_InPlaceOr:PyObject*:w:0: + +PyNumber_InPlacePower:PyObject*::+1: +PyNumber_InPlacePower:PyObject*:v:0: +PyNumber_InPlacePower:PyObject*:w:0: +PyNumber_InPlacePower:PyObject*:z:0: + +PyNumber_InPlaceRemainder:PyObject*::+1: +PyNumber_InPlaceRemainder:PyObject*:v:0: +PyNumber_InPlaceRemainder:PyObject*:w:0: + +PyNumber_InPlaceRshift:PyObject*::+1: +PyNumber_InPlaceRshift:PyObject*:v:0: +PyNumber_InPlaceRshift:PyObject*:w:0: + +PyNumber_InPlaceSubtract:PyObject*::+1: +PyNumber_InPlaceSubtract:PyObject*:v:0: +PyNumber_InPlaceSubtract:PyObject*:w:0: + +PyNumber_InPlaceTrueDivide:PyObject*::+1: +PyNumber_InPlaceTrueDivide:PyObject*:v:0: +PyNumber_InPlaceTrueDivide:PyObject*:w:0: + +PyNumber_InPlaceXor:PyObject*::+1: +PyNumber_InPlaceXor:PyObject*:v:0: +PyNumber_InPlaceXor:PyObject*:w:0: + +PyNumber_Int:PyObject*::+1: +PyNumber_Int:PyObject*:o:0: + +PyNumber_Invert:PyObject*::+1: +PyNumber_Invert:PyObject*:o:0: + +PyNumber_Long:PyObject*::+1: +PyNumber_Long:PyObject*:o:0: + +PyNumber_Lshift:PyObject*::+1: +PyNumber_Lshift:PyObject*:o1:0: +PyNumber_Lshift:PyObject*:o2:0: + +PyNumber_Multiply:PyObject*::+1: +PyNumber_Multiply:PyObject*:o1:0: +PyNumber_Multiply:PyObject*:o2:0: + +PyNumber_Negative:PyObject*::+1: +PyNumber_Negative:PyObject*:o:0: + +PyNumber_Or:PyObject*::+1: +PyNumber_Or:PyObject*:o1:0: +PyNumber_Or:PyObject*:o2:0: + +PyNumber_Positive:PyObject*::+1: +PyNumber_Positive:PyObject*:o:0: + +PyNumber_Power:PyObject*::+1: +PyNumber_Power:PyObject*:o1:0: +PyNumber_Power:PyObject*:o2:0: +PyNumber_Power:PyObject*:o3:0: + +PyNumber_Remainder:PyObject*::+1: +PyNumber_Remainder:PyObject*:o1:0: +PyNumber_Remainder:PyObject*:o2:0: + +PyNumber_Rshift:PyObject*::+1: +PyNumber_Rshift:PyObject*:o1:0: +PyNumber_Rshift:PyObject*:o2:0: + +PyNumber_Subtract:PyObject*::+1: +PyNumber_Subtract:PyObject*:o1:0: +PyNumber_Subtract:PyObject*:o2:0: + +PyNumber_TrueDivide:PyObject*::+1: +PyNumber_TrueDivide:PyObject*:v:0: +PyNumber_TrueDivide:PyObject*:w:0: + +PyNumber_Xor:PyObject*::+1: +PyNumber_Xor:PyObject*:o1:0: +PyNumber_Xor:PyObject*:o2:0: + +PyOS_GetLastModificationTime:long::: +PyOS_GetLastModificationTime:char*:filename:: + +PyObject_AsFileDescriptor:int::: +PyObject_AsFileDescriptor:PyObject*:o:0: + +PyObject_Call:PyObject*::+1: +PyObject_Call:PyObject*:callable_object:0: +PyObject_Call:PyObject*:args:0: +PyObject_Call:PyObject*:kw:0: + +PyObject_CallFunction:PyObject*::+1: +PyObject_CallFunction:PyObject*:callable_object:0: +PyObject_CallFunction:char*:format:: +PyObject_CallFunction::...:: + +PyObject_CallFunctionObjArgs:PyObject*::+1: +PyObject_CallFunctionObjArgs:PyObject*:callable:0: +PyObject_CallFunctionObjArgs::...:: + +PyObject_CallMethod:PyObject*::+1: +PyObject_CallMethod:PyObject*:o:0: +PyObject_CallMethod:char*:m:: +PyObject_CallMethod:char*:format:: +PyObject_CallMethod::...:: + +PyObject_CallMethodObjArgs:PyObject*::+1: +PyObject_CallMethodObjArgs:PyObject*:o:0: +PyObject_CallMethodObjArgs:char*:name:: +PyObject_CallMethodObjArgs::...:: + +PyObject_CallObject:PyObject*::+1: +PyObject_CallObject:PyObject*:callable_object:0: +PyObject_CallObject:PyObject*:args:0: + +PyObject_Cmp:int::: +PyObject_Cmp:PyObject*:o1:0: +PyObject_Cmp:PyObject*:o2:0: +PyObject_Cmp:int*:result:: + +PyObject_Compare:int::: +PyObject_Compare:PyObject*:o1:0: +PyObject_Compare:PyObject*:o2:0: + +PyObject_DelAttr:int::: +PyObject_DelAttr:PyObject*:o:0: +PyObject_DelAttr:PyObject*:attr_name:0: + +PyObject_DelAttrString:int::: +PyObject_DelAttrString:PyObject*:o:0: +PyObject_DelAttrString:char*:attr_name:: + +PyObject_DelItem:int::: +PyObject_DelItem:PyObject*:o:0: +PyObject_DelItem:PyObject*:key:0: + +PyObject_Dir:PyObject*::+1: +PyObject_Dir:PyObject*:o:0: + +PyObject_GetAttr:PyObject*::+1: +PyObject_GetAttr:PyObject*:o:0: +PyObject_GetAttr:PyObject*:attr_name:0: + +PyObject_GetAttrString:PyObject*::+1: +PyObject_GetAttrString:PyObject*:o:0: +PyObject_GetAttrString:char*:attr_name:: + +PyObject_GetItem:PyObject*::+1: +PyObject_GetItem:PyObject*:o:0: +PyObject_GetItem:PyObject*:key:0: + +PyObject_GetIter:PyObject*::+1: +PyObject_GetIter:PyObject*:o:0: + +PyObject_HasAttr:int::: +PyObject_HasAttr:PyObject*:o:0: +PyObject_HasAttr:PyObject*:attr_name:0: + +PyObject_HasAttrString:int::: +PyObject_HasAttrString:PyObject*:o:0: +PyObject_HasAttrString:char*:attr_name:0: + +PyObject_Hash:int::: +PyObject_Hash:PyObject*:o:0: + +PyObject_IsTrue:int::: +PyObject_IsTrue:PyObject*:o:0: + +PyObject_Init:PyObject*::0: +PyObject_Init:PyObject*:op:0: + +PyObject_InitVar:PyVarObject*::0: +PyObject_InitVar:PyVarObject*:op:0: + +PyObject_Length:int::: +PyObject_Length:PyObject*:o:0: + +PyObject_NEW:PyObject*::+1: + +PyObject_New:PyObject*::+1: + +PyObject_NEW_VAR:PyObject*::+1: + +PyObject_NewVar:PyObject*::+1: + +PyObject_Print:int::: +PyObject_Print:PyObject*:o:0: +PyObject_Print:FILE*:fp:: +PyObject_Print:int:flags:: + +PyObject_Repr:PyObject*::+1: +PyObject_Repr:PyObject*:o:0: + +PyObject_RichCompare:PyObject*::+1: +PyObject_RichCompare:PyObject*:o1:0: +PyObject_RichCompare:PyObject*:o2:0: +PyObject_RichCompare:int:opid:: + +PyObject_RichCompareBool:int::: +PyObject_RichCompareBool:PyObject*:o1:0: +PyObject_RichCompareBool:PyObject*:o2:0: +PyObject_RichCompareBool:int:opid:: + +PyObject_SetAttr:int::: +PyObject_SetAttr:PyObject*:o:0: +PyObject_SetAttr:PyObject*:attr_name:0: +PyObject_SetAttr:PyObject*:v:+1: + +PyObject_SetAttrString:int::: +PyObject_SetAttrString:PyObject*:o:0: +PyObject_SetAttrString:char*:attr_name:: +PyObject_SetAttrString:PyObject*:v:+1: + +PyObject_SetItem:int::: +PyObject_SetItem:PyObject*:o:0: +PyObject_SetItem:PyObject*:key:0: +PyObject_SetItem:PyObject*:v:+1: + +PyObject_Str:PyObject*::+1: +PyObject_Str:PyObject*:o:0: + +PyObject_Type:PyObject*::+1: +PyObject_Type:PyObject*:o:0: + +PyObject_Unicode:PyObject*::+1: +PyObject_Unicode:PyObject*:o:0: + +PyParser_SimpleParseFile:struct _node*::: +PyParser_SimpleParseFile:FILE*:fp:: +PyParser_SimpleParseFile:char*:filename:: +PyParser_SimpleParseFile:int:start:: + +PyParser_SimpleParseString:struct _node*::: +PyParser_SimpleParseString:char*:str:: +PyParser_SimpleParseString:int:start:: + +PyRun_AnyFile:int::: +PyRun_AnyFile:FILE*:fp:: +PyRun_AnyFile:char*:filename:: + +PyRun_File:PyObject*::+1:??? -- same as eval_code2() +PyRun_File:FILE*:fp:: +PyRun_File:char*:filename:: +PyRun_File:int:start:: +PyRun_File:PyObject*:globals:0: +PyRun_File:PyObject*:locals:0: + +PyRun_FileEx:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileEx:FILE*:fp:: +PyRun_FileEx:char*:filename:: +PyRun_FileEx:int:start:: +PyRun_FileEx:PyObject*:globals:0: +PyRun_FileEx:PyObject*:locals:0: +PyRun_FileEx:int:closeit:: + +PyRun_FileFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileFlags:FILE*:fp:: +PyRun_FileFlags:char*:filename:: +PyRun_FileFlags:int:start:: +PyRun_FileFlags:PyObject*:globals:0: +PyRun_FileFlags:PyObject*:locals:0: +PyRun_FileFlags:PyCompilerFlags*:flags:: + +PyRun_FileExFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_FileExFlags:FILE*:fp:: +PyRun_FileExFlags:char*:filename:: +PyRun_FileExFlags:int:start:: +PyRun_FileExFlags:PyObject*:globals:0: +PyRun_FileExFlags:PyObject*:locals:0: +PyRun_FileExFlags:int:closeit:: +PyRun_FileExFlags:PyCompilerFlags*:flags:: + +PyRun_InteractiveLoop:int::: +PyRun_InteractiveLoop:FILE*:fp:: +PyRun_InteractiveLoop:char*:filename:: + +PyRun_InteractiveOne:int::: +PyRun_InteractiveOne:FILE*:fp:: +PyRun_InteractiveOne:char*:filename:: + +PyRun_SimpleFile:int::: +PyRun_SimpleFile:FILE*:fp:: +PyRun_SimpleFile:char*:filename:: + +PyRun_SimpleString:int::: +PyRun_SimpleString:char*:command:: + +PyRun_String:PyObject*::+1:??? -- same as eval_code2() +PyRun_String:char*:str:: +PyRun_String:int:start:: +PyRun_String:PyObject*:globals:0: +PyRun_String:PyObject*:locals:0: + +PyRun_StringFlags:PyObject*::+1:??? -- same as eval_code2() +PyRun_StringFlags:char*:str:: +PyRun_StringFlags:int:start:: +PyRun_StringFlags:PyObject*:globals:0: +PyRun_StringFlags:PyObject*:locals:0: +PyRun_StringFlags:PyCompilerFlags*:flags:: + +PySeqIter_New:PyObject*::+1: +PySeqIter_New:PyObject*:seq:: + +PySequence_Check:int::: +PySequence_Check:PyObject*:o:0: + +PySequence_Concat:PyObject*::+1: +PySequence_Concat:PyObject*:o1:0: +PySequence_Concat:PyObject*:o2:0: + +PySequence_Count:int::: +PySequence_Count:PyObject*:o:0: +PySequence_Count:PyObject*:value:0: + +PySequence_DelItem:int::: +PySequence_DelItem:PyObject*:o:0: +PySequence_DelItem:int:i:: + +PySequence_DelSlice:int::: +PySequence_DelSlice:PyObject*:o:0: +PySequence_DelSlice:int:i1:: +PySequence_DelSlice:int:i2:: + +PySequence_Fast:PyObject*::+1: +PySequence_Fast:PyObject*:v:0: +PySequence_Fast:const char*:m:: + +PySequence_Fast_GET_ITEM:PyObject*::0: +PySequence_Fast_GET_ITEM:PyObject*:o:0: +PySequence_Fast_GET_ITEM:int:i:: + +PySequence_GetItem:PyObject*::+1: +PySequence_GetItem:PyObject*:o:0: +PySequence_GetItem:int:i:: + +PySequence_GetSlice:PyObject*::+1: +PySequence_GetSlice:PyObject*:o:0: +PySequence_GetSlice:int:i1:: +PySequence_GetSlice:int:i2:: + +PySequence_In:int::: +PySequence_In:PyObject*:o:0: +PySequence_In:PyObject*:value:0: + +PySequence_Index:int::: +PySequence_Index:PyObject*:o:0: +PySequence_Index:PyObject*:value:0: + +PySequence_InPlaceConcat:PyObject*::+1: +PySequence_InPlaceConcat:PyObject*:s:0: +PySequence_InPlaceConcat:PyObject*:o:0: + +PySequence_InPlaceRepeat:PyObject*::+1: +PySequence_InPlaceRepeat:PyObject*:s:0: +PySequence_InPlaceRepeat:PyObject*:o:0: + +PySequence_ITEM:PyObject*::+1: +PySequence_ITEM:PyObject*:o:0: +PySequence_ITEM:int:i:: + +PySequence_Repeat:PyObject*::+1: +PySequence_Repeat:PyObject*:o:0: +PySequence_Repeat:int:count:: + +PySequence_SetItem:int::: +PySequence_SetItem:PyObject*:o:0: +PySequence_SetItem:int:i:: +PySequence_SetItem:PyObject*:v:+1: + +PySequence_SetSlice:int::: +PySequence_SetSlice:PyObject*:o:0: +PySequence_SetSlice:int:i1:: +PySequence_SetSlice:int:i2:: +PySequence_SetSlice:PyObject*:v:+1: + +PySequence_List:PyObject*::+1: +PySequence_List:PyObject*:o:0: + +PySequence_Tuple:PyObject*::+1: +PySequence_Tuple:PyObject*:o:0: + +PySet_Append:int::: +PySet_Append:PyObject*:set:0: +PySet_Append:PyObject*:key:+1: + +PySet_Contains:int::: +PySet_Contains:PyObject*:anyset:0: +PySet_Contains:PyObject*:key:0: + +PySet_Discard:int::: +PySet_Discard:PyObject*:set:0: +PySet_Discard:PyObject*:key:-1:no effect if key not found + +PySet_New:PyObject*::+1: +PySet_New:PyObject*:iterable:0: + +PySet_Pop:PyObject*::+1:or returns NULL and raises KeyError if set is empty +PySet_Pop:PyObject*:set:0: + +PySet_Size:int::: +PySet_Size:PyObject*:anyset:0: + +PySlice_Check:int::: +PySlice_Check:PyObject*:ob:0: + +PySlice_New:PyObject*::+1: +PySlice_New:PyObject*:start:0: +PySlice_New:PyObject*:stop:0: +PySlice_New:PyObject*:step:0: + +PyString_AS_STRING:char*::: +PyString_AS_STRING:PyObject*:string:0: + +PyString_AsDecodedObject:PyObject*::+1: +PyString_AsDecodedObject:PyObject*:str:0: +PyString_AsDecodedObject:const char*:encoding:: +PyString_AsDecodedObject:const char*:errors:: + +PyString_AsEncodedObject:PyObject*::+1: +PyString_AsEncodedObject:PyObject*:str:0: +PyString_AsEncodedObject:const char*:encoding:: +PyString_AsEncodedObject:const char*:errors:: + +PyString_AsString:char*::: +PyString_AsString:PyObject*:string:0: + +PyString_AsStringAndSize:int::: +PyString_AsStringAndSize:PyObject*:obj:0: +PyString_AsStringAndSize:char**:buffer:: +PyString_AsStringAndSize:int*:length:: + +PyString_Check:int::: +PyString_Check:PyObject*:o:0: + +PyString_Concat:void::: +PyString_Concat:PyObject**:string:0:??? -- replaces w/ new string or NULL +PyString_Concat:PyObject*:newpart:0: + +PyString_ConcatAndDel:void::: +PyString_ConcatAndDel:PyObject**:string:0:??? -- replaces w/ new string or NULL +PyString_ConcatAndDel:PyObject*:newpart:-1: + +PyString_Format:PyObject*::+1: +PyString_Format:PyObject*:format:0: +PyString_Format:PyObject*:args:0: + +PyString_FromString:PyObject*::+1: +PyString_FromString:const char*:v:: + +PyString_FromStringAndSize:PyObject*::+1: +PyString_FromStringAndSize:const char*:v:: +PyString_FromStringAndSize:int:len:: + +PyString_FromFormat:PyObject*::+1: +PyString_FromFormat:const char*:format:: +PyString_FromFormat::...:: + +PyString_FromFormatV:PyObject*::+1: +PyString_FromFormatV:const char*:format:: +PyString_FromFormatV:va_list:vargs:: + +PyString_GET_SIZE:int::: +PyString_GET_SIZE:PyObject*:string:0: + +PyString_InternFromString:PyObject*::+1: +PyString_InternFromString:const char*:v:: + +PyString_InternInPlace:void::: +PyString_InternInPlace:PyObject**:string:+1:??? + +PyString_Size:int::: +PyString_Size:PyObject*:string:0: + +PyString_Decode:PyObject*::+1: +PyString_Decode:const char*:s:: +PyString_Decode:int:size:: +PyString_Decode:const char*:encoding:: +PyString_Decode:const char*:errors:: + +PyString_Encode:PyObject*::+1: +PyString_Encode:const char*:s:: +PyString_Encode:int:size:: +PyString_Encode:const char*:encoding:: +PyString_Encode:const char*:errors:: + +PyString_AsEncodedString:PyObject*::+1: +PyString_AsEncodedString:PyObject*:str:: +PyString_AsEncodedString:const char*:encoding:: +PyString_AsEncodedString:const char*:errors:: + +PySys_SetArgv:int::: +PySys_SetArgv:int:argc:: +PySys_SetArgv:char**:argv:: + +PyThreadState_Clear:void::: +PyThreadState_Clear:PyThreadState*:tstate:: + +PyThreadState_Delete:void::: +PyThreadState_Delete:PyThreadState*:tstate:: + +PyThreadState_Get:PyThreadState*::: + +PyThreadState_GetDict:PyObject*::0: + +PyThreadState_New:PyThreadState*::: +PyThreadState_New:PyInterpreterState*:interp:: + +PyThreadState_Swap:PyThreadState*::: +PyThreadState_Swap:PyThreadState*:tstate:: + +PyTime_FromTime:PyObject*::+1: +PyTime_FromTime:int:hour:: +PyTime_FromTime:int:minute:: +PyTime_FromTime:int:second:: +PyTime_FromTime:int:usecond:: + +PyTuple_Check:int::: +PyTuple_Check:PyObject*:p:0: + +PyTuple_GET_ITEM:PyObject*::0: +PyTuple_GET_ITEM:PyTupleObject*:p:0: +PyTuple_GET_ITEM:int:pos:: + +PyTuple_GetItem:PyObject*::0: +PyTuple_GetItem:PyTupleObject*:p:0: +PyTuple_GetItem:int:pos:: + +PyTuple_GetSlice:PyObject*::+1: +PyTuple_GetSlice:PyTupleObject*:p:0: +PyTuple_GetSlice:int:low:: +PyTuple_GetSlice:int:high:: + +PyTuple_New:PyObject*::+1: +PyTuple_New:int:len:: + +PyTuple_Pack:PyObject*::+1: +PyTuple_Pack:int:len:: +PyTuple_Pack:PyObject*:...:0: + +PyTuple_SET_ITEM:void::: +PyTuple_SET_ITEM:PyTupleObject*:p:0: +PyTuple_SET_ITEM:int:pos:: +PyTuple_SET_ITEM:PyObject*:o:0: + +PyTuple_SetItem:int::: +PyTuple_SetItem:PyTupleObject*:p:0: +PyTuple_SetItem:int:pos:: +PyTuple_SetItem:PyObject*:o:0: + +PyTuple_Size:int::: +PyTuple_Size:PyTupleObject*:p:0: + +PyType_GenericAlloc:PyObject*::+1: +PyType_GenericAlloc:PyObject*:type:0: +PyType_GenericAlloc:int:nitems:0: + +PyType_GenericNew:PyObject*::+1: +PyType_GenericNew:PyObject*:type:0: +PyType_GenericNew:PyObject*:args:0: +PyType_GenericNew:PyObject*:kwds:0: + +PyUnicode_Check:int::: +PyUnicode_Check:PyObject*:o:0: + +PyUnicode_GET_SIZE:int::: +PyUnicode_GET_SIZE:PyObject*:o:0: + +PyUnicode_GET_DATA_SIZE:int::: +PyUnicode_GET_DATA_SIZE:PyObject*:o:0: + +PyUnicode_AS_UNICODE:Py_UNICODE*::: +PyUnicode_AS_UNICODE:PyObject*:o:0: + +PyUnicode_AS_DATA:const char*::: +PyUnicode_AS_DATA:PyObject*:o:0: + +Py_UNICODE_ISSPACE:int::: +Py_UNICODE_ISSPACE:Py_UNICODE:ch:: + +Py_UNICODE_ISLOWER:int::: +Py_UNICODE_ISLOWER:Py_UNICODE:ch:: + +Py_UNICODE_ISUPPER:int::: +Py_UNICODE_ISUPPER:Py_UNICODE:ch:: + +Py_UNICODE_ISTITLE:int::: +Py_UNICODE_ISTITLE:Py_UNICODE:ch:: + +Py_UNICODE_ISLINEBREAK:int::: +Py_UNICODE_ISLINEBREAK:Py_UNICODE:ch:: + +Py_UNICODE_ISDECIMAL:int::: +Py_UNICODE_ISDECIMAL:Py_UNICODE:ch:: + +Py_UNICODE_ISDIGIT:int::: +Py_UNICODE_ISDIGIT:Py_UNICODE:ch:: + +Py_UNICODE_ISNUMERIC:int::: +Py_UNICODE_ISNUMERIC:Py_UNICODE:ch:: + +Py_UNICODE_TOLOWER:Py_UNICODE::: +Py_UNICODE_TOLOWER:Py_UNICODE:ch:: + +Py_UNICODE_TOUPPER:Py_UNICODE::: +Py_UNICODE_TOUPPER:Py_UNICODE:ch:: + +Py_UNICODE_TOTITLE:Py_UNICODE::: +Py_UNICODE_TOTITLE:Py_UNICODE:ch:: + +Py_UNICODE_TODECIMAL:int::: +Py_UNICODE_TODECIMAL:Py_UNICODE:ch:: + +Py_UNICODE_TODIGIT:int::: +Py_UNICODE_TODIGIT:Py_UNICODE:ch:: + +Py_UNICODE_TONUMERIC:double::: +Py_UNICODE_TONUMERIC:Py_UNICODE:ch:: + +PyUnicode_FromUnicode:PyObject*::+1: +PyUnicode_FromUnicode:const Py_UNICODE*:u:: +PyUnicode_FromUnicode:int:size:: + +PyUnicode_AsUnicode:Py_UNICODE*::: +PyUnicode_AsUnicode:PyObject :*unicode:0: + +PyUnicode_GetSize:int::: +PyUnicode_GetSize:PyObject :*unicode:0: + +PyUnicode_FromObject:PyObject*::+1: +PyUnicode_FromObject:PyObject*:*obj:0: + +PyUnicode_FromEncodedObject:PyObject*::+1: +PyUnicode_FromEncodedObject:PyObject*:*obj:0: +PyUnicode_FromEncodedObject:const char*:encoding:: +PyUnicode_FromEncodedObject:const char*:errors:: + +PyUnicode_FromWideChar:PyObject*::+1: +PyUnicode_FromWideChar:const wchar_t*:w:: +PyUnicode_FromWideChar:int:size:: + +PyUnicode_AsWideChar:int::: +PyUnicode_AsWideChar:PyObject*:*unicode:0: +PyUnicode_AsWideChar:wchar_t*:w:: +PyUnicode_AsWideChar:int:size:: + +PyUnicode_Decode:PyObject*::+1: +PyUnicode_Decode:const char*:s:: +PyUnicode_Decode:int:size:: +PyUnicode_Decode:const char*:encoding:: +PyUnicode_Decode:const char*:errors:: + +PyUnicode_DecodeUTF16Stateful:PyObject*::+1: +PyUnicode_DecodeUTF16Stateful:const char*:s:: +PyUnicode_DecodeUTF16Stateful:int:size:: +PyUnicode_DecodeUTF16Stateful:const char*:errors:: +PyUnicode_DecodeUTF16Stateful:int*:byteorder:: +PyUnicode_DecodeUTF16Stateful:int*:consumed:: + +PyUnicode_DecodeUTF8Stateful:PyObject*::+1: +PyUnicode_DecodeUTF8Stateful:const char*:s:: +PyUnicode_DecodeUTF8Stateful:int:size:: +PyUnicode_DecodeUTF8Stateful:const char*:errors:: +PyUnicode_DecodeUTF8Stateful:int*:consumed:: + +PyUnicode_Encode:PyObject*::+1: +PyUnicode_Encode:const Py_UNICODE*:s:: +PyUnicode_Encode:int:size:: +PyUnicode_Encode:const char*:encoding:: +PyUnicode_Encode:const char*:errors:: + +PyUnicode_AsEncodedString:PyObject*::+1: +PyUnicode_AsEncodedString:PyObject*:unicode:: +PyUnicode_AsEncodedString:const char*:encoding:: +PyUnicode_AsEncodedString:const char*:errors:: + +PyUnicode_DecodeUTF8:PyObject*::+1: +PyUnicode_DecodeUTF8:const char*:s:: +PyUnicode_DecodeUTF8:int:size:: +PyUnicode_DecodeUTF8:const char*:errors:: + +PyUnicode_EncodeUTF8:PyObject*::+1: +PyUnicode_EncodeUTF8:const Py_UNICODE*:s:: +PyUnicode_EncodeUTF8:int:size:: +PyUnicode_EncodeUTF8:const char*:errors:: + +PyUnicode_AsUTF8String:PyObject*::+1: +PyUnicode_AsUTF8String:PyObject*:unicode:: + +PyUnicode_DecodeUTF16:PyObject*::+1: +PyUnicode_DecodeUTF16:const char*:s:: +PyUnicode_DecodeUTF16:int:size:: +PyUnicode_DecodeUTF16:const char*:errors:: +PyUnicode_DecodeUTF16:int*:byteorder:: + +PyUnicode_EncodeUTF16:PyObject*::+1: +PyUnicode_EncodeUTF16:const Py_UNICODE*:s:: +PyUnicode_EncodeUTF16:int:size:: +PyUnicode_EncodeUTF16:const char*:errors:: +PyUnicode_EncodeUTF16:int:byteorder:: + +PyUnicode_AsUTF16String:PyObject*::+1: +PyUnicode_AsUTF16String:PyObject*:unicode:: + +PyUnicode_DecodeUnicodeEscape:PyObject*::+1: +PyUnicode_DecodeUnicodeEscape:const char*:s:: +PyUnicode_DecodeUnicodeEscape:int:size:: +PyUnicode_DecodeUnicodeEscape:const char*:errors:: + +PyUnicode_EncodeUnicodeEscape:PyObject*::+1: +PyUnicode_EncodeUnicodeEscape:const Py_UNICODE*:s:: +PyUnicode_EncodeUnicodeEscape:int:size:: +PyUnicode_EncodeUnicodeEscape:const char*:errors:: + +PyUnicode_AsUnicodeEscapeString:PyObject*::+1: +PyUnicode_AsUnicodeEscapeString:PyObject*:unicode:: + +PyUnicode_DecodeRawUnicodeEscape:PyObject*::+1: +PyUnicode_DecodeRawUnicodeEscape:const char*:s:: +PyUnicode_DecodeRawUnicodeEscape:int:size:: +PyUnicode_DecodeRawUnicodeEscape:const char*:errors:: + +PyUnicode_EncodeRawUnicodeEscape:PyObject*::+1: +PyUnicode_EncodeRawUnicodeEscape:const Py_UNICODE*:s:: +PyUnicode_EncodeRawUnicodeEscape:int:size:: +PyUnicode_EncodeRawUnicodeEscape:const char*:errors:: + +PyUnicode_AsRawUnicodeEscapeString:PyObject*::+1: +PyUnicode_AsRawUnicodeEscapeString:PyObject*:unicode:: + +PyUnicode_DecodeLatin1:PyObject*::+1: +PyUnicode_DecodeLatin1:const char*:s:: +PyUnicode_DecodeLatin1:int:size:: +PyUnicode_DecodeLatin1:const char*:errors:: + +PyUnicode_EncodeLatin1:PyObject*::+1: +PyUnicode_EncodeLatin1:const Py_UNICODE*:s:: +PyUnicode_EncodeLatin1:int:size:: +PyUnicode_EncodeLatin1:const char*:errors:: + +PyUnicode_AsLatin1String:PyObject*::+1: +PyUnicode_AsLatin1String:PyObject*:unicode:: + +PyUnicode_DecodeASCII:PyObject*::+1: +PyUnicode_DecodeASCII:const char*:s:: +PyUnicode_DecodeASCII:int:size:: +PyUnicode_DecodeASCII:const char*:errors:: + +PyUnicode_EncodeASCII:PyObject*::+1: +PyUnicode_EncodeASCII:const Py_UNICODE*:s:: +PyUnicode_EncodeASCII:int:size:: +PyUnicode_EncodeASCII:const char*:errors:: + +PyUnicode_AsASCIIString:PyObject*::+1: +PyUnicode_AsASCIIString:PyObject*:unicode:: + +PyUnicode_DecodeCharmap:PyObject*::+1: +PyUnicode_DecodeCharmap:const char*:s:: +PyUnicode_DecodeCharmap:int:size:: +PyUnicode_DecodeCharmap:PyObject*:mapping:0: +PyUnicode_DecodeCharmap:const char*:errors:: + +PyUnicode_EncodeCharmap:PyObject*::+1: +PyUnicode_EncodeCharmap:const Py_UNICODE*:s:: +PyUnicode_EncodeCharmap:int:size:: +PyUnicode_EncodeCharmap:PyObject*:mapping:0: +PyUnicode_EncodeCharmap:const char*:errors:: + +PyUnicode_AsCharmapString:PyObject*::+1: +PyUnicode_AsCharmapString:PyObject*:unicode:0: +PyUnicode_AsCharmapString:PyObject*:mapping:0: + +PyUnicode_TranslateCharmap:PyObject*::+1: +PyUnicode_TranslateCharmap:const Py_UNICODE*:s:: +PyUnicode_TranslateCharmap:int:size:: +PyUnicode_TranslateCharmap:PyObject*:table:0: +PyUnicode_TranslateCharmap:const char*:errors:: + +PyUnicode_DecodeMBCS:PyObject*::+1: +PyUnicode_DecodeMBCS:const char*:s:: +PyUnicode_DecodeMBCS:int:size:: +PyUnicode_DecodeMBCS:const char*:errors:: + +PyUnicode_EncodeMBCS:PyObject*::+1: +PyUnicode_EncodeMBCS:const Py_UNICODE*:s:: +PyUnicode_EncodeMBCS:int:size:: +PyUnicode_EncodeMBCS:const char*:errors:: + +PyUnicode_AsMBCSString:PyObject*::+1: +PyUnicode_AsMBCSString:PyObject*:unicode:: + +PyUnicode_Concat:PyObject*::+1: +PyUnicode_Concat:PyObject*:left:0: +PyUnicode_Concat:PyObject*:right:0: + +PyUnicode_Split:PyObject*::+1: +PyUnicode_Split:PyObject*:left:0: +PyUnicode_Split:PyObject*:right:0: +PyUnicode_Split:int:maxsplit:: + +PyUnicode_Splitlines:PyObject*::+1: +PyUnicode_Splitlines:PyObject*:s:0: +PyUnicode_Splitlines:int:maxsplit:: + +PyUnicode_Translate:PyObject*::+1: +PyUnicode_Translate:PyObject*:str:0: +PyUnicode_Translate:PyObject*:table:0: +PyUnicode_Translate:const char*:errors:: + +PyUnicode_Join:PyObject*::+1: +PyUnicode_Join:PyObject*:separator:0: +PyUnicode_Join:PyObject*:seq:0: + +PyUnicode_Tailmatch:PyObject*::+1: +PyUnicode_Tailmatch:PyObject*:str:0: +PyUnicode_Tailmatch:PyObject*:substr:0: +PyUnicode_Tailmatch:int:start:: +PyUnicode_Tailmatch:int:end:: +PyUnicode_Tailmatch:int:direction:: + +PyUnicode_Find:int::: +PyUnicode_Find:PyObject*:str:0: +PyUnicode_Find:PyObject*:substr:0: +PyUnicode_Find:int:start:: +PyUnicode_Find:int:end:: +PyUnicode_Find:int:direction:: + +PyUnicode_Count:int::: +PyUnicode_Count:PyObject*:str:0: +PyUnicode_Count:PyObject*:substr:0: +PyUnicode_Count:int:start:: +PyUnicode_Count:int:end:: + +PyUnicode_Replace:PyObject*::+1: +PyUnicode_Replace:PyObject*:str:0: +PyUnicode_Replace:PyObject*:substr:0: +PyUnicode_Replace:PyObject*:replstr:0: +PyUnicode_Replace:int:maxcount:: + +PyUnicode_Compare:int::: +PyUnicode_Compare:PyObject*:left:0: +PyUnicode_Compare:PyObject*:right:0: + +PyUnicode_Format:PyObject*::+1: +PyUnicode_Format:PyObject*:format:0: +PyUnicode_Format:PyObject*:args:0: + +PyUnicode_Contains:int::: +PyUnicode_Contains:PyObject*:container:0: +PyUnicode_Contains:PyObject*:element:0: + +PyWeakref_GET_OBJECT:PyObject*::0: +PyWeakref_GET_OBJECT:PyObject*:ref:0: + +PyWeakref_GetObject:PyObject*::0: +PyWeakref_GetObject:PyObject*:ref:0: + +PyWeakref_NewProxy:PyObject*::+1: +PyWeakref_NewProxy:PyObject*:ob:0: +PyWeakref_NewProxy:PyObject*:callback:0: + +PyWeakref_NewRef:PyObject*::+1: +PyWeakref_NewRef:PyObject*:ob:0: +PyWeakref_NewRef:PyObject*:callback:0: + +PyWrapper_New:PyObject*::+1: +PyWrapper_New:PyObject*:d:0: +PyWrapper_New:PyObject*:self:0: + +Py_AtExit:int::: +Py_AtExit:void (*)():func:: + +Py_BuildValue:PyObject*::+1: +Py_BuildValue:char*:format:: + +Py_CompileString:PyObject*::+1: +Py_CompileString:char*:str:: +Py_CompileString:char*:filename:: +Py_CompileString:int:start:: + +Py_CompileStringFlags:PyObject*::+1: +Py_CompileStringFlags:char*:str:: +Py_CompileStringFlags:char*:filename:: +Py_CompileStringFlags:int:start:: +Py_CompileStringFlags:PyCompilerFlags*:flags:: + +Py_DECREF:void::: +Py_DECREF:PyObject*:o:-1: + +Py_EndInterpreter:void::: +Py_EndInterpreter:PyThreadState*:tstate:: + +Py_Exit:void::: +Py_Exit:int:status:: + +Py_FatalError:void::: +Py_FatalError:char*:message:: + +Py_FdIsInteractive:int::: +Py_FdIsInteractive:FILE*:fp:: +Py_FdIsInteractive:char*:filename:: + +Py_Finalize:void::: + +Py_FindMethod:PyObject*::+1: +Py_FindMethod:PyMethodDef[]:methods:: +Py_FindMethod:PyObject*:self:+1: +Py_FindMethod:char*:name:: + +Py_GetBuildInfoconst:char*::: + +Py_GetCompilerconst:char*::: + +Py_GetCopyrightconst:char*::: + +Py_GetExecPrefix:char*::: + +Py_GetPath:char*::: + +Py_GetPlatformconst:char*::: + +Py_GetPrefix:char*::: + +Py_GetProgramFullPath:char*::: + +Py_GetProgramName:char*::: + +Py_GetVersionconst:char*::: + +Py_INCREF:void::: +Py_INCREF:PyObject*:o:+1: + +Py_Initialize:void::: + +Py_IsInitialized:int::: + +Py_NewInterpreter:PyThreadState*::: + +Py_SetProgramName:void::: +Py_SetProgramName:char*:name:: + +Py_XDECREF:void::: +Py_XDECREF:PyObject*:o:-1:if o is not NULL + +Py_XINCREF:void::: +Py_XINCREF:PyObject*:o:+1:if o is not NULL + +_PyImport_FindExtension:PyObject*::0:??? see PyImport_AddModule +_PyImport_FindExtension:char*::: +_PyImport_FindExtension:char*::: + +_PyImport_Fini:void::: + +_PyImport_FixupExtension:PyObject*:::??? +_PyImport_FixupExtension:char*::: +_PyImport_FixupExtension:char*::: + +_PyImport_Init:void::: + +_PyObject_Del:void::: +_PyObject_Del:PyObject*:op:0: + +_PyObject_New:PyObject*::+1: +_PyObject_New:PyTypeObject*:type:0: + +_PyObject_NewVar:PyObject*::+1: +_PyObject_NewVar:PyTypeObject*:type:0: +_PyObject_NewVar:int:size:: + +_PyString_Resize:int::: +_PyString_Resize:PyObject**:string:+1: +_PyString_Resize:int:newsize:: + +_PyTuple_Resize:int::: +_PyTuple_Resize:PyTupleObject**:p:+1: +_PyTuple_Resize:int:new:: + +_Py_c_diff:Py_complex::: +_Py_c_diff:Py_complex:left:: +_Py_c_diff:Py_complex:right:: + +_Py_c_neg:Py_complex::: +_Py_c_neg:Py_complex:complex:: + +_Py_c_pow:Py_complex::: +_Py_c_pow:Py_complex:num:: +_Py_c_pow:Py_complex:exp:: + +_Py_c_prod:Py_complex::: +_Py_c_prod:Py_complex:left:: +_Py_c_prod:Py_complex:right:: + +_Py_c_quot:Py_complex::: +_Py_c_quot:Py_complex:dividend:: +_Py_c_quot:Py_complex:divisor:: + +_Py_c_sum:Py_complex::: +_Py_c_sum:Py_complex:left:: +_Py_c_sum:Py_complex:right:: diff --git a/Doc/distutils/apiref.rst b/Doc/distutils/apiref.rst new file mode 100644 index 0000000..c8e57fd --- /dev/null +++ b/Doc/distutils/apiref.rst @@ -0,0 +1,1976 @@ +.. _api-reference: + +************* +API Reference +************* + + +:mod:`distutils.core` --- Core Distutils functionality +====================================================== + +.. module:: distutils.core + :synopsis: The core Distutils functionality + + +The :mod:`distutils.core` module is the only module that needs to be installed +to use the Distutils. It provides the :func:`setup` (which is called from the +setup script). Indirectly provides the :class:`distutils.dist.Distribution` and +:class:`distutils.cmd.Command` class. + + +.. function:: setup(arguments) + + The basic do-everything function that does most everything you could ever ask + for from a Distutils method. See XXXXX + + The setup function takes a large number of arguments. These are laid out in the + following table. + + +--------------------+--------------------------------+-------------------------------------------------------------+ + | argument name | value | type | + +====================+================================+=============================================================+ + | *name* | The name of the package | a string | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *version* | The version number of the | See :mod:`distutils.version` | + | | package | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *description* | A single line describing the | a string | + | | package | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *long_description* | Longer description of the | a string | + | | package | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *author* | The name of the package author | a string | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *author_email* | The email address of the | a string | + | | package author | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *maintainer* | The name of the current | a string | + | | maintainer, if different from | | + | | the author | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *maintainer_email* | The email address of the | | + | | current maintainer, if | | + | | different from the author | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *url* | A URL for the package | a URL | + | | (homepage) | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *download_url* | A URL to download the package | a URL | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *packages* | A list of Python packages that | a list of strings | + | | distutils will manipulate | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *py_modules* | A list of Python modules that | a list of strings | + | | distutils will manipulate | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *scripts* | A list of standalone script | a list of strings | + | | files to be built and | | + | | installed | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *ext_modules* | A list of Python extensions to | A list of instances of | + | | be built | :class:`distutils.core.Extension` | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *classifiers* | A list of categories for the | The list of available | + | | package | categorizations is at | + | | | http://cheeseshop.python.org/pypi?:action=list_classifiers. | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *distclass* | the :class:`Distribution` | A subclass of | + | | class to use | :class:`distutils.core.Distribution` | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *script_name* | The name of the setup.py | a string | + | | script - defaults to | | + | | ``sys.argv[0]`` | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *script_args* | Arguments to supply to the | a list of strings | + | | setup script | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *options* | default options for the setup | a string | + | | script | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *license* | The license for the package | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *keywords* | Descriptive meta-data. See | | + | | :pep:`314` | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *platforms* | | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + | *cmdclass* | A mapping of command names to | a dictionary | + | | :class:`Command` subclasses | | + +--------------------+--------------------------------+-------------------------------------------------------------+ + + +.. function:: run_setup(script_name[, script_args=None, stop_after='run']) + + Run a setup script in a somewhat controlled environment, and return the + :class:`distutils.dist.Distribution` instance that drives things. This is + useful if you need to find out the distribution meta-data (passed as keyword + args from *script* to :func:`setup`), or the contents of the config files or + command-line. + + *script_name* is a file that will be read and run with :func:`exec`. ``sys.argv[0]`` + will be replaced with *script* for the duration of the call. *script_args* is a + list of strings; if supplied, ``sys.argv[1:]`` will be replaced by *script_args* + for the duration of the call. + + *stop_after* tells :func:`setup` when to stop processing; possible values: + + +---------------+---------------------------------------------+ + | value | description | + +===============+=============================================+ + | *init* | Stop after the :class:`Distribution` | + | | instance has been created and populated | + | | with the keyword arguments to :func:`setup` | + +---------------+---------------------------------------------+ + | *config* | Stop after config files have been parsed | + | | (and their data stored in the | + | | :class:`Distribution` instance) | + +---------------+---------------------------------------------+ + | *commandline* | Stop after the command-line | + | | (``sys.argv[1:]`` or *script_args*) have | + | | been parsed (and the data stored in the | + | | :class:`Distribution` instance.) | + +---------------+---------------------------------------------+ + | *run* | Stop after all commands have been run (the | + | | same as if :func:`setup` had been called | + | | in the usual way). This is the default | + | | value. | + +---------------+---------------------------------------------+ + +In addition, the :mod:`distutils.core` module exposed a number of classes that +live elsewhere. + +* :class:`Extension` from :mod:`distutils.extension` + +* :class:`Command` from :mod:`distutils.cmd` + +* :class:`Distribution` from :mod:`distutils.dist` + +A short description of each of these follows, but see the relevant module for +the full reference. + + +.. class:: Extension + + The Extension class describes a single C or C++extension module in a setup + script. It accepts the following keyword arguments in its constructor + + +------------------------+--------------------------------+---------------------------+ + | argument name | value | type | + +========================+================================+===========================+ + | *name* | the full name of the | string | + | | extension, including any | | + | | packages --- ie. *not* a | | + | | filename or pathname, but | | + | | Python dotted name | | + +------------------------+--------------------------------+---------------------------+ + | *sources* | list of source filenames, | string | + | | relative to the distribution | | + | | root (where the setup script | | + | | lives), in Unix form (slash- | | + | | separated) for portability. | | + | | Source files may be C, C++, | | + | | SWIG (.i), platform-specific | | + | | resource files, or whatever | | + | | else is recognized by the | | + | | :command:`build_ext` command | | + | | as source for a Python | | + | | extension. | | + +------------------------+--------------------------------+---------------------------+ + | *include_dirs* | list of directories to search | string | + | | for C/C++ header files (in | | + | | Unix form for portability) | | + +------------------------+--------------------------------+---------------------------+ + | *define_macros* | list of macros to define; each | (string,string) tuple or | + | | macro is defined using a | (name,``None``) | + | | 2-tuple, where 'value' is | | + | | either the string to define it | | + | | to or ``None`` to define it | | + | | without a particular value | | + | | (equivalent of ``#define FOO`` | | + | | in source or :option:`-DFOO` | | + | | on Unix C compiler command | | + | | line) | | + +------------------------+--------------------------------+---------------------------+ + | *undef_macros* | list of macros to undefine | string | + | | explicitly | | + +------------------------+--------------------------------+---------------------------+ + | *library_dirs* | list of directories to search | string | + | | for C/C++ libraries at link | | + | | time | | + +------------------------+--------------------------------+---------------------------+ + | *libraries* | list of library names (not | string | + | | filenames or paths) to link | | + | | against | | + +------------------------+--------------------------------+---------------------------+ + | *runtime_library_dirs* | list of directories to search | string | + | | for C/C++ libraries at run | | + | | time (for shared extensions, | | + | | this is when the extension is | | + | | loaded) | | + +------------------------+--------------------------------+---------------------------+ + | *extra_objects* | list of extra files to link | string | + | | with (eg. object files not | | + | | implied by 'sources', static | | + | | library that must be | | + | | explicitly specified, binary | | + | | resource files, etc.) | | + +------------------------+--------------------------------+---------------------------+ + | *extra_compile_args* | any extra platform- and | string | + | | compiler-specific information | | + | | to use when compiling the | | + | | source files in 'sources'. For | | + | | platforms and compilers where | | + | | a command line makes sense, | | + | | this is typically a list of | | + | | command-line arguments, but | | + | | for other platforms it could | | + | | be anything. | | + +------------------------+--------------------------------+---------------------------+ + | *extra_link_args* | any extra platform- and | string | + | | compiler-specific information | | + | | to use when linking object | | + | | files together to create the | | + | | extension (or to create a new | | + | | static Python interpreter). | | + | | Similar interpretation as for | | + | | 'extra_compile_args'. | | + +------------------------+--------------------------------+---------------------------+ + | *export_symbols* | list of symbols to be exported | string | + | | from a shared extension. Not | | + | | used on all platforms, and not | | + | | generally necessary for Python | | + | | extensions, which typically | | + | | export exactly one symbol: | | + | | ``init`` + extension_name. | | + +------------------------+--------------------------------+---------------------------+ + | *depends* | list of files that the | string | + | | extension depends on | | + +------------------------+--------------------------------+---------------------------+ + | *language* | extension language (i.e. | string | + | | ``'c'``, ``'c++'``, | | + | | ``'objc'``). Will be detected | | + | | from the source extensions if | | + | | not provided. | | + +------------------------+--------------------------------+---------------------------+ + + +.. class:: Distribution + + A :class:`Distribution` describes how to build, install and package up a Python + software package. + + See the :func:`setup` function for a list of keyword arguments accepted by the + Distribution constructor. :func:`setup` creates a Distribution instance. + + +.. class:: Command + + A :class:`Command` class (or rather, an instance of one of its subclasses) + implement a single distutils command. + + +:mod:`distutils.ccompiler` --- CCompiler base class +=================================================== + +.. module:: distutils.ccompiler + :synopsis: Abstract CCompiler class + + +This module provides the abstract base class for the :class:`CCompiler` +classes. A :class:`CCompiler` instance can be used for all the compile and +link steps needed to build a single project. Methods are provided to set +options for the compiler --- macro definitions, include directories, link path, +libraries and the like. + +This module provides the following functions. + + +.. function:: gen_lib_options(compiler, library_dirs, runtime_library_dirs, libraries) + + Generate linker options for searching library directories and linking with + specific libraries. *libraries* and *library_dirs* are, respectively, lists of + library names (not filenames!) and search directories. Returns a list of + command-line options suitable for use with some compiler (depending on the two + format strings passed in). + + +.. function:: gen_preprocess_options(macros, include_dirs) + + Generate C pre-processor options (:option:`-D`, :option:`-U`, :option:`-I`) as + used by at least two types of compilers: the typical Unix compiler and Visual + C++. *macros* is the usual thing, a list of 1- or 2-tuples, where ``(name,)`` + means undefine (:option:`-U`) macro *name*, and ``(name, value)`` means define + (:option:`-D`) macro *name* to *value*. *include_dirs* is just a list of + directory names to be added to the header file search path (:option:`-I`). + Returns a list of command-line options suitable for either Unix compilers or + Visual C++. + + +.. function:: get_default_compiler(osname, platform) + + Determine the default compiler to use for the given platform. + + *osname* should be one of the standard Python OS names (i.e. the ones returned + by ``os.name``) and *platform* the common value returned by ``sys.platform`` for + the platform in question. + + The default values are ``os.name`` and ``sys.platform`` in case the parameters + are not given. + + +.. function:: new_compiler(plat=None, compiler=None, verbose=0, dry_run=0, force=0) + + Factory function to generate an instance of some CCompiler subclass for the + supplied platform/compiler combination. *plat* defaults to ``os.name`` (eg. + ``'posix'``, ``'nt'``), and *compiler* defaults to the default compiler for + that platform. Currently only ``'posix'`` and ``'nt'`` are supported, and the + default compilers are "traditional Unix interface" (:class:`UnixCCompiler` + class) and Visual C++(:class:`MSVCCompiler` class). Note that it's perfectly + possible to ask for a Unix compiler object under Windows, and a Microsoft + compiler object under Unix---if you supply a value for *compiler*, *plat* is + ignored. + + .. % Is the posix/nt only thing still true? Mac OS X seems to work, and + .. % returns a UnixCCompiler instance. How to document this... hmm. + + +.. function:: show_compilers() + + Print list of available compilers (used by the :option:`--help-compiler` options + to :command:`build`, :command:`build_ext`, :command:`build_clib`). + + +.. class:: CCompiler([verbose=0, dry_run=0, force=0]) + + The abstract base class :class:`CCompiler` defines the interface that must be + implemented by real compiler classes. The class also has some utility methods + used by several compiler classes. + + The basic idea behind a compiler abstraction class is that each instance can be + used for all the compile/link steps in building a single project. Thus, + attributes common to all of those compile and link steps --- include + directories, macros to define, libraries to link against, etc. --- are + attributes of the compiler instance. To allow for variability in how individual + files are treated, most of those attributes may be varied on a per-compilation + or per-link basis. + + The constructor for each subclass creates an instance of the Compiler object. + Flags are *verbose* (show verbose output), *dry_run* (don't actually execute the + steps) and *force* (rebuild everything, regardless of dependencies). All of + these flags default to ``0`` (off). Note that you probably don't want to + instantiate :class:`CCompiler` or one of its subclasses directly - use the + :func:`distutils.CCompiler.new_compiler` factory function instead. + + The following methods allow you to manually alter compiler options for the + instance of the Compiler class. + + + .. method:: CCompiler.add_include_dir(dir) + + Add *dir* to the list of directories that will be searched for header files. + The compiler is instructed to search directories in the order in which they are + supplied by successive calls to :meth:`add_include_dir`. + + + .. method:: CCompiler.set_include_dirs(dirs) + + Set the list of directories that will be searched to *dirs* (a list of strings). + Overrides any preceding calls to :meth:`add_include_dir`; subsequent calls to + :meth:`add_include_dir` add to the list passed to :meth:`set_include_dirs`. + This does not affect any list of standard include directories that the compiler + may search by default. + + + .. method:: CCompiler.add_library(libname) + + Add *libname* to the list of libraries that will be included in all links driven + by this compiler object. Note that *libname* should \*not\* be the name of a + file containing a library, but the name of the library itself: the actual + filename will be inferred by the linker, the compiler, or the compiler class + (depending on the platform). + + The linker will be instructed to link against libraries in the order they were + supplied to :meth:`add_library` and/or :meth:`set_libraries`. It is perfectly + valid to duplicate library names; the linker will be instructed to link against + libraries as many times as they are mentioned. + + + .. method:: CCompiler.set_libraries(libnames) + + Set the list of libraries to be included in all links driven by this compiler + object to *libnames* (a list of strings). This does not affect any standard + system libraries that the linker may include by default. + + + .. method:: CCompiler.add_library_dir(dir) + + Add *dir* to the list of directories that will be searched for libraries + specified to :meth:`add_library` and :meth:`set_libraries`. The linker will be + instructed to search for libraries in the order they are supplied to + :meth:`add_library_dir` and/or :meth:`set_library_dirs`. + + + .. method:: CCompiler.set_library_dirs(dirs) + + Set the list of library search directories to *dirs* (a list of strings). This + does not affect any standard library search path that the linker may search by + default. + + + .. method:: CCompiler.add_runtime_library_dir(dir) + + Add *dir* to the list of directories that will be searched for shared libraries + at runtime. + + + .. method:: CCompiler.set_runtime_library_dirs(dirs) + + Set the list of directories to search for shared libraries at runtime to *dirs* + (a list of strings). This does not affect any standard search path that the + runtime linker may search by default. + + + .. method:: CCompiler.define_macro(name[, value=None]) + + Define a preprocessor macro for all compilations driven by this compiler object. + The optional parameter *value* should be a string; if it is not supplied, then + the macro will be defined without an explicit value and the exact outcome + depends on the compiler used (XXX true? does ANSI say anything about this?) + + + .. method:: CCompiler.undefine_macro(name) + + Undefine a preprocessor macro for all compilations driven by this compiler + object. If the same macro is defined by :meth:`define_macro` and + undefined by :meth:`undefine_macro` the last call takes precedence + (including multiple redefinitions or undefinitions). If the macro is + redefined/undefined on a per-compilation basis (ie. in the call to + :meth:`compile`), then that takes precedence. + + + .. method:: CCompiler.add_link_object(object) + + Add *object* to the list of object files (or analogues, such as explicitly named + library files or the output of "resource compilers") to be included in every + link driven by this compiler object. + + + .. method:: CCompiler.set_link_objects(objects) + + Set the list of object files (or analogues) to be included in every link to + *objects*. This does not affect any standard object files that the linker may + include by default (such as system libraries). + + The following methods implement methods for autodetection of compiler options, + providing some functionality similar to GNU :program:`autoconf`. + + + .. method:: CCompiler.detect_language(sources) + + Detect the language of a given file, or list of files. Uses the instance + attributes :attr:`language_map` (a dictionary), and :attr:`language_order` (a + list) to do the job. + + + .. method:: CCompiler.find_library_file(dirs, lib[, debug=0]) + + Search the specified list of directories for a static or shared library file + *lib* and return the full path to that file. If *debug* is true, look for a + debugging version (if that makes sense on the current platform). Return + ``None`` if *lib* wasn't found in any of the specified directories. + + + .. method:: CCompiler.has_function(funcname [, includes=None, include_dirs=None, libraries=None, library_dirs=None]) + + Return a boolean indicating whether *funcname* is supported on the current + platform. The optional arguments can be used to augment the compilation + environment by providing additional include files and paths and libraries and + paths. + + + .. method:: CCompiler.library_dir_option(dir) + + Return the compiler option to add *dir* to the list of directories searched for + libraries. + + + .. method:: CCompiler.library_option(lib) + + Return the compiler option to add *dir* to the list of libraries linked into the + shared library or executable. + + + .. method:: CCompiler.runtime_library_dir_option(dir) + + Return the compiler option to add *dir* to the list of directories searched for + runtime libraries. + + + .. method:: CCompiler.set_executables(**args) + + Define the executables (and options for them) that will be run to perform the + various stages of compilation. The exact set of executables that may be + specified here depends on the compiler class (via the 'executables' class + attribute), but most will have: + + +--------------+------------------------------------------+ + | attribute | description | + +==============+==========================================+ + | *compiler* | the C/C++ compiler | + +--------------+------------------------------------------+ + | *linker_so* | linker used to create shared objects and | + | | libraries | + +--------------+------------------------------------------+ + | *linker_exe* | linker used to create binary executables | + +--------------+------------------------------------------+ + | *archiver* | static library creator | + +--------------+------------------------------------------+ + + On platforms with a command-line (Unix, DOS/Windows), each of these is a string + that will be split into executable name and (optional) list of arguments. + (Splitting the string is done similarly to how Unix shells operate: words are + delimited by spaces, but quotes and backslashes can override this. See + :func:`distutils.util.split_quoted`.) + + The following methods invoke stages in the build process. + + + .. method:: CCompiler.compile(sources[, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None]) + + Compile one or more source files. Generates object files (e.g. transforms a + :file:`.c` file to a :file:`.o` file.) + + *sources* must be a list of filenames, most likely C/C++ files, but in reality + anything that can be handled by a particular compiler and compiler class (eg. + :class:`MSVCCompiler` can handle resource files in *sources*). Return a list of + object filenames, one per source filename in *sources*. Depending on the + implementation, not all source files will necessarily be compiled, but all + corresponding object filenames will be returned. + + If *output_dir* is given, object files will be put under it, while retaining + their original path component. That is, :file:`foo/bar.c` normally compiles to + :file:`foo/bar.o` (for a Unix implementation); if *output_dir* is *build*, then + it would compile to :file:`build/foo/bar.o`. + + *macros*, if given, must be a list of macro definitions. A macro definition is + either a ``(name, value)`` 2-tuple or a ``(name,)`` 1-tuple. The former defines + a macro; if the value is ``None``, the macro is defined without an explicit + value. The 1-tuple case undefines a macro. Later + definitions/redefinitions/undefinitions take precedence. + + *include_dirs*, if given, must be a list of strings, the directories to add to + the default include file search path for this compilation only. + + *debug* is a boolean; if true, the compiler will be instructed to output debug + symbols in (or alongside) the object file(s). + + *extra_preargs* and *extra_postargs* are implementation-dependent. On platforms + that have the notion of a command-line (e.g. Unix, DOS/Windows), they are most + likely lists of strings: extra command-line arguments to prepend/append to the + compiler command line. On other platforms, consult the implementation class + documentation. In any event, they are intended as an escape hatch for those + occasions when the abstract compiler framework doesn't cut the mustard. + + *depends*, if given, is a list of filenames that all targets depend on. If a + source file is older than any file in depends, then the source file will be + recompiled. This supports dependency tracking, but only at a coarse + granularity. + + Raises :exc:`CompileError` on failure. + + + .. method:: CCompiler.create_static_lib(objects, output_libname[, output_dir=None, debug=0, target_lang=None]) + + Link a bunch of stuff together to create a static library file. The "bunch of + stuff" consists of the list of object files supplied as *objects*, the extra + object files supplied to :meth:`add_link_object` and/or + :meth:`set_link_objects`, the libraries supplied to :meth:`add_library` and/or + :meth:`set_libraries`, and the libraries supplied as *libraries* (if any). + + *output_libname* should be a library name, not a filename; the filename will be + inferred from the library name. *output_dir* is the directory where the library + file will be put. XXX defaults to what? + + *debug* is a boolean; if true, debugging information will be included in the + library (note that on most platforms, it is the compile step where this matters: + the *debug* flag is included here just for consistency). + + *target_lang* is the target language for which the given objects are being + compiled. This allows specific linkage time treatment of certain languages. + + Raises :exc:`LibError` on failure. + + + .. method:: CCompiler.link(target_desc, objects, output_filename[, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None]) + + Link a bunch of stuff together to create an executable or shared library file. + + The "bunch of stuff" consists of the list of object files supplied as *objects*. + *output_filename* should be a filename. If *output_dir* is supplied, + *output_filename* is relative to it (i.e. *output_filename* can provide + directory components if needed). + + *libraries* is a list of libraries to link against. These are library names, + not filenames, since they're translated into filenames in a platform-specific + way (eg. *foo* becomes :file:`libfoo.a` on Unix and :file:`foo.lib` on + DOS/Windows). However, they can include a directory component, which means the + linker will look in that specific directory rather than searching all the normal + locations. + + *library_dirs*, if supplied, should be a list of directories to search for + libraries that were specified as bare library names (ie. no directory + component). These are on top of the system default and those supplied to + :meth:`add_library_dir` and/or :meth:`set_library_dirs`. *runtime_library_dirs* + is a list of directories that will be embedded into the shared library and used + to search for other shared libraries that \*it\* depends on at run-time. (This + may only be relevant on Unix.) + + *export_symbols* is a list of symbols that the shared library will export. + (This appears to be relevant only on Windows.) + + *debug* is as for :meth:`compile` and :meth:`create_static_lib`, with the + slight distinction that it actually matters on most platforms (as opposed to + :meth:`create_static_lib`, which includes a *debug* flag mostly for form's + sake). + + *extra_preargs* and *extra_postargs* are as for :meth:`compile` (except of + course that they supply command-line arguments for the particular linker being + used). + + *target_lang* is the target language for which the given objects are being + compiled. This allows specific linkage time treatment of certain languages. + + Raises :exc:`LinkError` on failure. + + + .. method:: CCompiler.link_executable(objects, output_progname[, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, target_lang=None]) + + Link an executable. *output_progname* is the name of the file executable, while + *objects* are a list of object filenames to link in. Other arguments are as for + the :meth:`link` method. + + + .. method:: CCompiler.link_shared_lib(objects, output_libname[, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None]) + + Link a shared library. *output_libname* is the name of the output library, + while *objects* is a list of object filenames to link in. Other arguments are + as for the :meth:`link` method. + + + .. method:: CCompiler.link_shared_object(objects, output_filename[, output_dir=None, libraries=None, library_dirs=None, runtime_library_dirs=None, export_symbols=None, debug=0, extra_preargs=None, extra_postargs=None, build_temp=None, target_lang=None]) + + Link a shared object. *output_filename* is the name of the shared object that + will be created, while *objects* is a list of object filenames to link in. + Other arguments are as for the :meth:`link` method. + + + .. method:: CCompiler.preprocess(source[, output_file=None, macros=None, include_dirs=None, extra_preargs=None, extra_postargs=None]) + + Preprocess a single C/C++ source file, named in *source*. Output will be written + to file named *output_file*, or *stdout* if *output_file* not supplied. + *macros* is a list of macro definitions as for :meth:`compile`, which will + augment the macros set with :meth:`define_macro` and :meth:`undefine_macro`. + *include_dirs* is a list of directory names that will be added to the default + list, in the same way as :meth:`add_include_dir`. + + Raises :exc:`PreprocessError` on failure. + + The following utility methods are defined by the :class:`CCompiler` class, for + use by the various concrete subclasses. + + + .. method:: CCompiler.executable_filename(basename[, strip_dir=0, output_dir='']) + + Returns the filename of the executable for the given *basename*. Typically for + non-Windows platforms this is the same as the basename, while Windows will get + a :file:`.exe` added. + + + .. method:: CCompiler.library_filename(libname[, lib_type='static', strip_dir=0, output_dir='']) + + Returns the filename for the given library name on the current platform. On Unix + a library with *lib_type* of ``'static'`` will typically be of the form + :file:`liblibname.a`, while a *lib_type* of ``'dynamic'`` will be of the form + :file:`liblibname.so`. + + + .. method:: CCompiler.object_filenames(source_filenames[, strip_dir=0, output_dir='']) + + Returns the name of the object files for the given source files. + *source_filenames* should be a list of filenames. + + + .. method:: CCompiler.shared_object_filename(basename[, strip_dir=0, output_dir='']) + + Returns the name of a shared object file for the given file name *basename*. + + + .. method:: CCompiler.execute(func, args[, msg=None, level=1]) + + Invokes :func:`distutils.util.execute` This method invokes a Python function + *func* with the given arguments *args*, after logging and taking into account + the *dry_run* flag. XXX see also. + + + .. method:: CCompiler.spawn(cmd) + + Invokes :func:`distutils.util.spawn`. This invokes an external process to run + the given command. XXX see also. + + + .. method:: CCompiler.mkpath(name[, mode=511]) + + Invokes :func:`distutils.dir_util.mkpath`. This creates a directory and any + missing ancestor directories. XXX see also. + + + .. method:: CCompiler.move_file(src, dst) + + Invokes :meth:`distutils.file_util.move_file`. Renames *src* to *dst*. XXX see + also. + + + .. method:: CCompiler.announce(msg[, level=1]) + + Write a message using :func:`distutils.log.debug`. XXX see also. + + + .. method:: CCompiler.warn(msg) + + Write a warning message *msg* to standard error. + + + .. method:: CCompiler.debug_print(msg) + + If the *debug* flag is set on this :class:`CCompiler` instance, print *msg* to + standard output, otherwise do nothing. + +.. % \subsection{Compiler-specific modules} +.. % +.. % The following modules implement concrete subclasses of the abstract +.. % \class{CCompiler} class. They should not be instantiated directly, but should +.. % be created using \function{distutils.ccompiler.new_compiler()} factory +.. % function. + + +:mod:`distutils.unixccompiler` --- Unix C Compiler +================================================== + +.. module:: distutils.unixccompiler + :synopsis: UNIX C Compiler + + +This module provides the :class:`UnixCCompiler` class, a subclass of +:class:`CCompiler` that handles the typical Unix-style command-line C compiler: + +* macros defined with :option:`-Dname[=value]` + +* macros undefined with :option:`-Uname` + +* include search directories specified with :option:`-Idir` + +* libraries specified with :option:`-llib` + +* library search directories specified with :option:`-Ldir` + +* compile handled by :program:`cc` (or similar) executable with :option:`-c` + option: compiles :file:`.c` to :file:`.o` + +* link static library handled by :program:`ar` command (possibly with + :program:`ranlib`) + +* link shared library handled by :program:`cc` :option:`-shared` + + +:mod:`distutils.msvccompiler` --- Microsoft Compiler +==================================================== + +.. module:: distutils.msvccompiler + :synopsis: Microsoft Compiler + + +This module provides :class:`MSVCCompiler`, an implementation of the abstract +:class:`CCompiler` class for Microsoft Visual Studio. Typically, extension +modules need to be compiled with the same compiler that was used to compile +Python. For Python 2.3 and earlier, the compiler was Visual Studio 6. For Python +2.4 and 2.5, the compiler is Visual Studio .NET 2003. The AMD64 and Itanium +binaries are created using the Platform SDK. + +:class:`MSVCCompiler` will normally choose the right compiler, linker etc. on +its own. To override this choice, the environment variables *DISTUTILS_USE_SDK* +and *MSSdk* must be both set. *MSSdk* indicates that the current environment has +been setup by the SDK's ``SetEnv.Cmd`` script, or that the environment variables +had been registered when the SDK was installed; *DISTUTILS_USE_SDK* indicates +that the distutils user has made an explicit choice to override the compiler +selection by :class:`MSVCCompiler`. + + +:mod:`distutils.bcppcompiler` --- Borland Compiler +================================================== + +.. module:: distutils.bcppcompiler + + +This module provides :class:`BorlandCCompiler`, an subclass of the abstract +:class:`CCompiler` class for the Borland C++ compiler. + + +:mod:`distutils.cygwincompiler` --- Cygwin Compiler +=================================================== + +.. module:: distutils.cygwinccompiler + + +This module provides the :class:`CygwinCCompiler` class, a subclass of +:class:`UnixCCompiler` that handles the Cygwin port of the GNU C compiler to +Windows. It also contains the Mingw32CCompiler class which handles the mingw32 +port of GCC (same as cygwin in no-cygwin mode). + + +:mod:`distutils.emxccompiler` --- OS/2 EMX Compiler +=================================================== + +.. module:: distutils.emxccompiler + :synopsis: OS/2 EMX Compiler support + + +This module provides the EMXCCompiler class, a subclass of +:class:`UnixCCompiler` that handles the EMX port of the GNU C compiler to OS/2. + + +:mod:`distutils.mwerkscompiler` --- Metrowerks CodeWarrior support +================================================================== + +.. module:: distutils.mwerkscompiler + :synopsis: Metrowerks CodeWarrior support + + +Contains :class:`MWerksCompiler`, an implementation of the abstract +:class:`CCompiler` class for MetroWerks CodeWarrior on the pre-Mac OS X +Macintosh. Needs work to support CW on Windows or Mac OS X. + +.. % \subsection{Utility modules} +.. % +.. % The following modules all provide general utility functions. They haven't +.. % all been documented yet. + + +:mod:`distutils.archive_util` --- Archiving utilities +====================================================== + +.. module:: distutils.archive_util + :synopsis: Utility functions for creating archive files (tarballs, zip files, ...) + + +This module provides a few functions for creating archive files, such as +tarballs or zipfiles. + + +.. function:: make_archive(base_name, format[, root_dir=None, base_dir=None, verbose=0, dry_run=0]) + + Create an archive file (eg. ``zip`` or ``tar``). *base_name* is the name of + the file to create, minus any format-specific extension; *format* is the + archive format: one of ``zip``, ``tar``, ``ztar``, or ``gztar``. *root_dir* is + a directory that will be the root directory of the archive; ie. we typically + ``chdir`` into *root_dir* before creating the archive. *base_dir* is the + directory where we start archiving from; ie. *base_dir* will be the common + prefix of all files and directories in the archive. *root_dir* and *base_dir* + both default to the current directory. Returns the name of the archive file. + + .. warning:: + + This should be changed to support bz2 files + + +.. function:: make_tarball(base_name, base_dir[, compress='gzip', verbose=0, dry_run=0]) + + 'Create an (optional compressed) archive as a tar file from all files in and + under *base_dir*. *compress* must be ``'gzip'`` (the default), ``'compress'``, + ``'bzip2'``, or ``None``. Both :program:`tar` and the compression utility named + by *compress* must be on the default program search path, so this is probably + Unix-specific. The output tar file will be named :file:`base_dir.tar`, + possibly plus the appropriate compression extension (:file:`.gz`, :file:`.bz2` + or :file:`.Z`). Return the output filename. + + .. warning:: + + This should be replaced with calls to the :mod:`tarfile` module. + + +.. function:: make_zipfile(base_name, base_dir[, verbose=0, dry_run=0]) + + Create a zip file from all files in and under *base_dir*. The output zip file + will be named *base_dir* + :file:`.zip`. Uses either the :mod:`zipfile` Python + module (if available) or the InfoZIP :file:`zip` utility (if installed and + found on the default search path). If neither tool is available, raises + :exc:`DistutilsExecError`. Returns the name of the output zip file. + + +:mod:`distutils.dep_util` --- Dependency checking +================================================= + +.. module:: distutils.dep_util + :synopsis: Utility functions for simple dependency checking + + +This module provides functions for performing simple, timestamp-based +dependency of files and groups of files; also, functions based entirely on such +timestamp dependency analysis. + + +.. function:: newer(source, target) + + Return true if *source* exists and is more recently modified than *target*, or + if *source* exists and *target* doesn't. Return false if both exist and *target* + is the same age or newer than *source*. Raise :exc:`DistutilsFileError` if + *source* does not exist. + + +.. function:: newer_pairwise(sources, targets) + + Walk two filename lists in parallel, testing if each source is newer than its + corresponding target. Return a pair of lists (*sources*, *targets*) where + source is newer than target, according to the semantics of :func:`newer` + + .. % % equivalent to a listcomp... + + +.. function:: newer_group(sources, target[, missing='error']) + + Return true if *target* is out-of-date with respect to any file listed in + *sources* In other words, if *target* exists and is newer than every file in + *sources*, return false; otherwise return true. *missing* controls what we do + when a source file is missing; the default (``'error'``) is to blow up with an + :exc:`OSError` from inside :func:`os.stat`; if it is ``'ignore'``, we silently + drop any missing source files; if it is ``'newer'``, any missing source files + make us assume that *target* is out-of-date (this is handy in "dry-run" mode: + it'll make you pretend to carry out commands that wouldn't work because inputs + are missing, but that doesn't matter because you're not actually going to run + the commands). + + +:mod:`distutils.dir_util` --- Directory tree operations +======================================================= + +.. module:: distutils.dir_util + :synopsis: Utility functions for operating on directories and directory trees + + +This module provides functions for operating on directories and trees of +directories. + + +.. function:: mkpath(name[, mode=0777, verbose=0, dry_run=0]) + + Create a directory and any missing ancestor directories. If the directory + already exists (or if *name* is the empty string, which means the current + directory, which of course exists), then do nothing. Raise + :exc:`DistutilsFileError` if unable to create some directory along the way (eg. + some sub-path exists, but is a file rather than a directory). If *verbose* is + true, print a one-line summary of each mkdir to stdout. Return the list of + directories actually created. + + +.. function:: create_tree(base_dir, files[, mode=0777, verbose=0, dry_run=0]) + + Create all the empty directories under *base_dir* needed to put *files* there. + *base_dir* is just the a name of a directory which doesn't necessarily exist + yet; *files* is a list of filenames to be interpreted relative to *base_dir*. + *base_dir* + the directory portion of every file in *files* will be created if + it doesn't already exist. *mode*, *verbose* and *dry_run* flags are as for + :func:`mkpath`. + + +.. function:: copy_tree(src, dst[, preserve_mode=1, preserve_times=1, preserve_symlinks=0, update=0, verbose=0, dry_run=0]) + + Copy an entire directory tree *src* to a new location *dst*. Both *src* and + *dst* must be directory names. If *src* is not a directory, raise + :exc:`DistutilsFileError`. If *dst* does not exist, it is created with + :func:`mkpath`. The end result of the copy is that every file in *src* is + copied to *dst*, and directories under *src* are recursively copied to *dst*. + Return the list of files that were copied or might have been copied, using their + output name. The return value is unaffected by *update* or *dry_run*: it is + simply the list of all files under *src*, with the names changed to be under + *dst*. + + *preserve_mode* and *preserve_times* are the same as for :func:`copy_file` in + :mod:`distutils.file_util`; note that they only apply to regular files, not to + directories. If *preserve_symlinks* is true, symlinks will be copied as + symlinks (on platforms that support them!); otherwise (the default), the + destination of the symlink will be copied. *update* and *verbose* are the same + as for :func:`copy_file`. + + +.. function:: remove_tree(directory[, verbose=0, dry_run=0]) + + Recursively remove *directory* and all files and directories underneath it. Any + errors are ignored (apart from being reported to ``sys.stdout`` if *verbose* is + true). + +**\*\*** Some of this could be replaced with the shutil module? **\*\*** + + +:mod:`distutils.file_util` --- Single file operations +===================================================== + +.. module:: distutils.file_util + :synopsis: Utility functions for operating on single files + + +This module contains some utility functions for operating on individual files. + + +.. function:: copy_file(src, dst[, preserve_mode=1, preserve_times=1, update=0, link=None, verbose=0, dry_run=0]) + + Copy file *src* to *dst*. If *dst* is a directory, then *src* is copied there + with the same name; otherwise, it must be a filename. (If the file exists, it + will be ruthlessly clobbered.) If *preserve_mode* is true (the default), the + file's mode (type and permission bits, or whatever is analogous on the + current platform) is copied. If *preserve_times* is true (the default), the + last-modified and last-access times are copied as well. If *update* is true, + *src* will only be copied if *dst* does not exist, or if *dst* does exist but + is older than *src*. + + *link* allows you to make hard links (using :func:`os.link`) or symbolic links + (using :func:`os.symlink`) instead of copying: set it to ``'hard'`` or + ``'sym'``; if it is ``None`` (the default), files are copied. Don't set *link* + on systems that don't support it: :func:`copy_file` doesn't check if hard or + symbolic linking is available. It uses :func:`_copy_file_contents` to copy file + contents. + + Return a tuple ``(dest_name, copied)``: *dest_name* is the actual name of the + output file, and *copied* is true if the file was copied (or would have been + copied, if *dry_run* true). + + .. % XXX if the destination file already exists, we clobber it if + .. % copying, but blow up if linking. Hmmm. And I don't know what + .. % macostools.copyfile() does. Should definitely be consistent, and + .. % should probably blow up if destination exists and we would be + .. % changing it (ie. it's not already a hard/soft link to src OR + .. % (not update) and (src newer than dst)). + + +.. function:: move_file(src, dst[, verbose, dry_run]) + + Move file *src* to *dst*. If *dst* is a directory, the file will be moved into + it with the same name; otherwise, *src* is just renamed to *dst*. Returns the + new full name of the file. + + .. warning:: + + Handles cross-device moves on Unix using :func:`copy_file`. What about other + systems??? + + +.. function:: write_file(filename, contents) + + Create a file called *filename* and write *contents* (a sequence of strings + without line terminators) to it. + + +:mod:`distutils.util` --- Miscellaneous other utility functions +=============================================================== + +.. module:: distutils.util + :synopsis: Miscellaneous other utility functions + + +This module contains other assorted bits and pieces that don't fit into any +other utility module. + + +.. function:: get_platform() + + Return a string that identifies the current platform. This is used mainly to + distinguish platform-specific build directories and platform-specific built + distributions. Typically includes the OS name and version and the architecture + (as supplied by 'os.uname()'), although the exact information included depends + on the OS; eg. for IRIX the architecture isn't particularly important (IRIX only + runs on SGI hardware), but for Linux the kernel version isn't particularly + important. + + Examples of returned values: + + * ``linux-i586`` + * ``linux-alpha`` + * ``solaris-2.6-sun4u`` + * ``irix-5.3`` + * ``irix64-6.2`` + + For non-POSIX platforms, currently just returns ``sys.platform``. + + .. % XXX isn't this also provided by some other non-distutils module? + + +.. function:: convert_path(pathname) + + Return 'pathname' as a name that will work on the native filesystem, i.e. split + it on '/' and put it back together again using the current directory separator. + Needed because filenames in the setup script are always supplied in Unix style, + and have to be converted to the local convention before we can actually use them + in the filesystem. Raises :exc:`ValueError` on non-Unix-ish systems if + *pathname* either starts or ends with a slash. + + +.. function:: change_root(new_root, pathname) + + Return *pathname* with *new_root* prepended. If *pathname* is relative, this is + equivalent to ``os.path.join(new_root,pathname)`` Otherwise, it requires making + *pathname* relative and then joining the two, which is tricky on DOS/Windows. + + +.. function:: check_environ() + + Ensure that 'os.environ' has all the environment variables we guarantee that + users can use in config files, command-line options, etc. Currently this + includes: + + * :envvar:`HOME` - user's home directory (Unix only) + * :envvar:`PLAT` - description of the current platform, including hardware and + OS (see :func:`get_platform`) + + +.. function:: subst_vars(s, local_vars) + + Perform shell/Perl-style variable substitution on *s*. Every occurrence of + ``$`` followed by a name is considered a variable, and variable is substituted + by the value found in the *local_vars* dictionary, or in ``os.environ`` if it's + not in *local_vars*. *os.environ* is first checked/augmented to guarantee that + it contains certain values: see :func:`check_environ`. Raise :exc:`ValueError` + for any variables not found in either *local_vars* or ``os.environ``. + + Note that this is not a fully-fledged string interpolation function. A valid + ``$variable`` can consist only of upper and lower case letters, numbers and an + underscore. No { } or ( ) style quoting is available. + + +.. function:: grok_environment_error(exc[, prefix='error: ']) + + Generate a useful error message from an :exc:`EnvironmentError` (:exc:`IOError` + or :exc:`OSError`) exception object. Handles Python 1.5.1 and later styles, + and does what it can to deal with exception objects that don't have a filename + (which happens when the error is due to a two-file operation, such as + :func:`rename` or :func:`link`). Returns the error message as a string + prefixed with *prefix*. + + +.. function:: split_quoted(s) + + Split a string up according to Unix shell-like rules for quotes and backslashes. + In short: words are delimited by spaces, as long as those spaces are not escaped + by a backslash, or inside a quoted string. Single and double quotes are + equivalent, and the quote characters can be backslash-escaped. The backslash is + stripped from any two-character escape sequence, leaving only the escaped + character. The quote characters are stripped from any quoted string. Returns a + list of words. + + .. % Should probably be moved into the standard library. + + +.. function:: execute(func, args[, msg=None, verbose=0, dry_run=0]) + + Perform some action that affects the outside world (for instance, writing to the + filesystem). Such actions are special because they are disabled by the + *dry_run* flag. This method takes care of all that bureaucracy for you; all + you have to do is supply the function to call and an argument tuple for it (to + embody the "external action" being performed), and an optional message to print. + + +.. function:: strtobool(val) + + Convert a string representation of truth to true (1) or false (0). + + True values are ``y``, ``yes``, ``t``, ``true``, ``on`` and ``1``; false values + are ``n``, ``no``, ``f``, ``false``, ``off`` and ``0``. Raises + :exc:`ValueError` if *val* is anything else. + + +.. function:: byte_compile(py_files[, optimize=0, force=0, prefix=None, base_dir=None, verbose=1, dry_run=0, direct=None]) + + Byte-compile a collection of Python source files to either :file:`.pyc` or + :file:`.pyo` files in the same directory. *py_files* is a list of files to + compile; any files that don't end in :file:`.py` are silently skipped. + *optimize* must be one of the following: + + * ``0`` - don't optimize (generate :file:`.pyc`) + * ``1`` - normal optimization (like ``python -O``) + * ``2`` - extra optimization (like ``python -OO``) + + If *force* is true, all files are recompiled regardless of timestamps. + + The source filename encoded in each bytecode file defaults to the filenames + listed in *py_files*; you can modify these with *prefix* and *basedir*. + *prefix* is a string that will be stripped off of each source filename, and + *base_dir* is a directory name that will be prepended (after *prefix* is + stripped). You can supply either or both (or neither) of *prefix* and + *base_dir*, as you wish. + + If *dry_run* is true, doesn't actually do anything that would affect the + filesystem. + + Byte-compilation is either done directly in this interpreter process with the + standard :mod:`py_compile` module, or indirectly by writing a temporary script + and executing it. Normally, you should let :func:`byte_compile` figure out to + use direct compilation or not (see the source for details). The *direct* flag + is used by the script generated in indirect mode; unless you know what you're + doing, leave it set to ``None``. + + +.. function:: rfc822_escape(header) + + Return a version of *header* escaped for inclusion in an :rfc:`822` header, by + ensuring there are 8 spaces space after each newline. Note that it does no other + modification of the string. + + .. % this _can_ be replaced + +.. % \subsection{Distutils objects} + + +:mod:`distutils.dist` --- The Distribution class +================================================ + +.. module:: distutils.dist + :synopsis: Provides the Distribution class, which represents the module distribution being + built/installed/distributed + + +This module provides the :class:`Distribution` class, which represents the +module distribution being built/installed/distributed. + + +:mod:`distutils.extension` --- The Extension class +================================================== + +.. module:: distutils.extension + :synopsis: Provides the Extension class, used to describe C/C++ extension modules in setup + scripts + + +This module provides the :class:`Extension` class, used to describe C/C++ +extension modules in setup scripts. + +.. % \subsection{Ungrouped modules} +.. % The following haven't been moved into a more appropriate section yet. + + +:mod:`distutils.debug` --- Distutils debug mode +=============================================== + +.. module:: distutils.debug + :synopsis: Provides the debug flag for distutils + + +This module provides the DEBUG flag. + + +:mod:`distutils.errors` --- Distutils exceptions +================================================ + +.. module:: distutils.errors + :synopsis: Provides standard distutils exceptions + + +Provides exceptions used by the Distutils modules. Note that Distutils modules +may raise standard exceptions; in particular, SystemExit is usually raised for +errors that are obviously the end-user's fault (eg. bad command-line arguments). + +This module is safe to use in ``from ... import *`` mode; it only exports +symbols whose names start with ``Distutils`` and end with ``Error``. + + +:mod:`distutils.fancy_getopt` --- Wrapper around the standard getopt module +=========================================================================== + +.. module:: distutils.fancy_getopt + :synopsis: Additional getopt functionality + + +This module provides a wrapper around the standard :mod:`getopt` module that +provides the following additional features: + +* short and long options are tied together + +* options have help strings, so :func:`fancy_getopt` could potentially create a + complete usage summary + +* options set attributes of a passed-in object + +* boolean options can have "negative aliases" --- eg. if :option:`--quiet` is + the "negative alias" of :option:`--verbose`, then :option:`--quiet` on the + command line sets *verbose* to false. + +**\*\*** Should be replaced with :mod:`optik` (which is also now known as +:mod:`optparse` in Python 2.3 and later). **\*\*** + + +.. function:: fancy_getopt(options, negative_opt, object, args) + + Wrapper function. *options* is a list of ``(long_option, short_option, + help_string)`` 3-tuples as described in the constructor for + :class:`FancyGetopt`. *negative_opt* should be a dictionary mapping option names + to option names, both the key and value should be in the *options* list. + *object* is an object which will be used to store values (see the :meth:`getopt` + method of the :class:`FancyGetopt` class). *args* is the argument list. Will use + ``sys.argv[1:]`` if you pass ``None`` as *args*. + + +.. function:: wrap_text(text, width) + + Wraps *text* to less than *width* wide. + + .. warning:: + + Should be replaced with :mod:`textwrap` (which is available in Python 2.3 and + later). + + +.. class:: FancyGetopt([option_table=None]) + + The option_table is a list of 3-tuples: ``(long_option, short_option, + help_string)`` + + If an option takes an argument, its *long_option* should have ``'='`` appended; + *short_option* should just be a single character, no ``':'`` in any case. + *short_option* should be ``None`` if a *long_option* doesn't have a + corresponding *short_option*. All option tuples must have long options. + +The :class:`FancyGetopt` class provides the following methods: + + +.. method:: FancyGetopt.getopt([args=None, object=None]) + + Parse command-line options in args. Store as attributes on *object*. + + If *args* is ``None`` or not supplied, uses ``sys.argv[1:]``. If *object* is + ``None`` or not supplied, creates a new :class:`OptionDummy` instance, stores + option values there, and returns a tuple ``(args, object)``. If *object* is + supplied, it is modified in place and :func:`getopt` just returns *args*; in + both cases, the returned *args* is a modified copy of the passed-in *args* list, + which is left untouched. + + .. % and args returned are? + + +.. method:: FancyGetopt.get_option_order() + + Returns the list of ``(option, value)`` tuples processed by the previous run of + :meth:`getopt` Raises :exc:`RuntimeError` if :meth:`getopt` hasn't been called + yet. + + +.. method:: FancyGetopt.generate_help([header=None]) + + Generate help text (a list of strings, one per suggested line of output) from + the option table for this :class:`FancyGetopt` object. + + If supplied, prints the supplied *header* at the top of the help. + + +:mod:`distutils.filelist` --- The FileList class +================================================ + +.. module:: distutils.filelist + :synopsis: The FileList class, used for poking about the file system and building lists of + files. + + +This module provides the :class:`FileList` class, used for poking about the +filesystem and building lists of files. + + +:mod:`distutils.log` --- Simple PEP 282-style logging +===================================================== + +.. module:: distutils.log + :synopsis: A simple logging mechanism, 282-style + + +.. warning:: + + Should be replaced with standard :mod:`logging` module. + +.. % \subsubsection{\module{} --- } +.. % \declaremodule{standard}{distutils.magic} +.. % \modulesynopsis{ } + + +:mod:`distutils.spawn` --- Spawn a sub-process +============================================== + +.. module:: distutils.spawn + :synopsis: Provides the spawn() function + + +This module provides the :func:`spawn` function, a front-end to various +platform-specific functions for launching another program in a sub-process. +Also provides :func:`find_executable` to search the path for a given executable +name. + + +:mod:`distutils.sysconfig` --- System configuration information +=============================================================== + +.. module:: distutils.sysconfig + :synopsis: Low-level access to configuration information of the Python interpreter. +.. moduleauthor:: Fred L. Drake, Jr. +.. moduleauthor:: Greg Ward +.. sectionauthor:: Fred L. Drake, Jr. + + +The :mod:`distutils.sysconfig` module provides access to Python's low-level +configuration information. The specific configuration variables available +depend heavily on the platform and configuration. The specific variables depend +on the build process for the specific version of Python being run; the variables +are those found in the :file:`Makefile` and configuration header that are +installed with Python on Unix systems. The configuration header is called +:file:`pyconfig.h` for Python versions starting with 2.2, and :file:`config.h` +for earlier versions of Python. + +Some additional functions are provided which perform some useful manipulations +for other parts of the :mod:`distutils` package. + + +.. data:: PREFIX + + The result of ``os.path.normpath(sys.prefix)``. + + +.. data:: EXEC_PREFIX + + The result of ``os.path.normpath(sys.exec_prefix)``. + + +.. function:: get_config_var(name) + + Return the value of a single variable. This is equivalent to + ``get_config_vars().get(name)``. + + +.. function:: get_config_vars(...) + + Return a set of variable definitions. If there are no arguments, this returns a + dictionary mapping names of configuration variables to values. If arguments are + provided, they should be strings, and the return value will be a sequence giving + the associated values. If a given name does not have a corresponding value, + ``None`` will be included for that variable. + + +.. function:: get_config_h_filename() + + Return the full path name of the configuration header. For Unix, this will be + the header generated by the :program:`configure` script; for other platforms the + header will have been supplied directly by the Python source distribution. The + file is a platform-specific text file. + + +.. function:: get_makefile_filename() + + Return the full path name of the :file:`Makefile` used to build Python. For + Unix, this will be a file generated by the :program:`configure` script; the + meaning for other platforms will vary. The file is a platform-specific text + file, if it exists. This function is only useful on POSIX platforms. + + +.. function:: get_python_inc([plat_specific[, prefix]]) + + Return the directory for either the general or platform-dependent C include + files. If *plat_specific* is true, the platform-dependent include directory is + returned; if false or omitted, the platform-independent directory is returned. + If *prefix* is given, it is used as either the prefix instead of + :const:`PREFIX`, or as the exec-prefix instead of :const:`EXEC_PREFIX` if + *plat_specific* is true. + + +.. function:: get_python_lib([plat_specific[, standard_lib[, prefix]]]) + + Return the directory for either the general or platform-dependent library + installation. If *plat_specific* is true, the platform-dependent include + directory is returned; if false or omitted, the platform-independent directory + is returned. If *prefix* is given, it is used as either the prefix instead of + :const:`PREFIX`, or as the exec-prefix instead of :const:`EXEC_PREFIX` if + *plat_specific* is true. If *standard_lib* is true, the directory for the + standard library is returned rather than the directory for the installation of + third-party extensions. + +The following function is only intended for use within the :mod:`distutils` +package. + + +.. function:: customize_compiler(compiler) + + Do any platform-specific customization of a + :class:`distutils.ccompiler.CCompiler` instance. + + This function is only needed on Unix at this time, but should be called + consistently to support forward-compatibility. It inserts the information that + varies across Unix flavors and is stored in Python's :file:`Makefile`. This + information includes the selected compiler, compiler and linker options, and the + extension used by the linker for shared objects. + +This function is even more special-purpose, and should only be used from +Python's own build procedures. + + +.. function:: set_python_build() + + Inform the :mod:`distutils.sysconfig` module that it is being used as part of + the build process for Python. This changes a lot of relative locations for + files, allowing them to be located in the build area rather than in an installed + Python. + + +:mod:`distutils.text_file` --- The TextFile class +================================================= + +.. module:: distutils.text_file + :synopsis: provides the TextFile class, a simple interface to text files + + +This module provides the :class:`TextFile` class, which gives an interface to +text files that (optionally) takes care of stripping comments, ignoring blank +lines, and joining lines with backslashes. + + +.. class:: TextFile([filename=None, file=None, **options]) + + This class provides a file-like object that takes care of all the things you + commonly want to do when processing a text file that has some line-by-line + syntax: strip comments (as long as ``#`` is your comment character), skip blank + lines, join adjacent lines by escaping the newline (ie. backslash at end of + line), strip leading and/or trailing whitespace. All of these are optional and + independently controllable. + + The class provides a :meth:`warn` method so you can generate warning messages + that report physical line number, even if the logical line in question spans + multiple physical lines. Also provides :meth:`unreadline` for implementing + line-at-a-time lookahead. + + :class:`TextFile` instances are create with either *filename*, *file*, or both. + :exc:`RuntimeError` is raised if both are ``None``. *filename* should be a + string, and *file* a file object (or something that provides :meth:`readline` + and :meth:`close` methods). It is recommended that you supply at least + *filename*, so that :class:`TextFile` can include it in warning messages. If + *file* is not supplied, :class:`TextFile` creates its own using the + :func:`open` built-in function. + + The options are all boolean, and affect the values returned by :meth:`readline` + + +------------------+--------------------------------+---------+ + | option name | description | default | + +==================+================================+=========+ + | *strip_comments* | strip from ``'#'`` to end-of- | true | + | | line, as well as any | | + | | whitespace leading up to the | | + | | ``'#'``\ ---unless it is | | + | | escaped by a backslash | | + +------------------+--------------------------------+---------+ + | *lstrip_ws* | strip leading whitespace from | false | + | | each line before returning it | | + +------------------+--------------------------------+---------+ + | *rstrip_ws* | strip trailing whitespace | true | + | | (including line terminator!) | | + | | from each line before | | + | | returning it. | | + +------------------+--------------------------------+---------+ + | *skip_blanks* | skip lines that are empty | true | + | | \*after\* stripping comments | | + | | and whitespace. (If both | | + | | lstrip_ws and rstrip_ws are | | + | | false, then some lines may | | + | | consist of solely whitespace: | | + | | these will \*not\* be skipped, | | + | | even if *skip_blanks* is | | + | | true.) | | + +------------------+--------------------------------+---------+ + | *join_lines* | if a backslash is the last | false | + | | non-newline character on a | | + | | line after stripping comments | | + | | and whitespace, join the | | + | | following line to it to form | | + | | one logical line; if N | | + | | consecutive lines end with a | | + | | backslash, then N+1 physical | | + | | lines will be joined to form | | + | | one logical line. | | + +------------------+--------------------------------+---------+ + | *collapse_join* | strip leading whitespace from | false | + | | lines that are joined to their | | + | | predecessor; only matters if | | + | | ``(join_lines and not | | + | | lstrip_ws)`` | | + +------------------+--------------------------------+---------+ + + Note that since *rstrip_ws* can strip the trailing newline, the semantics of + :meth:`readline` must differ from those of the builtin file object's + :meth:`readline` method! In particular, :meth:`readline` returns ``None`` for + end-of-file: an empty string might just be a blank line (or an all-whitespace + line), if *rstrip_ws* is true but *skip_blanks* is not. + + + .. method:: TextFile.open(filename) + + Open a new file *filename*. This overrides any *file* or *filename* constructor + arguments. + + + .. method:: TextFile.close() + + Close the current file and forget everything we know about it (including the + filename and the current line number). + + + .. method:: TextFile.warn(msg[,line=None]) + + Print (to stderr) a warning message tied to the current logical line in the + current file. If the current logical line in the file spans multiple physical + lines, the warning refers to the whole range, such as ``"lines 3-5"``. If + *line* is supplied, it overrides the current line number; it may be a list or + tuple to indicate a range of physical lines, or an integer for a single + physical line. + + + .. method:: TextFile.readline() + + Read and return a single logical line from the current file (or from an internal + buffer if lines have previously been "unread" with :meth:`unreadline`). If the + *join_lines* option is true, this may involve reading multiple physical lines + concatenated into a single string. Updates the current line number, so calling + :meth:`warn` after :meth:`readline` emits a warning about the physical line(s) + just read. Returns ``None`` on end-of-file, since the empty string can occur + if *rstrip_ws* is true but *strip_blanks* is not. + + + .. method:: TextFile.readlines() + + Read and return the list of all logical lines remaining in the current file. + This updates the current line number to the last line of the file. + + + .. method:: TextFile.unreadline(line) + + Push *line* (a string) onto an internal buffer that will be checked by future + :meth:`readline` calls. Handy for implementing a parser with line-at-a-time + lookahead. Note that lines that are "unread" with :meth:`unreadline` are not + subsequently re-cleansed (whitespace stripped, or whatever) when read with + :meth:`readline`. If multiple calls are made to :meth:`unreadline` before a call + to :meth:`readline`, the lines will be returned most in most recent first order. + + +:mod:`distutils.version` --- Version number classes +=================================================== + +.. module:: distutils.version + :synopsis: implements classes that represent module version numbers. + + +.. % todo +.. % \section{Distutils Commands} +.. % +.. % This part of Distutils implements the various Distutils commands, such +.. % as \code{build}, \code{install} \&c. Each command is implemented as a +.. % separate module, with the command name as the name of the module. + + +:mod:`distutils.cmd` --- Abstract base class for Distutils commands +=================================================================== + +.. module:: distutils.cmd + :synopsis: This module provides the abstract base class Command. This class is subclassed + by the modules in the distutils.command subpackage. + + +This module supplies the abstract base class :class:`Command`. + + +.. class:: Command(dist) + + Abstract base class for defining command classes, the "worker bees" of the + Distutils. A useful analogy for command classes is to think of them as + subroutines with local variables called *options*. The options are declared in + :meth:`initialize_options` and defined (given their final values) in + :meth:`finalize_options`, both of which must be defined by every command class. + The distinction between the two is necessary because option values might come + from the outside world (command line, config file, ...), and any options + dependent on other options must be computed after these outside influences have + been processed --- hence :meth:`finalize_options`. The body of the subroutine, + where it does all its work based on the values of its options, is the + :meth:`run` method, which must also be implemented by every command class. + + The class constructor takes a single argument *dist*, a :class:`Distribution` + instance. + + +:mod:`distutils.command` --- Individual Distutils commands +========================================================== + +.. module:: distutils.command + :synopsis: This subpackage contains one module for each standard Distutils command. + + +.. % \subsubsection{Individual Distutils commands} +.. % todo + + +:mod:`distutils.command.bdist` --- Build a binary installer +=========================================================== + +.. module:: distutils.command.bdist + :synopsis: Build a binary installer for a package + + +.. % todo + + +:mod:`distutils.command.bdist_packager` --- Abstract base class for packagers +============================================================================= + +.. module:: distutils.command.bdist_packager + :synopsis: Abstract base class for packagers + + +.. % todo + + +:mod:`distutils.command.bdist_dumb` --- Build a "dumb" installer +================================================================ + +.. module:: distutils.command.bdist_dumb + :synopsis: Build a "dumb" installer - a simple archive of files + + +.. % todo + + +:mod:`distutils.command.bdist_msi` --- Build a Microsoft Installer binary package +================================================================================= + +.. module:: distutils.command.bdist_msi + :synopsis: Build a binary distribution as a Windows MSI file + + +.. % todo + + +:mod:`distutils.command.bdist_rpm` --- Build a binary distribution as a Redhat RPM and SRPM +=========================================================================================== + +.. module:: distutils.command.bdist_rpm + :synopsis: Build a binary distribution as a Redhat RPM and SRPM + + +.. % todo + + +:mod:`distutils.command.bdist_wininst` --- Build a Windows installer +==================================================================== + +.. module:: distutils.command.bdist_wininst + :synopsis: Build a Windows installer + + +.. % todo + + +:mod:`distutils.command.sdist` --- Build a source distribution +============================================================== + +.. module:: distutils.command.sdist + :synopsis: Build a source distribution + + +.. % todo + + +:mod:`distutils.command.build` --- Build all files of a package +=============================================================== + +.. module:: distutils.command.build + :synopsis: Build all files of a package + + +.. % todo + + +:mod:`distutils.command.build_clib` --- Build any C libraries in a package +========================================================================== + +.. module:: distutils.command.build_clib + :synopsis: Build any C libraries in a package + + +.. % todo + + +:mod:`distutils.command.build_ext` --- Build any extensions in a package +======================================================================== + +.. module:: distutils.command.build_ext + :synopsis: Build any extensions in a package + + +.. % todo + + +:mod:`distutils.command.build_py` --- Build the .py/.pyc files of a package +=========================================================================== + +.. module:: distutils.command.build_py + :synopsis: Build the .py/.pyc files of a package + + +.. % todo + + +:mod:`distutils.command.build_scripts` --- Build the scripts of a package +========================================================================= + +.. module:: distutils.command.build_scripts + :synopsis: Build the scripts of a package + + +.. % todo + + +:mod:`distutils.command.clean` --- Clean a package build area +============================================================= + +.. module:: distutils.command.clean + :synopsis: Clean a package build area + + +.. % todo + + +:mod:`distutils.command.config` --- Perform package configuration +================================================================= + +.. module:: distutils.command.config + :synopsis: Perform package configuration + + +.. % todo + + +:mod:`distutils.command.install` --- Install a package +====================================================== + +.. module:: distutils.command.install + :synopsis: Install a package + + +.. % todo + + +:mod:`distutils.command.install_data` --- Install data files from a package +=========================================================================== + +.. module:: distutils.command.install_data + :synopsis: Install data files from a package + + +.. % todo + + +:mod:`distutils.command.install_headers` --- Install C/C++ header files from a package +====================================================================================== + +.. module:: distutils.command.install_headers + :synopsis: Install C/C++ header files from a package + + +.. % todo + + +:mod:`distutils.command.install_lib` --- Install library files from a package +============================================================================= + +.. module:: distutils.command.install_lib + :synopsis: Install library files from a package + + +.. % todo + + +:mod:`distutils.command.install_scripts` --- Install script files from a package +================================================================================ + +.. module:: distutils.command.install_scripts + :synopsis: Install script files from a package + + +.. % todo + + +:mod:`distutils.command.register` --- Register a module with the Python Package Index +===================================================================================== + +.. module:: distutils.command.register + :synopsis: Register a module with the Python Package Index + + +The ``register`` command registers the package with the Python Package Index. +This is described in more detail in :pep:`301`. + +.. % todo + + +Creating a new Distutils command +================================ + +This section outlines the steps to create a new Distutils command. + +A new command lives in a module in the :mod:`distutils.command` package. There +is a sample template in that directory called :file:`command_template`. Copy +this file to a new module with the same name as the new command you're +implementing. This module should implement a class with the same name as the +module (and the command). So, for instance, to create the command +``peel_banana`` (so that users can run ``setup.py peel_banana``), you'd copy +:file:`command_template` to :file:`distutils/command/peel_banana.py`, then edit +it so that it's implementing the class :class:`peel_banana`, a subclass of +:class:`distutils.cmd.Command`. + +Subclasses of :class:`Command` must define the following methods. + + +.. method:: Command.initialize_options()(S) + + et default values for all the options that this command supports. Note that + these defaults may be overridden by other commands, by the setup script, by + config files, or by the command-line. Thus, this is not the place to code + dependencies between options; generally, :meth:`initialize_options` + implementations are just a bunch of ``self.foo = None`` assignments. + + +.. method:: Command.finalize_options() + + Set final values for all the options that this command supports. This is + always called as late as possible, ie. after any option assignments from the + command-line or from other commands have been done. Thus, this is the place + to to code option dependencies: if *foo* depends on *bar*, then it is safe to + set *foo* from *bar* as long as *foo* still has the same value it was + assigned in :meth:`initialize_options`. + + +.. method:: Command.run() + + A command's raison d'etre: carry out the action it exists to perform, controlled + by the options initialized in :meth:`initialize_options`, customized by other + commands, the setup script, the command-line, and config files, and finalized in + :meth:`finalize_options`. All terminal output and filesystem interaction should + be done by :meth:`run`. + +*sub_commands* formalizes the notion of a "family" of commands, eg. ``install`` +as the parent with sub-commands ``install_lib``, ``install_headers``, etc. The +parent of a family of commands defines *sub_commands* as a class attribute; it's +a list of 2-tuples ``(command_name, predicate)``, with *command_name* a string +and *predicate* an unbound method, a string or None. *predicate* is a method of +the parent command that determines whether the corresponding command is +applicable in the current situation. (Eg. we ``install_headers`` is only +applicable if we have any C header files to install.) If *predicate* is None, +that command is always applicable. + +*sub_commands* is usually defined at the \*end\* of a class, because predicates +can be unbound methods, so they must already have been defined. The canonical +example is the :command:`install` command. diff --git a/Doc/distutils/builtdist.rst b/Doc/distutils/builtdist.rst new file mode 100644 index 0000000..b40ddeb --- /dev/null +++ b/Doc/distutils/builtdist.rst @@ -0,0 +1,405 @@ +.. _built-dist: + +**************************** +Creating Built Distributions +**************************** + +A "built distribution" is what you're probably used to thinking of either as a +"binary package" or an "installer" (depending on your background). It's not +necessarily binary, though, because it might contain only Python source code +and/or byte-code; and we don't call it a package, because that word is already +spoken for in Python. (And "installer" is a term specific to the world of +mainstream desktop systems.) + +A built distribution is how you make life as easy as possible for installers of +your module distribution: for users of RPM-based Linux systems, it's a binary +RPM; for Windows users, it's an executable installer; for Debian-based Linux +users, it's a Debian package; and so forth. Obviously, no one person will be +able to create built distributions for every platform under the sun, so the +Distutils are designed to enable module developers to concentrate on their +specialty---writing code and creating source distributions---while an +intermediary species called *packagers* springs up to turn source distributions +into built distributions for as many platforms as there are packagers. + +Of course, the module developer could be his own packager; or the packager could +be a volunteer "out there" somewhere who has access to a platform which the +original developer does not; or it could be software periodically grabbing new +source distributions and turning them into built distributions for as many +platforms as the software has access to. Regardless of who they are, a packager +uses the setup script and the :command:`bdist` command family to generate built +distributions. + +As a simple example, if I run the following command in the Distutils source +tree:: + + python setup.py bdist + +then the Distutils builds my module distribution (the Distutils itself in this +case), does a "fake" installation (also in the :file:`build` directory), and +creates the default type of built distribution for my platform. The default +format for built distributions is a "dumb" tar file on Unix, and a simple +executable installer on Windows. (That tar file is considered "dumb" because it +has to be unpacked in a specific location to work.) + +Thus, the above command on a Unix system creates +:file:`Distutils-1.0.{plat}.tar.gz`; unpacking this tarball from the right place +installs the Distutils just as though you had downloaded the source distribution +and run ``python setup.py install``. (The "right place" is either the root of +the filesystem or Python's :file:`{prefix}` directory, depending on the options +given to the :command:`bdist_dumb` command; the default is to make dumb +distributions relative to :file:`{prefix}`.) + +Obviously, for pure Python distributions, this isn't any simpler than just +running ``python setup.py install``\ ---but for non-pure distributions, which +include extensions that would need to be compiled, it can mean the difference +between someone being able to use your extensions or not. And creating "smart" +built distributions, such as an RPM package or an executable installer for +Windows, is far more convenient for users even if your distribution doesn't +include any extensions. + +The :command:`bdist` command has a :option:`--formats` option, similar to the +:command:`sdist` command, which you can use to select the types of built +distribution to generate: for example, :: + + python setup.py bdist --format=zip + +would, when run on a Unix system, create :file:`Distutils-1.0.{plat}.zip`\ +---again, this archive would be unpacked from the root directory to install the +Distutils. + +The available formats for built distributions are: + ++-------------+------------------------------+---------+ +| Format | Description | Notes | ++=============+==============================+=========+ +| ``gztar`` | gzipped tar file | (1),(3) | +| | (:file:`.tar.gz`) | | ++-------------+------------------------------+---------+ +| ``ztar`` | compressed tar file | \(3) | +| | (:file:`.tar.Z`) | | ++-------------+------------------------------+---------+ +| ``tar`` | tar file (:file:`.tar`) | \(3) | ++-------------+------------------------------+---------+ +| ``zip`` | zip file (:file:`.zip`) | \(4) | ++-------------+------------------------------+---------+ +| ``rpm`` | RPM | \(5) | ++-------------+------------------------------+---------+ +| ``pkgtool`` | Solaris :program:`pkgtool` | | ++-------------+------------------------------+---------+ +| ``sdux`` | HP-UX :program:`swinstall` | | ++-------------+------------------------------+---------+ +| ``rpm`` | RPM | \(5) | ++-------------+------------------------------+---------+ +| ``wininst`` | self-extracting ZIP file for | (2),(4) | +| | Windows | | ++-------------+------------------------------+---------+ + +Notes: + +(1) + default on Unix + +(2) + default on Windows + + **\*\*** to-do! **\*\*** + +(3) + requires external utilities: :program:`tar` and possibly one of :program:`gzip`, + :program:`bzip2`, or :program:`compress` + +(4) + requires either external :program:`zip` utility or :mod:`zipfile` module (part + of the standard Python library since Python 1.6) + +(5) + requires external :program:`rpm` utility, version 3.0.4 or better (use ``rpm + --version`` to find out which version you have) + +You don't have to use the :command:`bdist` command with the :option:`--formats` +option; you can also use the command that directly implements the format you're +interested in. Some of these :command:`bdist` "sub-commands" actually generate +several similar formats; for instance, the :command:`bdist_dumb` command +generates all the "dumb" archive formats (``tar``, ``ztar``, ``gztar``, and +``zip``), and :command:`bdist_rpm` generates both binary and source RPMs. The +:command:`bdist` sub-commands, and the formats generated by each, are: + ++--------------------------+-----------------------+ +| Command | Formats | ++==========================+=======================+ +| :command:`bdist_dumb` | tar, ztar, gztar, zip | ++--------------------------+-----------------------+ +| :command:`bdist_rpm` | rpm, srpm | ++--------------------------+-----------------------+ +| :command:`bdist_wininst` | wininst | ++--------------------------+-----------------------+ + +The following sections give details on the individual :command:`bdist_\*` +commands. + + +.. _creating-dumb: + +Creating dumb built distributions +================================= + +**\*\*** Need to document absolute vs. prefix-relative packages here, but first +I have to implement it! **\*\*** + + +.. _creating-rpms: + +Creating RPM packages +===================== + +The RPM format is used by many popular Linux distributions, including Red Hat, +SuSE, and Mandrake. If one of these (or any of the other RPM-based Linux +distributions) is your usual environment, creating RPM packages for other users +of that same distribution is trivial. Depending on the complexity of your module +distribution and differences between Linux distributions, you may also be able +to create RPMs that work on different RPM-based distributions. + +The usual way to create an RPM of your module distribution is to run the +:command:`bdist_rpm` command:: + + python setup.py bdist_rpm + +or the :command:`bdist` command with the :option:`--format` option:: + + python setup.py bdist --formats=rpm + +The former allows you to specify RPM-specific options; the latter allows you to +easily specify multiple formats in one run. If you need to do both, you can +explicitly specify multiple :command:`bdist_\*` commands and their options:: + + python setup.py bdist_rpm --packager="John Doe " \ + bdist_wininst --target_version="2.0" + +Creating RPM packages is driven by a :file:`.spec` file, much as using the +Distutils is driven by the setup script. To make your life easier, the +:command:`bdist_rpm` command normally creates a :file:`.spec` file based on the +information you supply in the setup script, on the command line, and in any +Distutils configuration files. Various options and sections in the +:file:`.spec` file are derived from options in the setup script as follows: + ++------------------------------------------+----------------------------------------------+ +| RPM :file:`.spec` file option or section | Distutils setup script option | ++==========================================+==============================================+ +| Name | :option:`name` | ++------------------------------------------+----------------------------------------------+ +| Summary (in preamble) | :option:`description` | ++------------------------------------------+----------------------------------------------+ +| Version | :option:`version` | ++------------------------------------------+----------------------------------------------+ +| Vendor | :option:`author` and :option:`author_email`, | +| | or --- & :option:`maintainer` and | +| | :option:`maintainer_email` | ++------------------------------------------+----------------------------------------------+ +| Copyright | :option:`licence` | ++------------------------------------------+----------------------------------------------+ +| Url | :option:`url` | ++------------------------------------------+----------------------------------------------+ +| %description (section) | :option:`long_description` | ++------------------------------------------+----------------------------------------------+ + +Additionally, there are many options in :file:`.spec` files that don't have +corresponding options in the setup script. Most of these are handled through +options to the :command:`bdist_rpm` command as follows: + ++-------------------------------+-----------------------------+-------------------------+ +| RPM :file:`.spec` file option | :command:`bdist_rpm` option | default value | +| or section | | | ++===============================+=============================+=========================+ +| Release | :option:`release` | "1" | ++-------------------------------+-----------------------------+-------------------------+ +| Group | :option:`group` | "Development/Libraries" | ++-------------------------------+-----------------------------+-------------------------+ +| Vendor | :option:`vendor` | (see above) | ++-------------------------------+-----------------------------+-------------------------+ +| Packager | :option:`packager` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Provides | :option:`provides` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Requires | :option:`requires` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Conflicts | :option:`conflicts` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Obsoletes | :option:`obsoletes` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Distribution | :option:`distribution_name` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| BuildRequires | :option:`build_requires` | (none) | ++-------------------------------+-----------------------------+-------------------------+ +| Icon | :option:`icon` | (none) | ++-------------------------------+-----------------------------+-------------------------+ + +Obviously, supplying even a few of these options on the command-line would be +tedious and error-prone, so it's usually best to put them in the setup +configuration file, :file:`setup.cfg`\ ---see section :ref:`setup-config`. If +you distribute or package many Python module distributions, you might want to +put options that apply to all of them in your personal Distutils configuration +file (:file:`~/.pydistutils.cfg`). + +There are three steps to building a binary RPM package, all of which are +handled automatically by the Distutils: + +#. create a :file:`.spec` file, which describes the package (analogous to the + Distutils setup script; in fact, much of the information in the setup script + winds up in the :file:`.spec` file) + +#. create the source RPM + +#. create the "binary" RPM (which may or may not contain binary code, depending + on whether your module distribution contains Python extensions) + +Normally, RPM bundles the last two steps together; when you use the Distutils, +all three steps are typically bundled together. + +If you wish, you can separate these three steps. You can use the +:option:`--spec-only` option to make :command:`bdist_rpm` just create the +:file:`.spec` file and exit; in this case, the :file:`.spec` file will be +written to the "distribution directory"---normally :file:`dist/`, but +customizable with the :option:`--dist-dir` option. (Normally, the :file:`.spec` +file winds up deep in the "build tree," in a temporary directory created by +:command:`bdist_rpm`.) + +.. % \XXX{this isn't implemented yet---is it needed?!} +.. % You can also specify a custom \file{.spec} file with the +.. % \longprogramopt{spec-file} option; used in conjunction with +.. % \longprogramopt{spec-only}, this gives you an opportunity to customize +.. % the \file{.spec} file manually: +.. % +.. % \ begin{verbatim} +.. % > python setup.py bdist_rpm --spec-only +.. % # ...edit dist/FooBar-1.0.spec +.. % > python setup.py bdist_rpm --spec-file=dist/FooBar-1.0.spec +.. % \ end{verbatim} +.. % +.. % (Although a better way to do this is probably to override the standard +.. % \command{bdist\_rpm} command with one that writes whatever else you want +.. % to the \file{.spec} file.) + + +.. _creating-wininst: + +Creating Windows Installers +=========================== + +Executable installers are the natural format for binary distributions on +Windows. They display a nice graphical user interface, display some information +about the module distribution to be installed taken from the metadata in the +setup script, let the user select a few options, and start or cancel the +installation. + +Since the metadata is taken from the setup script, creating Windows installers +is usually as easy as running:: + + python setup.py bdist_wininst + +or the :command:`bdist` command with the :option:`--formats` option:: + + python setup.py bdist --formats=wininst + +If you have a pure module distribution (only containing pure Python modules and +packages), the resulting installer will be version independent and have a name +like :file:`foo-1.0.win32.exe`. These installers can even be created on Unix or +Mac OS platforms. + +If you have a non-pure distribution, the extensions can only be created on a +Windows platform, and will be Python version dependent. The installer filename +will reflect this and now has the form :file:`foo-1.0.win32-py2.0.exe`. You +have to create a separate installer for every Python version you want to +support. + +The installer will try to compile pure modules into bytecode after installation +on the target system in normal and optimizing mode. If you don't want this to +happen for some reason, you can run the :command:`bdist_wininst` command with +the :option:`--no-target-compile` and/or the :option:`--no-target-optimize` +option. + +By default the installer will display the cool "Python Powered" logo when it is +run, but you can also supply your own bitmap which must be a Windows +:file:`.bmp` file with the :option:`--bitmap` option. + +The installer will also display a large title on the desktop background window +when it is run, which is constructed from the name of your distribution and the +version number. This can be changed to another text by using the +:option:`--title` option. + +The installer file will be written to the "distribution directory" --- normally +:file:`dist/`, but customizable with the :option:`--dist-dir` option. + + +.. _postinstallation-script: + +The Postinstallation script +--------------------------- + +Starting with Python 2.3, a postinstallation script can be specified which the +:option:`--install-script` option. The basename of the script must be +specified, and the script filename must also be listed in the scripts argument +to the setup function. + +This script will be run at installation time on the target system after all the +files have been copied, with ``argv[1]`` set to :option:`-install`, and again at +uninstallation time before the files are removed with ``argv[1]`` set to +:option:`-remove`. + +The installation script runs embedded in the windows installer, every output +(``sys.stdout``, ``sys.stderr``) is redirected into a buffer and will be +displayed in the GUI after the script has finished. + +Some functions especially useful in this context are available as additional +built-in functions in the installation script. + + +.. function:: directory_created(path) + file_created(path) + + These functions should be called when a directory or file is created by the + postinstall script at installation time. It will register *path* with the + uninstaller, so that it will be removed when the distribution is uninstalled. + To be safe, directories are only removed if they are empty. + + +.. function:: get_special_folder_path(csidl_string) + + This function can be used to retrieve special folder locations on Windows like + the Start Menu or the Desktop. It returns the full path to the folder. + *csidl_string* must be one of the following strings:: + + "CSIDL_APPDATA" + + "CSIDL_COMMON_STARTMENU" + "CSIDL_STARTMENU" + + "CSIDL_COMMON_DESKTOPDIRECTORY" + "CSIDL_DESKTOPDIRECTORY" + + "CSIDL_COMMON_STARTUP" + "CSIDL_STARTUP" + + "CSIDL_COMMON_PROGRAMS" + "CSIDL_PROGRAMS" + + "CSIDL_FONTS" + + If the folder cannot be retrieved, :exc:`OSError` is raised. + + Which folders are available depends on the exact Windows version, and probably + also the configuration. For details refer to Microsoft's documentation of the + :cfunc:`SHGetSpecialFolderPath` function. + + +.. function:: create_shortcut(target, description, filename[, arguments[, workdir[, iconpath[, iconindex]]]]) + + This function creates a shortcut. *target* is the path to the program to be + started by the shortcut. *description* is the description of the shortcut. + *filename* is the title of the shortcut that the user will see. *arguments* + specifies the command line arguments, if any. *workdir* is the working directory + for the program. *iconpath* is the file containing the icon for the shortcut, + and *iconindex* is the index of the icon in the file *iconpath*. Again, for + details consult the Microsoft documentation for the :class:`IShellLink` + interface. + + diff --git a/Doc/distutils/commandref.rst b/Doc/distutils/commandref.rst new file mode 100644 index 0000000..f5f0220 --- /dev/null +++ b/Doc/distutils/commandref.rst @@ -0,0 +1,104 @@ +.. _reference: + +***************** +Command Reference +***************** + +.. % \section{Building modules: the \protect\command{build} command family} +.. % \label{build-cmds} +.. % \subsubsection{\protect\command{build}} +.. % \label{build-cmd} +.. % \subsubsection{\protect\command{build\_py}} +.. % \label{build-py-cmd} +.. % \subsubsection{\protect\command{build\_ext}} +.. % \label{build-ext-cmd} +.. % \subsubsection{\protect\command{build\_clib}} +.. % \label{build-clib-cmd} + + +.. _install-cmd: + +Installing modules: the :command:`install` command family +========================================================= + +The install command ensures that the build commands have been run and then runs +the subcommands :command:`install_lib`, :command:`install_data` and +:command:`install_scripts`. + +.. % \subsubsection{\protect\command{install\_lib}} +.. % \label{install-lib-cmd} + + +.. _install-data-cmd: + +:command:`install_data` +----------------------- + +This command installs all data files provided with the distribution. + + +.. _install-scripts-cmd: + +:command:`install_scripts` +-------------------------- + +This command installs all (Python) scripts in the distribution. + +.. % \subsection{Cleaning up: the \protect\command{clean} command} +.. % \label{clean-cmd} + + +.. _sdist-cmd: + +Creating a source distribution: the :command:`sdist` command +============================================================ + +**\*\*** fragment moved down from above: needs context! **\*\*** + +The manifest template commands are: + ++-------------------------------------------+-----------------------------------------------+ +| Command | Description | ++===========================================+===============================================+ +| :command:`include pat1 pat2 ...` | include all files matching any of the listed | +| | patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`exclude pat1 pat2 ...` | exclude all files matching any of the listed | +| | patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`recursive-include dir pat1 pat2 | include all files under *dir* matching any of | +| ...` | the listed patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`recursive-exclude dir pat1 pat2 | exclude all files under *dir* matching any of | +| ...` | the listed patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`global-include pat1 pat2 ...` | include all files anywhere in the source tree | +| | matching --- & any of the listed patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`global-exclude pat1 pat2 ...` | exclude all files anywhere in the source tree | +| | matching --- & any of the listed patterns | ++-------------------------------------------+-----------------------------------------------+ +| :command:`prune dir` | exclude all files under *dir* | ++-------------------------------------------+-----------------------------------------------+ +| :command:`graft dir` | include all files under *dir* | ++-------------------------------------------+-----------------------------------------------+ + +The patterns here are Unix-style "glob" patterns: ``*`` matches any sequence of +regular filename characters, ``?`` matches any single regular filename +character, and ``[range]`` matches any of the characters in *range* (e.g., +``a-z``, ``a-zA-Z``, ``a-f0-9_.``). The definition of "regular filename +character" is platform-specific: on Unix it is anything except slash; on Windows +anything except backslash or colon; on Mac OS 9 anything except colon. + +**\*\*** Windows support not there yet **\*\*** + +.. % \section{Creating a built distribution: the +.. % \protect\command{bdist} command family} +.. % \label{bdist-cmds} + +.. % \subsection{\protect\command{bdist}} +.. % \subsection{\protect\command{bdist\_dumb}} +.. % \subsection{\protect\command{bdist\_rpm}} +.. % \subsection{\protect\command{bdist\_wininst}} + + diff --git a/Doc/distutils/configfile.rst b/Doc/distutils/configfile.rst new file mode 100644 index 0000000..0ccd5fd --- /dev/null +++ b/Doc/distutils/configfile.rst @@ -0,0 +1,130 @@ +.. _setup-config: + +************************************ +Writing the Setup Configuration File +************************************ + +Often, it's not possible to write down everything needed to build a distribution +*a priori*: you may need to get some information from the user, or from the +user's system, in order to proceed. As long as that information is fairly +simple---a list of directories to search for C header files or libraries, for +example---then providing a configuration file, :file:`setup.cfg`, for users to +edit is a cheap and easy way to solicit it. Configuration files also let you +provide default values for any command option, which the installer can then +override either on the command-line or by editing the config file. + +The setup configuration file is a useful middle-ground between the setup script +---which, ideally, would be opaque to installers [#]_---and the command-line to +the setup script, which is outside of your control and entirely up to the +installer. In fact, :file:`setup.cfg` (and any other Distutils configuration +files present on the target system) are processed after the contents of the +setup script, but before the command-line. This has several useful +consequences: + +.. % (If you have more advanced needs, such as determining which extensions +.. % to build based on what capabilities are present on the target system, +.. % then you need the Distutils ``auto-configuration'' facility. This +.. % started to appear in Distutils 0.9 but, as of this writing, isn't mature +.. % or stable enough yet for real-world use.) + +* installers can override some of what you put in :file:`setup.py` by editing + :file:`setup.cfg` + +* you can provide non-standard defaults for options that are not easily set in + :file:`setup.py` + +* installers can override anything in :file:`setup.cfg` using the command-line + options to :file:`setup.py` + +The basic syntax of the configuration file is simple:: + + [command] + option=value + ... + +where *command* is one of the Distutils commands (e.g. :command:`build_py`, +:command:`install`), and *option* is one of the options that command supports. +Any number of options can be supplied for each command, and any number of +command sections can be included in the file. Blank lines are ignored, as are +comments, which run from a ``'#'`` character until the end of the line. Long +option values can be split across multiple lines simply by indenting the +continuation lines. + +You can find out the list of options supported by a particular command with the +universal :option:`--help` option, e.g. :: + + > python setup.py --help build_ext + [...] + Options for 'build_ext' command: + --build-lib (-b) directory for compiled extension modules + --build-temp (-t) directory for temporary files (build by-products) + --inplace (-i) ignore build-lib and put compiled extensions into the + source directory alongside your pure Python modules + --include-dirs (-I) list of directories to search for header files + --define (-D) C preprocessor macros to define + --undef (-U) C preprocessor macros to undefine + --swig-opts list of SWIG command line options + [...] + +Note that an option spelled :option:`--foo-bar` on the command-line is spelled +:option:`foo_bar` in configuration files. + +For example, say you want your extensions to be built "in-place"---that is, you +have an extension :mod:`pkg.ext`, and you want the compiled extension file +(:file:`ext.so` on Unix, say) to be put in the same source directory as your +pure Python modules :mod:`pkg.mod1` and :mod:`pkg.mod2`. You can always use the +:option:`--inplace` option on the command-line to ensure this:: + + python setup.py build_ext --inplace + +But this requires that you always specify the :command:`build_ext` command +explicitly, and remember to provide :option:`--inplace`. An easier way is to +"set and forget" this option, by encoding it in :file:`setup.cfg`, the +configuration file for this distribution:: + + [build_ext] + inplace=1 + +This will affect all builds of this module distribution, whether or not you +explicitly specify :command:`build_ext`. If you include :file:`setup.cfg` in +your source distribution, it will also affect end-user builds---which is +probably a bad idea for this option, since always building extensions in-place +would break installation of the module distribution. In certain peculiar cases, +though, modules are built right in their installation directory, so this is +conceivably a useful ability. (Distributing extensions that expect to be built +in their installation directory is almost always a bad idea, though.) + +Another example: certain commands take a lot of options that don't change from +run to run; for example, :command:`bdist_rpm` needs to know everything required +to generate a "spec" file for creating an RPM distribution. Some of this +information comes from the setup script, and some is automatically generated by +the Distutils (such as the list of files installed). But some of it has to be +supplied as options to :command:`bdist_rpm`, which would be very tedious to do +on the command-line for every run. Hence, here is a snippet from the Distutils' +own :file:`setup.cfg`:: + + [bdist_rpm] + release = 1 + packager = Greg Ward + doc_files = CHANGES.txt + README.txt + USAGE.txt + doc/ + examples/ + +Note that the :option:`doc_files` option is simply a whitespace-separated string +split across multiple lines for readability. + + +.. seealso:: + + :ref:`inst-config-syntax` in "Installing Python Modules" + More information on the configuration files is available in the manual for + system administrators. + + +.. rubric:: Footnotes + +.. [#] This ideal probably won't be achieved until auto-configuration is fully + supported by the Distutils. + diff --git a/Doc/distutils/examples.rst b/Doc/distutils/examples.rst new file mode 100644 index 0000000..4e4adc5 --- /dev/null +++ b/Doc/distutils/examples.rst @@ -0,0 +1,241 @@ +.. _examples: + +******** +Examples +******** + +This chapter provides a number of basic examples to help get started with +distutils. Additional information about using distutils can be found in the +Distutils Cookbook. + + +.. seealso:: + + `Distutils Cookbook `_ + Collection of recipes showing how to achieve more control over distutils. + + +.. _pure-mod: + +Pure Python distribution (by module) +==================================== + +If you're just distributing a couple of modules, especially if they don't live +in a particular package, you can specify them individually using the +:option:`py_modules` option in the setup script. + +In the simplest case, you'll have two files to worry about: a setup script and +the single module you're distributing, :file:`foo.py` in this example:: + + / + setup.py + foo.py + +(In all diagrams in this section, ** will refer to the distribution root +directory.) A minimal setup script to describe this situation would be:: + + from distutils.core import setup + setup(name='foo', + version='1.0', + py_modules=['foo'], + ) + +Note that the name of the distribution is specified independently with the +:option:`name` option, and there's no rule that says it has to be the same as +the name of the sole module in the distribution (although that's probably a good +convention to follow). However, the distribution name is used to generate +filenames, so you should stick to letters, digits, underscores, and hyphens. + +Since :option:`py_modules` is a list, you can of course specify multiple +modules, eg. if you're distributing modules :mod:`foo` and :mod:`bar`, your +setup might look like this:: + + / + setup.py + foo.py + bar.py + +and the setup script might be :: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + py_modules=['foo', 'bar'], + ) + +You can put module source files into another directory, but if you have enough +modules to do that, it's probably easier to specify modules by package rather +than listing them individually. + + +.. _pure-pkg: + +Pure Python distribution (by package) +===================================== + +If you have more than a couple of modules to distribute, especially if they are +in multiple packages, it's probably easier to specify whole packages rather than +individual modules. This works even if your modules are not in a package; you +can just tell the Distutils to process modules from the root package, and that +works the same as any other package (except that you don't have to have an +:file:`__init__.py` file). + +The setup script from the last example could also be written as :: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + packages=[''], + ) + +(The empty string stands for the root package.) + +If those two files are moved into a subdirectory, but remain in the root +package, e.g.:: + + / + setup.py + src/ foo.py + bar.py + +then you would still specify the root package, but you have to tell the +Distutils where source files in the root package live:: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + package_dir={'': 'src'}, + packages=[''], + ) + +More typically, though, you will want to distribute multiple modules in the same +package (or in sub-packages). For example, if the :mod:`foo` and :mod:`bar` +modules belong in package :mod:`foobar`, one way to layout your source tree is +:: + + / + setup.py + foobar/ + __init__.py + foo.py + bar.py + +This is in fact the default layout expected by the Distutils, and the one that +requires the least work to describe in your setup script:: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + packages=['foobar'], + ) + +If you want to put modules in directories not named for their package, then you +need to use the :option:`package_dir` option again. For example, if the +:file:`src` directory holds modules in the :mod:`foobar` package:: + + / + setup.py + src/ + __init__.py + foo.py + bar.py + +an appropriate setup script would be :: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + package_dir={'foobar': 'src'}, + packages=['foobar'], + ) + +Or, you might put modules from your main package right in the distribution +root:: + + / + setup.py + __init__.py + foo.py + bar.py + +in which case your setup script would be :: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + package_dir={'foobar': ''}, + packages=['foobar'], + ) + +(The empty string also stands for the current directory.) + +If you have sub-packages, they must be explicitly listed in :option:`packages`, +but any entries in :option:`package_dir` automatically extend to sub-packages. +(In other words, the Distutils does *not* scan your source tree, trying to +figure out which directories correspond to Python packages by looking for +:file:`__init__.py` files.) Thus, if the default layout grows a sub-package:: + + / + setup.py + foobar/ + __init__.py + foo.py + bar.py + subfoo/ + __init__.py + blah.py + +then the corresponding setup script would be :: + + from distutils.core import setup + setup(name='foobar', + version='1.0', + packages=['foobar', 'foobar.subfoo'], + ) + +(Again, the empty string in :option:`package_dir` stands for the current +directory.) + + +.. _single-ext: + +Single extension module +======================= + +Extension modules are specified using the :option:`ext_modules` option. +:option:`package_dir` has no effect on where extension source files are found; +it only affects the source for pure Python modules. The simplest case, a +single extension module in a single C source file, is:: + + / + setup.py + foo.c + +If the :mod:`foo` extension belongs in the root package, the setup script for +this could be :: + + from distutils.core import setup + from distutils.extension import Extension + setup(name='foobar', + version='1.0', + ext_modules=[Extension('foo', ['foo.c'])], + ) + +If the extension actually belongs in a package, say :mod:`foopkg`, then + +With exactly the same source tree layout, this extension can be put in the +:mod:`foopkg` package simply by changing the name of the extension:: + + from distutils.core import setup + from distutils.extension import Extension + setup(name='foobar', + version='1.0', + ext_modules=[Extension('foopkg.foo', ['foo.c'])], + ) + +.. % \section{Multiple extension modules} +.. % \label{multiple-ext} + +.. % \section{Putting it all together} + + diff --git a/Doc/distutils/extending.rst b/Doc/distutils/extending.rst new file mode 100644 index 0000000..a2930c7 --- /dev/null +++ b/Doc/distutils/extending.rst @@ -0,0 +1,96 @@ +.. _extending: + +******************* +Extending Distutils +******************* + +Distutils can be extended in various ways. Most extensions take the form of new +commands or replacements for existing commands. New commands may be written to +support new types of platform-specific packaging, for example, while +replacements for existing commands may be made to modify details of how the +command operates on a package. + +Most extensions of the distutils are made within :file:`setup.py` scripts that +want to modify existing commands; many simply add a few file extensions that +should be copied into packages in addition to :file:`.py` files as a +convenience. + +Most distutils command implementations are subclasses of the :class:`Command` +class from :mod:`distutils.cmd`. New commands may directly inherit from +:class:`Command`, while replacements often derive from :class:`Command` +indirectly, directly subclassing the command they are replacing. Commands are +required to derive from :class:`Command`. + +.. % \section{Extending existing commands} +.. % \label{extend-existing} + +.. % \section{Writing new commands} +.. % \label{new-commands} +.. % \XXX{Would an uninstall command be a good example here?} + + +Integrating new commands +======================== + +There are different ways to integrate new command implementations into +distutils. The most difficult is to lobby for the inclusion of the new features +in distutils itself, and wait for (and require) a version of Python that +provides that support. This is really hard for many reasons. + +The most common, and possibly the most reasonable for most needs, is to include +the new implementations with your :file:`setup.py` script, and cause the +:func:`distutils.core.setup` function use them:: + + from distutils.command.build_py import build_py as _build_py + from distutils.core import setup + + class build_py(_build_py): + """Specialized Python source builder.""" + + # implement whatever needs to be different... + + setup(cmdclass={'build_py': build_py}, + ...) + +This approach is most valuable if the new implementations must be used to use a +particular package, as everyone interested in the package will need to have the +new command implementation. + +Beginning with Python 2.4, a third option is available, intended to allow new +commands to be added which can support existing :file:`setup.py` scripts without +requiring modifications to the Python installation. This is expected to allow +third-party extensions to provide support for additional packaging systems, but +the commands can be used for anything distutils commands can be used for. A new +configuration option, :option:`command_packages` (command-line option +:option:`--command-packages`), can be used to specify additional packages to be +searched for modules implementing commands. Like all distutils options, this +can be specified on the command line or in a configuration file. This option +can only be set in the ``[global]`` section of a configuration file, or before +any commands on the command line. If set in a configuration file, it can be +overridden from the command line; setting it to an empty string on the command +line causes the default to be used. This should never be set in a configuration +file provided with a package. + +This new option can be used to add any number of packages to the list of +packages searched for command implementations; multiple package names should be +separated by commas. When not specified, the search is only performed in the +:mod:`distutils.command` package. When :file:`setup.py` is run with the option +:option:`--command-packages` :option:`distcmds,buildcmds`, however, the packages +:mod:`distutils.command`, :mod:`distcmds`, and :mod:`buildcmds` will be searched +in that order. New commands are expected to be implemented in modules of the +same name as the command by classes sharing the same name. Given the example +command line option above, the command :command:`bdist_openpkg` could be +implemented by the class :class:`distcmds.bdist_openpkg.bdist_openpkg` or +:class:`buildcmds.bdist_openpkg.bdist_openpkg`. + + +Adding new distribution types +============================= + +Commands that create distributions (files in the :file:`dist/` directory) need +to add ``(command, filename)`` pairs to ``self.distribution.dist_files`` so that +:command:`upload` can upload it to PyPI. The *filename* in the pair contains no +path information, only the name of the file itself. In dry-run mode, pairs +should still be added to represent what would have been created. + + diff --git a/Doc/distutils/index.rst b/Doc/distutils/index.rst new file mode 100644 index 0000000..6d82c84 --- /dev/null +++ b/Doc/distutils/index.rst @@ -0,0 +1,30 @@ +.. _distutils-index: + +############################### + Distributing Python Modules +############################### + +:Authors: Greg Ward, Anthony Baxter +:Email: distutils-sig@python.org +:Release: |version| +:Date: |today| + +This document describes the Python Distribution Utilities ("Distutils") from +the module developer's point of view, describing how to use the Distutils to +make Python modules and extensions easily available to a wider audience with +very little overhead for build/release/install mechanics. + +.. toctree:: + :maxdepth: 2 + + introduction.rst + setupscript.rst + configfile.rst + sourcedist.rst + builtdist.rst + packageindex.rst + uploading.rst + examples.rst + extending.rst + commandref.rst + apiref.rst diff --git a/Doc/distutils/introduction.rst b/Doc/distutils/introduction.rst new file mode 100644 index 0000000..b772b01 --- /dev/null +++ b/Doc/distutils/introduction.rst @@ -0,0 +1,208 @@ +.. _distutils-intro: + +**************************** +An Introduction to Distutils +**************************** + +This document covers using the Distutils to distribute your Python modules, +concentrating on the role of developer/distributor: if you're looking for +information on installing Python modules, you should refer to the +:ref:`install-index` chapter. + + +.. _distutils-concepts: + +Concepts & Terminology +====================== + +Using the Distutils is quite simple, both for module developers and for +users/administrators installing third-party modules. As a developer, your +responsibilities (apart from writing solid, well-documented and well-tested +code, of course!) are: + +* write a setup script (:file:`setup.py` by convention) + +* (optional) write a setup configuration file + +* create a source distribution + +* (optional) create one or more built (binary) distributions + +Each of these tasks is covered in this document. + +Not all module developers have access to a multitude of platforms, so it's not +always feasible to expect them to create a multitude of built distributions. It +is hoped that a class of intermediaries, called *packagers*, will arise to +address this need. Packagers will take source distributions released by module +developers, build them on one or more platforms, and release the resulting built +distributions. Thus, users on the most popular platforms will be able to +install most popular Python module distributions in the most natural way for +their platform, without having to run a single setup script or compile a line of +code. + + +.. _distutils-simple-example: + +A Simple Example +================ + +The setup script is usually quite simple, although since it's written in Python, +there are no arbitrary limits to what you can do with it, though you should be +careful about putting arbitrarily expensive operations in your setup script. +Unlike, say, Autoconf-style configure scripts, the setup script may be run +multiple times in the course of building and installing your module +distribution. + +If all you want to do is distribute a module called :mod:`foo`, contained in a +file :file:`foo.py`, then your setup script can be as simple as this:: + + from distutils.core import setup + setup(name='foo', + version='1.0', + py_modules=['foo'], + ) + +Some observations: + +* most information that you supply to the Distutils is supplied as keyword + arguments to the :func:`setup` function + +* those keyword arguments fall into two categories: package metadata (name, + version number) and information about what's in the package (a list of pure + Python modules, in this case) + +* modules are specified by module name, not filename (the same will hold true + for packages and extensions) + +* it's recommended that you supply a little more metadata, in particular your + name, email address and a URL for the project (see section :ref:`setup-script` + for an example) + +To create a source distribution for this module, you would create a setup +script, :file:`setup.py`, containing the above code, and run:: + + python setup.py sdist + +which will create an archive file (e.g., tarball on Unix, ZIP file on Windows) +containing your setup script :file:`setup.py`, and your module :file:`foo.py`. +The archive file will be named :file:`foo-1.0.tar.gz` (or :file:`.zip`), and +will unpack into a directory :file:`foo-1.0`. + +If an end-user wishes to install your :mod:`foo` module, all she has to do is +download :file:`foo-1.0.tar.gz` (or :file:`.zip`), unpack it, and---from the +:file:`foo-1.0` directory---run :: + + python setup.py install + +which will ultimately copy :file:`foo.py` to the appropriate directory for +third-party modules in their Python installation. + +This simple example demonstrates some fundamental concepts of the Distutils. +First, both developers and installers have the same basic user interface, i.e. +the setup script. The difference is which Distutils *commands* they use: the +:command:`sdist` command is almost exclusively for module developers, while +:command:`install` is more often for installers (although most developers will +want to install their own code occasionally). + +If you want to make things really easy for your users, you can create one or +more built distributions for them. For instance, if you are running on a +Windows machine, and want to make things easy for other Windows users, you can +create an executable installer (the most appropriate type of built distribution +for this platform) with the :command:`bdist_wininst` command. For example:: + + python setup.py bdist_wininst + +will create an executable installer, :file:`foo-1.0.win32.exe`, in the current +directory. + +Other useful built distribution formats are RPM, implemented by the +:command:`bdist_rpm` command, Solaris :program:`pkgtool` +(:command:`bdist_pkgtool`), and HP-UX :program:`swinstall` +(:command:`bdist_sdux`). For example, the following command will create an RPM +file called :file:`foo-1.0.noarch.rpm`:: + + python setup.py bdist_rpm + +(The :command:`bdist_rpm` command uses the :command:`rpm` executable, therefore +this has to be run on an RPM-based system such as Red Hat Linux, SuSE Linux, or +Mandrake Linux.) + +You can find out what distribution formats are available at any time by running +:: + + python setup.py bdist --help-formats + + +.. _python-terms: + +General Python terminology +========================== + +If you're reading this document, you probably have a good idea of what modules, +extensions, and so forth are. Nevertheless, just to be sure that everyone is +operating from a common starting point, we offer the following glossary of +common Python terms: + +module + the basic unit of code reusability in Python: a block of code imported by some + other code. Three types of modules concern us here: pure Python modules, + extension modules, and packages. + +pure Python module + a module written in Python and contained in a single :file:`.py` file (and + possibly associated :file:`.pyc` and/or :file:`.pyo` files). Sometimes referred + to as a "pure module." + +extension module + a module written in the low-level language of the Python implementation: C/C++ + for Python, Java for Jython. Typically contained in a single dynamically + loadable pre-compiled file, e.g. a shared object (:file:`.so`) file for Python + extensions on Unix, a DLL (given the :file:`.pyd` extension) for Python + extensions on Windows, or a Java class file for Jython extensions. (Note that + currently, the Distutils only handles C/C++ extensions for Python.) + +package + a module that contains other modules; typically contained in a directory in the + filesystem and distinguished from other directories by the presence of a file + :file:`__init__.py`. + +root package + the root of the hierarchy of packages. (This isn't really a package, since it + doesn't have an :file:`__init__.py` file. But we have to call it something.) + The vast majority of the standard library is in the root package, as are many + small, standalone third-party modules that don't belong to a larger module + collection. Unlike regular packages, modules in the root package can be found in + many directories: in fact, every directory listed in ``sys.path`` contributes + modules to the root package. + + +.. _distutils-term: + +Distutils-specific terminology +============================== + +The following terms apply more specifically to the domain of distributing Python +modules using the Distutils: + +module distribution + a collection of Python modules distributed together as a single downloadable + resource and meant to be installed *en masse*. Examples of some well-known + module distributions are Numeric Python, PyXML, PIL (the Python Imaging + Library), or mxBase. (This would be called a *package*, except that term is + already taken in the Python context: a single module distribution may contain + zero, one, or many Python packages.) + +pure module distribution + a module distribution that contains only pure Python modules and packages. + Sometimes referred to as a "pure distribution." + +non-pure module distribution + a module distribution that contains at least one extension module. Sometimes + referred to as a "non-pure distribution." + +distribution root + the top-level directory of your source tree (or source distribution); the + directory where :file:`setup.py` exists. Generally :file:`setup.py` will be + run from this directory. + + diff --git a/Doc/distutils/packageindex.rst b/Doc/distutils/packageindex.rst new file mode 100644 index 0000000..f0f886b --- /dev/null +++ b/Doc/distutils/packageindex.rst @@ -0,0 +1,65 @@ +.. _package-index: + +********************************** +Registering with the Package Index +********************************** + +The Python Package Index (PyPI) holds meta-data describing distributions +packaged with distutils. The distutils command :command:`register` is used to +submit your distribution's meta-data to the index. It is invoked as follows:: + + python setup.py register + +Distutils will respond with the following prompt:: + + running register + We need to know who you are, so please choose either: + 1. use your existing login, + 2. register as a new user, + 3. have the server generate a new password for you (and email it to you), or + 4. quit + Your selection [default 1]: + +Note: if your username and password are saved locally, you will not see this +menu. + +If you have not registered with PyPI, then you will need to do so now. You +should choose option 2, and enter your details as required. Soon after +submitting your details, you will receive an email which will be used to confirm +your registration. + +Once you are registered, you may choose option 1 from the menu. You will be +prompted for your PyPI username and password, and :command:`register` will then +submit your meta-data to the index. + +You may submit any number of versions of your distribution to the index. If you +alter the meta-data for a particular version, you may submit it again and the +index will be updated. + +PyPI holds a record for each (name, version) combination submitted. The first +user to submit information for a given name is designated the Owner of that +name. They may submit changes through the :command:`register` command or through +the web interface. They may also designate other users as Owners or Maintainers. +Maintainers may edit the package information, but not designate other Owners or +Maintainers. + +By default PyPI will list all versions of a given package. To hide certain +versions, the Hidden property should be set to yes. This must be edited through +the web interface. + + +.. _pypirc: + +The .pypirc file +================ + +The format of the :file:`.pypirc` file is formated as follows:: + + [server-login] + repository: + username: + password: + +*repository* can be ommitted and defaults to ``http://www.python.org/pypi``. + + diff --git a/Doc/distutils/setupscript.rst b/Doc/distutils/setupscript.rst new file mode 100644 index 0000000..26f50e6 --- /dev/null +++ b/Doc/distutils/setupscript.rst @@ -0,0 +1,669 @@ +.. _setup-script: + +************************ +Writing the Setup Script +************************ + +The setup script is the centre of all activity in building, distributing, and +installing modules using the Distutils. The main purpose of the setup script is +to describe your module distribution to the Distutils, so that the various +commands that operate on your modules do the right thing. As we saw in section +:ref:`distutils-simple-example` above, the setup script consists mainly of a call to +:func:`setup`, and most information supplied to the Distutils by the module +developer is supplied as keyword arguments to :func:`setup`. + +Here's a slightly more involved example, which we'll follow for the next couple +of sections: the Distutils' own setup script. (Keep in mind that although the +Distutils are included with Python 1.6 and later, they also have an independent +existence so that Python 1.5.2 users can use them to install other module +distributions. The Distutils' own setup script, shown here, is used to install +the package into Python 1.5.2.) :: + + #!/usr/bin/env python + + from distutils.core import setup + + setup(name='Distutils', + version='1.0', + description='Python Distribution Utilities', + author='Greg Ward', + author_email='gward@python.net', + url='http://www.python.org/sigs/distutils-sig/', + packages=['distutils', 'distutils.command'], + ) + +There are only two differences between this and the trivial one-file +distribution presented in section :ref:`distutils-simple-example`: more metadata, and the +specification of pure Python modules by package, rather than by module. This is +important since the Distutils consist of a couple of dozen modules split into +(so far) two packages; an explicit list of every module would be tedious to +generate and difficult to maintain. For more information on the additional +meta-data, see section :ref:`meta-data`. + +Note that any pathnames (files or directories) supplied in the setup script +should be written using the Unix convention, i.e. slash-separated. The +Distutils will take care of converting this platform-neutral representation into +whatever is appropriate on your current platform before actually using the +pathname. This makes your setup script portable across operating systems, which +of course is one of the major goals of the Distutils. In this spirit, all +pathnames in this document are slash-separated. (Mac OS 9 programmers should +keep in mind that the *absence* of a leading slash indicates a relative path, +the opposite of the Mac OS convention with colons.) + +This, of course, only applies to pathnames given to Distutils functions. If +you, for example, use standard Python functions such as :func:`glob.glob` or +:func:`os.listdir` to specify files, you should be careful to write portable +code instead of hardcoding path separators:: + + glob.glob(os.path.join('mydir', 'subdir', '*.html')) + os.listdir(os.path.join('mydir', 'subdir')) + + +.. _listing-packages: + +Listing whole packages +====================== + +The :option:`packages` option tells the Distutils to process (build, distribute, +install, etc.) all pure Python modules found in each package mentioned in the +:option:`packages` list. In order to do this, of course, there has to be a +correspondence between package names and directories in the filesystem. The +default correspondence is the most obvious one, i.e. package :mod:`distutils` is +found in the directory :file:`distutils` relative to the distribution root. +Thus, when you say ``packages = ['foo']`` in your setup script, you are +promising that the Distutils will find a file :file:`foo/__init__.py` (which +might be spelled differently on your system, but you get the idea) relative to +the directory where your setup script lives. If you break this promise, the +Distutils will issue a warning but still process the broken package anyways. + +If you use a different convention to lay out your source directory, that's no +problem: you just have to supply the :option:`package_dir` option to tell the +Distutils about your convention. For example, say you keep all Python source +under :file:`lib`, so that modules in the "root package" (i.e., not in any +package at all) are in :file:`lib`, modules in the :mod:`foo` package are in +:file:`lib/foo`, and so forth. Then you would put :: + + package_dir = {'': 'lib'} + +in your setup script. The keys to this dictionary are package names, and an +empty package name stands for the root package. The values are directory names +relative to your distribution root. In this case, when you say ``packages = +['foo']``, you are promising that the file :file:`lib/foo/__init__.py` exists. + +Another possible convention is to put the :mod:`foo` package right in +:file:`lib`, the :mod:`foo.bar` package in :file:`lib/bar`, etc. This would be +written in the setup script as :: + + package_dir = {'foo': 'lib'} + +A ``package: dir`` entry in the :option:`package_dir` dictionary implicitly +applies to all packages below *package*, so the :mod:`foo.bar` case is +automatically handled here. In this example, having ``packages = ['foo', +'foo.bar']`` tells the Distutils to look for :file:`lib/__init__.py` and +:file:`lib/bar/__init__.py`. (Keep in mind that although :option:`package_dir` +applies recursively, you must explicitly list all packages in +:option:`packages`: the Distutils will *not* recursively scan your source tree +looking for any directory with an :file:`__init__.py` file.) + + +.. _listing-modules: + +Listing individual modules +========================== + +For a small module distribution, you might prefer to list all modules rather +than listing packages---especially the case of a single module that goes in the +"root package" (i.e., no package at all). This simplest case was shown in +section :ref:`distutils-simple-example`; here is a slightly more involved example:: + + py_modules = ['mod1', 'pkg.mod2'] + +This describes two modules, one of them in the "root" package, the other in the +:mod:`pkg` package. Again, the default package/directory layout implies that +these two modules can be found in :file:`mod1.py` and :file:`pkg/mod2.py`, and +that :file:`pkg/__init__.py` exists as well. And again, you can override the +package/directory correspondence using the :option:`package_dir` option. + + +.. _describing-extensions: + +Describing extension modules +============================ + +Just as writing Python extension modules is a bit more complicated than writing +pure Python modules, describing them to the Distutils is a bit more complicated. +Unlike pure modules, it's not enough just to list modules or packages and expect +the Distutils to go out and find the right files; you have to specify the +extension name, source file(s), and any compile/link requirements (include +directories, libraries to link with, etc.). + +.. % XXX read over this section + +All of this is done through another keyword argument to :func:`setup`, the +:option:`ext_modules` option. :option:`ext_modules` is just a list of +:class:`Extension` instances, each of which describes a single extension module. +Suppose your distribution includes a single extension, called :mod:`foo` and +implemented by :file:`foo.c`. If no additional instructions to the +compiler/linker are needed, describing this extension is quite simple:: + + Extension('foo', ['foo.c']) + +The :class:`Extension` class can be imported from :mod:`distutils.core` along +with :func:`setup`. Thus, the setup script for a module distribution that +contains only this one extension and nothing else might be:: + + from distutils.core import setup, Extension + setup(name='foo', + version='1.0', + ext_modules=[Extension('foo', ['foo.c'])], + ) + +The :class:`Extension` class (actually, the underlying extension-building +machinery implemented by the :command:`build_ext` command) supports a great deal +of flexibility in describing Python extensions, which is explained in the +following sections. + + +Extension names and packages +---------------------------- + +The first argument to the :class:`Extension` constructor is always the name of +the extension, including any package names. For example, :: + + Extension('foo', ['src/foo1.c', 'src/foo2.c']) + +describes an extension that lives in the root package, while :: + + Extension('pkg.foo', ['src/foo1.c', 'src/foo2.c']) + +describes the same extension in the :mod:`pkg` package. The source files and +resulting object code are identical in both cases; the only difference is where +in the filesystem (and therefore where in Python's namespace hierarchy) the +resulting extension lives. + +If you have a number of extensions all in the same package (or all under the +same base package), use the :option:`ext_package` keyword argument to +:func:`setup`. For example, :: + + setup(... + ext_package='pkg', + ext_modules=[Extension('foo', ['foo.c']), + Extension('subpkg.bar', ['bar.c'])], + ) + +will compile :file:`foo.c` to the extension :mod:`pkg.foo`, and :file:`bar.c` to +:mod:`pkg.subpkg.bar`. + + +Extension source files +---------------------- + +The second argument to the :class:`Extension` constructor is a list of source +files. Since the Distutils currently only support C, C++, and Objective-C +extensions, these are normally C/C++/Objective-C source files. (Be sure to use +appropriate extensions to distinguish C++\ source files: :file:`.cc` and +:file:`.cpp` seem to be recognized by both Unix and Windows compilers.) + +However, you can also include SWIG interface (:file:`.i`) files in the list; the +:command:`build_ext` command knows how to deal with SWIG extensions: it will run +SWIG on the interface file and compile the resulting C/C++ file into your +extension. + +**\*\*** SWIG support is rough around the edges and largely untested! **\*\*** + +This warning notwithstanding, options to SWIG can be currently passed like +this:: + + setup(... + ext_modules=[Extension('_foo', ['foo.i'], + swig_opts=['-modern', '-I../include'])], + py_modules=['foo'], + ) + +Or on the commandline like this:: + + > python setup.py build_ext --swig-opts="-modern -I../include" + +On some platforms, you can include non-source files that are processed by the +compiler and included in your extension. Currently, this just means Windows +message text (:file:`.mc`) files and resource definition (:file:`.rc`) files for +Visual C++. These will be compiled to binary resource (:file:`.res`) files and +linked into the executable. + + +Preprocessor options +-------------------- + +Three optional arguments to :class:`Extension` will help if you need to specify +include directories to search or preprocessor macros to define/undefine: +``include_dirs``, ``define_macros``, and ``undef_macros``. + +For example, if your extension requires header files in the :file:`include` +directory under your distribution root, use the ``include_dirs`` option:: + + Extension('foo', ['foo.c'], include_dirs=['include']) + +You can specify absolute directories there; if you know that your extension will +only be built on Unix systems with X11R6 installed to :file:`/usr`, you can get +away with :: + + Extension('foo', ['foo.c'], include_dirs=['/usr/include/X11']) + +You should avoid this sort of non-portable usage if you plan to distribute your +code: it's probably better to write C code like :: + + #include + +If you need to include header files from some other Python extension, you can +take advantage of the fact that header files are installed in a consistent way +by the Distutils :command:`install_header` command. For example, the Numerical +Python header files are installed (on a standard Unix installation) to +:file:`/usr/local/include/python1.5/Numerical`. (The exact location will differ +according to your platform and Python installation.) Since the Python include +directory---\ :file:`/usr/local/include/python1.5` in this case---is always +included in the search path when building Python extensions, the best approach +is to write C code like :: + + #include + +If you must put the :file:`Numerical` include directory right into your header +search path, though, you can find that directory using the Distutils +:mod:`distutils.sysconfig` module:: + + from distutils.sysconfig import get_python_inc + incdir = os.path.join(get_python_inc(plat_specific=1), 'Numerical') + setup(..., + Extension(..., include_dirs=[incdir]), + ) + +Even though this is quite portable---it will work on any Python installation, +regardless of platform---it's probably easier to just write your C code in the +sensible way. + +You can define and undefine pre-processor macros with the ``define_macros`` and +``undef_macros`` options. ``define_macros`` takes a list of ``(name, value)`` +tuples, where ``name`` is the name of the macro to define (a string) and +``value`` is its value: either a string or ``None``. (Defining a macro ``FOO`` +to ``None`` is the equivalent of a bare ``#define FOO`` in your C source: with +most compilers, this sets ``FOO`` to the string ``1``.) ``undef_macros`` is +just a list of macros to undefine. + +For example:: + + Extension(..., + define_macros=[('NDEBUG', '1'), + ('HAVE_STRFTIME', None)], + undef_macros=['HAVE_FOO', 'HAVE_BAR']) + +is the equivalent of having this at the top of every C source file:: + + #define NDEBUG 1 + #define HAVE_STRFTIME + #undef HAVE_FOO + #undef HAVE_BAR + + +Library options +--------------- + +You can also specify the libraries to link against when building your extension, +and the directories to search for those libraries. The ``libraries`` option is +a list of libraries to link against, ``library_dirs`` is a list of directories +to search for libraries at link-time, and ``runtime_library_dirs`` is a list of +directories to search for shared (dynamically loaded) libraries at run-time. + +For example, if you need to link against libraries known to be in the standard +library search path on target systems :: + + Extension(..., + libraries=['gdbm', 'readline']) + +If you need to link with libraries in a non-standard location, you'll have to +include the location in ``library_dirs``:: + + Extension(..., + library_dirs=['/usr/X11R6/lib'], + libraries=['X11', 'Xt']) + +(Again, this sort of non-portable construct should be avoided if you intend to +distribute your code.) + +**\*\*** Should mention clib libraries here or somewhere else! **\*\*** + + +Other options +------------- + +There are still some other options which can be used to handle special cases. + +The :option:`extra_objects` option is a list of object files to be passed to the +linker. These files must not have extensions, as the default extension for the +compiler is used. + +:option:`extra_compile_args` and :option:`extra_link_args` can be used to +specify additional command line options for the respective compiler and linker +command lines. + +:option:`export_symbols` is only useful on Windows. It can contain a list of +symbols (functions or variables) to be exported. This option is not needed when +building compiled extensions: Distutils will automatically add ``initmodule`` +to the list of exported symbols. + + +Relationships between Distributions and Packages +================================================ + +A distribution may relate to packages in three specific ways: + +#. It can require packages or modules. + +#. It can provide packages or modules. + +#. It can obsolete packages or modules. + +These relationships can be specified using keyword arguments to the +:func:`distutils.core.setup` function. + +Dependencies on other Python modules and packages can be specified by supplying +the *requires* keyword argument to :func:`setup`. The value must be a list of +strings. Each string specifies a package that is required, and optionally what +versions are sufficient. + +To specify that any version of a module or package is required, the string +should consist entirely of the module or package name. Examples include +``'mymodule'`` and ``'xml.parsers.expat'``. + +If specific versions are required, a sequence of qualifiers can be supplied in +parentheses. Each qualifier may consist of a comparison operator and a version +number. The accepted comparison operators are:: + + < > == + <= >= != + +These can be combined by using multiple qualifiers separated by commas (and +optional whitespace). In this case, all of the qualifiers must be matched; a +logical AND is used to combine the evaluations. + +Let's look at a bunch of examples: + ++-------------------------+----------------------------------------------+ +| Requires Expression | Explanation | ++=========================+==============================================+ +| ``==1.0`` | Only version ``1.0`` is compatible | ++-------------------------+----------------------------------------------+ +| ``>1.0, !=1.5.1, <2.0`` | Any version after ``1.0`` and before ``2.0`` | +| | is compatible, except ``1.5.1`` | ++-------------------------+----------------------------------------------+ + +Now that we can specify dependencies, we also need to be able to specify what we +provide that other distributions can require. This is done using the *provides* +keyword argument to :func:`setup`. The value for this keyword is a list of +strings, each of which names a Python module or package, and optionally +identifies the version. If the version is not specified, it is assumed to match +that of the distribution. + +Some examples: + ++---------------------+----------------------------------------------+ +| Provides Expression | Explanation | ++=====================+==============================================+ +| ``mypkg`` | Provide ``mypkg``, using the distribution | +| | version | ++---------------------+----------------------------------------------+ +| ``mypkg (1.1)`` | Provide ``mypkg`` version 1.1, regardless of | +| | the distribution version | ++---------------------+----------------------------------------------+ + +A package can declare that it obsoletes other packages using the *obsoletes* +keyword argument. The value for this is similar to that of the *requires* +keyword: a list of strings giving module or package specifiers. Each specifier +consists of a module or package name optionally followed by one or more version +qualifiers. Version qualifiers are given in parentheses after the module or +package name. + +The versions identified by the qualifiers are those that are obsoleted by the +distribution being described. If no qualifiers are given, all versions of the +named module or package are understood to be obsoleted. + + +Installing Scripts +================== + +So far we have been dealing with pure and non-pure Python modules, which are +usually not run by themselves but imported by scripts. + +Scripts are files containing Python source code, intended to be started from the +command line. Scripts don't require Distutils to do anything very complicated. +The only clever feature is that if the first line of the script starts with +``#!`` and contains the word "python", the Distutils will adjust the first line +to refer to the current interpreter location. By default, it is replaced with +the current interpreter location. The :option:`--executable` (or :option:`-e`) +option will allow the interpreter path to be explicitly overridden. + +The :option:`scripts` option simply is a list of files to be handled in this +way. From the PyXML setup script:: + + setup(... + scripts=['scripts/xmlproc_parse', 'scripts/xmlproc_val'] + ) + + +Installing Package Data +======================= + +Often, additional files need to be installed into a package. These files are +often data that's closely related to the package's implementation, or text files +containing documentation that might be of interest to programmers using the +package. These files are called :dfn:`package data`. + +Package data can be added to packages using the ``package_data`` keyword +argument to the :func:`setup` function. The value must be a mapping from +package name to a list of relative path names that should be copied into the +package. The paths are interpreted as relative to the directory containing the +package (information from the ``package_dir`` mapping is used if appropriate); +that is, the files are expected to be part of the package in the source +directories. They may contain glob patterns as well. + +The path names may contain directory portions; any necessary directories will be +created in the installation. + +For example, if a package should contain a subdirectory with several data files, +the files can be arranged like this in the source tree:: + + setup.py + src/ + mypkg/ + __init__.py + module.py + data/ + tables.dat + spoons.dat + forks.dat + +The corresponding call to :func:`setup` might be:: + + setup(..., + packages=['mypkg'], + package_dir={'mypkg': 'src/mypkg'}, + package_data={'mypkg': ['data/*.dat']}, + ) + +.. versionadded:: 2.4 + + +Installing Additional Files +=========================== + +The :option:`data_files` option can be used to specify additional files needed +by the module distribution: configuration files, message catalogs, data files, +anything which doesn't fit in the previous categories. + +:option:`data_files` specifies a sequence of (*directory*, *files*) pairs in the +following way:: + + setup(... + data_files=[('bitmaps', ['bm/b1.gif', 'bm/b2.gif']), + ('config', ['cfg/data.cfg']), + ('/etc/init.d', ['init-script'])] + ) + +Note that you can specify the directory names where the data files will be +installed, but you cannot rename the data files themselves. + +Each (*directory*, *files*) pair in the sequence specifies the installation +directory and the files to install there. If *directory* is a relative path, it +is interpreted relative to the installation prefix (Python's ``sys.prefix`` for +pure-Python packages, ``sys.exec_prefix`` for packages that contain extension +modules). Each file name in *files* is interpreted relative to the +:file:`setup.py` script at the top of the package source distribution. No +directory information from *files* is used to determine the final location of +the installed file; only the name of the file is used. + +You can specify the :option:`data_files` options as a simple sequence of files +without specifying a target directory, but this is not recommended, and the +:command:`install` command will print a warning in this case. To install data +files directly in the target directory, an empty string should be given as the +directory. + + +.. _meta-data: + +Additional meta-data +==================== + +The setup script may include additional meta-data beyond the name and version. +This information includes: + ++----------------------+---------------------------+-----------------+--------+ +| Meta-Data | Description | Value | Notes | ++======================+===========================+=================+========+ +| ``name`` | name of the package | short string | \(1) | ++----------------------+---------------------------+-----------------+--------+ +| ``version`` | version of this release | short string | (1)(2) | ++----------------------+---------------------------+-----------------+--------+ +| ``author`` | package author's name | short string | \(3) | ++----------------------+---------------------------+-----------------+--------+ +| ``author_email`` | email address of the | email address | \(3) | +| | package author | | | ++----------------------+---------------------------+-----------------+--------+ +| ``maintainer`` | package maintainer's name | short string | \(3) | ++----------------------+---------------------------+-----------------+--------+ +| ``maintainer_email`` | email address of the | email address | \(3) | +| | package maintainer | | | ++----------------------+---------------------------+-----------------+--------+ +| ``url`` | home page for the package | URL | \(1) | ++----------------------+---------------------------+-----------------+--------+ +| ``description`` | short, summary | short string | | +| | description of the | | | +| | package | | | ++----------------------+---------------------------+-----------------+--------+ +| ``long_description`` | longer description of the | long string | | +| | package | | | ++----------------------+---------------------------+-----------------+--------+ +| ``download_url`` | location where the | URL | \(4) | +| | package may be downloaded | | | ++----------------------+---------------------------+-----------------+--------+ +| ``classifiers`` | a list of classifiers | list of strings | \(4) | ++----------------------+---------------------------+-----------------+--------+ + +Notes: + +(1) + These fields are required. + +(2) + It is recommended that versions take the form *major.minor[.patch[.sub]]*. + +(3) + Either the author or the maintainer must be identified. + +(4) + These fields should not be used if your package is to be compatible with Python + versions prior to 2.2.3 or 2.3. The list is available from the `PyPI website + `_. + +'short string' + A single line of text, not more than 200 characters. + +'long string' + Multiple lines of plain text in reStructuredText format (see + http://docutils.sf.net/). + +'list of strings' + See below. + +None of the string values may be Unicode. + +Encoding the version information is an art in itself. Python packages generally +adhere to the version format *major.minor[.patch][sub]*. The major number is 0 +for initial, experimental releases of software. It is incremented for releases +that represent major milestones in a package. The minor number is incremented +when important new features are added to the package. The patch number +increments when bug-fix releases are made. Additional trailing version +information is sometimes used to indicate sub-releases. These are +"a1,a2,...,aN" (for alpha releases, where functionality and API may change), +"b1,b2,...,bN" (for beta releases, which only fix bugs) and "pr1,pr2,...,prN" +(for final pre-release release testing). Some examples: + +0.1.0 + the first, experimental release of a package + +1.0.1a2 + the second alpha release of the first patch version of 1.0 + +:option:`classifiers` are specified in a python list:: + + setup(... + classifiers=[ + 'Development Status :: 4 - Beta', + 'Environment :: Console', + 'Environment :: Web Environment', + 'Intended Audience :: End Users/Desktop', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: Python Software Foundation License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: Microsoft :: Windows', + 'Operating System :: POSIX', + 'Programming Language :: Python', + 'Topic :: Communications :: Email', + 'Topic :: Office/Business', + 'Topic :: Software Development :: Bug Tracking', + ], + ) + +If you wish to include classifiers in your :file:`setup.py` file and also wish +to remain backwards-compatible with Python releases prior to 2.2.3, then you can +include the following code fragment in your :file:`setup.py` before the +:func:`setup` call. :: + + # patch distutils if it can't cope with the "classifiers" or + # "download_url" keywords + from sys import version + if version < '2.2.3': + from distutils.dist import DistributionMetadata + DistributionMetadata.classifiers = None + DistributionMetadata.download_url = None + + +Debugging the setup script +========================== + +Sometimes things go wrong, and the setup script doesn't do what the developer +wants. + +Distutils catches any exceptions when running the setup script, and print a +simple error message before the script is terminated. The motivation for this +behaviour is to not confuse administrators who don't know much about Python and +are trying to install a package. If they get a big long traceback from deep +inside the guts of Distutils, they may think the package or the Python +installation is broken because they don't read all the way down to the bottom +and see that it's a permission problem. + +On the other hand, this doesn't help the developer to find the cause of the +failure. For this purpose, the DISTUTILS_DEBUG environment variable can be set +to anything except an empty string, and distutils will now print detailed +information what it is doing, and prints the full traceback in case an exception +occurs. + + diff --git a/Doc/distutils/sourcedist.rst b/Doc/distutils/sourcedist.rst new file mode 100644 index 0000000..9f15870 --- /dev/null +++ b/Doc/distutils/sourcedist.rst @@ -0,0 +1,207 @@ +.. _source-dist: + +****************************** +Creating a Source Distribution +****************************** + +As shown in section :ref:`distutils-simple-example`, you use the :command:`sdist` command +to create a source distribution. In the simplest case, :: + + python setup.py sdist + +(assuming you haven't specified any :command:`sdist` options in the setup script +or config file), :command:`sdist` creates the archive of the default format for +the current platform. The default format is a gzip'ed tar file +(:file:`.tar.gz`) on Unix, and ZIP file on Windows. + +You can specify as many formats as you like using the :option:`--formats` +option, for example:: + + python setup.py sdist --formats=gztar,zip + +to create a gzipped tarball and a zip file. The available formats are: + ++-----------+-------------------------+---------+ +| Format | Description | Notes | ++===========+=========================+=========+ +| ``zip`` | zip file (:file:`.zip`) | (1),(3) | ++-----------+-------------------------+---------+ +| ``gztar`` | gzip'ed tar file | (2),(4) | +| | (:file:`.tar.gz`) | | ++-----------+-------------------------+---------+ +| ``bztar`` | bzip2'ed tar file | \(4) | +| | (:file:`.tar.bz2`) | | ++-----------+-------------------------+---------+ +| ``ztar`` | compressed tar file | \(4) | +| | (:file:`.tar.Z`) | | ++-----------+-------------------------+---------+ +| ``tar`` | tar file (:file:`.tar`) | \(4) | ++-----------+-------------------------+---------+ + +Notes: + +(1) + default on Windows + +(2) + default on Unix + +(3) + requires either external :program:`zip` utility or :mod:`zipfile` module (part + of the standard Python library since Python 1.6) + +(4) + requires external utilities: :program:`tar` and possibly one of :program:`gzip`, + :program:`bzip2`, or :program:`compress` + + +.. _manifest: + +Specifying the files to distribute +================================== + +If you don't supply an explicit list of files (or instructions on how to +generate one), the :command:`sdist` command puts a minimal default set into the +source distribution: + +* all Python source files implied by the :option:`py_modules` and + :option:`packages` options + +* all C source files mentioned in the :option:`ext_modules` or + :option:`libraries` options ( + + **\*\*** getting C library sources currently broken---no + :meth:`get_source_files` method in :file:`build_clib.py`! **\*\***) + +* scripts identified by the :option:`scripts` option + +* anything that looks like a test script: :file:`test/test\*.py` (currently, the + Distutils don't do anything with test scripts except include them in source + distributions, but in the future there will be a standard for testing Python + module distributions) + +* :file:`README.txt` (or :file:`README`), :file:`setup.py` (or whatever you + called your setup script), and :file:`setup.cfg` + +Sometimes this is enough, but usually you will want to specify additional files +to distribute. The typical way to do this is to write a *manifest template*, +called :file:`MANIFEST.in` by default. The manifest template is just a list of +instructions for how to generate your manifest file, :file:`MANIFEST`, which is +the exact list of files to include in your source distribution. The +:command:`sdist` command processes this template and generates a manifest based +on its instructions and what it finds in the filesystem. + +If you prefer to roll your own manifest file, the format is simple: one filename +per line, regular files (or symlinks to them) only. If you do supply your own +:file:`MANIFEST`, you must specify everything: the default set of files +described above does not apply in this case. + +The manifest template has one command per line, where each command specifies a +set of files to include or exclude from the source distribution. For an +example, again we turn to the Distutils' own manifest template:: + + include *.txt + recursive-include examples *.txt *.py + prune examples/sample?/build + +The meanings should be fairly clear: include all files in the distribution root +matching :file:`\*.txt`, all files anywhere under the :file:`examples` directory +matching :file:`\*.txt` or :file:`\*.py`, and exclude all directories matching +:file:`examples/sample?/build`. All of this is done *after* the standard +include set, so you can exclude files from the standard set with explicit +instructions in the manifest template. (Or, you can use the +:option:`--no-defaults` option to disable the standard set entirely.) There are +several other commands available in the manifest template mini-language; see +section :ref:`sdist-cmd`. + +The order of commands in the manifest template matters: initially, we have the +list of default files as described above, and each command in the template adds +to or removes from that list of files. Once we have fully processed the +manifest template, we remove files that should not be included in the source +distribution: + +* all files in the Distutils "build" tree (default :file:`build/`) + +* all files in directories named :file:`RCS`, :file:`CVS` or :file:`.svn` + +Now we have our complete list of files, which is written to the manifest for +future reference, and then used to build the source distribution archive(s). + +You can disable the default set of included files with the +:option:`--no-defaults` option, and you can disable the standard exclude set +with :option:`--no-prune`. + +Following the Distutils' own manifest template, let's trace how the +:command:`sdist` command builds the list of files to include in the Distutils +source distribution: + +#. include all Python source files in the :file:`distutils` and + :file:`distutils/command` subdirectories (because packages corresponding to + those two directories were mentioned in the :option:`packages` option in the + setup script---see section :ref:`setup-script`) + +#. include :file:`README.txt`, :file:`setup.py`, and :file:`setup.cfg` (standard + files) + +#. include :file:`test/test\*.py` (standard files) + +#. include :file:`\*.txt` in the distribution root (this will find + :file:`README.txt` a second time, but such redundancies are weeded out later) + +#. include anything matching :file:`\*.txt` or :file:`\*.py` in the sub-tree + under :file:`examples`, + +#. exclude all files in the sub-trees starting at directories matching + :file:`examples/sample?/build`\ ---this may exclude files included by the + previous two steps, so it's important that the ``prune`` command in the manifest + template comes after the ``recursive-include`` command + +#. exclude the entire :file:`build` tree, and any :file:`RCS`, :file:`CVS` and + :file:`.svn` directories + +Just like in the setup script, file and directory names in the manifest template +should always be slash-separated; the Distutils will take care of converting +them to the standard representation on your platform. That way, the manifest +template is portable across operating systems. + + +.. _manifest-options: + +Manifest-related options +======================== + +The normal course of operations for the :command:`sdist` command is as follows: + +* if the manifest file, :file:`MANIFEST` doesn't exist, read :file:`MANIFEST.in` + and create the manifest + +* if neither :file:`MANIFEST` nor :file:`MANIFEST.in` exist, create a manifest + with just the default file set + +* if either :file:`MANIFEST.in` or the setup script (:file:`setup.py`) are more + recent than :file:`MANIFEST`, recreate :file:`MANIFEST` by reading + :file:`MANIFEST.in` + +* use the list of files now in :file:`MANIFEST` (either just generated or read + in) to create the source distribution archive(s) + +There are a couple of options that modify this behaviour. First, use the +:option:`--no-defaults` and :option:`--no-prune` to disable the standard +"include" and "exclude" sets. + +Second, you might want to force the manifest to be regenerated---for example, if +you have added or removed files or directories that match an existing pattern in +the manifest template, you should regenerate the manifest:: + + python setup.py sdist --force-manifest + +Or, you might just want to (re)generate the manifest, but not create a source +distribution:: + + python setup.py sdist --manifest-only + +:option:`--manifest-only` implies :option:`--force-manifest`. :option:`-o` is a +shortcut for :option:`--manifest-only`, and :option:`-f` for +:option:`--force-manifest`. + + diff --git a/Doc/distutils/uploading.rst b/Doc/distutils/uploading.rst new file mode 100644 index 0000000..0b82184 --- /dev/null +++ b/Doc/distutils/uploading.rst @@ -0,0 +1,37 @@ +.. _package-upload: + +*************************************** +Uploading Packages to the Package Index +*************************************** + +.. versionadded:: 2.5 + +The Python Package Index (PyPI) not only stores the package info, but also the +package data if the author of the package wishes to. The distutils command +:command:`upload` pushes the distribution files to PyPI. + +The command is invoked immediately after building one or more distribution +files. For example, the command :: + + python setup.py sdist bdist_wininst upload + +will cause the source distribution and the Windows installer to be uploaded to +PyPI. Note that these will be uploaded even if they are built using an earlier +invocation of :file:`setup.py`, but that only distributions named on the command +line for the invocation including the :command:`upload` command are uploaded. + +The :command:`upload` command uses the username, password, and repository URL +from the :file:`$HOME/.pypirc` file (see section :ref:`pypirc` for more on this +file). + +You can use the :option:`--sign` option to tell :command:`upload` to sign each +uploaded file using GPG (GNU Privacy Guard). The :program:`gpg` program must +be available for execution on the system :envvar:`PATH`. You can also specify +which key to use for signing using the :option:`--identity=*name*` option. + +Other :command:`upload` options include :option:`--repository=*url*` (which +lets you override the repository setting from :file:`$HOME/.pypirc`), and +:option:`--show-response` (which displays the full response text from the PyPI +server for help in debugging upload problems). + + diff --git a/Doc/documenting/fromlatex.rst b/Doc/documenting/fromlatex.rst new file mode 100644 index 0000000..67abe8a --- /dev/null +++ b/Doc/documenting/fromlatex.rst @@ -0,0 +1,192 @@ +.. highlightlang:: rest + +Differences to the LaTeX markup +=============================== + +Though the markup language is different, most of the concepts and markup types +of the old LaTeX docs have been kept -- environments as reST directives, inline +commands as reST roles and so forth. + +However, there are some differences in the way these work, partly due to the +differences in the markup languages, partly due to improvements in Sphinx. This +section lists these differences, in order to give those familiar with the old +format a quick overview of what they might run into. + +Inline markup +------------- + +These changes have been made to inline markup: + +* **Cross-reference roles** + + Most of the following semantic roles existed previously as inline commands, + but didn't do anything except formatting the content as code. Now, they + cross-reference to known targets (some names have also been shortened): + + | *mod* (previously *refmodule* or *module*) + | *func* (previously *function*) + | *data* (new) + | *const* + | *class* + | *meth* (previously *method*) + | *attr* (previously *member*) + | *exc* (previously *exception*) + | *cdata* + | *cfunc* (previously *cfunction*) + | *cmacro* (previously *csimplemacro*) + | *ctype* + + Also different is the handling of *func* and *meth*: while previously + parentheses were added to the callable name (like ``\func{str()}``), they are + now appended by the build system -- appending them in the source will result + in double parentheses. This also means that ``:func:`str(object)``` will not + work as expected -- use ````str(object)```` instead! + +* **Inline commands implemented as directives** + + These were inline commands in LaTeX, but are now directives in reST: + + | *deprecated* + | *versionadded* + | *versionchanged* + + These are used like so:: + + .. deprecated:: 2.5 + Reason of deprecation. + + Also, no period is appended to the text for *versionadded* and + *versionchanged*. + + | *note* + | *warning* + + These are used like so:: + + .. note:: + + Content of note. + +* **Otherwise changed commands** + + The *samp* command previously formatted code and added quotation marks around + it. The *samp* role, however, features a new highlighting system just like + *file* does: + + ``:samp:`open({filename}, {mode})``` results in :samp:`open({filename}, {mode})` + +* **Dropped commands** + + These were commands in LaTeX, but are not available as roles: + + | *bfcode* + | *character* (use :samp:`\`\`'c'\`\``) + | *citetitle* (use ```Title `_``) + | *code* (use ````code````) + | *email* (just write the address in body text) + | *filenq* + | *filevar* (use the ``{...}`` highlighting feature of *file*) + | *programopt*, *longprogramopt* (use *option*) + | *ulink* (use ```Title `_``) + | *url* (just write the URL in body text) + | *var* (use ``*var*``) + | *infinity*, *plusminus* (use the Unicode character) + | *shortversion*, *version* (use the ``|version|`` and ``|release|`` substitutions) + | *emph*, *strong* (use the reST markup) + +* **Backslash escaping** + + In reST, a backslash must be escaped in normal text, and in the content of + roles. However, in code literals and literal blocks, it must not be escaped. + Example: ``:file:`C:\\Temp\\my.tmp``` vs. ````open("C:\Temp\my.tmp")````. + + +Information units +----------------- + +Information units (*...desc* environments) have been made reST directives. +These changes to information units should be noted: + +* **New names** + + "desc" has been removed from every name. Additionally, these directives have + new names: + + | *cfunction* (previously *cfuncdesc*) + | *cmacro* (previously *csimplemacrodesc*) + | *exception* (previously *excdesc*) + | *function* (previously *funcdesc*) + | *attribute* (previously *memberdesc*) + + The *classdesc\** and *excclassdesc* environments have been dropped, the + *class* and *exception* directives support classes documented with and without + constructor arguments. + +* **Multiple objects** + + The equivalent of the *...line* commands is:: + + .. function:: do_foo(bar) + do_bar(baz) + + Description of the functions. + + IOW, just give one signatures per line, at the same indentation level. + +* **Arguments** + + There is no *optional* command. Just give function signatures like they + should appear in the output:: + + .. function:: open(filename[, mode[, buffering]]) + + Description. + + Note: markup in the signature is not supported. + +* **Indexing** + + The *...descni* environments have been dropped. To mark an information unit + as unsuitable for index entry generation, use the *noindex* option like so:: + + .. function:: foo_* + :noindex: + + Description. + +* **New information unit** + + There is a new generic information unit called "describe" which can be used + to document things that are not covered by the other units:: + + .. describe:: a == b + + The equals operator. + + +Structure +--------- + +The LaTeX docs were split in several toplevel manuals. Now, all files +are part of the same documentation tree, as indicated by the *toctree* +directives in the sources. Every *toctree* directive embeds other files +as subdocuments of the current file (this structure is not necessarily +mirrored in the filesystem layout). The toplevel file is +:file:`contents.rst`. + +However, most of the old directory structure has been kept, with the +directories renamed as follows: + +* :file:`api` -> :file:`c-api` +* :file:`dist` -> :file:`distutils`, with the single TeX file split up +* :file:`doc` -> :file:`documenting` +* :file:`ext` -> :file:`extending` +* :file:`inst` -> :file:`installing` +* :file:`lib` -> :file:`library` +* :file:`mac` -> merged into :file:`library`, with `mac/using.tex` + moved to `howto/pythonmac.rst` +* :file:`ref` -> :file:`reference` +* :file:`tut` -> :file:`tutorial`, with the single TeX file split up + + +.. XXX more (index-generating, production lists, ...) diff --git a/Doc/documenting/index.rst b/Doc/documenting/index.rst new file mode 100644 index 0000000..1a3778b --- /dev/null +++ b/Doc/documenting/index.rst @@ -0,0 +1,33 @@ +.. _documenting-index: + +###################### + Documenting Python +###################### + + +The Python language has a substantial body of documentation, much of it +contributed by various authors. The markup used for the Python documentation is +`reStructuredText`_, developed by the `docutils`_ project, amended by custom +directives and using a toolset named *Sphinx* to postprocess the HTML output. + +This document describes the style guide for our documentation, the custom +reStructuredText markup introduced to support Python documentation and how it +should be used, as well as the Sphinx build system. + +.. _reStructuredText: http://docutils.sf.net/rst.html +.. _docutils: http://docutils.sf.net/ + +If you're interested in contributing to Python's documentation, there's no need +to write reStructuredText if you're not so inclined; plain text contributions +are more than welcome as well. + +.. toctree:: + + intro.rst + style.rst + rest.rst + markup.rst + sphinx.rst + +.. XXX add credits, thanks etc. + diff --git a/Doc/documenting/intro.rst b/Doc/documenting/intro.rst new file mode 100644 index 0000000..e02ad7d --- /dev/null +++ b/Doc/documenting/intro.rst @@ -0,0 +1,29 @@ +Introduction +============ + +Python's documentation has long been considered to be good for a free +programming language. There are a number of reasons for this, the most +important being the early commitment of Python's creator, Guido van Rossum, to +providing documentation on the language and its libraries, and the continuing +involvement of the user community in providing assistance for creating and +maintaining documentation. + +The involvement of the community takes many forms, from authoring to bug reports +to just plain complaining when the documentation could be more complete or +easier to use. + +This document is aimed at authors and potential authors of documentation for +Python. More specifically, it is for people contributing to the standard +documentation and developing additional documents using the same tools as the +standard documents. This guide will be less useful for authors using the Python +documentation tools for topics other than Python, and less useful still for +authors not using the tools at all. + +If your interest is in contributing to the Python documentation, but you don't +have the time or inclination to learn reStructuredText and the markup structures +documented here, there's a welcoming place for you among the Python contributors +as well. Any time you feel that you can clarify existing documentation or +provide documentation that's missing, the existing documentation team will +gladly work with you to integrate your text, dealing with the markup for you. +Please don't let the material in this document stand between the documentation +and your desire to help out! \ No newline at end of file diff --git a/Doc/documenting/markup.rst b/Doc/documenting/markup.rst new file mode 100644 index 0000000..831fad9 --- /dev/null +++ b/Doc/documenting/markup.rst @@ -0,0 +1,775 @@ +.. highlightlang:: rest + +Additional Markup Constructs +============================ + +Sphinx adds a lot of new directives and interpreted text roles to standard reST +markup. This section contains the reference material for these facilities. +Documentation for "standard" reST constructs is not included here, though +they are used in the Python documentation. + +File-wide metadata +------------------ + +reST has the concept of "field lists"; these are a sequence of fields marked up +like this:: + + :Field name: Field content + +A field list at the very top of a file is parsed as the "docinfo", which in +normal documents can be used to record the author, date of publication and +other metadata. In Sphinx, the docinfo is used as metadata, too, but not +displayed in the output. + +At the moment, only one metadata field is recognized: + +``nocomments`` + If set, the web application won't display a comment form for a page generated + from this source file. + + +Meta-information markup +----------------------- + +.. describe:: sectionauthor + + Identifies the author of the current section. The argument should include + the author's name such that it can be used for presentation (though it isn't) + and email address. The domain name portion of the address should be lower + case. Example:: + + .. sectionauthor:: Guido van Rossum + + Currently, this markup isn't reflected in the output in any way, but it helps + keep track of contributions. + + +Module-specific markup +---------------------- + +The markup described in this section is used to provide information about a +module being documented. Each module should be documented in its own file. +Normally this markup appears after the title heading of that file; a typical +file might start like this:: + + :mod:`parrot` -- Dead parrot access + =================================== + + .. module:: parrot + :platform: Unix, Windows + :synopsis: Analyze and reanimate dead parrots. + .. moduleauthor:: Eric Cleese + .. moduleauthor:: John Idle + +As you can see, the module-specific markup consists of two directives, the +``module`` directive and the ``moduleauthor`` directive. + +.. describe:: module + + This directive marks the beginning of the description of a module (or package + submodule, in which case the name should be fully qualified, including the + package name). + + The ``platform`` option, if present, is a comma-separated list of the + platforms on which the module is available (if it is available on all + platforms, the option should be omitted). The keys are short identifiers; + examples that are in use include "IRIX", "Mac", "Windows", and "Unix". It is + important to use a key which has already been used when applicable. + + The ``synopsis`` option should consist of one sentence describing the + module's purpose -- it is currently only used in the Global Module Index. + +.. describe:: moduleauthor + + The ``moduleauthor`` directive, which can appear multiple times, names the + authors of the module code, just like ``sectionauthor`` names the author(s) + of a piece of documentation. It too does not result in any output currently. + + +.. note:: + + It is important to make the section title of a module-describing file + meaningful since that value will be inserted in the table-of-contents trees + in overview files. + + +Information units +----------------- + +There are a number of directives used to describe specific features provided by +modules. Each directive requires one or more signatures to provide basic +information about what is being described, and the content should be the +description. The basic version makes entries in the general index; if no index +entry is desired, you can give the directive option flag ``:noindex:``. The +following example shows all of the features of this directive type:: + + .. function:: spam(eggs) + ham(eggs) + :noindex: + + Spam or ham the foo. + +The signatures of object methods or data attributes should always include the +type name (``.. method:: FileInput.input(...)``), even if it is obvious from the +context which type they belong to; this is to enable consistent +cross-references. If you describe methods belonging to an abstract protocol, +such as "context managers", include a (pseudo-)type name too to make the +index entries more informative. + +The directives are: + +.. describe:: cfunction + + Describes a C function. The signature should be given as in C, e.g.:: + + .. cfunction:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) + + This is also used to describe function-like preprocessor macros. The names + of the arguments should be given so they may be used in the description. + + Note that you don't have to backslash-escape asterisks in the signature, + as it is not parsed by the reST inliner. + +.. describe:: cmember + + Describes a C struct member. Example signature:: + + .. cmember:: PyObject* PyTypeObject.tp_bases + + The text of the description should include the range of values allowed, how + the value should be interpreted, and whether the value can be changed. + References to structure members in text should use the ``member`` role. + +.. describe:: cmacro + + Describes a "simple" C macro. Simple macros are macros which are used + for code expansion, but which do not take arguments so cannot be described as + functions. This is not to be used for simple constant definitions. Examples + of its use in the Python documentation include :cmacro:`PyObject_HEAD` and + :cmacro:`Py_BEGIN_ALLOW_THREADS`. + +.. describe:: ctype + + Describes a C type. The signature should just be the type name. + +.. describe:: cvar + + Describes a global C variable. The signature should include the type, such + as:: + + .. cvar:: PyObject* PyClass_Type + +.. describe:: data + + Describes global data in a module, including both variables and values used + as "defined constants." Class and object attributes are not documented + using this environment. + +.. describe:: exception + + Describes an exception class. The signature can, but need not include + parentheses with constructor arguments. + +.. describe:: function + + Describes a module-level function. The signature should include the + parameters, enclosing optional parameters in brackets. Default values can be + given if it enhances clarity. For example:: + + .. function:: Timer.repeat([repeat=3[, number=1000000]]) + + Object methods are not documented using this directive. Bound object methods + placed in the module namespace as part of the public interface of the module + are documented using this, as they are equivalent to normal functions for + most purposes. + + The description should include information about the parameters required and + how they are used (especially whether mutable objects passed as parameters + are modified), side effects, and possible exceptions. A small example may be + provided. + +.. describe:: class + + Describes a class. The signature can include parentheses with parameters + which will be shown as the constructor arguments. + +.. describe:: attribute + + Describes an object data attribute. The description should include + information about the type of the data to be expected and whether it may be + changed directly. + +.. describe:: method + + Describes an object method. The parameters should not include the ``self`` + parameter. The description should include similar information to that + described for ``function``. + +.. describe:: opcode + + Describes a Python bytecode instruction. + + +There is also a generic version of these directives: + +.. describe:: describe + + This directive produces the same formatting as the specific ones explained + above but does not create index entries or cross-referencing targets. It is + used, for example, to describe the directives in this document. Example:: + + .. describe:: opcode + + Describes a Python bytecode instruction. + + +Showing code examples +--------------------- + +Examples of Python source code or interactive sessions are represented using +standard reST literal blocks. They are started by a ``::`` at the end of the +preceding paragraph and delimited by indentation. + +Representing an interactive session requires including the prompts and output +along with the Python code. No special markup is required for interactive +sessions. After the last line of input or output presented, there should not be +an "unused" primary prompt; this is an example of what *not* to do:: + + >>> 1 + 1 + 2 + >>> + +Syntax highlighting is handled in a smart way: + +* There is a "highlighting language" for each source file. Per default, + this is ``'python'`` as the majority of files will have to highlight Python + snippets. + +* Within Python highlighting mode, interactive sessions are recognized + automatically and highlighted appropriately. + +* The highlighting language can be changed using the ``highlightlang`` + directive, used as follows:: + + .. highlightlang:: c + + This language is used until the next ``highlightlang`` directive is + encountered. + +* The valid values for the highlighting language are: + + * ``python`` (the default) + * ``c`` + * ``rest`` + * ``none`` (no highlighting) + +* If highlighting with the current language fails, the block is not highlighted + in any way. + +Longer displays of verbatim text may be included by storing the example text in +an external file containing only plain text. The file may be included using the +``literalinclude`` directive. [1]_ For example, to include the Python source file +:file:`example.py`, use:: + + .. literalinclude:: example.py + +The file name is relative to the current file's path. Documentation-specific +include files should be placed in the ``Doc/includes`` subdirectory. + + +Inline markup +------------- + +As said before, Sphinx uses interpreted text roles to insert semantic markup in +documents. + +Variable names are an exception, they should be marked simply with ``*var*``. + +For all other roles, you have to write ``:rolename:`content```. + +The following roles refer to objects in modules and are possibly hyperlinked if +a matching identifier is found: + +.. describe:: mod + + The name of a module; a dotted name may be used. This should also be used for + package names. + +.. describe:: func + + The name of a Python function; dotted names may be used. The role text + should include trailing parentheses to enhance readability. The parentheses + are stripped when searching for identifiers. + +.. describe:: data + + The name of a module-level variable. + +.. describe:: const + + The name of a "defined" constant. This may be a C-language ``#define`` + or a Python variable that is not intended to be changed. + +.. describe:: class + + A class name; a dotted name may be used. + +.. describe:: meth + + The name of a method of an object. The role text should include the type + name, method name and the trailing parentheses. A dotted name may be used. + +.. describe:: attr + + The name of a data attribute of an object. + +.. describe:: exc + + The name of an exception. A dotted name may be used. + +The name enclosed in this markup can include a module name and/or a class name. +For example, ``:func:`filter``` could refer to a function named ``filter`` in +the current module, or the built-in function of that name. In contrast, +``:func:`foo.filter``` clearly refers to the ``filter`` function in the ``foo`` +module. + +A similar heuristic is used to determine whether the name is an attribute of +the currently documented class. + +The following roles create cross-references to C-language constructs if they +are defined in the API documentation: + +.. describe:: cdata + + The name of a C-language variable. + +.. describe:: cfunc + + The name of a C-language function. Should include trailing parentheses. + +.. describe:: cmacro + + The name of a "simple" C macro, as defined above. + +.. describe:: ctype + + The name of a C-language type. + + +The following role does possibly create a cross-reference, but does not refer +to objects: + +.. describe:: token + + The name of a grammar token (used in the reference manual to create links + between production displays). + +--------- + +The following roles don't do anything special except formatting the text +in a different style: + +.. describe:: command + + The name of an OS-level command, such as ``rm``. + +.. describe:: dfn + + Mark the defining instance of a term in the text. (No index entries are + generated.) + +.. describe:: envvar + + An environment variable. Index entries are generated. + +.. describe:: file + + The name of a file or directory. Within the contents, you can use curly + braces to indicate a "variable" part, for example:: + + ... is installed in :file:`/usr/lib/python2.{x}/site-packages` ... + + In the built documentation, the ``x`` will be displayed differently to + indicate that it is to be replaced by the Python minor version. + +.. describe:: guilabel + + Labels presented as part of an interactive user interface should be marked + using ``guilabel``. This includes labels from text-based interfaces such as + those created using :mod:`curses` or other text-based libraries. Any label + used in the interface should be marked with this role, including button + labels, window titles, field names, menu and menu selection names, and even + values in selection lists. + +.. describe:: kbd + + Mark a sequence of keystrokes. What form the key sequence takes may depend + on platform- or application-specific conventions. When there are no relevant + conventions, the names of modifier keys should be spelled out, to improve + accessibility for new users and non-native speakers. For example, an + *xemacs* key sequence may be marked like ``:kbd:`C-x C-f```, but without + reference to a specific application or platform, the same sequence should be + marked as ``:kbd:`Control-x Control-f```. + +.. describe:: keyword + + The name of a keyword in a programming language. + +.. describe:: mailheader + + The name of an RFC 822-style mail header. This markup does not imply that + the header is being used in an email message, but can be used to refer to any + header of the same "style." This is also used for headers defined by the + various MIME specifications. The header name should be entered in the same + way it would normally be found in practice, with the camel-casing conventions + being preferred where there is more than one common usage. For example: + ``:mailheader:`Content-Type```. + +.. describe:: makevar + + The name of a :command:`make` variable. + +.. describe:: manpage + + A reference to a Unix manual page including the section, + e.g. ``:manpage:`ls(1)```. + +.. describe:: menuselection + + Menu selections should be marked using the ``menuselection`` role. This is + used to mark a complete sequence of menu selections, including selecting + submenus and choosing a specific operation, or any subsequence of such a + sequence. The names of individual selections should be separated by + ``-->``. + + For example, to mark the selection "Start > Programs", use this markup:: + + :menuselection:`Start --> Programs` + + When including a selection that includes some trailing indicator, such as the + ellipsis some operating systems use to indicate that the command opens a + dialog, the indicator should be omitted from the selection name. + +.. describe:: mimetype + + The name of a MIME type, or a component of a MIME type (the major or minor + portion, taken alone). + +.. describe:: newsgroup + + The name of a Usenet newsgroup. + +.. describe:: option + + A command-line option to an executable program. The leading hyphen(s) must + be included. + +.. describe:: program + + The name of an executable program. This may differ from the file name for + the executable for some platforms. In particular, the ``.exe`` (or other) + extension should be omitted for Windows programs. + +.. describe:: regexp + + A regular expression. Quotes should not be included. + +.. describe:: samp + + A piece of literal text, such as code. Within the contents, you can use + curly braces to indicate a "variable" part, as in ``:file:``. + + If you don't need the "variable part" indication, use the standard + ````code```` instead. + +.. describe:: var + + A Python or C variable or parameter name. + + +The following roles generate external links: + +.. describe:: pep + + A reference to a Python Enhancement Proposal. This generates appropriate + index entries. The text "PEP *number*\ " is generated; in the HTML output, + this text is a hyperlink to an online copy of the specified PEP. + +.. describe:: rfc + + A reference to an Internet Request for Comments. This generates appropriate + index entries. The text "RFC *number*\ " is generated; in the HTML output, + this text is a hyperlink to an online copy of the specified RFC. + + +Note that there are no special roles for including hyperlinks as you can use +the standard reST markup for that purpose. + + +.. _doc-ref-role: + +Cross-linking markup +-------------------- + +To support cross-referencing to arbitrary sections in the documentation, the +standard reST labels are "abused" a bit: Every label must precede a section +title; and every label name must be unique throughout the entire documentation +source. + +You can then reference to these sections using the ``:ref:`label-name``` role. + +Example:: + + .. _my-reference-label: + + Section to cross-reference + -------------------------- + + This is the text of the section. + + It refers to the section itself, see :ref:`my-reference-label`. + +The ``:ref:`` invocation is replaced with the section title. + + +Paragraph-level markup +---------------------- + +These directives create short paragraphs and can be used inside information +units as well as normal text: + +.. describe:: note + + An especially important bit of information about an API that a user should be + aware of when using whatever bit of API the note pertains to. The content of + the directive should be written in complete sentences and include all + appropriate punctuation. + + Example:: + + .. note:: + + This function is not suitable for sending spam e-mails. + +.. describe:: warning + + An important bit of information about an API that a user should be very aware + of when using whatever bit of API the warning pertains to. The content of + the directive should be written in complete sentences and include all + appropriate punctuation. This differs from ``note`` in that it is recommended + over ``note`` for information regarding security. + +.. describe:: versionadded + + This directive documents the version of Python which added the described + feature to the library or C API. When this applies to an entire module, it + should be placed at the top of the module section before any prose. + + The first argument must be given and is the version in question; you can add + a second argument consisting of a *brief* explanation of the change. + + Example:: + + .. versionadded:: 2.5 + The `spam` parameter. + + Note that there must be no blank line between the directive head and the + explanation; this is to make these blocks visually continuous in the markup. + +.. describe:: versionchanged + + Similar to ``versionadded``, but describes when and what changed in the named + feature in some way (new parameters, changed side effects, etc.). + +-------------- + +.. describe:: seealso + + Many sections include a list of references to module documentation or + external documents. These lists are created using the ``seealso`` directive. + + The ``seealso`` directive is typically placed in a section just before any + sub-sections. For the HTML output, it is shown boxed off from the main flow + of the text. + + The content of the ``seealso`` directive should be a reST definition list. + Example:: + + .. seealso:: + + Module :mod:`zipfile` + Documentation of the :mod:`zipfile` standard module. + + `GNU tar manual, Basic Tar Format `_ + Documentation for tar archive files, including GNU tar extensions. + +.. describe:: rubric + + This directive creates a paragraph heading that is not used to create a + table of contents node. It is currently used for the "Footnotes" caption. + +.. describe:: centered + + This directive creates a centered boldfaced paragraph. Use it as follows:: + + .. centered:: + + Paragraph contents. + + +Table-of-contents markup +------------------------ + +Since reST does not have facilities to interconnect several documents, or split +documents into multiple output files, Sphinx uses a custom directive to add +relations between the single files the documentation is made of, as well as +tables of contents. The ``toctree`` directive is the central element. + +.. describe:: toctree + + This directive inserts a "TOC tree" at the current location, using the + individual TOCs (including "sub-TOC trees") of the files given in the + directive body. A numeric ``maxdepth`` option may be given to indicate the + depth of the tree; by default, all levels are included. + + Consider this example (taken from the library reference index):: + + .. toctree:: + :maxdepth: 2 + + intro.rst + strings.rst + datatypes.rst + numeric.rst + (many more files listed here) + + This accomplishes two things: + + * Tables of contents from all those files are inserted, with a maximum depth + of two, that means one nested heading. ``toctree`` directives in those + files are also taken into account. + * Sphinx knows that the relative order of the files ``intro.rst``, + ``strings.rst`` and so forth, and it knows that they are children of the + shown file, the library index. From this information it generates "next + chapter", "previous chapter" and "parent chapter" links. + + In the end, all files included in the build process must occur in one + ``toctree`` directive; Sphinx will emit a warning if it finds a file that is + not included, because that means that this file will not be reachable through + standard navigation. + + The special file ``contents.rst`` at the root of the source directory is the + "root" of the TOC tree hierarchy; from it the "Contents" page is generated. + + +Index-generating markup +----------------------- + +Sphinx automatically creates index entries from all information units (like +functions, classes or attributes) like discussed before. + +However, there is also an explicit directive available, to make the index more +comprehensive and enable index entries in documents where information is not +mainly contained in information units, such as the language reference. + +The directive is ``index`` and contains one or more index entries. Each entry +consists of a type and a value, separated by a colon. + +For example:: + + .. index:: + single: execution!context + module: __main__ + module: sys + triple: module; search; path + +This directive contains five entries, which will be converted to entries in the +generated index which link to the exact location of the index statement (or, in +case of offline media, the corresponding page number). + +The possible entry types are: + +single + Creates a single index entry. Can be made a subentry by separating the + subentry text with a semicolon (this is also used below to describe what + entries are created). +pair + ``pair: loop; statement`` is a shortcut that creates two index entries, + namely ``loop; statement`` and ``statement; loop``. +triple + Likewise, ``triple: module; search; path`` is a shortcut that creates three + index entries, which are ``module; search path``, ``search; path, module`` and + ``path; module search``. +module, keyword, operator, object, exception, statement, builtin + These all create two index entries. For example, ``module: hashlib`` creates + the entries ``module; hashlib`` and ``hashlib; module``. + + +Grammar production displays +--------------------------- + +Special markup is available for displaying the productions of a formal grammar. +The markup is simple and does not attempt to model all aspects of BNF (or any +derived forms), but provides enough to allow context-free grammars to be +displayed in a way that causes uses of a symbol to be rendered as hyperlinks to +the definition of the symbol. There is this directive: + +.. describe:: productionlist + + This directive is used to enclose a group of productions. Each production is + given on a single line and consists of a name, separated by a colon from the + following definition. If the definition spans multiple lines, each + continuation line must begin with a colon placed at the same column as in the + first line. + + Blank lines are not allowed within ``productionlist`` directive arguments. + + The definition can contain token names which are marked as interpreted text + (e.g. ``sum ::= `integer` "+" `integer```) -- this generates cross-references + to the productions of these tokens. + + Note that no further reST parsing is done in the production, so that you + don't have to escape ``*`` or ``|`` characters. + + +.. XXX describe optional first parameter + +The following is an example taken from the Python Reference Manual:: + + .. productionlist:: + try_stmt: try1_stmt | try2_stmt + try1_stmt: "try" ":" `suite` + : ("except" [`expression` ["," `target`]] ":" `suite`)+ + : ["else" ":" `suite`] + : ["finally" ":" `suite`] + try2_stmt: "try" ":" `suite` + : "finally" ":" `suite` + + +Substitutions +------------- + +The documentation system provides three substitutions that are defined by default. +They are set in the build configuration file, see :ref:`doc-build-config`. + +.. describe:: |release| + + Replaced by the Python release the documentation refers to. This is the full + version string including alpha/beta/release candidate tags, e.g. ``2.5.2b3``. + +.. describe:: |version| + + Replaced by the Python version the documentation refers to. This consists + only of the major and minor version parts, e.g. ``2.5``, even for version + 2.5.1. + +.. describe:: |today| + + Replaced by either today's date, or the date set in the build configuration + file. Normally has the format ``April 14, 2007``. + + +.. rubric:: Footnotes + +.. [1] There is a standard ``.. include`` directive, but it raises errors if the + file is not found. This one only emits a warning. diff --git a/Doc/documenting/rest.rst b/Doc/documenting/rest.rst new file mode 100644 index 0000000..8a4fc3d --- /dev/null +++ b/Doc/documenting/rest.rst @@ -0,0 +1,251 @@ +.. highlightlang:: rest + +reStructuredText Primer +======================= + +This section is a brief introduction to reStructuredText (reST) concepts and +syntax, intended to provide authors with enough information to author documents +productively. Since reST was designed to be a simple, unobtrusive markup +language, this will not take too long. + +.. seealso:: + + The authoritative `reStructuredText User + Documentation `_. + + +Paragraphs +---------- + +The paragraph is the most basic block in a reST document. Paragraphs are simply +chunks of text separated by one or more blank lines. As in Python, indentation +is significant in reST, so all lines of the same paragraph must be left-aligned +to the same level of indentation. + + +Inline markup +------------- + +The standard reST inline markup is quite simple: use + +* one asterisk: ``*text*`` for emphasis (italics), +* two asterisks: ``**text**`` for strong emphasis (boldface), and +* backquotes: ````text```` for code samples. + +If asterisks or backquotes appear in running text and could be confused with +inline markup delimiters, they have to be escaped with a backslash. + +Be aware of some restrictions of this markup: + +* it may not be nested, +* content may not start or end with whitespace: ``* text*`` is wrong, +* it must be separated from surrounding text by non-word characters. Use a + backslash escaped space to work around that: ``thisis\ *one*\ word``. + +These restrictions may be lifted in future versions of the docutils. + +reST also allows for custom "interpreted text roles"', which signify that the +enclosed text should be interpreted in a specific way. Sphinx uses this to +provide semantic markup and cross-referencing of identifiers, as described in +the appropriate section. The general syntax is ``:rolename:`content```. + + +Lists and Quotes +---------------- + +List markup is natural: just place an asterisk at the start of a paragraph and +indent properly. The same goes for numbered lists; they can also be +autonumbered using a ``#`` sign:: + + * This is a bulleted list. + * It has two items, the second + item uses two lines. + + 1. This is a numbered list. + 2. It has two items too. + + #. This is a numbered list. + #. It has two items too. + +Note that Sphinx disables the use of enumerated lists introduced by alphabetic +or roman numerals, such as :: + + A. First item + B. Second item + + +Nested lists are possible, but be aware that they must be separated from the +parent list items by blank lines:: + + * this is + * a list + + * with a nested list + * and some subitems + + * and here the parent list continues + +Definition lists are created as follows:: + + term (up to a line of text) + Definition of the term, which must be indented + + and can even consist of multiple paragraphs + + next term + Description. + + +Paragraphs are quoted by just indenting them more than the surrounding +paragraphs. + + +Source Code +----------- + +Literal code blocks are introduced by ending a paragraph with the special marker +``::``. The literal block must be indented, to be able to include blank lines:: + + This is a normal text paragraph. The next paragraph is a code sample:: + + It is not processed in any way, except + that the indentation is removed. + + It can span multiple lines. + + This is a normal text paragraph again. + +The handling of the ``::`` marker is smart: + +* If it occurs as a paragraph of its own, that paragraph is completely left + out of the document. +* If it is preceded by whitespace, the marker is removed. +* If it is preceded by non-whitespace, the marker is replaced by a single + colon. + +That way, the second sentence in the above example's first paragraph would be +rendered as "The next paragraph is a code sample:". + + +Hyperlinks +---------- + +External links +^^^^^^^^^^^^^^ + +Use ```Link text `_`` for inline web links. If the link text +should be the web address, you don't need special markup at all, the parser +finds links and mail addresses in ordinary text. + +Internal links +^^^^^^^^^^^^^^ + +Internal linking is done via a special reST role, see the section on specific +markup, :ref:`doc-ref-role`. + + +Sections +-------- + +Section headers are created by underlining (and optionally overlining) the +section title with a punctuation character, at least as long as the text:: + + ================= + This is a heading + ================= + +Normally, there are no heading levels assigned to certain characters as the +structure is determined from the succession of headings. However, for the +Python documentation, we use this convention: + +* ``#`` with overline, for parts +* ``*`` with overline, for chapters +* ``=``, for sections +* ``-``, for subsections +* ``^``, for subsubsections +* ``"``, for paragraphs + + +Explicit Markup +--------------- + +"Explicit markup" is used in reST for most constructs that need special +handling, such as footnotes, specially-highlighted paragraphs, comments, and +generic directives. + +An explicit markup block begins with a line starting with ``..`` followed by +whitespace and is terminated by the next paragraph at the same level of +indentation. (There needs to be a blank line between explicit markup and normal +paragraphs. This may all sound a bit complicated, but it is intuitive enough +when you write it.) + + +Directives +---------- + +A directive is a generic block of explicit markup. Besides roles, it is one of +the extension mechanisms of reST, and Sphinx makes heavy use of it. + +Basically, a directive consists of a name, arguments, options and content. (Keep +this terminology in mind, it is used in the next chapter describing custom +directives.) Looking at this example, :: + + .. function:: foo(x) + foo(y, z) + :bar: no + + Return a line of text input from the user. + +``function`` is the directive name. It is given two arguments here, the +remainder of the first line and the second line, as well as one option ``bar`` +(as you can see, options are given in the lines immediately following the +arguments and indicated by the colons). + +The directive content follows after a blank line and is indented relative to the +directive start. + + +Footnotes +--------- + +For footnotes, use ``[#]_`` to mark the footnote location, and add the footnote +body at the bottom of the document after a "Footnotes" rubric heading, like so:: + + Lorem ipsum [#]_ dolor sit amet ... [#]_ + + .. rubric:: Footnotes + + .. [#] Text of the first footnote. + .. [#] Text of the second footnote. + +You can also explicitly number the footnotes for better context. + + +Comments +-------- + +Every explicit markup block which isn't a valid markup construct (like the +footnotes above) is regarded as a comment. + + +Source encoding +--------------- + +Since the easiest way to include special characters like em dashes or copyright +signs in reST is to directly write them as Unicode characters, one has to +specify an encoding: + +All Python documentation source files must be in UTF-8 encoding, and the HTML +documents written from them will be in that encoding as well. + + +Gotchas +------- + +There are some problems one commonly runs into while authoring reST documents: + +* **Separation of inline markup:** As said above, inline markup spans must be + separated from the surrounding text by non-word characters, you have to use + an escaped space to get around that. + +.. XXX more? diff --git a/Doc/documenting/sphinx.rst b/Doc/documenting/sphinx.rst new file mode 100644 index 0000000..85e8b5e --- /dev/null +++ b/Doc/documenting/sphinx.rst @@ -0,0 +1,60 @@ +.. highlightlang:: rest + +The Sphinx build system +======================= + +.. XXX: intro... + +.. _doc-build-config: + +The build configuration file +---------------------------- + +The documentation root, that is the ``Doc`` subdirectory of the source +distribution, contains a file named ``conf.py``. This file is called the "build +configuration file", and it contains several variables that are read and used +during a build run. + +These variables are: + +version : string + A string that is used as a replacement for the ``|version|`` reST + substitution. It should be the Python version the documentation refers to. + This consists only of the major and minor version parts, e.g. ``2.5``, even + for version 2.5.1. + +release : string + A string that is used as a replacement for the ``|release|`` reST + substitution. It should be the full version string including + alpha/beta/release candidate tags, e.g. ``2.5.2b3``. + +Both ``release`` and ``version`` can be ``'auto'``, which means that they are +determined at runtime from the ``Include/patchlevel.h`` file, if a complete +Python source distribution can be found, or else from the interpreter running +Sphinx. + +today_fmt : string + A ``strftime`` format that is used to format a replacement for the + ``|today|`` reST substitution. + +today : string + A string that can contain a date that should be written to the documentation + output literally. If this is nonzero, it is used instead of + ``strftime(today_fmt)``. + +unused_files : list of strings + A list of reST filenames that are to be disregarded during building. This + could be docs for temporarily disabled modules or documentation that's not + yet ready for public consumption. + +last_updated_format : string + If this is not an empty string, it will be given to ``time.strftime()`` and + written to each generated output file after "last updated on:". + +use_smartypants : bool + If true, use SmartyPants to convert quotes and dashes to the typographically + correct entities. + +add_function_parentheses : bool + If true, ``()`` will be appended to the content of ``:func:``, ``:meth:`` and + ``:cfunc:`` cross-references. \ No newline at end of file diff --git a/Doc/documenting/style.rst b/Doc/documenting/style.rst new file mode 100644 index 0000000..5d0ccb7 --- /dev/null +++ b/Doc/documenting/style.rst @@ -0,0 +1,70 @@ +.. highlightlang:: rest + +Style Guide +=========== + +The Python documentation should follow the `Apple Publications Style Guide`_ +wherever possible. This particular style guide was selected mostly because it +seems reasonable and is easy to get online. + +Topics which are not covered in the Apple's style guide will be discussed in +this document. + +All reST files use an indentation of 3 spaces. The maximum line length is 80 +characters for normal text, but tables, deeply indented code samples and long +links may extend beyond that. + +Make generous use of blank lines where applicable; they help grouping things +together. + +A sentence-ending period may be followed by one or two spaces; while reST +ignores the second space, it is customarily put in by some users, for example +to aid Emacs' auto-fill mode. + +Footnotes are generally discouraged, though they may be used when they are the +best way to present specific information. When a footnote reference is added at +the end of the sentence, it should follow the sentence-ending punctuation. The +reST markup should appear something like this:: + + This sentence has a footnote reference. [#]_ This is the next sentence. + +Footnotes should be gathered at the end of a file, or if the file is very long, +at the end of a section. The docutils will automatically create backlinks to +the footnote reference. + +Footnotes may appear in the middle of sentences where appropriate. + +Many special names are used in the Python documentation, including the names of +operating systems, programming languages, standards bodies, and the like. Most +of these entities are not assigned any special markup, but the preferred +spellings are given here to aid authors in maintaining the consistency of +presentation in the Python documentation. + +Other terms and words deserve special mention as well; these conventions should +be used to ensure consistency throughout the documentation: + +CPU + For "central processing unit." Many style guides say this should be spelled + out on the first use (and if you must use it, do so!). For the Python + documentation, this abbreviation should be avoided since there's no + reasonable way to predict which occurrence will be the first seen by the + reader. It is better to use the word "processor" instead. + +POSIX + The name assigned to a particular group of standards. This is always + uppercase. + +Python + The name of our favorite programming language is always capitalized. + +Unicode + The name of a character set and matching encoding. This is always written + capitalized. + +Unix + The name of the operating system developed at AT&T Bell Labs in the early + 1970s. + + +.. _Apple Publications Style Guide: http://developer.apple.com/documentation/UserExperience/Conceptual/APStyleGuide/AppleStyleGuide2003.pdf + diff --git a/Doc/extending/building.rst b/Doc/extending/building.rst new file mode 100644 index 0000000..5e1dec8 --- /dev/null +++ b/Doc/extending/building.rst @@ -0,0 +1,131 @@ +.. highlightlang:: c + + +.. _building: + +******************************************** +Building C and C++ Extensions with distutils +******************************************** + +.. sectionauthor:: Martin v. Löwis + + +Starting in Python 1.4, Python provides, on Unix, a special make file for +building make files for building dynamically-linked extensions and custom +interpreters. Starting with Python 2.0, this mechanism (known as related to +Makefile.pre.in, and Setup files) is no longer supported. Building custom +interpreters was rarely used, and extension modules can be built using +distutils. + +Building an extension module using distutils requires that distutils is +installed on the build machine, which is included in Python 2.x and available +separately for Python 1.5. Since distutils also supports creation of binary +packages, users don't necessarily need a compiler and distutils to install the +extension. + +A distutils package contains a driver script, :file:`setup.py`. This is a plain +Python file, which, in the most simple case, could look like this:: + + from distutils.core import setup, Extension + + module1 = Extension('demo', + sources = ['demo.c']) + + setup (name = 'PackageName', + version = '1.0', + description = 'This is a demo package', + ext_modules = [module1]) + + +With this :file:`setup.py`, and a file :file:`demo.c`, running :: + + python setup.py build + +will compile :file:`demo.c`, and produce an extension module named ``demo`` in +the :file:`build` directory. Depending on the system, the module file will end +up in a subdirectory :file:`build/lib.system`, and may have a name like +:file:`demo.so` or :file:`demo.pyd`. + +In the :file:`setup.py`, all execution is performed by calling the ``setup`` +function. This takes a variable number of keyword arguments, of which the +example above uses only a subset. Specifically, the example specifies +meta-information to build packages, and it specifies the contents of the +package. Normally, a package will contain of addition modules, like Python +source modules, documentation, subpackages, etc. Please refer to the distutils +documentation in :ref:`distutils-index` to learn more about the features of +distutils; this section explains building extension modules only. + +It is common to pre-compute arguments to :func:`setup`, to better structure the +driver script. In the example above, the\ ``ext_modules`` argument to +:func:`setup` is a list of extension modules, each of which is an instance of +the :class:`Extension`. In the example, the instance defines an extension named +``demo`` which is build by compiling a single source file, :file:`demo.c`. + +In many cases, building an extension is more complex, since additional +preprocessor defines and libraries may be needed. This is demonstrated in the +example below. :: + + from distutils.core import setup, Extension + + module1 = Extension('demo', + define_macros = [('MAJOR_VERSION', '1'), + ('MINOR_VERSION', '0')], + include_dirs = ['/usr/local/include'], + libraries = ['tcl83'], + library_dirs = ['/usr/local/lib'], + sources = ['demo.c']) + + setup (name = 'PackageName', + version = '1.0', + description = 'This is a demo package', + author = 'Martin v. Loewis', + author_email = 'martin@v.loewis.de', + url = 'http://www.python.org/doc/current/ext/building.html', + long_description = ''' + This is really just a demo package. + ''', + ext_modules = [module1]) + + +In this example, :func:`setup` is called with additional meta-information, which +is recommended when distribution packages have to be built. For the extension +itself, it specifies preprocessor defines, include directories, library +directories, and libraries. Depending on the compiler, distutils passes this +information in different ways to the compiler. For example, on Unix, this may +result in the compilation commands :: + + gcc -DNDEBUG -g -O3 -Wall -Wstrict-prototypes -fPIC -DMAJOR_VERSION=1 -DMINOR_VERSION=0 -I/usr/local/include -I/usr/local/include/python2.2 -c demo.c -o build/temp.linux-i686-2.2/demo.o + + gcc -shared build/temp.linux-i686-2.2/demo.o -L/usr/local/lib -ltcl83 -o build/lib.linux-i686-2.2/demo.so + +These lines are for demonstration purposes only; distutils users should trust +that distutils gets the invocations right. + + +.. _distributing: + +Distributing your extension modules +=================================== + +When an extension has been successfully build, there are three ways to use it. + +End-users will typically want to install the module, they do so by running :: + + python setup.py install + +Module maintainers should produce source packages; to do so, they run :: + + python setup.py sdist + +In some cases, additional files need to be included in a source distribution; +this is done through a :file:`MANIFEST.in` file; see the distutils documentation +for details. + +If the source distribution has been build successfully, maintainers can also +create binary distributions. Depending on the platform, one of the following +commands can be used to do so. :: + + python setup.py bdist_wininst + python setup.py bdist_rpm + python setup.py bdist_dumb + diff --git a/Doc/extending/embedding.rst b/Doc/extending/embedding.rst new file mode 100644 index 0000000..b9a567c --- /dev/null +++ b/Doc/extending/embedding.rst @@ -0,0 +1,297 @@ +.. highlightlang:: c + + +.. _embedding: + +*************************************** +Embedding Python in Another Application +*************************************** + +The previous chapters discussed how to extend Python, that is, how to extend the +functionality of Python by attaching a library of C functions to it. It is also +possible to do it the other way around: enrich your C/C++ application by +embedding Python in it. Embedding provides your application with the ability to +implement some of the functionality of your application in Python rather than C +or C++. This can be used for many purposes; one example would be to allow users +to tailor the application to their needs by writing some scripts in Python. You +can also use it yourself if some of the functionality can be written in Python +more easily. + +Embedding Python is similar to extending it, but not quite. The difference is +that when you extend Python, the main program of the application is still the +Python interpreter, while if you embed Python, the main program may have nothing +to do with Python --- instead, some parts of the application occasionally call +the Python interpreter to run some Python code. + +So if you are embedding Python, you are providing your own main program. One of +the things this main program has to do is initialize the Python interpreter. At +the very least, you have to call the function :cfunc:`Py_Initialize` (on Mac OS, +call :cfunc:`PyMac_Initialize` instead). There are optional calls to pass +command line arguments to Python. Then later you can call the interpreter from +any part of the application. + +There are several different ways to call the interpreter: you can pass a string +containing Python statements to :cfunc:`PyRun_SimpleString`, or you can pass a +stdio file pointer and a file name (for identification in error messages only) +to :cfunc:`PyRun_SimpleFile`. You can also call the lower-level operations +described in the previous chapters to construct and use Python objects. + +A simple demo of embedding Python can be found in the directory +:file:`Demo/embed/` of the source distribution. + + +.. seealso:: + + :ref:`c-api-index` + The details of Python's C interface are given in this manual. A great deal of + necessary information can be found here. + + +.. _high-level-embedding: + +Very High Level Embedding +========================= + +The simplest form of embedding Python is the use of the very high level +interface. This interface is intended to execute a Python script without needing +to interact with the application directly. This can for example be used to +perform some operation on a file. :: + + #include + + int + main(int argc, char *argv[]) + { + Py_Initialize(); + PyRun_SimpleString("from time import time,ctime\n" + "print 'Today is',ctime(time())\n"); + Py_Finalize(); + return 0; + } + +The above code first initializes the Python interpreter with +:cfunc:`Py_Initialize`, followed by the execution of a hard-coded Python script +that print the date and time. Afterwards, the :cfunc:`Py_Finalize` call shuts +the interpreter down, followed by the end of the program. In a real program, +you may want to get the Python script from another source, perhaps a text-editor +routine, a file, or a database. Getting the Python code from a file can better +be done by using the :cfunc:`PyRun_SimpleFile` function, which saves you the +trouble of allocating memory space and loading the file contents. + + +.. _lower-level-embedding: + +Beyond Very High Level Embedding: An overview +============================================= + +The high level interface gives you the ability to execute arbitrary pieces of +Python code from your application, but exchanging data values is quite +cumbersome to say the least. If you want that, you should use lower level calls. +At the cost of having to write more C code, you can achieve almost anything. + +It should be noted that extending Python and embedding Python is quite the same +activity, despite the different intent. Most topics discussed in the previous +chapters are still valid. To show this, consider what the extension code from +Python to C really does: + +#. Convert data values from Python to C, + +#. Perform a function call to a C routine using the converted values, and + +#. Convert the data values from the call from C to Python. + +When embedding Python, the interface code does: + +#. Convert data values from C to Python, + +#. Perform a function call to a Python interface routine using the converted + values, and + +#. Convert the data values from the call from Python to C. + +As you can see, the data conversion steps are simply swapped to accommodate the +different direction of the cross-language transfer. The only difference is the +routine that you call between both data conversions. When extending, you call a +C routine, when embedding, you call a Python routine. + +This chapter will not discuss how to convert data from Python to C and vice +versa. Also, proper use of references and dealing with errors is assumed to be +understood. Since these aspects do not differ from extending the interpreter, +you can refer to earlier chapters for the required information. + + +.. _pure-embedding: + +Pure Embedding +============== + +The first program aims to execute a function in a Python script. Like in the +section about the very high level interface, the Python interpreter does not +directly interact with the application (but that will change in the next +section). + +The code to run a function defined in a Python script is: + +.. literalinclude:: ../includes/run-func.c + + +This code loads a Python script using ``argv[1]``, and calls the function named +in ``argv[2]``. Its integer arguments are the other values of the ``argv`` +array. If you compile and link this program (let's call the finished executable +:program:`call`), and use it to execute a Python script, such as:: + + def multiply(a,b): + print "Will compute", a, "times", b + c = 0 + for i in range(0, a): + c = c + b + return c + +then the result should be:: + + $ call multiply multiply 3 2 + Will compute 3 times 2 + Result of call: 6 + +Although the program is quite large for its functionality, most of the code is +for data conversion between Python and C, and for error reporting. The +interesting part with respect to embedding Python starts with + +.. % $ + +:: + + Py_Initialize(); + pName = PyString_FromString(argv[1]); + /* Error checking of pName left out */ + pModule = PyImport_Import(pName); + +After initializing the interpreter, the script is loaded using +:cfunc:`PyImport_Import`. This routine needs a Python string as its argument, +which is constructed using the :cfunc:`PyString_FromString` data conversion +routine. :: + + pFunc = PyObject_GetAttrString(pModule, argv[2]); + /* pFunc is a new reference */ + + if (pFunc && PyCallable_Check(pFunc)) { + ... + } + Py_XDECREF(pFunc); + +Once the script is loaded, the name we're looking for is retrieved using +:cfunc:`PyObject_GetAttrString`. If the name exists, and the object returned is +callable, you can safely assume that it is a function. The program then +proceeds by constructing a tuple of arguments as normal. The call to the Python +function is then made with:: + + pValue = PyObject_CallObject(pFunc, pArgs); + +Upon return of the function, ``pValue`` is either *NULL* or it contains a +reference to the return value of the function. Be sure to release the reference +after examining the value. + + +.. _extending-with-embedding: + +Extending Embedded Python +========================= + +Until now, the embedded Python interpreter had no access to functionality from +the application itself. The Python API allows this by extending the embedded +interpreter. That is, the embedded interpreter gets extended with routines +provided by the application. While it sounds complex, it is not so bad. Simply +forget for a while that the application starts the Python interpreter. Instead, +consider the application to be a set of subroutines, and write some glue code +that gives Python access to those routines, just like you would write a normal +Python extension. For example:: + + static int numargs=0; + + /* Return the number of arguments of the application command line */ + static PyObject* + emb_numargs(PyObject *self, PyObject *args) + { + if(!PyArg_ParseTuple(args, ":numargs")) + return NULL; + return Py_BuildValue("i", numargs); + } + + static PyMethodDef EmbMethods[] = { + {"numargs", emb_numargs, METH_VARARGS, + "Return the number of arguments received by the process."}, + {NULL, NULL, 0, NULL} + }; + +Insert the above code just above the :cfunc:`main` function. Also, insert the +following two statements directly after :cfunc:`Py_Initialize`:: + + numargs = argc; + Py_InitModule("emb", EmbMethods); + +These two lines initialize the ``numargs`` variable, and make the +:func:`emb.numargs` function accessible to the embedded Python interpreter. +With these extensions, the Python script can do things like :: + + import emb + print "Number of arguments", emb.numargs() + +In a real application, the methods will expose an API of the application to +Python. + +.. % \section{For the future} +.. % +.. % You don't happen to have a nice library to get textual +.. % equivalents of numeric values do you :-) ? +.. % Callbacks here ? (I may be using information from that section +.. % ?!) +.. % threads +.. % code examples do not really behave well if errors happen +.. % (what to watch out for) + + +.. _embeddingincplusplus: + +Embedding Python in C++ +======================= + +It is also possible to embed Python in a C++ program; precisely how this is done +will depend on the details of the C++ system used; in general you will need to +write the main program in C++, and use the C++ compiler to compile and link your +program. There is no need to recompile Python itself using C++. + + +.. _link-reqs: + +Linking Requirements +==================== + +While the :program:`configure` script shipped with the Python sources will +correctly build Python to export the symbols needed by dynamically linked +extensions, this is not automatically inherited by applications which embed the +Python library statically, at least on Unix. This is an issue when the +application is linked to the static runtime library (:file:`libpython.a`) and +needs to load dynamic extensions (implemented as :file:`.so` files). + +The problem is that some entry points are defined by the Python runtime solely +for extension modules to use. If the embedding application does not use any of +these entry points, some linkers will not include those entries in the symbol +table of the finished executable. Some additional options are needed to inform +the linker not to remove these symbols. + +Determining the right options to use for any given platform can be quite +difficult, but fortunately the Python configuration already has those values. +To retrieve them from an installed Python interpreter, start an interactive +interpreter and have a short session like this:: + + >>> import distutils.sysconfig + >>> distutils.sysconfig.get_config_var('LINKFORSHARED') + '-Xlinker -export-dynamic' + +.. index:: module: distutils.sysconfig + +The contents of the string presented will be the options that should be used. +If the string is empty, there's no need to add any additional options. The +:const:`LINKFORSHARED` definition corresponds to the variable of the same name +in Python's top-level :file:`Makefile`. + diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst new file mode 100644 index 0000000..bf48c49 --- /dev/null +++ b/Doc/extending/extending.rst @@ -0,0 +1,1273 @@ +.. highlightlang:: c + + +.. _extending-intro: + +****************************** +Extending Python with C or C++ +****************************** + +It is quite easy to add new built-in modules to Python, if you know how to +program in C. Such :dfn:`extension modules` can do two things that can't be +done directly in Python: they can implement new built-in object types, and they +can call C library functions and system calls. + +To support extensions, the Python API (Application Programmers Interface) +defines a set of functions, macros and variables that provide access to most +aspects of the Python run-time system. The Python API is incorporated in a C +source file by including the header ``"Python.h"``. + +The compilation of an extension module depends on its intended use as well as on +your system setup; details are given in later chapters. + + +.. _extending-simpleexample: + +A Simple Example +================ + +Let's create an extension module called ``spam`` (the favorite food of Monty +Python fans...) and let's say we want to create a Python interface to the C +library function :cfunc:`system`. [#]_ This function takes a null-terminated +character string as argument and returns an integer. We want this function to +be callable from Python as follows:: + + >>> import spam + >>> status = spam.system("ls -l") + +Begin by creating a file :file:`spammodule.c`. (Historically, if a module is +called ``spam``, the C file containing its implementation is called +:file:`spammodule.c`; if the module name is very long, like ``spammify``, the +module name can be just :file:`spammify.c`.) + +The first line of our file can be:: + + #include + +which pulls in the Python API (you can add a comment describing the purpose of +the module and a copyright notice if you like). + +.. warning:: + + Since Python may define some pre-processor definitions which affect the standard + headers on some systems, you *must* include :file:`Python.h` before any standard + headers are included. + +All user-visible symbols defined by :file:`Python.h` have a prefix of ``Py`` or +``PY``, except those defined in standard header files. For convenience, and +since they are used extensively by the Python interpreter, ``"Python.h"`` +includes a few standard header files: ````, ````, +````, and ````. If the latter header file does not exist on +your system, it declares the functions :cfunc:`malloc`, :cfunc:`free` and +:cfunc:`realloc` directly. + +The next thing we add to our module file is the C function that will be called +when the Python expression ``spam.system(string)`` is evaluated (we'll see +shortly how it ends up being called):: + + static PyObject * + spam_system(PyObject *self, PyObject *args) + { + const char *command; + int sts; + + if (!PyArg_ParseTuple(args, "s", &command)) + return NULL; + sts = system(command); + return Py_BuildValue("i", sts); + } + +There is a straightforward translation from the argument list in Python (for +example, the single expression ``"ls -l"``) to the arguments passed to the C +function. The C function always has two arguments, conventionally named *self* +and *args*. + +The *self* argument is only used when the C function implements a built-in +method, not a function. In the example, *self* will always be a *NULL* pointer, +since we are defining a function, not a method. (This is done so that the +interpreter doesn't have to understand two different types of C functions.) + +The *args* argument will be a pointer to a Python tuple object containing the +arguments. Each item of the tuple corresponds to an argument in the call's +argument list. The arguments are Python objects --- in order to do anything +with them in our C function we have to convert them to C values. The function +:cfunc:`PyArg_ParseTuple` in the Python API checks the argument types and +converts them to C values. It uses a template string to determine the required +types of the arguments as well as the types of the C variables into which to +store the converted values. More about this later. + +:cfunc:`PyArg_ParseTuple` returns true (nonzero) if all arguments have the right +type and its components have been stored in the variables whose addresses are +passed. It returns false (zero) if an invalid argument list was passed. In the +latter case it also raises an appropriate exception so the calling function can +return *NULL* immediately (as we saw in the example). + + +.. _extending-errors: + +Intermezzo: Errors and Exceptions +================================= + +An important convention throughout the Python interpreter is the following: when +a function fails, it should set an exception condition and return an error value +(usually a *NULL* pointer). Exceptions are stored in a static global variable +inside the interpreter; if this variable is *NULL* no exception has occurred. A +second global variable stores the "associated value" of the exception (the +second argument to :keyword:`raise`). A third variable contains the stack +traceback in case the error originated in Python code. These three variables +are the C equivalents of the result in Python of :meth:`sys.exc_info` (see the +section on module :mod:`sys` in the Python Library Reference). It is important +to know about them to understand how errors are passed around. + +The Python API defines a number of functions to set various types of exceptions. + +The most common one is :cfunc:`PyErr_SetString`. Its arguments are an exception +object and a C string. The exception object is usually a predefined object like +:cdata:`PyExc_ZeroDivisionError`. The C string indicates the cause of the error +and is converted to a Python string object and stored as the "associated value" +of the exception. + +Another useful function is :cfunc:`PyErr_SetFromErrno`, which only takes an +exception argument and constructs the associated value by inspection of the +global variable :cdata:`errno`. The most general function is +:cfunc:`PyErr_SetObject`, which takes two object arguments, the exception and +its associated value. You don't need to :cfunc:`Py_INCREF` the objects passed +to any of these functions. + +You can test non-destructively whether an exception has been set with +:cfunc:`PyErr_Occurred`. This returns the current exception object, or *NULL* +if no exception has occurred. You normally don't need to call +:cfunc:`PyErr_Occurred` to see whether an error occurred in a function call, +since you should be able to tell from the return value. + +When a function *f* that calls another function *g* detects that the latter +fails, *f* should itself return an error value (usually *NULL* or ``-1``). It +should *not* call one of the :cfunc:`PyErr_\*` functions --- one has already +been called by *g*. *f*'s caller is then supposed to also return an error +indication to *its* caller, again *without* calling :cfunc:`PyErr_\*`, and so on +--- the most detailed cause of the error was already reported by the function +that first detected it. Once the error reaches the Python interpreter's main +loop, this aborts the currently executing Python code and tries to find an +exception handler specified by the Python programmer. + +(There are situations where a module can actually give a more detailed error +message by calling another :cfunc:`PyErr_\*` function, and in such cases it is +fine to do so. As a general rule, however, this is not necessary, and can cause +information about the cause of the error to be lost: most operations can fail +for a variety of reasons.) + +To ignore an exception set by a function call that failed, the exception +condition must be cleared explicitly by calling :cfunc:`PyErr_Clear`. The only +time C code should call :cfunc:`PyErr_Clear` is if it doesn't want to pass the +error on to the interpreter but wants to handle it completely by itself +(possibly by trying something else, or pretending nothing went wrong). + +Every failing :cfunc:`malloc` call must be turned into an exception --- the +direct caller of :cfunc:`malloc` (or :cfunc:`realloc`) must call +:cfunc:`PyErr_NoMemory` and return a failure indicator itself. All the +object-creating functions (for example, :cfunc:`PyInt_FromLong`) already do +this, so this note is only relevant to those who call :cfunc:`malloc` directly. + +Also note that, with the important exception of :cfunc:`PyArg_ParseTuple` and +friends, functions that return an integer status usually return a positive value +or zero for success and ``-1`` for failure, like Unix system calls. + +Finally, be careful to clean up garbage (by making :cfunc:`Py_XDECREF` or +:cfunc:`Py_DECREF` calls for objects you have already created) when you return +an error indicator! + +The choice of which exception to raise is entirely yours. There are predeclared +C objects corresponding to all built-in Python exceptions, such as +:cdata:`PyExc_ZeroDivisionError`, which you can use directly. Of course, you +should choose exceptions wisely --- don't use :cdata:`PyExc_TypeError` to mean +that a file couldn't be opened (that should probably be :cdata:`PyExc_IOError`). +If something's wrong with the argument list, the :cfunc:`PyArg_ParseTuple` +function usually raises :cdata:`PyExc_TypeError`. If you have an argument whose +value must be in a particular range or must satisfy other conditions, +:cdata:`PyExc_ValueError` is appropriate. + +You can also define a new exception that is unique to your module. For this, you +usually declare a static object variable at the beginning of your file:: + + static PyObject *SpamError; + +and initialize it in your module's initialization function (:cfunc:`initspam`) +with an exception object (leaving out the error checking for now):: + + PyMODINIT_FUNC + initspam(void) + { + PyObject *m; + + m = Py_InitModule("spam", SpamMethods); + if (m == NULL) + return; + + SpamError = PyErr_NewException("spam.error", NULL, NULL); + Py_INCREF(SpamError); + PyModule_AddObject(m, "error", SpamError); + } + +Note that the Python name for the exception object is :exc:`spam.error`. The +:cfunc:`PyErr_NewException` function may create a class with the base class +being :exc:`Exception` (unless another class is passed in instead of *NULL*), +described in :ref:`bltin-exceptions`. + +Note also that the :cdata:`SpamError` variable retains a reference to the newly +created exception class; this is intentional! Since the exception could be +removed from the module by external code, an owned reference to the class is +needed to ensure that it will not be discarded, causing :cdata:`SpamError` to +become a dangling pointer. Should it become a dangling pointer, C code which +raises the exception could cause a core dump or other unintended side effects. + +We discuss the use of PyMODINIT_FUNC as a function return type later in this +sample. + + +.. _backtoexample: + +Back to the Example +=================== + +Going back to our example function, you should now be able to understand this +statement:: + + if (!PyArg_ParseTuple(args, "s", &command)) + return NULL; + +It returns *NULL* (the error indicator for functions returning object pointers) +if an error is detected in the argument list, relying on the exception set by +:cfunc:`PyArg_ParseTuple`. Otherwise the string value of the argument has been +copied to the local variable :cdata:`command`. This is a pointer assignment and +you are not supposed to modify the string to which it points (so in Standard C, +the variable :cdata:`command` should properly be declared as ``const char +*command``). + +The next statement is a call to the Unix function :cfunc:`system`, passing it +the string we just got from :cfunc:`PyArg_ParseTuple`:: + + sts = system(command); + +Our :func:`spam.system` function must return the value of :cdata:`sts` as a +Python object. This is done using the function :cfunc:`Py_BuildValue`, which is +something like the inverse of :cfunc:`PyArg_ParseTuple`: it takes a format +string and an arbitrary number of C values, and returns a new Python object. +More info on :cfunc:`Py_BuildValue` is given later. :: + + return Py_BuildValue("i", sts); + +In this case, it will return an integer object. (Yes, even integers are objects +on the heap in Python!) + +If you have a C function that returns no useful argument (a function returning +:ctype:`void`), the corresponding Python function must return ``None``. You +need this idiom to do so (which is implemented by the :cmacro:`Py_RETURN_NONE` +macro):: + + Py_INCREF(Py_None); + return Py_None; + +:cdata:`Py_None` is the C name for the special Python object ``None``. It is a +genuine Python object rather than a *NULL* pointer, which means "error" in most +contexts, as we have seen. + + +.. _methodtable: + +The Module's Method Table and Initialization Function +===================================================== + +I promised to show how :cfunc:`spam_system` is called from Python programs. +First, we need to list its name and address in a "method table":: + + static PyMethodDef SpamMethods[] = { + ... + {"system", spam_system, METH_VARARGS, + "Execute a shell command."}, + ... + {NULL, NULL, 0, NULL} /* Sentinel */ + }; + +Note the third entry (``METH_VARARGS``). This is a flag telling the interpreter +the calling convention to be used for the C function. It should normally always +be ``METH_VARARGS`` or ``METH_VARARGS | METH_KEYWORDS``; a value of ``0`` means +that an obsolete variant of :cfunc:`PyArg_ParseTuple` is used. + +When using only ``METH_VARARGS``, the function should expect the Python-level +parameters to be passed in as a tuple acceptable for parsing via +:cfunc:`PyArg_ParseTuple`; more information on this function is provided below. + +The :const:`METH_KEYWORDS` bit may be set in the third field if keyword +arguments should be passed to the function. In this case, the C function should +accept a third ``PyObject *`` parameter which will be a dictionary of keywords. +Use :cfunc:`PyArg_ParseTupleAndKeywords` to parse the arguments to such a +function. + +The method table must be passed to the interpreter in the module's +initialization function. The initialization function must be named +:cfunc:`initname`, where *name* is the name of the module, and should be the +only non-\ :keyword:`static` item defined in the module file:: + + PyMODINIT_FUNC + initspam(void) + { + (void) Py_InitModule("spam", SpamMethods); + } + +Note that PyMODINIT_FUNC declares the function as ``void`` return type, +declares any special linkage declarations required by the platform, and for C++ +declares the function as ``extern "C"``. + +When the Python program imports module :mod:`spam` for the first time, +:cfunc:`initspam` is called. (See below for comments about embedding Python.) +It calls :cfunc:`Py_InitModule`, which creates a "module object" (which is +inserted in the dictionary ``sys.modules`` under the key ``"spam"``), and +inserts built-in function objects into the newly created module based upon the +table (an array of :ctype:`PyMethodDef` structures) that was passed as its +second argument. :cfunc:`Py_InitModule` returns a pointer to the module object +that it creates (which is unused here). It may abort with a fatal error for +certain errors, or return *NULL* if the module could not be initialized +satisfactorily. + +When embedding Python, the :cfunc:`initspam` function is not called +automatically unless there's an entry in the :cdata:`_PyImport_Inittab` table. +The easiest way to handle this is to statically initialize your +statically-linked modules by directly calling :cfunc:`initspam` after the call +to :cfunc:`Py_Initialize`:: + + int + main(int argc, char *argv[]) + { + /* Pass argv[0] to the Python interpreter */ + Py_SetProgramName(argv[0]); + + /* Initialize the Python interpreter. Required. */ + Py_Initialize(); + + /* Add a static module */ + initspam(); + +An example may be found in the file :file:`Demo/embed/demo.c` in the Python +source distribution. + +.. note:: + + Removing entries from ``sys.modules`` or importing compiled modules into + multiple interpreters within a process (or following a :cfunc:`fork` without an + intervening :cfunc:`exec`) can create problems for some extension modules. + Extension module authors should exercise caution when initializing internal data + structures. + +A more substantial example module is included in the Python source distribution +as :file:`Modules/xxmodule.c`. This file may be used as a template or simply +read as an example. The :program:`modulator.py` script included in the source +distribution or Windows install provides a simple graphical user interface for +declaring the functions and objects which a module should implement, and can +generate a template which can be filled in. The script lives in the +:file:`Tools/modulator/` directory; see the :file:`README` file there for more +information. + + +.. _compilation: + +Compilation and Linkage +======================= + +There are two more things to do before you can use your new extension: compiling +and linking it with the Python system. If you use dynamic loading, the details +may depend on the style of dynamic loading your system uses; see the chapters +about building extension modules (chapter :ref:`building`) and additional +information that pertains only to building on Windows (chapter +:ref:`building-on-windows`) for more information about this. + +If you can't use dynamic loading, or if you want to make your module a permanent +part of the Python interpreter, you will have to change the configuration setup +and rebuild the interpreter. Luckily, this is very simple on Unix: just place +your file (:file:`spammodule.c` for example) in the :file:`Modules/` directory +of an unpacked source distribution, add a line to the file +:file:`Modules/Setup.local` describing your file:: + + spam spammodule.o + +and rebuild the interpreter by running :program:`make` in the toplevel +directory. You can also run :program:`make` in the :file:`Modules/` +subdirectory, but then you must first rebuild :file:`Makefile` there by running +':program:`make` Makefile'. (This is necessary each time you change the +:file:`Setup` file.) + +If your module requires additional libraries to link with, these can be listed +on the line in the configuration file as well, for instance:: + + spam spammodule.o -lX11 + + +.. _callingpython: + +Calling Python Functions from C +=============================== + +So far we have concentrated on making C functions callable from Python. The +reverse is also useful: calling Python functions from C. This is especially the +case for libraries that support so-called "callback" functions. If a C +interface makes use of callbacks, the equivalent Python often needs to provide a +callback mechanism to the Python programmer; the implementation will require +calling the Python callback functions from a C callback. Other uses are also +imaginable. + +Fortunately, the Python interpreter is easily called recursively, and there is a +standard interface to call a Python function. (I won't dwell on how to call the +Python parser with a particular string as input --- if you're interested, have a +look at the implementation of the :option:`-c` command line option in +:file:`Python/pythonmain.c` from the Python source code.) + +Calling a Python function is easy. First, the Python program must somehow pass +you the Python function object. You should provide a function (or some other +interface) to do this. When this function is called, save a pointer to the +Python function object (be careful to :cfunc:`Py_INCREF` it!) in a global +variable --- or wherever you see fit. For example, the following function might +be part of a module definition:: + + static PyObject *my_callback = NULL; + + static PyObject * + my_set_callback(PyObject *dummy, PyObject *args) + { + PyObject *result = NULL; + PyObject *temp; + + if (PyArg_ParseTuple(args, "O:set_callback", &temp)) { + if (!PyCallable_Check(temp)) { + PyErr_SetString(PyExc_TypeError, "parameter must be callable"); + return NULL; + } + Py_XINCREF(temp); /* Add a reference to new callback */ + Py_XDECREF(my_callback); /* Dispose of previous callback */ + my_callback = temp; /* Remember new callback */ + /* Boilerplate to return "None" */ + Py_INCREF(Py_None); + result = Py_None; + } + return result; + } + +This function must be registered with the interpreter using the +:const:`METH_VARARGS` flag; this is described in section :ref:`methodtable`. The +:cfunc:`PyArg_ParseTuple` function and its arguments are documented in section +:ref:`parsetuple`. + +The macros :cfunc:`Py_XINCREF` and :cfunc:`Py_XDECREF` increment/decrement the +reference count of an object and are safe in the presence of *NULL* pointers +(but note that *temp* will not be *NULL* in this context). More info on them +in section :ref:`refcounts`. + +.. index:: single: PyEval_CallObject() + +Later, when it is time to call the function, you call the C function +:cfunc:`PyEval_CallObject`. This function has two arguments, both pointers to +arbitrary Python objects: the Python function, and the argument list. The +argument list must always be a tuple object, whose length is the number of +arguments. To call the Python function with no arguments, pass an empty tuple; +to call it with one argument, pass a singleton tuple. :cfunc:`Py_BuildValue` +returns a tuple when its format string consists of zero or more format codes +between parentheses. For example:: + + int arg; + PyObject *arglist; + PyObject *result; + ... + arg = 123; + ... + /* Time to call the callback */ + arglist = Py_BuildValue("(i)", arg); + result = PyEval_CallObject(my_callback, arglist); + Py_DECREF(arglist); + +:cfunc:`PyEval_CallObject` returns a Python object pointer: this is the return +value of the Python function. :cfunc:`PyEval_CallObject` is +"reference-count-neutral" with respect to its arguments. In the example a new +tuple was created to serve as the argument list, which is :cfunc:`Py_DECREF`\ +-ed immediately after the call. + +The return value of :cfunc:`PyEval_CallObject` is "new": either it is a brand +new object, or it is an existing object whose reference count has been +incremented. So, unless you want to save it in a global variable, you should +somehow :cfunc:`Py_DECREF` the result, even (especially!) if you are not +interested in its value. + +Before you do this, however, it is important to check that the return value +isn't *NULL*. If it is, the Python function terminated by raising an exception. +If the C code that called :cfunc:`PyEval_CallObject` is called from Python, it +should now return an error indication to its Python caller, so the interpreter +can print a stack trace, or the calling Python code can handle the exception. +If this is not possible or desirable, the exception should be cleared by calling +:cfunc:`PyErr_Clear`. For example:: + + if (result == NULL) + return NULL; /* Pass error back */ + ...use result... + Py_DECREF(result); + +Depending on the desired interface to the Python callback function, you may also +have to provide an argument list to :cfunc:`PyEval_CallObject`. In some cases +the argument list is also provided by the Python program, through the same +interface that specified the callback function. It can then be saved and used +in the same manner as the function object. In other cases, you may have to +construct a new tuple to pass as the argument list. The simplest way to do this +is to call :cfunc:`Py_BuildValue`. For example, if you want to pass an integral +event code, you might use the following code:: + + PyObject *arglist; + ... + arglist = Py_BuildValue("(l)", eventcode); + result = PyEval_CallObject(my_callback, arglist); + Py_DECREF(arglist); + if (result == NULL) + return NULL; /* Pass error back */ + /* Here maybe use the result */ + Py_DECREF(result); + +Note the placement of ``Py_DECREF(arglist)`` immediately after the call, before +the error check! Also note that strictly spoken this code is not complete: +:cfunc:`Py_BuildValue` may run out of memory, and this should be checked. + + +.. _parsetuple: + +Extracting Parameters in Extension Functions +============================================ + +.. index:: single: PyArg_ParseTuple() + +The :cfunc:`PyArg_ParseTuple` function is declared as follows:: + + int PyArg_ParseTuple(PyObject *arg, char *format, ...); + +The *arg* argument must be a tuple object containing an argument list passed +from Python to a C function. The *format* argument must be a format string, +whose syntax is explained in :ref:`arg-parsing` in the Python/C API Reference +Manual. The remaining arguments must be addresses of variables whose type is +determined by the format string. + +Note that while :cfunc:`PyArg_ParseTuple` checks that the Python arguments have +the required types, it cannot check the validity of the addresses of C variables +passed to the call: if you make mistakes there, your code will probably crash or +at least overwrite random bits in memory. So be careful! + +Note that any Python object references which are provided to the caller are +*borrowed* references; do not decrement their reference count! + +Some example calls:: + + int ok; + int i, j; + long k, l; + const char *s; + int size; + + ok = PyArg_ParseTuple(args, ""); /* No arguments */ + /* Python call: f() */ + +:: + + ok = PyArg_ParseTuple(args, "s", &s); /* A string */ + /* Possible Python call: f('whoops!') */ + +:: + + ok = PyArg_ParseTuple(args, "lls", &k, &l, &s); /* Two longs and a string */ + /* Possible Python call: f(1, 2, 'three') */ + +:: + + ok = PyArg_ParseTuple(args, "(ii)s#", &i, &j, &s, &size); + /* A pair of ints and a string, whose size is also returned */ + /* Possible Python call: f((1, 2), 'three') */ + +:: + + { + const char *file; + const char *mode = "r"; + int bufsize = 0; + ok = PyArg_ParseTuple(args, "s|si", &file, &mode, &bufsize); + /* A string, and optionally another string and an integer */ + /* Possible Python calls: + f('spam') + f('spam', 'w') + f('spam', 'wb', 100000) */ + } + +:: + + { + int left, top, right, bottom, h, v; + ok = PyArg_ParseTuple(args, "((ii)(ii))(ii)", + &left, &top, &right, &bottom, &h, &v); + /* A rectangle and a point */ + /* Possible Python call: + f(((0, 0), (400, 300)), (10, 10)) */ + } + +:: + + { + Py_complex c; + ok = PyArg_ParseTuple(args, "D:myfunction", &c); + /* a complex, also providing a function name for errors */ + /* Possible Python call: myfunction(1+2j) */ + } + + +.. _parsetupleandkeywords: + +Keyword Parameters for Extension Functions +========================================== + +.. index:: single: PyArg_ParseTupleAndKeywords() + +The :cfunc:`PyArg_ParseTupleAndKeywords` function is declared as follows:: + + int PyArg_ParseTupleAndKeywords(PyObject *arg, PyObject *kwdict, + char *format, char *kwlist[], ...); + +The *arg* and *format* parameters are identical to those of the +:cfunc:`PyArg_ParseTuple` function. The *kwdict* parameter is the dictionary of +keywords received as the third parameter from the Python runtime. The *kwlist* +parameter is a *NULL*-terminated list of strings which identify the parameters; +the names are matched with the type information from *format* from left to +right. On success, :cfunc:`PyArg_ParseTupleAndKeywords` returns true, otherwise +it returns false and raises an appropriate exception. + +.. note:: + + Nested tuples cannot be parsed when using keyword arguments! Keyword parameters + passed in which are not present in the *kwlist* will cause :exc:`TypeError` to + be raised. + +.. index:: single: Philbrick, Geoff + +Here is an example module which uses keywords, based on an example by Geoff +Philbrick (philbrick@hks.com): + +.. % + +:: + + #include "Python.h" + + static PyObject * + keywdarg_parrot(PyObject *self, PyObject *args, PyObject *keywds) + { + int voltage; + char *state = "a stiff"; + char *action = "voom"; + char *type = "Norwegian Blue"; + + static char *kwlist[] = {"voltage", "state", "action", "type", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, keywds, "i|sss", kwlist, + &voltage, &state, &action, &type)) + return NULL; + + printf("-- This parrot wouldn't %s if you put %i Volts through it.\n", + action, voltage); + printf("-- Lovely plumage, the %s -- It's %s!\n", type, state); + + Py_INCREF(Py_None); + + return Py_None; + } + + static PyMethodDef keywdarg_methods[] = { + /* The cast of the function is necessary since PyCFunction values + * only take two PyObject* parameters, and keywdarg_parrot() takes + * three. + */ + {"parrot", (PyCFunction)keywdarg_parrot, METH_VARARGS | METH_KEYWORDS, + "Print a lovely skit to standard output."}, + {NULL, NULL, 0, NULL} /* sentinel */ + }; + +:: + + void + initkeywdarg(void) + { + /* Create the module and add the functions */ + Py_InitModule("keywdarg", keywdarg_methods); + } + + +.. _buildvalue: + +Building Arbitrary Values +========================= + +This function is the counterpart to :cfunc:`PyArg_ParseTuple`. It is declared +as follows:: + + PyObject *Py_BuildValue(char *format, ...); + +It recognizes a set of format units similar to the ones recognized by +:cfunc:`PyArg_ParseTuple`, but the arguments (which are input to the function, +not output) must not be pointers, just values. It returns a new Python object, +suitable for returning from a C function called from Python. + +One difference with :cfunc:`PyArg_ParseTuple`: while the latter requires its +first argument to be a tuple (since Python argument lists are always represented +as tuples internally), :cfunc:`Py_BuildValue` does not always build a tuple. It +builds a tuple only if its format string contains two or more format units. If +the format string is empty, it returns ``None``; if it contains exactly one +format unit, it returns whatever object is described by that format unit. To +force it to return a tuple of size 0 or one, parenthesize the format string. + +Examples (to the left the call, to the right the resulting Python value):: + + Py_BuildValue("") None + Py_BuildValue("i", 123) 123 + Py_BuildValue("iii", 123, 456, 789) (123, 456, 789) + Py_BuildValue("s", "hello") 'hello' + Py_BuildValue("y", "hello") b'hello' + Py_BuildValue("ss", "hello", "world") ('hello', 'world') + Py_BuildValue("s#", "hello", 4) 'hell' + Py_BuildValue("y#", "hello", 4) b'hell' + Py_BuildValue("()") () + Py_BuildValue("(i)", 123) (123,) + Py_BuildValue("(ii)", 123, 456) (123, 456) + Py_BuildValue("(i,i)", 123, 456) (123, 456) + Py_BuildValue("[i,i]", 123, 456) [123, 456] + Py_BuildValue("{s:i,s:i}", + "abc", 123, "def", 456) {'abc': 123, 'def': 456} + Py_BuildValue("((ii)(ii)) (ii)", + 1, 2, 3, 4, 5, 6) (((1, 2), (3, 4)), (5, 6)) + + +.. _refcounts: + +Reference Counts +================ + +In languages like C or C++, the programmer is responsible for dynamic allocation +and deallocation of memory on the heap. In C, this is done using the functions +:cfunc:`malloc` and :cfunc:`free`. In C++, the operators :keyword:`new` and +:keyword:`delete` are used with essentially the same meaning and we'll restrict +the following discussion to the C case. + +Every block of memory allocated with :cfunc:`malloc` should eventually be +returned to the pool of available memory by exactly one call to :cfunc:`free`. +It is important to call :cfunc:`free` at the right time. If a block's address +is forgotten but :cfunc:`free` is not called for it, the memory it occupies +cannot be reused until the program terminates. This is called a :dfn:`memory +leak`. On the other hand, if a program calls :cfunc:`free` for a block and then +continues to use the block, it creates a conflict with re-use of the block +through another :cfunc:`malloc` call. This is called :dfn:`using freed memory`. +It has the same bad consequences as referencing uninitialized data --- core +dumps, wrong results, mysterious crashes. + +Common causes of memory leaks are unusual paths through the code. For instance, +a function may allocate a block of memory, do some calculation, and then free +the block again. Now a change in the requirements for the function may add a +test to the calculation that detects an error condition and can return +prematurely from the function. It's easy to forget to free the allocated memory +block when taking this premature exit, especially when it is added later to the +code. Such leaks, once introduced, often go undetected for a long time: the +error exit is taken only in a small fraction of all calls, and most modern +machines have plenty of virtual memory, so the leak only becomes apparent in a +long-running process that uses the leaking function frequently. Therefore, it's +important to prevent leaks from happening by having a coding convention or +strategy that minimizes this kind of errors. + +Since Python makes heavy use of :cfunc:`malloc` and :cfunc:`free`, it needs a +strategy to avoid memory leaks as well as the use of freed memory. The chosen +method is called :dfn:`reference counting`. The principle is simple: every +object contains a counter, which is incremented when a reference to the object +is stored somewhere, and which is decremented when a reference to it is deleted. +When the counter reaches zero, the last reference to the object has been deleted +and the object is freed. + +An alternative strategy is called :dfn:`automatic garbage collection`. +(Sometimes, reference counting is also referred to as a garbage collection +strategy, hence my use of "automatic" to distinguish the two.) The big +advantage of automatic garbage collection is that the user doesn't need to call +:cfunc:`free` explicitly. (Another claimed advantage is an improvement in speed +or memory usage --- this is no hard fact however.) The disadvantage is that for +C, there is no truly portable automatic garbage collector, while reference +counting can be implemented portably (as long as the functions :cfunc:`malloc` +and :cfunc:`free` are available --- which the C Standard guarantees). Maybe some +day a sufficiently portable automatic garbage collector will be available for C. +Until then, we'll have to live with reference counts. + +While Python uses the traditional reference counting implementation, it also +offers a cycle detector that works to detect reference cycles. This allows +applications to not worry about creating direct or indirect circular references; +these are the weakness of garbage collection implemented using only reference +counting. Reference cycles consist of objects which contain (possibly indirect) +references to themselves, so that each object in the cycle has a reference count +which is non-zero. Typical reference counting implementations are not able to +reclaim the memory belonging to any objects in a reference cycle, or referenced +from the objects in the cycle, even though there are no further references to +the cycle itself. + +The cycle detector is able to detect garbage cycles and can reclaim them so long +as there are no finalizers implemented in Python (:meth:`__del__` methods). +When there are such finalizers, the detector exposes the cycles through the +:mod:`gc` module (specifically, the +``garbage`` variable in that module). The :mod:`gc` module also exposes a way +to run the detector (the :func:`collect` function), as well as configuration +interfaces and the ability to disable the detector at runtime. The cycle +detector is considered an optional component; though it is included by default, +it can be disabled at build time using the :option:`--without-cycle-gc` option +to the :program:`configure` script on Unix platforms (including Mac OS X) or by +removing the definition of ``WITH_CYCLE_GC`` in the :file:`pyconfig.h` header on +other platforms. If the cycle detector is disabled in this way, the :mod:`gc` +module will not be available. + + +.. _refcountsinpython: + +Reference Counting in Python +---------------------------- + +There are two macros, ``Py_INCREF(x)`` and ``Py_DECREF(x)``, which handle the +incrementing and decrementing of the reference count. :cfunc:`Py_DECREF` also +frees the object when the count reaches zero. For flexibility, it doesn't call +:cfunc:`free` directly --- rather, it makes a call through a function pointer in +the object's :dfn:`type object`. For this purpose (and others), every object +also contains a pointer to its type object. + +The big question now remains: when to use ``Py_INCREF(x)`` and ``Py_DECREF(x)``? +Let's first introduce some terms. Nobody "owns" an object; however, you can +:dfn:`own a reference` to an object. An object's reference count is now defined +as the number of owned references to it. The owner of a reference is +responsible for calling :cfunc:`Py_DECREF` when the reference is no longer +needed. Ownership of a reference can be transferred. There are three ways to +dispose of an owned reference: pass it on, store it, or call :cfunc:`Py_DECREF`. +Forgetting to dispose of an owned reference creates a memory leak. + +It is also possible to :dfn:`borrow` [#]_ a reference to an object. The +borrower of a reference should not call :cfunc:`Py_DECREF`. The borrower must +not hold on to the object longer than the owner from which it was borrowed. +Using a borrowed reference after the owner has disposed of it risks using freed +memory and should be avoided completely. [#]_ + +The advantage of borrowing over owning a reference is that you don't need to +take care of disposing of the reference on all possible paths through the code +--- in other words, with a borrowed reference you don't run the risk of leaking +when a premature exit is taken. The disadvantage of borrowing over leaking is +that there are some subtle situations where in seemingly correct code a borrowed +reference can be used after the owner from which it was borrowed has in fact +disposed of it. + +A borrowed reference can be changed into an owned reference by calling +:cfunc:`Py_INCREF`. This does not affect the status of the owner from which the +reference was borrowed --- it creates a new owned reference, and gives full +owner responsibilities (the new owner must dispose of the reference properly, as +well as the previous owner). + + +.. _ownershiprules: + +Ownership Rules +--------------- + +Whenever an object reference is passed into or out of a function, it is part of +the function's interface specification whether ownership is transferred with the +reference or not. + +Most functions that return a reference to an object pass on ownership with the +reference. In particular, all functions whose function it is to create a new +object, such as :cfunc:`PyInt_FromLong` and :cfunc:`Py_BuildValue`, pass +ownership to the receiver. Even if the object is not actually new, you still +receive ownership of a new reference to that object. For instance, +:cfunc:`PyInt_FromLong` maintains a cache of popular values and can return a +reference to a cached item. + +Many functions that extract objects from other objects also transfer ownership +with the reference, for instance :cfunc:`PyObject_GetAttrString`. The picture +is less clear, here, however, since a few common routines are exceptions: +:cfunc:`PyTuple_GetItem`, :cfunc:`PyList_GetItem`, :cfunc:`PyDict_GetItem`, and +:cfunc:`PyDict_GetItemString` all return references that you borrow from the +tuple, list or dictionary. + +The function :cfunc:`PyImport_AddModule` also returns a borrowed reference, even +though it may actually create the object it returns: this is possible because an +owned reference to the object is stored in ``sys.modules``. + +When you pass an object reference into another function, in general, the +function borrows the reference from you --- if it needs to store it, it will use +:cfunc:`Py_INCREF` to become an independent owner. There are exactly two +important exceptions to this rule: :cfunc:`PyTuple_SetItem` and +:cfunc:`PyList_SetItem`. These functions take over ownership of the item passed +to them --- even if they fail! (Note that :cfunc:`PyDict_SetItem` and friends +don't take over ownership --- they are "normal.") + +When a C function is called from Python, it borrows references to its arguments +from the caller. The caller owns a reference to the object, so the borrowed +reference's lifetime is guaranteed until the function returns. Only when such a +borrowed reference must be stored or passed on, it must be turned into an owned +reference by calling :cfunc:`Py_INCREF`. + +The object reference returned from a C function that is called from Python must +be an owned reference --- ownership is transferred from the function to its +caller. + + +.. _thinice: + +Thin Ice +-------- + +There are a few situations where seemingly harmless use of a borrowed reference +can lead to problems. These all have to do with implicit invocations of the +interpreter, which can cause the owner of a reference to dispose of it. + +The first and most important case to know about is using :cfunc:`Py_DECREF` on +an unrelated object while borrowing a reference to a list item. For instance:: + + void + bug(PyObject *list) + { + PyObject *item = PyList_GetItem(list, 0); + + PyList_SetItem(list, 1, PyInt_FromLong(0L)); + PyObject_Print(item, stdout, 0); /* BUG! */ + } + +This function first borrows a reference to ``list[0]``, then replaces +``list[1]`` with the value ``0``, and finally prints the borrowed reference. +Looks harmless, right? But it's not! + +Let's follow the control flow into :cfunc:`PyList_SetItem`. The list owns +references to all its items, so when item 1 is replaced, it has to dispose of +the original item 1. Now let's suppose the original item 1 was an instance of a +user-defined class, and let's further suppose that the class defined a +:meth:`__del__` method. If this class instance has a reference count of 1, +disposing of it will call its :meth:`__del__` method. + +Since it is written in Python, the :meth:`__del__` method can execute arbitrary +Python code. Could it perhaps do something to invalidate the reference to +``item`` in :cfunc:`bug`? You bet! Assuming that the list passed into +:cfunc:`bug` is accessible to the :meth:`__del__` method, it could execute a +statement to the effect of ``del list[0]``, and assuming this was the last +reference to that object, it would free the memory associated with it, thereby +invalidating ``item``. + +The solution, once you know the source of the problem, is easy: temporarily +increment the reference count. The correct version of the function reads:: + + void + no_bug(PyObject *list) + { + PyObject *item = PyList_GetItem(list, 0); + + Py_INCREF(item); + PyList_SetItem(list, 1, PyInt_FromLong(0L)); + PyObject_Print(item, stdout, 0); + Py_DECREF(item); + } + +This is a true story. An older version of Python contained variants of this bug +and someone spent a considerable amount of time in a C debugger to figure out +why his :meth:`__del__` methods would fail... + +The second case of problems with a borrowed reference is a variant involving +threads. Normally, multiple threads in the Python interpreter can't get in each +other's way, because there is a global lock protecting Python's entire object +space. However, it is possible to temporarily release this lock using the macro +:cmacro:`Py_BEGIN_ALLOW_THREADS`, and to re-acquire it using +:cmacro:`Py_END_ALLOW_THREADS`. This is common around blocking I/O calls, to +let other threads use the processor while waiting for the I/O to complete. +Obviously, the following function has the same problem as the previous one:: + + void + bug(PyObject *list) + { + PyObject *item = PyList_GetItem(list, 0); + Py_BEGIN_ALLOW_THREADS + ...some blocking I/O call... + Py_END_ALLOW_THREADS + PyObject_Print(item, stdout, 0); /* BUG! */ + } + + +.. _nullpointers: + +NULL Pointers +------------- + +In general, functions that take object references as arguments do not expect you +to pass them *NULL* pointers, and will dump core (or cause later core dumps) if +you do so. Functions that return object references generally return *NULL* only +to indicate that an exception occurred. The reason for not testing for *NULL* +arguments is that functions often pass the objects they receive on to other +function --- if each function were to test for *NULL*, there would be a lot of +redundant tests and the code would run more slowly. + +It is better to test for *NULL* only at the "source:" when a pointer that may be +*NULL* is received, for example, from :cfunc:`malloc` or from a function that +may raise an exception. + +The macros :cfunc:`Py_INCREF` and :cfunc:`Py_DECREF` do not check for *NULL* +pointers --- however, their variants :cfunc:`Py_XINCREF` and :cfunc:`Py_XDECREF` +do. + +The macros for checking for a particular object type (``Pytype_Check()``) don't +check for *NULL* pointers --- again, there is much code that calls several of +these in a row to test an object against various different expected types, and +this would generate redundant tests. There are no variants with *NULL* +checking. + +The C function calling mechanism guarantees that the argument list passed to C +functions (``args`` in the examples) is never *NULL* --- in fact it guarantees +that it is always a tuple. [#]_ + +It is a severe error to ever let a *NULL* pointer "escape" to the Python user. + +.. % Frank Stajano: +.. % A pedagogically buggy example, along the lines of the previous listing, +.. % would be helpful here -- showing in more concrete terms what sort of +.. % actions could cause the problem. I can't very well imagine it from the +.. % description. + + +.. _cplusplus: + +Writing Extensions in C++ +========================= + +It is possible to write extension modules in C++. Some restrictions apply. If +the main program (the Python interpreter) is compiled and linked by the C +compiler, global or static objects with constructors cannot be used. This is +not a problem if the main program is linked by the C++ compiler. Functions that +will be called by the Python interpreter (in particular, module initialization +functions) have to be declared using ``extern "C"``. It is unnecessary to +enclose the Python header files in ``extern "C" {...}`` --- they use this form +already if the symbol ``__cplusplus`` is defined (all recent C++ compilers +define this symbol). + + +.. _using-cobjects: + +Providing a C API for an Extension Module +========================================= + +.. sectionauthor:: Konrad Hinsen + + +Many extension modules just provide new functions and types to be used from +Python, but sometimes the code in an extension module can be useful for other +extension modules. For example, an extension module could implement a type +"collection" which works like lists without order. Just like the standard Python +list type has a C API which permits extension modules to create and manipulate +lists, this new collection type should have a set of C functions for direct +manipulation from other extension modules. + +At first sight this seems easy: just write the functions (without declaring them +:keyword:`static`, of course), provide an appropriate header file, and document +the C API. And in fact this would work if all extension modules were always +linked statically with the Python interpreter. When modules are used as shared +libraries, however, the symbols defined in one module may not be visible to +another module. The details of visibility depend on the operating system; some +systems use one global namespace for the Python interpreter and all extension +modules (Windows, for example), whereas others require an explicit list of +imported symbols at module link time (AIX is one example), or offer a choice of +different strategies (most Unices). And even if symbols are globally visible, +the module whose functions one wishes to call might not have been loaded yet! + +Portability therefore requires not to make any assumptions about symbol +visibility. This means that all symbols in extension modules should be declared +:keyword:`static`, except for the module's initialization function, in order to +avoid name clashes with other extension modules (as discussed in section +:ref:`methodtable`). And it means that symbols that *should* be accessible from +other extension modules must be exported in a different way. + +Python provides a special mechanism to pass C-level information (pointers) from +one extension module to another one: CObjects. A CObject is a Python data type +which stores a pointer (:ctype:`void \*`). CObjects can only be created and +accessed via their C API, but they can be passed around like any other Python +object. In particular, they can be assigned to a name in an extension module's +namespace. Other extension modules can then import this module, retrieve the +value of this name, and then retrieve the pointer from the CObject. + +There are many ways in which CObjects can be used to export the C API of an +extension module. Each name could get its own CObject, or all C API pointers +could be stored in an array whose address is published in a CObject. And the +various tasks of storing and retrieving the pointers can be distributed in +different ways between the module providing the code and the client modules. + +The following example demonstrates an approach that puts most of the burden on +the writer of the exporting module, which is appropriate for commonly used +library modules. It stores all C API pointers (just one in the example!) in an +array of :ctype:`void` pointers which becomes the value of a CObject. The header +file corresponding to the module provides a macro that takes care of importing +the module and retrieving its C API pointers; client modules only have to call +this macro before accessing the C API. + +The exporting module is a modification of the :mod:`spam` module from section +:ref:`extending-simpleexample`. The function :func:`spam.system` does not call +the C library function :cfunc:`system` directly, but a function +:cfunc:`PySpam_System`, which would of course do something more complicated in +reality (such as adding "spam" to every command). This function +:cfunc:`PySpam_System` is also exported to other extension modules. + +The function :cfunc:`PySpam_System` is a plain C function, declared +:keyword:`static` like everything else:: + + static int + PySpam_System(const char *command) + { + return system(command); + } + +The function :cfunc:`spam_system` is modified in a trivial way:: + + static PyObject * + spam_system(PyObject *self, PyObject *args) + { + const char *command; + int sts; + + if (!PyArg_ParseTuple(args, "s", &command)) + return NULL; + sts = PySpam_System(command); + return Py_BuildValue("i", sts); + } + +In the beginning of the module, right after the line :: + + #include "Python.h" + +two more lines must be added:: + + #define SPAM_MODULE + #include "spammodule.h" + +The ``#define`` is used to tell the header file that it is being included in the +exporting module, not a client module. Finally, the module's initialization +function must take care of initializing the C API pointer array:: + + PyMODINIT_FUNC + initspam(void) + { + PyObject *m; + static void *PySpam_API[PySpam_API_pointers]; + PyObject *c_api_object; + + m = Py_InitModule("spam", SpamMethods); + if (m == NULL) + return; + + /* Initialize the C API pointer array */ + PySpam_API[PySpam_System_NUM] = (void *)PySpam_System; + + /* Create a CObject containing the API pointer array's address */ + c_api_object = PyCObject_FromVoidPtr((void *)PySpam_API, NULL); + + if (c_api_object != NULL) + PyModule_AddObject(m, "_C_API", c_api_object); + } + +Note that ``PySpam_API`` is declared :keyword:`static`; otherwise the pointer +array would disappear when :func:`initspam` terminates! + +The bulk of the work is in the header file :file:`spammodule.h`, which looks +like this:: + + #ifndef Py_SPAMMODULE_H + #define Py_SPAMMODULE_H + #ifdef __cplusplus + extern "C" { + #endif + + /* Header file for spammodule */ + + /* C API functions */ + #define PySpam_System_NUM 0 + #define PySpam_System_RETURN int + #define PySpam_System_PROTO (const char *command) + + /* Total number of C API pointers */ + #define PySpam_API_pointers 1 + + + #ifdef SPAM_MODULE + /* This section is used when compiling spammodule.c */ + + static PySpam_System_RETURN PySpam_System PySpam_System_PROTO; + + #else + /* This section is used in modules that use spammodule's API */ + + static void **PySpam_API; + + #define PySpam_System \ + (*(PySpam_System_RETURN (*)PySpam_System_PROTO) PySpam_API[PySpam_System_NUM]) + + /* Return -1 and set exception on error, 0 on success. */ + static int + import_spam(void) + { + PyObject *module = PyImport_ImportModule("spam"); + + if (module != NULL) { + PyObject *c_api_object = PyObject_GetAttrString(module, "_C_API"); + if (c_api_object == NULL) + return -1; + if (PyCObject_Check(c_api_object)) + PySpam_API = (void **)PyCObject_AsVoidPtr(c_api_object); + Py_DECREF(c_api_object); + } + return 0; + } + + #endif + + #ifdef __cplusplus + } + #endif + + #endif /* !defined(Py_SPAMMODULE_H) */ + +All that a client module must do in order to have access to the function +:cfunc:`PySpam_System` is to call the function (or rather macro) +:cfunc:`import_spam` in its initialization function:: + + PyMODINIT_FUNC + initclient(void) + { + PyObject *m; + + m = Py_InitModule("client", ClientMethods); + if (m == NULL) + return; + if (import_spam() < 0) + return; + /* additional initialization can happen here */ + } + +The main disadvantage of this approach is that the file :file:`spammodule.h` is +rather complicated. However, the basic structure is the same for each function +that is exported, so it has to be learned only once. + +Finally it should be mentioned that CObjects offer additional functionality, +which is especially useful for memory allocation and deallocation of the pointer +stored in a CObject. The details are described in the Python/C API Reference +Manual in the section :ref:`cobjects` and in the implementation of CObjects (files +:file:`Include/cobject.h` and :file:`Objects/cobject.c` in the Python source +code distribution). + +.. rubric:: Footnotes + +.. [#] An interface for this function already exists in the standard module :mod:`os` + --- it was chosen as a simple and straightforward example. + +.. [#] The metaphor of "borrowing" a reference is not completely correct: the owner + still has a copy of the reference. + +.. [#] Checking that the reference count is at least 1 **does not work** --- the + reference count itself could be in freed memory and may thus be reused for + another object! + +.. [#] These guarantees don't hold when you use the "old" style calling convention --- + this is still found in much existing code. + diff --git a/Doc/extending/index.rst b/Doc/extending/index.rst new file mode 100644 index 0000000..6e8cf79 --- /dev/null +++ b/Doc/extending/index.rst @@ -0,0 +1,34 @@ +.. _extending-index: + +################################################## + Extending and Embedding the Python Interpreter +################################################## + +:Release: |version| +:Date: |today| + +This document describes how to write modules in C or C++ to extend the Python +interpreter with new modules. Those modules can define new functions but also +new object types and their methods. The document also describes how to embed +the Python interpreter in another application, for use as an extension language. +Finally, it shows how to compile and link extension modules so that they can be +loaded dynamically (at run time) into the interpreter, if the underlying +operating system supports this feature. + +This document assumes basic knowledge about Python. For an informal +introduction to the language, see :ref:`tutorial-index`. :ref:`reference-index` +gives a more formal definition of the language. :ref:`library-index` documents +the existing object types, functions and modules (both built-in and written in +Python) that give the language its wide application range. + +For a detailed description of the whole Python/C API, see the separate +:ref:`c-api-index`. + +.. toctree:: + :maxdepth: 2 + + extending.rst + newtypes.rst + building.rst + windows.rst + embedding.rst diff --git a/Doc/extending/newtypes.rst b/Doc/extending/newtypes.rst new file mode 100644 index 0000000..72aaf1b --- /dev/null +++ b/Doc/extending/newtypes.rst @@ -0,0 +1,1580 @@ +.. highlightlang:: c + + +.. _defining-new-types: + +****************** +Defining New Types +****************** + +.. sectionauthor:: Michael Hudson +.. sectionauthor:: Dave Kuhlman +.. sectionauthor:: Jim Fulton + + +As mentioned in the last chapter, Python allows the writer of an extension +module to define new types that can be manipulated from Python code, much like +strings and lists in core Python. + +This is not hard; the code for all extension types follows a pattern, but there +are some details that you need to understand before you can get started. + +.. note:: + + The way new types are defined changed dramatically (and for the better) in + Python 2.2. This document documents how to define new types for Python 2.2 and + later. If you need to support older versions of Python, you will need to refer + to `older versions of this documentation + `_. + + +.. _dnt-basics: + +The Basics +========== + +The Python runtime sees all Python objects as variables of type +:ctype:`PyObject\*`. A :ctype:`PyObject` is not a very magnificent object - it +just contains the refcount and a pointer to the object's "type object". This is +where the action is; the type object determines which (C) functions get called +when, for instance, an attribute gets looked up on an object or it is multiplied +by another object. These C functions are called "type methods" to distinguish +them from things like ``[].append`` (which we call "object methods"). + +So, if you want to define a new object type, you need to create a new type +object. + +This sort of thing can only be explained by example, so here's a minimal, but +complete, module that defines a new type: + +.. literalinclude:: ../includes/noddy.c + + +Now that's quite a bit to take in at once, but hopefully bits will seem familiar +from the last chapter. + +The first bit that will be new is:: + + typedef struct { + PyObject_HEAD + } noddy_NoddyObject; + +This is what a Noddy object will contain---in this case, nothing more than every +Python object contains, namely a refcount and a pointer to a type object. These +are the fields the ``PyObject_HEAD`` macro brings in. The reason for the macro +is to standardize the layout and to enable special debugging fields in debug +builds. Note that there is no semicolon after the ``PyObject_HEAD`` macro; one +is included in the macro definition. Be wary of adding one by accident; it's +easy to do from habit, and your compiler might not complain, but someone else's +probably will! (On Windows, MSVC is known to call this an error and refuse to +compile the code.) + +For contrast, let's take a look at the corresponding definition for standard +Python integers:: + + typedef struct { + PyObject_HEAD + long ob_ival; + } PyIntObject; + +Moving on, we come to the crunch --- the type object. :: + + static PyTypeObject noddy_NoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "noddy.Noddy", /*tp_name*/ + sizeof(noddy_NoddyObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "Noddy objects", /* tp_doc */ + }; + +Now if you go and look up the definition of :ctype:`PyTypeObject` in +:file:`object.h` you'll see that it has many more fields that the definition +above. The remaining fields will be filled with zeros by the C compiler, and +it's common practice to not specify them explicitly unless you need them. + +This is so important that we're going to pick the top of it apart still +further:: + + PyObject_HEAD_INIT(NULL) + +This line is a bit of a wart; what we'd like to write is:: + + PyObject_HEAD_INIT(&PyType_Type) + +as the type of a type object is "type", but this isn't strictly conforming C and +some compilers complain. Fortunately, this member will be filled in for us by +:cfunc:`PyType_Ready`. :: + + 0, /* ob_size */ + +The :attr:`ob_size` field of the header is not used; its presence in the type +structure is a historical artifact that is maintained for binary compatibility +with extension modules compiled for older versions of Python. Always set this +field to zero. :: + + "noddy.Noddy", /* tp_name */ + +The name of our type. This will appear in the default textual representation of +our objects and in some error messages, for example:: + + >>> "" + noddy.new_noddy() + Traceback (most recent call last): + File "", line 1, in ? + TypeError: cannot add type "noddy.Noddy" to string + +Note that the name is a dotted name that includes both the module name and the +name of the type within the module. The module in this case is :mod:`noddy` and +the type is :class:`Noddy`, so we set the type name to :class:`noddy.Noddy`. :: + + sizeof(noddy_NoddyObject), /* tp_basicsize */ + +This is so that Python knows how much memory to allocate when you call +:cfunc:`PyObject_New`. + +.. note:: + + If you want your type to be subclassable from Python, and your type has the same + :attr:`tp_basicsize` as its base type, you may have problems with multiple + inheritance. A Python subclass of your type will have to list your type first + in its :attr:`__bases__`, or else it will not be able to call your type's + :meth:`__new__` method without getting an error. You can avoid this problem by + ensuring that your type has a larger value for :attr:`tp_basicsize` than its + base type does. Most of the time, this will be true anyway, because either your + base type will be :class:`object`, or else you will be adding data members to + your base type, and therefore increasing its size. + +:: + + 0, /* tp_itemsize */ + +This has to do with variable length objects like lists and strings. Ignore this +for now. + +Skipping a number of type methods that we don't provide, we set the class flags +to :const:`Py_TPFLAGS_DEFAULT`. :: + + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + +All types should include this constant in their flags. It enables all of the +members defined by the current version of Python. + +We provide a doc string for the type in :attr:`tp_doc`. :: + + "Noddy objects", /* tp_doc */ + +Now we get into the type methods, the things that make your objects different +from the others. We aren't going to implement any of these in this version of +the module. We'll expand this example later to have more interesting behavior. + +For now, all we want to be able to do is to create new :class:`Noddy` objects. +To enable object creation, we have to provide a :attr:`tp_new` implementation. +In this case, we can just use the default implementation provided by the API +function :cfunc:`PyType_GenericNew`. We'd like to just assign this to the +:attr:`tp_new` slot, but we can't, for portability sake, On some platforms or +compilers, we can't statically initialize a structure member with a function +defined in another C module, so, instead, we'll assign the :attr:`tp_new` slot +in the module initialization function just before calling +:cfunc:`PyType_Ready`:: + + noddy_NoddyType.tp_new = PyType_GenericNew; + if (PyType_Ready(&noddy_NoddyType) < 0) + return; + +All the other type methods are *NULL*, so we'll go over them later --- that's +for a later section! + +Everything else in the file should be familiar, except for some code in +:cfunc:`initnoddy`:: + + if (PyType_Ready(&noddy_NoddyType) < 0) + return; + +This initializes the :class:`Noddy` type, filing in a number of members, +including :attr:`ob_type` that we initially set to *NULL*. :: + + PyModule_AddObject(m, "Noddy", (PyObject *)&noddy_NoddyType); + +This adds the type to the module dictionary. This allows us to create +:class:`Noddy` instances by calling the :class:`Noddy` class:: + + >>> import noddy + >>> mynoddy = noddy.Noddy() + +That's it! All that remains is to build it; put the above code in a file called +:file:`noddy.c` and :: + + from distutils.core import setup, Extension + setup(name="noddy", version="1.0", + ext_modules=[Extension("noddy", ["noddy.c"])]) + +in a file called :file:`setup.py`; then typing :: + + $ python setup.py build + +at a shell should produce a file :file:`noddy.so` in a subdirectory; move to +that directory and fire up Python --- you should be able to ``import noddy`` and +play around with Noddy objects. + +.. % $ <-- bow to font-lock ;-( + +That wasn't so hard, was it? + +Of course, the current Noddy type is pretty uninteresting. It has no data and +doesn't do anything. It can't even be subclassed. + + +Adding data and methods to the Basic example +-------------------------------------------- + +Let's expend the basic example to add some data and methods. Let's also make +the type usable as a base class. We'll create a new module, :mod:`noddy2` that +adds these capabilities: + +.. literalinclude:: ../includes/noddy2.c + + +This version of the module has a number of changes. + +We've added an extra include:: + + #include "structmember.h" + +This include provides declarations that we use to handle attributes, as +described a bit later. + +The name of the :class:`Noddy` object structure has been shortened to +:class:`Noddy`. The type object name has been shortened to :class:`NoddyType`. + +The :class:`Noddy` type now has three data attributes, *first*, *last*, and +*number*. The *first* and *last* variables are Python strings containing first +and last names. The *number* attribute is an integer. + +The object structure is updated accordingly:: + + typedef struct { + PyObject_HEAD + PyObject *first; + PyObject *last; + int number; + } Noddy; + +Because we now have data to manage, we have to be more careful about object +allocation and deallocation. At a minimum, we need a deallocation method:: + + static void + Noddy_dealloc(Noddy* self) + { + Py_XDECREF(self->first); + Py_XDECREF(self->last); + self->ob_type->tp_free((PyObject*)self); + } + +which is assigned to the :attr:`tp_dealloc` member:: + + (destructor)Noddy_dealloc, /*tp_dealloc*/ + +This method decrements the reference counts of the two Python attributes. We use +:cfunc:`Py_XDECREF` here because the :attr:`first` and :attr:`last` members +could be *NULL*. It then calls the :attr:`tp_free` member of the object's type +to free the object's memory. Note that the object's type might not be +:class:`NoddyType`, because the object may be an instance of a subclass. + +We want to make sure that the first and last names are initialized to empty +strings, so we provide a new method:: + + static PyObject * + Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds) + { + Noddy *self; + + self = (Noddy *)type->tp_alloc(type, 0); + if (self != NULL) { + self->first = PyString_FromString(""); + if (self->first == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->last = PyString_FromString(""); + if (self->last == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->number = 0; + } + + return (PyObject *)self; + } + +and install it in the :attr:`tp_new` member:: + + Noddy_new, /* tp_new */ + +The new member is responsible for creating (as opposed to initializing) objects +of the type. It is exposed in Python as the :meth:`__new__` method. See the +paper titled "Unifying types and classes in Python" for a detailed discussion of +the :meth:`__new__` method. One reason to implement a new method is to assure +the initial values of instance variables. In this case, we use the new method +to make sure that the initial values of the members :attr:`first` and +:attr:`last` are not *NULL*. If we didn't care whether the initial values were +*NULL*, we could have used :cfunc:`PyType_GenericNew` as our new method, as we +did before. :cfunc:`PyType_GenericNew` initializes all of the instance variable +members to *NULL*. + +The new method is a static method that is passed the type being instantiated and +any arguments passed when the type was called, and that returns the new object +created. New methods always accept positional and keyword arguments, but they +often ignore the arguments, leaving the argument handling to initializer +methods. Note that if the type supports subclassing, the type passed may not be +the type being defined. The new method calls the tp_alloc slot to allocate +memory. We don't fill the :attr:`tp_alloc` slot ourselves. Rather +:cfunc:`PyType_Ready` fills it for us by inheriting it from our base class, +which is :class:`object` by default. Most types use the default allocation. + +.. note:: + + If you are creating a co-operative :attr:`tp_new` (one that calls a base type's + :attr:`tp_new` or :meth:`__new__`), you must *not* try to determine what method + to call using method resolution order at runtime. Always statically determine + what type you are going to call, and call its :attr:`tp_new` directly, or via + ``type->tp_base->tp_new``. If you do not do this, Python subclasses of your + type that also inherit from other Python-defined classes may not work correctly. + (Specifically, you may not be able to create instances of such subclasses + without getting a :exc:`TypeError`.) + +We provide an initialization function:: + + static int + Noddy_init(Noddy *self, PyObject *args, PyObject *kwds) + { + PyObject *first=NULL, *last=NULL, *tmp; + + static char *kwlist[] = {"first", "last", "number", NULL}; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist, + &first, &last, + &self->number)) + return -1; + + if (first) { + tmp = self->first; + Py_INCREF(first); + self->first = first; + Py_XDECREF(tmp); + } + + if (last) { + tmp = self->last; + Py_INCREF(last); + self->last = last; + Py_XDECREF(tmp); + } + + return 0; + } + +by filling the :attr:`tp_init` slot. :: + + (initproc)Noddy_init, /* tp_init */ + +The :attr:`tp_init` slot is exposed in Python as the :meth:`__init__` method. It +is used to initialize an object after it's created. Unlike the new method, we +can't guarantee that the initializer is called. The initializer isn't called +when unpickling objects and it can be overridden. Our initializer accepts +arguments to provide initial values for our instance. Initializers always accept +positional and keyword arguments. + +Initializers can be called multiple times. Anyone can call the :meth:`__init__` +method on our objects. For this reason, we have to be extra careful when +assigning the new values. We might be tempted, for example to assign the +:attr:`first` member like this:: + + if (first) { + Py_XDECREF(self->first); + Py_INCREF(first); + self->first = first; + } + +But this would be risky. Our type doesn't restrict the type of the +:attr:`first` member, so it could be any kind of object. It could have a +destructor that causes code to be executed that tries to access the +:attr:`first` member. To be paranoid and protect ourselves against this +possibility, we almost always reassign members before decrementing their +reference counts. When don't we have to do this? + +* when we absolutely know that the reference count is greater than 1 + +* when we know that deallocation of the object [#]_ will not cause any calls + back into our type's code + +* when decrementing a reference count in a :attr:`tp_dealloc` handler when + garbage-collections is not supported [#]_ + +We want to want to expose our instance variables as attributes. There are a +number of ways to do that. The simplest way is to define member definitions:: + + static PyMemberDef Noddy_members[] = { + {"first", T_OBJECT_EX, offsetof(Noddy, first), 0, + "first name"}, + {"last", T_OBJECT_EX, offsetof(Noddy, last), 0, + "last name"}, + {"number", T_INT, offsetof(Noddy, number), 0, + "noddy number"}, + {NULL} /* Sentinel */ + }; + +and put the definitions in the :attr:`tp_members` slot:: + + Noddy_members, /* tp_members */ + +Each member definition has a member name, type, offset, access flags and +documentation string. See the "Generic Attribute Management" section below for +details. + +A disadvantage of this approach is that it doesn't provide a way to restrict the +types of objects that can be assigned to the Python attributes. We expect the +first and last names to be strings, but any Python objects can be assigned. +Further, the attributes can be deleted, setting the C pointers to *NULL*. Even +though we can make sure the members are initialized to non-*NULL* values, the +members can be set to *NULL* if the attributes are deleted. + +We define a single method, :meth:`name`, that outputs the objects name as the +concatenation of the first and last names. :: + + static PyObject * + Noddy_name(Noddy* self) + { + static PyObject *format = NULL; + PyObject *args, *result; + + if (format == NULL) { + format = PyString_FromString("%s %s"); + if (format == NULL) + return NULL; + } + + if (self->first == NULL) { + PyErr_SetString(PyExc_AttributeError, "first"); + return NULL; + } + + if (self->last == NULL) { + PyErr_SetString(PyExc_AttributeError, "last"); + return NULL; + } + + args = Py_BuildValue("OO", self->first, self->last); + if (args == NULL) + return NULL; + + result = PyString_Format(format, args); + Py_DECREF(args); + + return result; + } + +The method is implemented as a C function that takes a :class:`Noddy` (or +:class:`Noddy` subclass) instance as the first argument. Methods always take an +instance as the first argument. Methods often take positional and keyword +arguments as well, but in this cased we don't take any and don't need to accept +a positional argument tuple or keyword argument dictionary. This method is +equivalent to the Python method:: + + def name(self): + return "%s %s" % (self.first, self.last) + +Note that we have to check for the possibility that our :attr:`first` and +:attr:`last` members are *NULL*. This is because they can be deleted, in which +case they are set to *NULL*. It would be better to prevent deletion of these +attributes and to restrict the attribute values to be strings. We'll see how to +do that in the next section. + +Now that we've defined the method, we need to create an array of method +definitions:: + + static PyMethodDef Noddy_methods[] = { + {"name", (PyCFunction)Noddy_name, METH_NOARGS, + "Return the name, combining the first and last name" + }, + {NULL} /* Sentinel */ + }; + +and assign them to the :attr:`tp_methods` slot:: + + Noddy_methods, /* tp_methods */ + +Note that we used the :const:`METH_NOARGS` flag to indicate that the method is +passed no arguments. + +Finally, we'll make our type usable as a base class. We've written our methods +carefully so far so that they don't make any assumptions about the type of the +object being created or used, so all we need to do is to add the +:const:`Py_TPFLAGS_BASETYPE` to our class flag definition:: + + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + +We rename :cfunc:`initnoddy` to :cfunc:`initnoddy2` and update the module name +passed to :cfunc:`Py_InitModule3`. + +Finally, we update our :file:`setup.py` file to build the new module:: + + from distutils.core import setup, Extension + setup(name="noddy", version="1.0", + ext_modules=[ + Extension("noddy", ["noddy.c"]), + Extension("noddy2", ["noddy2.c"]), + ]) + + +Providing finer control over data attributes +-------------------------------------------- + +In this section, we'll provide finer control over how the :attr:`first` and +:attr:`last` attributes are set in the :class:`Noddy` example. In the previous +version of our module, the instance variables :attr:`first` and :attr:`last` +could be set to non-string values or even deleted. We want to make sure that +these attributes always contain strings. + +.. literalinclude:: ../includes/noddy3.c + + +To provide greater control, over the :attr:`first` and :attr:`last` attributes, +we'll use custom getter and setter functions. Here are the functions for +getting and setting the :attr:`first` attribute:: + + Noddy_getfirst(Noddy *self, void *closure) + { + Py_INCREF(self->first); + return self->first; + } + + static int + Noddy_setfirst(Noddy *self, PyObject *value, void *closure) + { + if (value == NULL) { + PyErr_SetString(PyExc_TypeError, "Cannot delete the first attribute"); + return -1; + } + + if (! PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "The first attribute value must be a string"); + return -1; + } + + Py_DECREF(self->first); + Py_INCREF(value); + self->first = value; + + return 0; + } + +The getter function is passed a :class:`Noddy` object and a "closure", which is +void pointer. In this case, the closure is ignored. (The closure supports an +advanced usage in which definition data is passed to the getter and setter. This +could, for example, be used to allow a single set of getter and setter functions +that decide the attribute to get or set based on data in the closure.) + +The setter function is passed the :class:`Noddy` object, the new value, and the +closure. The new value may be *NULL*, in which case the attribute is being +deleted. In our setter, we raise an error if the attribute is deleted or if the +attribute value is not a string. + +We create an array of :ctype:`PyGetSetDef` structures:: + + static PyGetSetDef Noddy_getseters[] = { + {"first", + (getter)Noddy_getfirst, (setter)Noddy_setfirst, + "first name", + NULL}, + {"last", + (getter)Noddy_getlast, (setter)Noddy_setlast, + "last name", + NULL}, + {NULL} /* Sentinel */ + }; + +and register it in the :attr:`tp_getset` slot:: + + Noddy_getseters, /* tp_getset */ + +to register out attribute getters and setters. + +The last item in a :ctype:`PyGetSetDef` structure is the closure mentioned +above. In this case, we aren't using the closure, so we just pass *NULL*. + +We also remove the member definitions for these attributes:: + + static PyMemberDef Noddy_members[] = { + {"number", T_INT, offsetof(Noddy, number), 0, + "noddy number"}, + {NULL} /* Sentinel */ + }; + +We also need to update the :attr:`tp_init` handler to only allow strings [#]_ to +be passed:: + + static int + Noddy_init(Noddy *self, PyObject *args, PyObject *kwds) + { + PyObject *first=NULL, *last=NULL, *tmp; + + static char *kwlist[] = {"first", "last", "number", NULL}; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|SSi", kwlist, + &first, &last, + &self->number)) + return -1; + + if (first) { + tmp = self->first; + Py_INCREF(first); + self->first = first; + Py_DECREF(tmp); + } + + if (last) { + tmp = self->last; + Py_INCREF(last); + self->last = last; + Py_DECREF(tmp); + } + + return 0; + } + +With these changes, we can assure that the :attr:`first` and :attr:`last` +members are never *NULL* so we can remove checks for *NULL* values in almost all +cases. This means that most of the :cfunc:`Py_XDECREF` calls can be converted to +:cfunc:`Py_DECREF` calls. The only place we can't change these calls is in the +deallocator, where there is the possibility that the initialization of these +members failed in the constructor. + +We also rename the module initialization function and module name in the +initialization function, as we did before, and we add an extra definition to the +:file:`setup.py` file. + + +Supporting cyclic garbage collection +------------------------------------ + +Python has a cyclic-garbage collector that can identify unneeded objects even +when their reference counts are not zero. This can happen when objects are +involved in cycles. For example, consider:: + + >>> l = [] + >>> l.append(l) + >>> del l + +In this example, we create a list that contains itself. When we delete it, it +still has a reference from itself. Its reference count doesn't drop to zero. +Fortunately, Python's cyclic-garbage collector will eventually figure out that +the list is garbage and free it. + +In the second version of the :class:`Noddy` example, we allowed any kind of +object to be stored in the :attr:`first` or :attr:`last` attributes. [#]_ This +means that :class:`Noddy` objects can participate in cycles:: + + >>> import noddy2 + >>> n = noddy2.Noddy() + >>> l = [n] + >>> n.first = l + +This is pretty silly, but it gives us an excuse to add support for the +cyclic-garbage collector to the :class:`Noddy` example. To support cyclic +garbage collection, types need to fill two slots and set a class flag that +enables these slots: + +.. literalinclude:: ../includes/noddy4.c + + +The traversal method provides access to subobjects that could participate in +cycles:: + + static int + Noddy_traverse(Noddy *self, visitproc visit, void *arg) + { + int vret; + + if (self->first) { + vret = visit(self->first, arg); + if (vret != 0) + return vret; + } + if (self->last) { + vret = visit(self->last, arg); + if (vret != 0) + return vret; + } + + return 0; + } + +For each subobject that can participate in cycles, we need to call the +:cfunc:`visit` function, which is passed to the traversal method. The +:cfunc:`visit` function takes as arguments the subobject and the extra argument +*arg* passed to the traversal method. It returns an integer value that must be +returned if it is non-zero. + +Python 2.4 and higher provide a :cfunc:`Py_VISIT` macro that automates calling +visit functions. With :cfunc:`Py_VISIT`, :cfunc:`Noddy_traverse` can be +simplified:: + + static int + Noddy_traverse(Noddy *self, visitproc visit, void *arg) + { + Py_VISIT(self->first); + Py_VISIT(self->last); + return 0; + } + +.. note:: + + Note that the :attr:`tp_traverse` implementation must name its arguments exactly + *visit* and *arg* in order to use :cfunc:`Py_VISIT`. This is to encourage + uniformity across these boring implementations. + +We also need to provide a method for clearing any subobjects that can +participate in cycles. We implement the method and reimplement the deallocator +to use it:: + + static int + Noddy_clear(Noddy *self) + { + PyObject *tmp; + + tmp = self->first; + self->first = NULL; + Py_XDECREF(tmp); + + tmp = self->last; + self->last = NULL; + Py_XDECREF(tmp); + + return 0; + } + + static void + Noddy_dealloc(Noddy* self) + { + Noddy_clear(self); + self->ob_type->tp_free((PyObject*)self); + } + +Notice the use of a temporary variable in :cfunc:`Noddy_clear`. We use the +temporary variable so that we can set each member to *NULL* before decrementing +its reference count. We do this because, as was discussed earlier, if the +reference count drops to zero, we might cause code to run that calls back into +the object. In addition, because we now support garbage collection, we also +have to worry about code being run that triggers garbage collection. If garbage +collection is run, our :attr:`tp_traverse` handler could get called. We can't +take a chance of having :cfunc:`Noddy_traverse` called when a member's reference +count has dropped to zero and its value hasn't been set to *NULL*. + +Python 2.4 and higher provide a :cfunc:`Py_CLEAR` that automates the careful +decrementing of reference counts. With :cfunc:`Py_CLEAR`, the +:cfunc:`Noddy_clear` function can be simplified:: + + static int + Noddy_clear(Noddy *self) + { + Py_CLEAR(self->first); + Py_CLEAR(self->last); + return 0; + } + +Finally, we add the :const:`Py_TPFLAGS_HAVE_GC` flag to the class flags:: + + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + +That's pretty much it. If we had written custom :attr:`tp_alloc` or +:attr:`tp_free` slots, we'd need to modify them for cyclic-garbage collection. +Most extensions will use the versions automatically provided. + + +Subclassing other types +----------------------- + +It is possible to create new extension types that are derived from existing +types. It is easiest to inherit from the built in types, since an extension can +easily use the :class:`PyTypeObject` it needs. It can be difficult to share +these :class:`PyTypeObject` structures between extension modules. + +In this example we will create a :class:`Shoddy` type that inherits from the +builtin :class:`list` type. The new type will be completely compatible with +regular lists, but will have an additional :meth:`increment` method that +increases an internal counter. :: + + >>> import shoddy + >>> s = shoddy.Shoddy(range(3)) + >>> s.extend(s) + >>> print len(s) + 6 + >>> print s.increment() + 1 + >>> print s.increment() + 2 + +.. literalinclude:: ../includes/shoddy.c + + +As you can see, the source code closely resembles the :class:`Noddy` examples in +previous sections. We will break down the main differences between them. :: + + typedef struct { + PyListObject list; + int state; + } Shoddy; + +The primary difference for derived type objects is that the base type's object +structure must be the first value. The base type will already include the +:cfunc:`PyObject_HEAD` at the beginning of its structure. + +When a Python object is a :class:`Shoddy` instance, its *PyObject\** pointer can +be safely cast to both *PyListObject\** and *Shoddy\**. :: + + static int + Shoddy_init(Shoddy *self, PyObject *args, PyObject *kwds) + { + if (PyList_Type.tp_init((PyObject *)self, args, kwds) < 0) + return -1; + self->state = 0; + return 0; + } + +In the :attr:`__init__` method for our type, we can see how to call through to +the :attr:`__init__` method of the base type. + +This pattern is important when writing a type with custom :attr:`new` and +:attr:`dealloc` methods. The :attr:`new` method should not actually create the +memory for the object with :attr:`tp_alloc`, that will be handled by the base +class when calling its :attr:`tp_new`. + +When filling out the :cfunc:`PyTypeObject` for the :class:`Shoddy` type, you see +a slot for :cfunc:`tp_base`. Due to cross platform compiler issues, you can't +fill that field directly with the :cfunc:`PyList_Type`; it can be done later in +the module's :cfunc:`init` function. :: + + PyMODINIT_FUNC + initshoddy(void) + { + PyObject *m; + + ShoddyType.tp_base = &PyList_Type; + if (PyType_Ready(&ShoddyType) < 0) + return; + + m = Py_InitModule3("shoddy", NULL, "Shoddy module"); + if (m == NULL) + return; + + Py_INCREF(&ShoddyType); + PyModule_AddObject(m, "Shoddy", (PyObject *) &ShoddyType); + } + +Before calling :cfunc:`PyType_Ready`, the type structure must have the +:attr:`tp_base` slot filled in. When we are deriving a new type, it is not +necessary to fill out the :attr:`tp_alloc` slot with :cfunc:`PyType_GenericNew` +-- the allocate function from the base type will be inherited. + +After that, calling :cfunc:`PyType_Ready` and adding the type object to the +module is the same as with the basic :class:`Noddy` examples. + + +.. _dnt-type-methods: + +Type Methods +============ + +This section aims to give a quick fly-by on the various type methods you can +implement and what they do. + +Here is the definition of :ctype:`PyTypeObject`, with some fields only used in +debug builds omitted: + +.. literalinclude:: ../includes/typestruct.h + + +Now that's a *lot* of methods. Don't worry too much though - if you have a type +you want to define, the chances are very good that you will only implement a +handful of these. + +As you probably expect by now, we're going to go over this and give more +information about the various handlers. We won't go in the order they are +defined in the structure, because there is a lot of historical baggage that +impacts the ordering of the fields; be sure your type initialization keeps the +fields in the right order! It's often easiest to find an example that includes +all the fields you need (even if they're initialized to ``0``) and then change +the values to suit your new type. :: + + char *tp_name; /* For printing */ + +The name of the type - as mentioned in the last section, this will appear in +various places, almost entirely for diagnostic purposes. Try to choose something +that will be helpful in such a situation! :: + + int tp_basicsize, tp_itemsize; /* For allocation */ + +These fields tell the runtime how much memory to allocate when new objects of +this type are created. Python has some built-in support for variable length +structures (think: strings, lists) which is where the :attr:`tp_itemsize` field +comes in. This will be dealt with later. :: + + char *tp_doc; + +Here you can put a string (or its address) that you want returned when the +Python script references ``obj.__doc__`` to retrieve the doc string. + +Now we come to the basic type methods---the ones most extension types will +implement. + + +Finalization and De-allocation +------------------------------ + +.. index:: + single: object; deallocation + single: deallocation, object + single: object; finalization + single: finalization, of objects + +:: + + destructor tp_dealloc; + +This function is called when the reference count of the instance of your type is +reduced to zero and the Python interpreter wants to reclaim it. If your type +has memory to free or other clean-up to perform, put it here. The object itself +needs to be freed here as well. Here is an example of this function:: + + static void + newdatatype_dealloc(newdatatypeobject * obj) + { + free(obj->obj_UnderlyingDatatypePtr); + obj->ob_type->tp_free(obj); + } + +.. index:: + single: PyErr_Fetch() + single: PyErr_Restore() + +One important requirement of the deallocator function is that it leaves any +pending exceptions alone. This is important since deallocators are frequently +called as the interpreter unwinds the Python stack; when the stack is unwound +due to an exception (rather than normal returns), nothing is done to protect the +deallocators from seeing that an exception has already been set. Any actions +which a deallocator performs which may cause additional Python code to be +executed may detect that an exception has been set. This can lead to misleading +errors from the interpreter. The proper way to protect against this is to save +a pending exception before performing the unsafe action, and restoring it when +done. This can be done using the :cfunc:`PyErr_Fetch` and +:cfunc:`PyErr_Restore` functions:: + + static void + my_dealloc(PyObject *obj) + { + MyObject *self = (MyObject *) obj; + PyObject *cbresult; + + if (self->my_callback != NULL) { + PyObject *err_type, *err_value, *err_traceback; + int have_error = PyErr_Occurred() ? 1 : 0; + + if (have_error) + PyErr_Fetch(&err_type, &err_value, &err_traceback); + + cbresult = PyObject_CallObject(self->my_callback, NULL); + if (cbresult == NULL) + PyErr_WriteUnraisable(self->my_callback); + else + Py_DECREF(cbresult); + + if (have_error) + PyErr_Restore(err_type, err_value, err_traceback); + + Py_DECREF(self->my_callback); + } + obj->ob_type->tp_free((PyObject*)self); + } + + +Object Presentation +------------------- + +.. index:: + builtin: repr + builtin: str + +In Python, there are two ways to generate a textual representation of an object: +the :func:`repr` function, and the :func:`str` function. (The :func:`print` +function just calls :func:`str`.) These handlers are both optional. + +:: + + reprfunc tp_repr; + reprfunc tp_str; + +The :attr:`tp_repr` handler should return a string object containing a +representation of the instance for which it is called. Here is a simple +example:: + + static PyObject * + newdatatype_repr(newdatatypeobject * obj) + { + return PyString_FromFormat("Repr-ified_newdatatype{{size:\%d}}", + obj->obj_UnderlyingDatatypePtr->size); + } + +If no :attr:`tp_repr` handler is specified, the interpreter will supply a +representation that uses the type's :attr:`tp_name` and a uniquely-identifying +value for the object. + +The :attr:`tp_str` handler is to :func:`str` what the :attr:`tp_repr` handler +described above is to :func:`repr`; that is, it is called when Python code calls +:func:`str` on an instance of your object. Its implementation is very similar +to the :attr:`tp_repr` function, but the resulting string is intended for human +consumption. If :attr:`tp_str` is not specified, the :attr:`tp_repr` handler is +used instead. + +Here is a simple example:: + + static PyObject * + newdatatype_str(newdatatypeobject * obj) + { + return PyString_FromFormat("Stringified_newdatatype{{size:\%d}}", + obj->obj_UnderlyingDatatypePtr->size); + } + +The print function will be called whenever Python needs to "print" an instance +of the type. For example, if 'node' is an instance of type TreeNode, then the +print function is called when Python code calls:: + + print node + +There is a flags argument and one flag, :const:`Py_PRINT_RAW`, and it suggests +that you print without string quotes and possibly without interpreting escape +sequences. + +The print function receives a file object as an argument. You will likely want +to write to that file object. + +Here is a sample print function:: + + static int + newdatatype_print(newdatatypeobject *obj, FILE *fp, int flags) + { + if (flags & Py_PRINT_RAW) { + fprintf(fp, "<{newdatatype object--size: %d}>", + obj->obj_UnderlyingDatatypePtr->size); + } + else { + fprintf(fp, "\"<{newdatatype object--size: %d}>\"", + obj->obj_UnderlyingDatatypePtr->size); + } + return 0; + } + + +Attribute Management +-------------------- + +For every object which can support attributes, the corresponding type must +provide the functions that control how the attributes are resolved. There needs +to be a function which can retrieve attributes (if any are defined), and another +to set attributes (if setting attributes is allowed). Removing an attribute is +a special case, for which the new value passed to the handler is *NULL*. + +Python supports two pairs of attribute handlers; a type that supports attributes +only needs to implement the functions for one pair. The difference is that one +pair takes the name of the attribute as a :ctype:`char\*`, while the other +accepts a :ctype:`PyObject\*`. Each type can use whichever pair makes more +sense for the implementation's convenience. :: + + getattrfunc tp_getattr; /* char * version */ + setattrfunc tp_setattr; + /* ... */ + getattrofunc tp_getattrofunc; /* PyObject * version */ + setattrofunc tp_setattrofunc; + +If accessing attributes of an object is always a simple operation (this will be +explained shortly), there are generic implementations which can be used to +provide the :ctype:`PyObject\*` version of the attribute management functions. +The actual need for type-specific attribute handlers almost completely +disappeared starting with Python 2.2, though there are many examples which have +not been updated to use some of the new generic mechanism that is available. + + +Generic Attribute Management +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 2.2 + +Most extension types only use *simple* attributes. So, what makes the +attributes simple? There are only a couple of conditions that must be met: + +#. The name of the attributes must be known when :cfunc:`PyType_Ready` is + called. + +#. No special processing is needed to record that an attribute was looked up or + set, nor do actions need to be taken based on the value. + +Note that this list does not place any restrictions on the values of the +attributes, when the values are computed, or how relevant data is stored. + +When :cfunc:`PyType_Ready` is called, it uses three tables referenced by the +type object to create *descriptors* which are placed in the dictionary of the +type object. Each descriptor controls access to one attribute of the instance +object. Each of the tables is optional; if all three are *NULL*, instances of +the type will only have attributes that are inherited from their base type, and +should leave the :attr:`tp_getattro` and :attr:`tp_setattro` fields *NULL* as +well, allowing the base type to handle attributes. + +The tables are declared as three fields of the type object:: + + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + +If :attr:`tp_methods` is not *NULL*, it must refer to an array of +:ctype:`PyMethodDef` structures. Each entry in the table is an instance of this +structure:: + + typedef struct PyMethodDef { + char *ml_name; /* method name */ + PyCFunction ml_meth; /* implementation function */ + int ml_flags; /* flags */ + char *ml_doc; /* docstring */ + } PyMethodDef; + +One entry should be defined for each method provided by the type; no entries are +needed for methods inherited from a base type. One additional entry is needed +at the end; it is a sentinel that marks the end of the array. The +:attr:`ml_name` field of the sentinel must be *NULL*. + +XXX Need to refer to some unified discussion of the structure fields, shared +with the next section. + +The second table is used to define attributes which map directly to data stored +in the instance. A variety of primitive C types are supported, and access may +be read-only or read-write. The structures in the table are defined as:: + + typedef struct PyMemberDef { + char *name; + int type; + int offset; + int flags; + char *doc; + } PyMemberDef; + +For each entry in the table, a descriptor will be constructed and added to the +type which will be able to extract a value from the instance structure. The +:attr:`type` field should contain one of the type codes defined in the +:file:`structmember.h` header; the value will be used to determine how to +convert Python values to and from C values. The :attr:`flags` field is used to +store flags which control how the attribute can be accessed. + +XXX Need to move some of this to a shared section! + +The following flag constants are defined in :file:`structmember.h`; they may be +combined using bitwise-OR. + ++---------------------------+----------------------------------------------+ +| Constant | Meaning | ++===========================+==============================================+ +| :const:`READONLY` | Never writable. | ++---------------------------+----------------------------------------------+ +| :const:`RO` | Shorthand for :const:`READONLY`. | ++---------------------------+----------------------------------------------+ +| :const:`READ_RESTRICTED` | Not readable in restricted mode. | ++---------------------------+----------------------------------------------+ +| :const:`WRITE_RESTRICTED` | Not writable in restricted mode. | ++---------------------------+----------------------------------------------+ +| :const:`RESTRICTED` | Not readable or writable in restricted mode. | ++---------------------------+----------------------------------------------+ + +.. index:: + single: READONLY + single: RO + single: READ_RESTRICTED + single: WRITE_RESTRICTED + single: RESTRICTED + +An interesting advantage of using the :attr:`tp_members` table to build +descriptors that are used at runtime is that any attribute defined this way can +have an associated doc string simply by providing the text in the table. An +application can use the introspection API to retrieve the descriptor from the +class object, and get the doc string using its :attr:`__doc__` attribute. + +As with the :attr:`tp_methods` table, a sentinel entry with a :attr:`name` value +of *NULL* is required. + +.. % XXX Descriptors need to be explained in more detail somewhere, but +.. % not here. +.. % +.. % Descriptor objects have two handler functions which correspond to +.. % the \member{tp_getattro} and \member{tp_setattro} handlers. The +.. % \method{__get__()} handler is a function which is passed the +.. % descriptor, instance, and type objects, and returns the value of the +.. % attribute, or it returns \NULL{} and sets an exception. The +.. % \method{__set__()} handler is passed the descriptor, instance, type, +.. % and new value; + + +Type-specific Attribute Management +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For simplicity, only the :ctype:`char\*` version will be demonstrated here; the +type of the name parameter is the only difference between the :ctype:`char\*` +and :ctype:`PyObject\*` flavors of the interface. This example effectively does +the same thing as the generic example above, but does not use the generic +support added in Python 2.2. The value in showing this is two-fold: it +demonstrates how basic attribute management can be done in a way that is +portable to older versions of Python, and explains how the handler functions are +called, so that if you do need to extend their functionality, you'll understand +what needs to be done. + +The :attr:`tp_getattr` handler is called when the object requires an attribute +look-up. It is called in the same situations where the :meth:`__getattr__` +method of a class would be called. + +A likely way to handle this is (1) to implement a set of functions (such as +:cfunc:`newdatatype_getSize` and :cfunc:`newdatatype_setSize` in the example +below), (2) provide a method table listing these functions, and (3) provide a +getattr function that returns the result of a lookup in that table. The method +table uses the same structure as the :attr:`tp_methods` field of the type +object. + +Here is an example:: + + static PyMethodDef newdatatype_methods[] = { + {"getSize", (PyCFunction)newdatatype_getSize, METH_VARARGS, + "Return the current size."}, + {"setSize", (PyCFunction)newdatatype_setSize, METH_VARARGS, + "Set the size."}, + {NULL, NULL, 0, NULL} /* sentinel */ + }; + + static PyObject * + newdatatype_getattr(newdatatypeobject *obj, char *name) + { + return Py_FindMethod(newdatatype_methods, (PyObject *)obj, name); + } + +The :attr:`tp_setattr` handler is called when the :meth:`__setattr__` or +:meth:`__delattr__` method of a class instance would be called. When an +attribute should be deleted, the third parameter will be *NULL*. Here is an +example that simply raises an exception; if this were really all you wanted, the +:attr:`tp_setattr` handler should be set to *NULL*. :: + + static int + newdatatype_setattr(newdatatypeobject *obj, char *name, PyObject *v) + { + (void)PyErr_Format(PyExc_RuntimeError, "Read-only attribute: \%s", name); + return -1; + } + + +Object Comparison +----------------- + +:: + + cmpfunc tp_compare; + +The :attr:`tp_compare` handler is called when comparisons are needed and the +object does not implement the specific rich comparison method which matches the +requested comparison. (It is always used if defined and the +:cfunc:`PyObject_Compare` or :cfunc:`PyObject_Cmp` functions are used, or if +:func:`cmp` is used from Python.) It is analogous to the :meth:`__cmp__` method. +This function should return ``-1`` if *obj1* is less than *obj2*, ``0`` if they +are equal, and ``1`` if *obj1* is greater than *obj2*. (It was previously +allowed to return arbitrary negative or positive integers for less than and +greater than, respectively; as of Python 2.2, this is no longer allowed. In the +future, other return values may be assigned a different meaning.) + +A :attr:`tp_compare` handler may raise an exception. In this case it should +return a negative value. The caller has to test for the exception using +:cfunc:`PyErr_Occurred`. + +Here is a sample implementation:: + + static int + newdatatype_compare(newdatatypeobject * obj1, newdatatypeobject * obj2) + { + long result; + + if (obj1->obj_UnderlyingDatatypePtr->size < + obj2->obj_UnderlyingDatatypePtr->size) { + result = -1; + } + else if (obj1->obj_UnderlyingDatatypePtr->size > + obj2->obj_UnderlyingDatatypePtr->size) { + result = 1; + } + else { + result = 0; + } + return result; + } + + +Abstract Protocol Support +------------------------- + +Python supports a variety of *abstract* 'protocols;' the specific interfaces +provided to use these interfaces are documented in :ref:`abstract`. + + +A number of these abstract interfaces were defined early in the development of +the Python implementation. In particular, the number, mapping, and sequence +protocols have been part of Python since the beginning. Other protocols have +been added over time. For protocols which depend on several handler routines +from the type implementation, the older protocols have been defined as optional +blocks of handlers referenced by the type object. For newer protocols there are +additional slots in the main type object, with a flag bit being set to indicate +that the slots are present and should be checked by the interpreter. (The flag +bit does not indicate that the slot values are non-*NULL*. The flag may be set +to indicate the presence of a slot, but a slot may still be unfilled.) :: + + PyNumberMethods tp_as_number; + PySequenceMethods tp_as_sequence; + PyMappingMethods tp_as_mapping; + +If you wish your object to be able to act like a number, a sequence, or a +mapping object, then you place the address of a structure that implements the C +type :ctype:`PyNumberMethods`, :ctype:`PySequenceMethods`, or +:ctype:`PyMappingMethods`, respectively. It is up to you to fill in this +structure with appropriate values. You can find examples of the use of each of +these in the :file:`Objects` directory of the Python source distribution. :: + + hashfunc tp_hash; + +This function, if you choose to provide it, should return a hash number for an +instance of your data type. Here is a moderately pointless example:: + + static long + newdatatype_hash(newdatatypeobject *obj) + { + long result; + result = obj->obj_UnderlyingDatatypePtr->size; + result = result * 3; + return result; + } + +:: + + ternaryfunc tp_call; + +This function is called when an instance of your data type is "called", for +example, if ``obj1`` is an instance of your data type and the Python script +contains ``obj1('hello')``, the :attr:`tp_call` handler is invoked. + +This function takes three arguments: + +#. *arg1* is the instance of the data type which is the subject of the call. If + the call is ``obj1('hello')``, then *arg1* is ``obj1``. + +#. *arg2* is a tuple containing the arguments to the call. You can use + :cfunc:`PyArg_ParseTuple` to extract the arguments. + +#. *arg3* is a dictionary of keyword arguments that were passed. If this is + non-*NULL* and you support keyword arguments, use + :cfunc:`PyArg_ParseTupleAndKeywords` to extract the arguments. If you do not + want to support keyword arguments and this is non-*NULL*, raise a + :exc:`TypeError` with a message saying that keyword arguments are not supported. + +Here is a desultory example of the implementation of the call function. :: + + /* Implement the call function. + * obj1 is the instance receiving the call. + * obj2 is a tuple containing the arguments to the call, in this + * case 3 strings. + */ + static PyObject * + newdatatype_call(newdatatypeobject *obj, PyObject *args, PyObject *other) + { + PyObject *result; + char *arg1; + char *arg2; + char *arg3; + + if (!PyArg_ParseTuple(args, "sss:call", &arg1, &arg2, &arg3)) { + return NULL; + } + result = PyString_FromFormat( + "Returning -- value: [\%d] arg1: [\%s] arg2: [\%s] arg3: [\%s]\n", + obj->obj_UnderlyingDatatypePtr->size, + arg1, arg2, arg3); + printf("\%s", PyString_AS_STRING(result)); + return result; + } + +XXX some fields need to be added here... :: + + /* Added in release 2.2 */ + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + +These functions provide support for the iterator protocol. Any object which +wishes to support iteration over its contents (which may be generated during +iteration) must implement the ``tp_iter`` handler. Objects which are returned +by a ``tp_iter`` handler must implement both the ``tp_iter`` and ``tp_iternext`` +handlers. Both handlers take exactly one parameter, the instance for which they +are being called, and return a new reference. In the case of an error, they +should set an exception and return *NULL*. + +For an object which represents an iterable collection, the ``tp_iter`` handler +must return an iterator object. The iterator object is responsible for +maintaining the state of the iteration. For collections which can support +multiple iterators which do not interfere with each other (as lists and tuples +do), a new iterator should be created and returned. Objects which can only be +iterated over once (usually due to side effects of iteration) should implement +this handler by returning a new reference to themselves, and should also +implement the ``tp_iternext`` handler. File objects are an example of such an +iterator. + +Iterator objects should implement both handlers. The ``tp_iter`` handler should +return a new reference to the iterator (this is the same as the ``tp_iter`` +handler for objects which can only be iterated over destructively). The +``tp_iternext`` handler should return a new reference to the next object in the +iteration if there is one. If the iteration has reached the end, it may return +*NULL* without setting an exception or it may set :exc:`StopIteration`; avoiding +the exception can yield slightly better performance. If an actual error occurs, +it should set an exception and return *NULL*. + + +.. _weakref-support: + +Weak Reference Support +---------------------- + +One of the goals of Python's weak-reference implementation is to allow any type +to participate in the weak reference mechanism without incurring the overhead on +those objects which do not benefit by weak referencing (such as numbers). + +For an object to be weakly referencable, the extension must include a +:ctype:`PyObject\*` field in the instance structure for the use of the weak +reference mechanism; it must be initialized to *NULL* by the object's +constructor. It must also set the :attr:`tp_weaklistoffset` field of the +corresponding type object to the offset of the field. For example, the instance +type is defined with the following structure:: + + typedef struct { + PyObject_HEAD + PyClassObject *in_class; /* The class object */ + PyObject *in_dict; /* A dictionary */ + PyObject *in_weakreflist; /* List of weak references */ + } PyInstanceObject; + +The statically-declared type object for instances is defined this way:: + + PyTypeObject PyInstance_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + "module.instance", + + /* Lots of stuff omitted for brevity... */ + + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + offsetof(PyInstanceObject, in_weakreflist), /* tp_weaklistoffset */ + }; + +The type constructor is responsible for initializing the weak reference list to +*NULL*:: + + static PyObject * + instance_new() { + /* Other initialization stuff omitted for brevity */ + + self->in_weakreflist = NULL; + + return (PyObject *) self; + } + +The only further addition is that the destructor needs to call the weak +reference manager to clear any weak references. This should be done before any +other parts of the destruction have occurred, but is only required if the weak +reference list is non-*NULL*:: + + static void + instance_dealloc(PyInstanceObject *inst) + { + /* Allocate temporaries if needed, but do not begin + destruction just yet. + */ + + if (inst->in_weakreflist != NULL) + PyObject_ClearWeakRefs((PyObject *) inst); + + /* Proceed with object destruction normally. */ + } + + +More Suggestions +---------------- + +Remember that you can omit most of these functions, in which case you provide +``0`` as a value. There are type definitions for each of the functions you must +provide. They are in :file:`object.h` in the Python include directory that +comes with the source distribution of Python. + +In order to learn how to implement any specific method for your new data type, +do the following: Download and unpack the Python source distribution. Go the +:file:`Objects` directory, then search the C source files for ``tp_`` plus the +function you want (for example, ``tp_compare``). You will find examples of the +function you want to implement. + +When you need to verify that an object is an instance of the type you are +implementing, use the :cfunc:`PyObject_TypeCheck` function. A sample of its use +might be something like the following:: + + if (! PyObject_TypeCheck(some_object, &MyType)) { + PyErr_SetString(PyExc_TypeError, "arg #1 not a mything"); + return NULL; + } + +.. rubric:: Footnotes + +.. [#] This is true when we know that the object is a basic type, like a string or a + float. + +.. [#] We relied on this in the :attr:`tp_dealloc` handler in this example, because our + type doesn't support garbage collection. Even if a type supports garbage + collection, there are calls that can be made to "untrack" the object from + garbage collection, however, these calls are advanced and not covered here. + +.. [#] We now know that the first and last members are strings, so perhaps we could be + less careful about decrementing their reference counts, however, we accept + instances of string subclasses. Even though deallocating normal strings won't + call back into our objects, we can't guarantee that deallocating an instance of + a string subclass won't. call back into out objects. + +.. [#] Even in the third version, we aren't guaranteed to avoid cycles. Instances of + string subclasses are allowed and string subclasses could allow cycles even if + normal strings don't. + diff --git a/Doc/extending/windows.rst b/Doc/extending/windows.rst new file mode 100644 index 0000000..7a66afe --- /dev/null +++ b/Doc/extending/windows.rst @@ -0,0 +1,280 @@ +.. highlightlang:: c + + +.. _building-on-windows: + +**************************************** +Building C and C++ Extensions on Windows +**************************************** + +.. % + +This chapter briefly explains how to create a Windows extension module for +Python using Microsoft Visual C++, and follows with more detailed background +information on how it works. The explanatory material is useful for both the +Windows programmer learning to build Python extensions and the Unix programmer +interested in producing software which can be successfully built on both Unix +and Windows. + +Module authors are encouraged to use the distutils approach for building +extension modules, instead of the one described in this section. You will still +need the C compiler that was used to build Python; typically Microsoft Visual +C++. + +.. note:: + + This chapter mentions a number of filenames that include an encoded Python + version number. These filenames are represented with the version number shown + as ``XY``; in practive, ``'X'`` will be the major version number and ``'Y'`` + will be the minor version number of the Python release you're working with. For + example, if you are using Python 2.2.1, ``XY`` will actually be ``22``. + + +.. _win-cookbook: + +A Cookbook Approach +=================== + +There are two approaches to building extension modules on Windows, just as there +are on Unix: use the :mod:`distutils` package to control the build process, or +do things manually. The distutils approach works well for most extensions; +documentation on using :mod:`distutils` to build and package extension modules +is available in :ref:`distutils-index`. This section describes the manual +approach to building Python extensions written in C or C++. + +To build extensions using these instructions, you need to have a copy of the +Python sources of the same version as your installed Python. You will need +Microsoft Visual C++ "Developer Studio"; project files are supplied for VC++ +version 7.1, but you can use older versions of VC++. Notice that you should use +the same version of VC++that was used to build Python itself. The example files +described here are distributed with the Python sources in the +:file:`PC\\example_nt\\` directory. + +#. **Copy the example files** --- The :file:`example_nt` directory is a + subdirectory of the :file:`PC` directory, in order to keep all the PC-specific + files under the same directory in the source distribution. However, the + :file:`example_nt` directory can't actually be used from this location. You + first need to copy or move it up one level, so that :file:`example_nt` is a + sibling of the :file:`PC` and :file:`Include` directories. Do all your work + from within this new location. + +#. **Open the project** --- From VC++, use the :menuselection:`File --> Open + Solution` dialog (not :menuselection:`File --> Open`!). Navigate to and select + the file :file:`example.sln`, in the *copy* of the :file:`example_nt` directory + you made above. Click Open. + +#. **Build the example DLL** --- In order to check that everything is set up + right, try building: + +#. Select a configuration. This step is optional. Choose + :menuselection:`Build --> Configuration Manager --> Active Solution Configuration` + and select either :guilabel:`Release` or :guilabel:`Debug`. If you skip this + step, VC++ will use the Debug configuration by default. + +#. Build the DLL. Choose :menuselection:`Build --> Build Solution`. This + creates all intermediate and result files in a subdirectory called either + :file:`Debug` or :file:`Release`, depending on which configuration you selected + in the preceding step. + +#. **Testing the debug-mode DLL** --- Once the Debug build has succeeded, bring + up a DOS box, and change to the :file:`example_nt\\Debug` directory. You should + now be able to repeat the following session (``C>`` is the DOS prompt, ``>>>`` + is the Python prompt; note that build information and various debug output from + Python may not match this screen dump exactly):: + + C>..\..\PCbuild\python_d + Adding parser accelerators ... + Done. + Python 2.2 (#28, Dec 19 2001, 23:26:37) [MSC 32 bit (Intel)] on win32 + Type "copyright", "credits" or "license" for more information. + >>> import example + [4897 refs] + >>> example.foo() + Hello, world + [4903 refs] + >>> + + Congratulations! You've successfully built your first Python extension module. + +#. **Creating your own project** --- Choose a name and create a directory for + it. Copy your C sources into it. Note that the module source file name does + not necessarily have to match the module name, but the name of the + initialization function should match the module name --- you can only import a + module :mod:`spam` if its initialization function is called :cfunc:`initspam`, + and it should call :cfunc:`Py_InitModule` with the string ``"spam"`` as its + first argument (use the minimal :file:`example.c` in this directory as a guide). + By convention, it lives in a file called :file:`spam.c` or :file:`spammodule.c`. + The output file should be called :file:`spam.dll` or :file:`spam.pyd` (the + latter is supported to avoid confusion with a system library :file:`spam.dll` to + which your module could be a Python interface) in Release mode, or + :file:`spam_d.dll` or :file:`spam_d.pyd` in Debug mode. + + Now your options are: + +#. Copy :file:`example.sln` and :file:`example.vcproj`, rename them to + :file:`spam.\*`, and edit them by hand, or + +#. Create a brand new project; instructions are below. + + In either case, copy :file:`example_nt\\example.def` to :file:`spam\\spam.def`, + and edit the new :file:`spam.def` so its second line contains the string + '``initspam``'. If you created a new project yourself, add the file + :file:`spam.def` to the project now. (This is an annoying little file with only + two lines. An alternative approach is to forget about the :file:`.def` file, + and add the option :option:`/export:initspam` somewhere to the Link settings, by + manually editing the setting in Project Properties dialog). + +#. **Creating a brand new project** --- Use the :menuselection:`File --> New + --> Project` dialog to create a new Project Workspace. Select :guilabel:`Visual + C++ Projects/Win32/ Win32 Project`, enter the name (``spam``), and make sure the + Location is set to parent of the :file:`spam` directory you have created (which + should be a direct subdirectory of the Python build tree, a sibling of + :file:`Include` and :file:`PC`). Select Win32 as the platform (in my version, + this is the only choice). Make sure the Create new workspace radio button is + selected. Click OK. + + You should now create the file :file:`spam.def` as instructed in the previous + section. Add the source files to the project, using :menuselection:`Project --> + Add Existing Item`. Set the pattern to ``*.*`` and select both :file:`spam.c` + and :file:`spam.def` and click OK. (Inserting them one by one is fine too.) + + Now open the :menuselection:`Project --> spam properties` dialog. You only need + to change a few settings. Make sure :guilabel:`All Configurations` is selected + from the :guilabel:`Settings for:` dropdown list. Select the C/C++ tab. Choose + the General category in the popup menu at the top. Type the following text in + the entry box labeled :guilabel:`Additional Include Directories`:: + + ..\Include,..\PC + + Then, choose the General category in the Linker tab, and enter :: + + ..\PCbuild + + in the text box labelled :guilabel:`Additional library Directories`. + + Now you need to add some mode-specific settings: + + Select :guilabel:`Release` in the :guilabel:`Configuration` dropdown list. + Choose the :guilabel:`Link` tab, choose the :guilabel:`Input` category, and + append ``pythonXY.lib`` to the list in the :guilabel:`Additional Dependencies` + box. + + Select :guilabel:`Debug` in the :guilabel:`Configuration` dropdown list, and + append ``pythonXY_d.lib`` to the list in the :guilabel:`Additional Dependencies` + box. Then click the C/C++ tab, select :guilabel:`Code Generation`, and select + :guilabel:`Multi-threaded Debug DLL` from the :guilabel:`Runtime library` + dropdown list. + + Select :guilabel:`Release` again from the :guilabel:`Configuration` dropdown + list. Select :guilabel:`Multi-threaded DLL` from the :guilabel:`Runtime + library` dropdown list. + +If your module creates a new type, you may have trouble with this line:: + + PyObject_HEAD_INIT(&PyType_Type) + +Change it to:: + + PyObject_HEAD_INIT(NULL) + +and add the following to the module initialization function:: + + MyObject_Type.ob_type = &PyType_Type; + +Refer to section 3 of the `Python FAQ `_ for +details on why you must do this. + + +.. _dynamic-linking: + +Differences Between Unix and Windows +==================================== + +.. sectionauthor:: Chris Phoenix + + +Unix and Windows use completely different paradigms for run-time loading of +code. Before you try to build a module that can be dynamically loaded, be aware +of how your system works. + +In Unix, a shared object (:file:`.so`) file contains code to be used by the +program, and also the names of functions and data that it expects to find in the +program. When the file is joined to the program, all references to those +functions and data in the file's code are changed to point to the actual +locations in the program where the functions and data are placed in memory. +This is basically a link operation. + +In Windows, a dynamic-link library (:file:`.dll`) file has no dangling +references. Instead, an access to functions or data goes through a lookup +table. So the DLL code does not have to be fixed up at runtime to refer to the +program's memory; instead, the code already uses the DLL's lookup table, and the +lookup table is modified at runtime to point to the functions and data. + +In Unix, there is only one type of library file (:file:`.a`) which contains code +from several object files (:file:`.o`). During the link step to create a shared +object file (:file:`.so`), the linker may find that it doesn't know where an +identifier is defined. The linker will look for it in the object files in the +libraries; if it finds it, it will include all the code from that object file. + +In Windows, there are two types of library, a static library and an import +library (both called :file:`.lib`). A static library is like a Unix :file:`.a` +file; it contains code to be included as necessary. An import library is +basically used only to reassure the linker that a certain identifier is legal, +and will be present in the program when the DLL is loaded. So the linker uses +the information from the import library to build the lookup table for using +identifiers that are not included in the DLL. When an application or a DLL is +linked, an import library may be generated, which will need to be used for all +future DLLs that depend on the symbols in the application or DLL. + +Suppose you are building two dynamic-load modules, B and C, which should share +another block of code A. On Unix, you would *not* pass :file:`A.a` to the +linker for :file:`B.so` and :file:`C.so`; that would cause it to be included +twice, so that B and C would each have their own copy. In Windows, building +:file:`A.dll` will also build :file:`A.lib`. You *do* pass :file:`A.lib` to the +linker for B and C. :file:`A.lib` does not contain code; it just contains +information which will be used at runtime to access A's code. + +In Windows, using an import library is sort of like using ``import spam``; it +gives you access to spam's names, but does not create a separate copy. On Unix, +linking with a library is more like ``from spam import *``; it does create a +separate copy. + + +.. _win-dlls: + +Using DLLs in Practice +====================== + +.. sectionauthor:: Chris Phoenix + + +Windows Python is built in Microsoft Visual C++; using other compilers may or +may not work (though Borland seems to). The rest of this section is MSVC++ +specific. + +When creating DLLs in Windows, you must pass :file:`pythonXY.lib` to the linker. +To build two DLLs, spam and ni (which uses C functions found in spam), you could +use these commands:: + + cl /LD /I/python/include spam.c ../libs/pythonXY.lib + cl /LD /I/python/include ni.c spam.lib ../libs/pythonXY.lib + +The first command created three files: :file:`spam.obj`, :file:`spam.dll` and +:file:`spam.lib`. :file:`Spam.dll` does not contain any Python functions (such +as :cfunc:`PyArg_ParseTuple`), but it does know how to find the Python code +thanks to :file:`pythonXY.lib`. + +The second command created :file:`ni.dll` (and :file:`.obj` and :file:`.lib`), +which knows how to find the necessary functions from spam, and also from the +Python executable. + +Not every identifier is exported to the lookup table. If you want any other +modules (including Python) to be able to see your identifiers, you have to say +``_declspec(dllexport)``, as in ``void _declspec(dllexport) initspam(void)`` or +``PyObject _declspec(dllexport) *NiGetSpamData(void)``. + +Developer Studio will throw in a lot of import libraries that you do not really +need, adding about 100K to your executable. To get rid of them, use the Project +Settings dialog, Link tab, to specify *ignore default libraries*. Add the +correct :file:`msvcrtxx.lib` to the list of libraries. + diff --git a/Doc/howto/advocacy.rst b/Doc/howto/advocacy.rst new file mode 100644 index 0000000..1f1754a --- /dev/null +++ b/Doc/howto/advocacy.rst @@ -0,0 +1,356 @@ +************************* + Python Advocacy HOWTO +************************* + +:Author: A.M. Kuchling +:Release: 0.03 + + +.. topic:: Abstract + + It's usually difficult to get your management to accept open source software, + and Python is no exception to this rule. This document discusses reasons to use + Python, strategies for winning acceptance, facts and arguments you can use, and + cases where you *shouldn't* try to use Python. + + +Reasons to Use Python +===================== + +There are several reasons to incorporate a scripting language into your +development process, and this section will discuss them, and why Python has some +properties that make it a particularly good choice. + + +Programmability +--------------- + +Programs are often organized in a modular fashion. Lower-level operations are +grouped together, and called by higher-level functions, which may in turn be +used as basic operations by still further upper levels. + +For example, the lowest level might define a very low-level set of functions for +accessing a hash table. The next level might use hash tables to store the +headers of a mail message, mapping a header name like ``Date`` to a value such +as ``Tue, 13 May 1997 20:00:54 -0400``. A yet higher level may operate on +message objects, without knowing or caring that message headers are stored in a +hash table, and so forth. + +Often, the lowest levels do very simple things; they implement a data structure +such as a binary tree or hash table, or they perform some simple computation, +such as converting a date string to a number. The higher levels then contain +logic connecting these primitive operations. Using the approach, the primitives +can be seen as basic building blocks which are then glued together to produce +the complete product. + +Why is this design approach relevant to Python? Because Python is well suited +to functioning as such a glue language. A common approach is to write a Python +module that implements the lower level operations; for the sake of speed, the +implementation might be in C, Java, or even Fortran. Once the primitives are +available to Python programs, the logic underlying higher level operations is +written in the form of Python code. The high-level logic is then more +understandable, and easier to modify. + +John Ousterhout wrote a paper that explains this idea at greater length, +entitled "Scripting: Higher Level Programming for the 21st Century". I +recommend that you read this paper; see the references for the URL. Ousterhout +is the inventor of the Tcl language, and therefore argues that Tcl should be +used for this purpose; he only briefly refers to other languages such as Python, +Perl, and Lisp/Scheme, but in reality, Ousterhout's argument applies to +scripting languages in general, since you could equally write extensions for any +of the languages mentioned above. + + +Prototyping +----------- + +In *The Mythical Man-Month*, Fredrick Brooks suggests the following rule when +planning software projects: "Plan to throw one away; you will anyway." Brooks +is saying that the first attempt at a software design often turns out to be +wrong; unless the problem is very simple or you're an extremely good designer, +you'll find that new requirements and features become apparent once development +has actually started. If these new requirements can't be cleanly incorporated +into the program's structure, you're presented with two unpleasant choices: +hammer the new features into the program somehow, or scrap everything and write +a new version of the program, taking the new features into account from the +beginning. + +Python provides you with a good environment for quickly developing an initial +prototype. That lets you get the overall program structure and logic right, and +you can fine-tune small details in the fast development cycle that Python +provides. Once you're satisfied with the GUI interface or program output, you +can translate the Python code into C++, Fortran, Java, or some other compiled +language. + +Prototyping means you have to be careful not to use too many Python features +that are hard to implement in your other language. Using ``eval()``, or regular +expressions, or the :mod:`pickle` module, means that you're going to need C or +Java libraries for formula evaluation, regular expressions, and serialization, +for example. But it's not hard to avoid such tricky code, and in the end the +translation usually isn't very difficult. The resulting code can be rapidly +debugged, because any serious logical errors will have been removed from the +prototype, leaving only more minor slip-ups in the translation to track down. + +This strategy builds on the earlier discussion of programmability. Using Python +as glue to connect lower-level components has obvious relevance for constructing +prototype systems. In this way Python can help you with development, even if +end users never come in contact with Python code at all. If the performance of +the Python version is adequate and corporate politics allow it, you may not need +to do a translation into C or Java, but it can still be faster to develop a +prototype and then translate it, instead of attempting to produce the final +version immediately. + +One example of this development strategy is Microsoft Merchant Server. Version +1.0 was written in pure Python, by a company that subsequently was purchased by +Microsoft. Version 2.0 began to translate the code into C++, shipping with some +C++code and some Python code. Version 3.0 didn't contain any Python at all; all +the code had been translated into C++. Even though the product doesn't contain +a Python interpreter, the Python language has still served a useful purpose by +speeding up development. + +This is a very common use for Python. Past conference papers have also +described this approach for developing high-level numerical algorithms; see +David M. Beazley and Peter S. Lomdahl's paper "Feeding a Large-scale Physics +Application to Python" in the references for a good example. If an algorithm's +basic operations are things like "Take the inverse of this 4000x4000 matrix", +and are implemented in some lower-level language, then Python has almost no +additional performance cost; the extra time required for Python to evaluate an +expression like ``m.invert()`` is dwarfed by the cost of the actual computation. +It's particularly good for applications where seemingly endless tweaking is +required to get things right. GUI interfaces and Web sites are prime examples. + +The Python code is also shorter and faster to write (once you're familiar with +Python), so it's easier to throw it away if you decide your approach was wrong; +if you'd spent two weeks working on it instead of just two hours, you might +waste time trying to patch up what you've got out of a natural reluctance to +admit that those two weeks were wasted. Truthfully, those two weeks haven't +been wasted, since you've learnt something about the problem and the technology +you're using to solve it, but it's human nature to view this as a failure of +some sort. + + +Simplicity and Ease of Understanding +------------------------------------ + +Python is definitely *not* a toy language that's only usable for small tasks. +The language features are general and powerful enough to enable it to be used +for many different purposes. It's useful at the small end, for 10- or 20-line +scripts, but it also scales up to larger systems that contain thousands of lines +of code. + +However, this expressiveness doesn't come at the cost of an obscure or tricky +syntax. While Python has some dark corners that can lead to obscure code, there +are relatively few such corners, and proper design can isolate their use to only +a few classes or modules. It's certainly possible to write confusing code by +using too many features with too little concern for clarity, but most Python +code can look a lot like a slightly-formalized version of human-understandable +pseudocode. + +In *The New Hacker's Dictionary*, Eric S. Raymond gives the following definition +for "compact": + +.. epigraph:: + + Compact *adj.* Of a design, describes the valuable property that it can all be + apprehended at once in one's head. This generally means the thing created from + the design can be used with greater facility and fewer errors than an equivalent + tool that is not compact. Compactness does not imply triviality or lack of + power; for example, C is compact and FORTRAN is not, but C is more powerful than + FORTRAN. Designs become non-compact through accreting features and cruft that + don't merge cleanly into the overall design scheme (thus, some fans of Classic C + maintain that ANSI C is no longer compact). + + (From http://www.catb.org/ esr/jargon/html/C/compact.html) + +In this sense of the word, Python is quite compact, because the language has +just a few ideas, which are used in lots of places. Take namespaces, for +example. Import a module with ``import math``, and you create a new namespace +called ``math``. Classes are also namespaces that share many of the properties +of modules, and have a few of their own; for example, you can create instances +of a class. Instances? They're yet another namespace. Namespaces are currently +implemented as Python dictionaries, so they have the same methods as the +standard dictionary data type: .keys() returns all the keys, and so forth. + +This simplicity arises from Python's development history. The language syntax +derives from different sources; ABC, a relatively obscure teaching language, is +one primary influence, and Modula-3 is another. (For more information about ABC +and Modula-3, consult their respective Web sites at http://www.cwi.nl/ +steven/abc/ and http://www.m3.org.) Other features have come from C, Icon, +Algol-68, and even Perl. Python hasn't really innovated very much, but instead +has tried to keep the language small and easy to learn, building on ideas that +have been tried in other languages and found useful. + +Simplicity is a virtue that should not be underestimated. It lets you learn the +language more quickly, and then rapidly write code, code that often works the +first time you run it. + + +Java Integration +---------------- + +If you're working with Java, Jython (http://www.jython.org/) is definitely worth +your attention. Jython is a re-implementation of Python in Java that compiles +Python code into Java bytecodes. The resulting environment has very tight, +almost seamless, integration with Java. It's trivial to access Java classes +from Python, and you can write Python classes that subclass Java classes. +Jython can be used for prototyping Java applications in much the same way +CPython is used, and it can also be used for test suites for Java code, or +embedded in a Java application to add scripting capabilities. + + +Arguments and Rebuttals +======================= + +Let's say that you've decided upon Python as the best choice for your +application. How can you convince your management, or your fellow developers, +to use Python? This section lists some common arguments against using Python, +and provides some possible rebuttals. + +**Python is freely available software that doesn't cost anything. How good can +it be?** + +Very good, indeed. These days Linux and Apache, two other pieces of open source +software, are becoming more respected as alternatives to commercial software, +but Python hasn't had all the publicity. + +Python has been around for several years, with many users and developers. +Accordingly, the interpreter has been used by many people, and has gotten most +of the bugs shaken out of it. While bugs are still discovered at intervals, +they're usually either quite obscure (they'd have to be, for no one to have run +into them before) or they involve interfaces to external libraries. The +internals of the language itself are quite stable. + +Having the source code should be viewed as making the software available for +peer review; people can examine the code, suggest (and implement) improvements, +and track down bugs. To find out more about the idea of open source code, along +with arguments and case studies supporting it, go to http://www.opensource.org. + +**Who's going to support it?** + +Python has a sizable community of developers, and the number is still growing. +The Internet community surrounding the language is an active one, and is worth +being considered another one of Python's advantages. Most questions posted to +the comp.lang.python newsgroup are quickly answered by someone. + +Should you need to dig into the source code, you'll find it's clear and +well-organized, so it's not very difficult to write extensions and track down +bugs yourself. If you'd prefer to pay for support, there are companies and +individuals who offer commercial support for Python. + +**Who uses Python for serious work?** + +Lots of people; one interesting thing about Python is the surprising diversity +of applications that it's been used for. People are using Python to: + +* Run Web sites + +* Write GUI interfaces + +* Control number-crunching code on supercomputers + +* Make a commercial application scriptable by embedding the Python interpreter + inside it + +* Process large XML data sets + +* Build test suites for C or Java code + +Whatever your application domain is, there's probably someone who's used Python +for something similar. Yet, despite being useable for such high-end +applications, Python's still simple enough to use for little jobs. + +See http://wiki.python.org/moin/OrganizationsUsingPython for a list of some of +the organizations that use Python. + +**What are the restrictions on Python's use?** + +They're practically nonexistent. Consult the :file:`Misc/COPYRIGHT` file in the +source distribution, or http://www.python.org/doc/Copyright.html for the full +language, but it boils down to three conditions. + +* You have to leave the copyright notice on the software; if you don't include + the source code in a product, you have to put the copyright notice in the + supporting documentation. + +* Don't claim that the institutions that have developed Python endorse your + product in any way. + +* If something goes wrong, you can't sue for damages. Practically all software + licences contain this condition. + +Notice that you don't have to provide source code for anything that contains +Python or is built with it. Also, the Python interpreter and accompanying +documentation can be modified and redistributed in any way you like, and you +don't have to pay anyone any licensing fees at all. + +**Why should we use an obscure language like Python instead of well-known +language X?** + +I hope this HOWTO, and the documents listed in the final section, will help +convince you that Python isn't obscure, and has a healthily growing user base. +One word of advice: always present Python's positive advantages, instead of +concentrating on language X's failings. People want to know why a solution is +good, rather than why all the other solutions are bad. So instead of attacking +a competing solution on various grounds, simply show how Python's virtues can +help. + + +Useful Resources +================ + +http://www.pythonology.com/success + The Python Success Stories are a collection of stories from successful users of + Python, with the emphasis on business and corporate users. + +.. % \term{\url{http://www.fsbassociates.com/books/pythonchpt1.htm}} +.. % The first chapter of \emph{Internet Programming with Python} also +.. % examines some of the reasons for using Python. The book is well worth +.. % buying, but the publishers have made the first chapter available on +.. % the Web. + +http://home.pacbell.net/ouster/scripting.html + John Ousterhout's white paper on scripting is a good argument for the utility of + scripting languages, though naturally enough, he emphasizes Tcl, the language he + developed. Most of the arguments would apply to any scripting language. + +http://www.python.org/workshops/1997-10/proceedings/beazley.html + The authors, David M. Beazley and Peter S. Lomdahl, describe their use of + Python at Los Alamos National Laboratory. It's another good example of how + Python can help get real work done. This quotation from the paper has been + echoed by many people: + + .. epigraph:: + + Originally developed as a large monolithic application for massively parallel + processing systems, we have used Python to transform our application into a + flexible, highly modular, and extremely powerful system for performing + simulation, data analysis, and visualization. In addition, we describe how + Python has solved a number of important problems related to the development, + debugging, deployment, and maintenance of scientific software. + +http://pythonjournal.cognizor.com/pyj1/Everitt-Feit_interview98-V1.html + This interview with Andy Feit, discussing Infoseek's use of Python, can be used + to show that choosing Python didn't introduce any difficulties into a company's + development process, and provided some substantial benefits. + +.. % \term{\url{http://www.python.org/psa/Commercial.html}} +.. % Robin Friedrich wrote this document on how to support Python's use in +.. % commercial projects. + +http://www.python.org/workshops/1997-10/proceedings/stein.ps + For the 6th Python conference, Greg Stein presented a paper that traced Python's + adoption and usage at a startup called eShop, and later at Microsoft. + +http://www.opensource.org + Management may be doubtful of the reliability and usefulness of software that + wasn't written commercially. This site presents arguments that show how open + source software can have considerable advantages over closed-source software. + +http://sunsite.unc.edu/LDP/HOWTO/mini/Advocacy.html + The Linux Advocacy mini-HOWTO was the inspiration for this document, and is also + well worth reading for general suggestions on winning acceptance for a new + technology, such as Linux or Python. In general, you won't make much progress + by simply attacking existing systems and complaining about their inadequacies; + this often ends up looking like unfocused whining. It's much better to point + out some of the many areas where Python is an improvement over other systems. + diff --git a/Doc/howto/curses.rst b/Doc/howto/curses.rst new file mode 100644 index 0000000..e16d07a --- /dev/null +++ b/Doc/howto/curses.rst @@ -0,0 +1,434 @@ +********************************** + Curses Programming with Python +********************************** + +:Author: A.M. Kuchling, Eric S. Raymond +:Release: 2.02 + + +.. topic:: Abstract + + This document describes how to write text-mode programs with Python 2.x, using + the :mod:`curses` extension module to control the display. + + +What is curses? +=============== + +The curses library supplies a terminal-independent screen-painting and +keyboard-handling facility for text-based terminals; such terminals include +VT100s, the Linux console, and the simulated terminal provided by X11 programs +such as xterm and rxvt. Display terminals support various control codes to +perform common operations such as moving the cursor, scrolling the screen, and +erasing areas. Different terminals use widely differing codes, and often have +their own minor quirks. + +In a world of X displays, one might ask "why bother"? It's true that +character-cell display terminals are an obsolete technology, but there are +niches in which being able to do fancy things with them are still valuable. One +is on small-footprint or embedded Unixes that don't carry an X server. Another +is for tools like OS installers and kernel configurators that may have to run +before X is available. + +The curses library hides all the details of different terminals, and provides +the programmer with an abstraction of a display, containing multiple +non-overlapping windows. The contents of a window can be changed in various +ways-- adding text, erasing it, changing its appearance--and the curses library +will automagically figure out what control codes need to be sent to the terminal +to produce the right output. + +The curses library was originally written for BSD Unix; the later System V +versions of Unix from AT&T added many enhancements and new functions. BSD curses +is no longer maintained, having been replaced by ncurses, which is an +open-source implementation of the AT&T interface. If you're using an +open-source Unix such as Linux or FreeBSD, your system almost certainly uses +ncurses. Since most current commercial Unix versions are based on System V +code, all the functions described here will probably be available. The older +versions of curses carried by some proprietary Unixes may not support +everything, though. + +No one has made a Windows port of the curses module. On a Windows platform, try +the Console module written by Fredrik Lundh. The Console module provides +cursor-addressable text output, plus full support for mouse and keyboard input, +and is available from http://effbot.org/efflib/console. + + +The Python curses module +------------------------ + +Thy Python module is a fairly simple wrapper over the C functions provided by +curses; if you're already familiar with curses programming in C, it's really +easy to transfer that knowledge to Python. The biggest difference is that the +Python interface makes things simpler, by merging different C functions such as +:func:`addstr`, :func:`mvaddstr`, :func:`mvwaddstr`, into a single +:meth:`addstr` method. You'll see this covered in more detail later. + +This HOWTO is simply an introduction to writing text-mode programs with curses +and Python. It doesn't attempt to be a complete guide to the curses API; for +that, see the Python library guide's section on ncurses, and the C manual pages +for ncurses. It will, however, give you the basic ideas. + + +Starting and ending a curses application +======================================== + +Before doing anything, curses must be initialized. This is done by calling the +:func:`initscr` function, which will determine the terminal type, send any +required setup codes to the terminal, and create various internal data +structures. If successful, :func:`initscr` returns a window object representing +the entire screen; this is usually called ``stdscr``, after the name of the +corresponding C variable. :: + + import curses + stdscr = curses.initscr() + +Usually curses applications turn off automatic echoing of keys to the screen, in +order to be able to read keys and only display them under certain circumstances. +This requires calling the :func:`noecho` function. :: + + curses.noecho() + +Applications will also commonly need to react to keys instantly, without +requiring the Enter key to be pressed; this is called cbreak mode, as opposed to +the usual buffered input mode. :: + + curses.cbreak() + +Terminals usually return special keys, such as the cursor keys or navigation +keys such as Page Up and Home, as a multibyte escape sequence. While you could +write your application to expect such sequences and process them accordingly, +curses can do it for you, returning a special value such as +:const:`curses.KEY_LEFT`. To get curses to do the job, you'll have to enable +keypad mode. :: + + stdscr.keypad(1) + +Terminating a curses application is much easier than starting one. You'll need +to call :: + + curses.nocbreak(); stdscr.keypad(0); curses.echo() + +to reverse the curses-friendly terminal settings. Then call the :func:`endwin` +function to restore the terminal to its original operating mode. :: + + curses.endwin() + +A common problem when debugging a curses application is to get your terminal +messed up when the application dies without restoring the terminal to its +previous state. In Python this commonly happens when your code is buggy and +raises an uncaught exception. Keys are no longer be echoed to the screen when +you type them, for example, which makes using the shell difficult. + +In Python you can avoid these complications and make debugging much easier by +importing the module :mod:`curses.wrapper`. It supplies a :func:`wrapper` +function that takes a callable. It does the initializations described above, +and also initializes colors if color support is present. It then runs your +provided callable and finally deinitializes appropriately. The callable is +called inside a try-catch clause which catches exceptions, performs curses +deinitialization, and then passes the exception upwards. Thus, your terminal +won't be left in a funny state on exception. + + +Windows and Pads +================ + +Windows are the basic abstraction in curses. A window object represents a +rectangular area of the screen, and supports various methods to display text, +erase it, allow the user to input strings, and so forth. + +The ``stdscr`` object returned by the :func:`initscr` function is a window +object that covers the entire screen. Many programs may need only this single +window, but you might wish to divide the screen into smaller windows, in order +to redraw or clear them separately. The :func:`newwin` function creates a new +window of a given size, returning the new window object. :: + + begin_x = 20 ; begin_y = 7 + height = 5 ; width = 40 + win = curses.newwin(height, width, begin_y, begin_x) + +A word about the coordinate system used in curses: coordinates are always passed +in the order *y,x*, and the top-left corner of a window is coordinate (0,0). +This breaks a common convention for handling coordinates, where the *x* +coordinate usually comes first. This is an unfortunate difference from most +other computer applications, but it's been part of curses since it was first +written, and it's too late to change things now. + +When you call a method to display or erase text, the effect doesn't immediately +show up on the display. This is because curses was originally written with slow +300-baud terminal connections in mind; with these terminals, minimizing the time +required to redraw the screen is very important. This lets curses accumulate +changes to the screen, and display them in the most efficient manner. For +example, if your program displays some characters in a window, and then clears +the window, there's no need to send the original characters because they'd never +be visible. + +Accordingly, curses requires that you explicitly tell it to redraw windows, +using the :func:`refresh` method of window objects. In practice, this doesn't +really complicate programming with curses much. Most programs go into a flurry +of activity, and then pause waiting for a keypress or some other action on the +part of the user. All you have to do is to be sure that the screen has been +redrawn before pausing to wait for user input, by simply calling +``stdscr.refresh()`` or the :func:`refresh` method of some other relevant +window. + +A pad is a special case of a window; it can be larger than the actual display +screen, and only a portion of it displayed at a time. Creating a pad simply +requires the pad's height and width, while refreshing a pad requires giving the +coordinates of the on-screen area where a subsection of the pad will be +displayed. :: + + pad = curses.newpad(100, 100) + # These loops fill the pad with letters; this is + # explained in the next section + for y in range(0, 100): + for x in range(0, 100): + try: pad.addch(y,x, ord('a') + (x*x+y*y) % 26 ) + except curses.error: pass + + # Displays a section of the pad in the middle of the screen + pad.refresh( 0,0, 5,5, 20,75) + +The :func:`refresh` call displays a section of the pad in the rectangle +extending from coordinate (5,5) to coordinate (20,75) on the screen; the upper +left corner of the displayed section is coordinate (0,0) on the pad. Beyond +that difference, pads are exactly like ordinary windows and support the same +methods. + +If you have multiple windows and pads on screen there is a more efficient way to +go, which will prevent annoying screen flicker at refresh time. Use the +:meth:`noutrefresh` method of each window to update the data structure +representing the desired state of the screen; then change the physical screen to +match the desired state in one go with the function :func:`doupdate`. The +normal :meth:`refresh` method calls :func:`doupdate` as its last act. + + +Displaying Text +=============== + +From a C programmer's point of view, curses may sometimes look like a twisty +maze of functions, all subtly different. For example, :func:`addstr` displays a +string at the current cursor location in the ``stdscr`` window, while +:func:`mvaddstr` moves to a given y,x coordinate first before displaying the +string. :func:`waddstr` is just like :func:`addstr`, but allows specifying a +window to use, instead of using ``stdscr`` by default. :func:`mvwaddstr` follows +similarly. + +Fortunately the Python interface hides all these details; ``stdscr`` is a window +object like any other, and methods like :func:`addstr` accept multiple argument +forms. Usually there are four different forms. + ++---------------------------------+-----------------------------------------------+ +| Form | Description | ++=================================+===============================================+ +| *str* or *ch* | Display the string *str* or character *ch* at | +| | the current position | ++---------------------------------+-----------------------------------------------+ +| *str* or *ch*, *attr* | Display the string *str* or character *ch*, | +| | using attribute *attr* at the current | +| | position | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch* | Move to position *y,x* within the window, and | +| | display *str* or *ch* | ++---------------------------------+-----------------------------------------------+ +| *y*, *x*, *str* or *ch*, *attr* | Move to position *y,x* within the window, and | +| | display *str* or *ch*, using attribute *attr* | ++---------------------------------+-----------------------------------------------+ + +Attributes allow displaying text in highlighted forms, such as in boldface, +underline, reverse code, or in color. They'll be explained in more detail in +the next subsection. + +The :func:`addstr` function takes a Python string as the value to be displayed, +while the :func:`addch` functions take a character, which can be either a Python +string of length 1 or an integer. If it's a string, you're limited to +displaying characters between 0 and 255. SVr4 curses provides constants for +extension characters; these constants are integers greater than 255. For +example, :const:`ACS_PLMINUS` is a +/- symbol, and :const:`ACS_ULCORNER` is the +upper left corner of a box (handy for drawing borders). + +Windows remember where the cursor was left after the last operation, so if you +leave out the *y,x* coordinates, the string or character will be displayed +wherever the last operation left off. You can also move the cursor with the +``move(y,x)`` method. Because some terminals always display a flashing cursor, +you may want to ensure that the cursor is positioned in some location where it +won't be distracting; it can be confusing to have the cursor blinking at some +apparently random location. + +If your application doesn't need a blinking cursor at all, you can call +``curs_set(0)`` to make it invisible. Equivalently, and for compatibility with +older curses versions, there's a ``leaveok(bool)`` function. When *bool* is +true, the curses library will attempt to suppress the flashing cursor, and you +won't need to worry about leaving it in odd locations. + + +Attributes and Color +-------------------- + +Characters can be displayed in different ways. Status lines in a text-based +application are commonly shown in reverse video; a text viewer may need to +highlight certain words. curses supports this by allowing you to specify an +attribute for each cell on the screen. + +An attribute is a integer, each bit representing a different attribute. You can +try to display text with multiple attribute bits set, but curses doesn't +guarantee that all the possible combinations are available, or that they're all +visually distinct. That depends on the ability of the terminal being used, so +it's safest to stick to the most commonly available attributes, listed here. + ++----------------------+--------------------------------------+ +| Attribute | Description | ++======================+======================================+ +| :const:`A_BLINK` | Blinking text | ++----------------------+--------------------------------------+ +| :const:`A_BOLD` | Extra bright or bold text | ++----------------------+--------------------------------------+ +| :const:`A_DIM` | Half bright text | ++----------------------+--------------------------------------+ +| :const:`A_REVERSE` | Reverse-video text | ++----------------------+--------------------------------------+ +| :const:`A_STANDOUT` | The best highlighting mode available | ++----------------------+--------------------------------------+ +| :const:`A_UNDERLINE` | Underlined text | ++----------------------+--------------------------------------+ + +So, to display a reverse-video status line on the top line of the screen, you +could code:: + + stdscr.addstr(0, 0, "Current mode: Typing mode", + curses.A_REVERSE) + stdscr.refresh() + +The curses library also supports color on those terminals that provide it, The +most common such terminal is probably the Linux console, followed by color +xterms. + +To use color, you must call the :func:`start_color` function soon after calling +:func:`initscr`, to initialize the default color set (the +:func:`curses.wrapper.wrapper` function does this automatically). Once that's +done, the :func:`has_colors` function returns TRUE if the terminal in use can +actually display color. (Note: curses uses the American spelling 'color', +instead of the Canadian/British spelling 'colour'. If you're used to the +British spelling, you'll have to resign yourself to misspelling it for the sake +of these functions.) + +The curses library maintains a finite number of color pairs, containing a +foreground (or text) color and a background color. You can get the attribute +value corresponding to a color pair with the :func:`color_pair` function; this +can be bitwise-OR'ed with other attributes such as :const:`A_REVERSE`, but +again, such combinations are not guaranteed to work on all terminals. + +An example, which displays a line of text using color pair 1:: + + stdscr.addstr( "Pretty text", curses.color_pair(1) ) + stdscr.refresh() + +As I said before, a color pair consists of a foreground and background color. +:func:`start_color` initializes 8 basic colors when it activates color mode. +They are: 0:black, 1:red, 2:green, 3:yellow, 4:blue, 5:magenta, 6:cyan, and +7:white. The curses module defines named constants for each of these colors: +:const:`curses.COLOR_BLACK`, :const:`curses.COLOR_RED`, and so forth. + +The ``init_pair(n, f, b)`` function changes the definition of color pair *n*, to +foreground color f and background color b. Color pair 0 is hard-wired to white +on black, and cannot be changed. + +Let's put all this together. To change color 1 to red text on a white +background, you would call:: + + curses.init_pair(1, curses.COLOR_RED, curses.COLOR_WHITE) + +When you change a color pair, any text already displayed using that color pair +will change to the new colors. You can also display new text in this color +with:: + + stdscr.addstr(0,0, "RED ALERT!", curses.color_pair(1) ) + +Very fancy terminals can change the definitions of the actual colors to a given +RGB value. This lets you change color 1, which is usually red, to purple or +blue or any other color you like. Unfortunately, the Linux console doesn't +support this, so I'm unable to try it out, and can't provide any examples. You +can check if your terminal can do this by calling :func:`can_change_color`, +which returns TRUE if the capability is there. If you're lucky enough to have +such a talented terminal, consult your system's man pages for more information. + + +User Input +========== + +The curses library itself offers only very simple input mechanisms. Python's +support adds a text-input widget that makes up some of the lack. + +The most common way to get input to a window is to use its :meth:`getch` method. +:meth:`getch` pauses and waits for the user to hit a key, displaying it if +:func:`echo` has been called earlier. You can optionally specify a coordinate +to which the cursor should be moved before pausing. + +It's possible to change this behavior with the method :meth:`nodelay`. After +``nodelay(1)``, :meth:`getch` for the window becomes non-blocking and returns +``curses.ERR`` (a value of -1) when no input is ready. There's also a +:func:`halfdelay` function, which can be used to (in effect) set a timer on each +:meth:`getch`; if no input becomes available within the number of milliseconds +specified as the argument to :func:`halfdelay`, curses raises an exception. + +The :meth:`getch` method returns an integer; if it's between 0 and 255, it +represents the ASCII code of the key pressed. Values greater than 255 are +special keys such as Page Up, Home, or the cursor keys. You can compare the +value returned to constants such as :const:`curses.KEY_PPAGE`, +:const:`curses.KEY_HOME`, or :const:`curses.KEY_LEFT`. Usually the main loop of +your program will look something like this:: + + while 1: + c = stdscr.getch() + if c == ord('p'): PrintDocument() + elif c == ord('q'): break # Exit the while() + elif c == curses.KEY_HOME: x = y = 0 + +The :mod:`curses.ascii` module supplies ASCII class membership functions that +take either integer or 1-character-string arguments; these may be useful in +writing more readable tests for your command interpreters. It also supplies +conversion functions that take either integer or 1-character-string arguments +and return the same type. For example, :func:`curses.ascii.ctrl` returns the +control character corresponding to its argument. + +There's also a method to retrieve an entire string, :const:`getstr()`. It isn't +used very often, because its functionality is quite limited; the only editing +keys available are the backspace key and the Enter key, which terminates the +string. It can optionally be limited to a fixed number of characters. :: + + curses.echo() # Enable echoing of characters + + # Get a 15-character string, with the cursor on the top line + s = stdscr.getstr(0,0, 15) + +The Python :mod:`curses.textpad` module supplies something better. With it, you +can turn a window into a text box that supports an Emacs-like set of +keybindings. Various methods of :class:`Textbox` class support editing with +input validation and gathering the edit results either with or without trailing +spaces. See the library documentation on :mod:`curses.textpad` for the +details. + + +For More Information +==================== + +This HOWTO didn't cover some advanced topics, such as screen-scraping or +capturing mouse events from an xterm instance. But the Python library page for +the curses modules is now pretty complete. You should browse it next. + +If you're in doubt about the detailed behavior of any of the ncurses entry +points, consult the manual pages for your curses implementation, whether it's +ncurses or a proprietary Unix vendor's. The manual pages will document any +quirks, and provide complete lists of all the functions, attributes, and +:const:`ACS_\*` characters available to you. + +Because the curses API is so large, some functions aren't supported in the +Python interface, not because they're difficult to implement, but because no one +has needed them yet. Feel free to add them and then submit a patch. Also, we +don't yet have support for the menus or panels libraries associated with +ncurses; feel free to add that. + +If you write an interesting little program, feel free to contribute it as +another demo. We can always use more of them! + +The ncurses FAQ: http://dickey.his.com/ncurses/ncurses.faq.html + diff --git a/Doc/howto/doanddont.rst b/Doc/howto/doanddont.rst new file mode 100644 index 0000000..a322c53 --- /dev/null +++ b/Doc/howto/doanddont.rst @@ -0,0 +1,308 @@ +************************************ + Idioms and Anti-Idioms in Python +************************************ + +:Author: Moshe Zadka + +This document is placed in the public doman. + + +.. topic:: Abstract + + This document can be considered a companion to the tutorial. It shows how to use + Python, and even more importantly, how *not* to use Python. + + +Language Constructs You Should Not Use +====================================== + +While Python has relatively few gotchas compared to other languages, it still +has some constructs which are only useful in corner cases, or are plain +dangerous. + + +from module import \* +--------------------- + + +Inside Function Definitions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``from module import *`` is *invalid* inside function definitions. While many +versions of Python do not check for the invalidity, it does not make it more +valid, no more then having a smart lawyer makes a man innocent. Do not use it +like that ever. Even in versions where it was accepted, it made the function +execution slower, because the compiler could not be certain which names are +local and which are global. In Python 2.1 this construct causes warnings, and +sometimes even errors. + + +At Module Level +^^^^^^^^^^^^^^^ + +While it is valid to use ``from module import *`` at module level it is usually +a bad idea. For one, this loses an important property Python otherwise has --- +you can know where each toplevel name is defined by a simple "search" function +in your favourite editor. You also open yourself to trouble in the future, if +some module grows additional functions or classes. + +One of the most awful question asked on the newsgroup is why this code:: + + f = open("www") + f.read() + +does not work. Of course, it works just fine (assuming you have a file called +"www".) But it does not work if somewhere in the module, the statement ``from os +import *`` is present. The :mod:`os` module has a function called :func:`open` +which returns an integer. While it is very useful, shadowing builtins is one of +its least useful properties. + +Remember, you can never know for sure what names a module exports, so either +take what you need --- ``from module import name1, name2``, or keep them in the +module and access on a per-need basis --- ``import module;print module.name``. + + +When It Is Just Fine +^^^^^^^^^^^^^^^^^^^^ + +There are situations in which ``from module import *`` is just fine: + +* The interactive prompt. For example, ``from math import *`` makes Python an + amazing scientific calculator. + +* When extending a module in C with a module in Python. + +* When the module advertises itself as ``from import *`` safe. + + +Unadorned :keyword:`exec` and friends +------------------------------------- + +The word "unadorned" refers to the use without an explicit dictionary, in which +case those constructs evaluate code in the *current* environment. This is +dangerous for the same reasons ``from import *`` is dangerous --- it might step +over variables you are counting on and mess up things for the rest of your code. +Simply do not do that. + +Bad examples:: + + >>> for name in sys.argv[1:]: + >>> exec "%s=1" % name + >>> def func(s, **kw): + >>> for var, val in kw.items(): + >>> exec "s.%s=val" % var # invalid! + >>> exec(open("handler.py").read()) + >>> handle() + +Good examples:: + + >>> d = {} + >>> for name in sys.argv[1:]: + >>> d[name] = 1 + >>> def func(s, **kw): + >>> for var, val in kw.items(): + >>> setattr(s, var, val) + >>> d={} + >>> exec(open("handle.py").read(), d, d) + >>> handle = d['handle'] + >>> handle() + + +from module import name1, name2 +------------------------------- + +This is a "don't" which is much weaker then the previous "don't"s but is still +something you should not do if you don't have good reasons to do that. The +reason it is usually bad idea is because you suddenly have an object which lives +in two seperate namespaces. When the binding in one namespace changes, the +binding in the other will not, so there will be a discrepancy between them. This +happens when, for example, one module is reloaded, or changes the definition of +a function at runtime. + +Bad example:: + + # foo.py + a = 1 + + # bar.py + from foo import a + if something(): + a = 2 # danger: foo.a != a + +Good example:: + + # foo.py + a = 1 + + # bar.py + import foo + if something(): + foo.a = 2 + + +except: +------- + +Python has the ``except:`` clause, which catches all exceptions. Since *every* +error in Python raises an exception, this makes many programming errors look +like runtime problems, and hinders the debugging process. + +The following code shows a great example:: + + try: + foo = opne("file") # misspelled "open" + except: + sys.exit("could not open file!") + +The second line triggers a :exc:`NameError` which is caught by the except +clause. The program will exit, and you will have no idea that this has nothing +to do with the readability of ``"file"``. + +The example above is better written :: + + try: + foo = opne("file") # will be changed to "open" as soon as we run it + except IOError: + sys.exit("could not open file") + +There are some situations in which the ``except:`` clause is useful: for +example, in a framework when running callbacks, it is good not to let any +callback disturb the framework. + + +Exceptions +========== + +Exceptions are a useful feature of Python. You should learn to raise them +whenever something unexpected occurs, and catch them only where you can do +something about them. + +The following is a very popular anti-idiom :: + + def get_status(file): + if not os.path.exists(file): + print "file not found" + sys.exit(1) + return open(file).readline() + +Consider the case the file gets deleted between the time the call to +:func:`os.path.exists` is made and the time :func:`open` is called. That means +the last line will throw an :exc:`IOError`. The same would happen if *file* +exists but has no read permission. Since testing this on a normal machine on +existing and non-existing files make it seem bugless, that means in testing the +results will seem fine, and the code will get shipped. Then an unhandled +:exc:`IOError` escapes to the user, who has to watch the ugly traceback. + +Here is a better way to do it. :: + + def get_status(file): + try: + return open(file).readline() + except (IOError, OSError): + print "file not found" + sys.exit(1) + +In this version, \*either\* the file gets opened and the line is read (so it +works even on flaky NFS or SMB connections), or the message is printed and the +application aborted. + +Still, :func:`get_status` makes too many assumptions --- that it will only be +used in a short running script, and not, say, in a long running server. Sure, +the caller could do something like :: + + try: + status = get_status(log) + except SystemExit: + status = None + +So, try to make as few ``except`` clauses in your code --- those will usually be +a catch-all in the :func:`main`, or inside calls which should always succeed. + +So, the best version is probably :: + + def get_status(file): + return open(file).readline() + +The caller can deal with the exception if it wants (for example, if it tries +several files in a loop), or just let the exception filter upwards to *its* +caller. + +The last version is not very good either --- due to implementation details, the +file would not be closed when an exception is raised until the handler finishes, +and perhaps not at all in non-C implementations (e.g., Jython). :: + + def get_status(file): + fp = open(file) + try: + return fp.readline() + finally: + fp.close() + + +Using the Batteries +=================== + +Every so often, people seem to be writing stuff in the Python library again, +usually poorly. While the occasional module has a poor interface, it is usually +much better to use the rich standard library and data types that come with +Python then inventing your own. + +A useful module very few people know about is :mod:`os.path`. It always has the +correct path arithmetic for your operating system, and will usually be much +better then whatever you come up with yourself. + +Compare:: + + # ugh! + return dir+"/"+file + # better + return os.path.join(dir, file) + +More useful functions in :mod:`os.path`: :func:`basename`, :func:`dirname` and +:func:`splitext`. + +There are also many useful builtin functions people seem not to be aware of for +some reason: :func:`min` and :func:`max` can find the minimum/maximum of any +sequence with comparable semantics, for example, yet many people write their own +:func:`max`/:func:`min`. Another highly useful function is :func:`reduce`. A +classical use of :func:`reduce` is something like :: + + import sys, operator + nums = map(float, sys.argv[1:]) + print reduce(operator.add, nums)/len(nums) + +This cute little script prints the average of all numbers given on the command +line. The :func:`reduce` adds up all the numbers, and the rest is just some +pre- and postprocessing. + +On the same note, note that :func:`float`, :func:`int` and :func:`long` all +accept arguments of type string, and so are suited to parsing --- assuming you +are ready to deal with the :exc:`ValueError` they raise. + + +Using Backslash to Continue Statements +====================================== + +Since Python treats a newline as a statement terminator, and since statements +are often more then is comfortable to put in one line, many people do:: + + if foo.bar()['first'][0] == baz.quux(1, 2)[5:9] and \ + calculate_number(10, 20) != forbulate(500, 360): + pass + +You should realize that this is dangerous: a stray space after the ``XXX`` would +make this line wrong, and stray spaces are notoriously hard to see in editors. +In this case, at least it would be a syntax error, but if the code was:: + + value = foo.bar()['first'][0]*baz.quux(1, 2)[5:9] \ + + calculate_number(10, 20)*forbulate(500, 360) + +then it would just be subtly wrong. + +It is usually much better to use the implicit continuation inside parenthesis: + +This version is bulletproof:: + + value = (foo.bar()['first'][0]*baz.quux(1, 2)[5:9] + + calculate_number(10, 20)*forbulate(500, 360)) + diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst new file mode 100644 index 0000000..bc12793 --- /dev/null +++ b/Doc/howto/functional.rst @@ -0,0 +1,1400 @@ +******************************** + Functional Programming HOWTO +******************************** + +:Author: \A. M. Kuchling +:Release: 0.30 + +(This is a first draft. Please send comments/error reports/suggestions to +amk@amk.ca. This URL is probably not going to be the final location of the +document, so be careful about linking to it -- you may want to add a +disclaimer.) + +In this document, we'll take a tour of Python's features suitable for +implementing programs in a functional style. After an introduction to the +concepts of functional programming, we'll look at language features such as +iterators and generators and relevant library modules such as :mod:`itertools` +and :mod:`functools`. + + +Introduction +============ + +This section explains the basic concept of functional programming; if you're +just interested in learning about Python language features, skip to the next +section. + +Programming languages support decomposing problems in several different ways: + +* Most programming languages are **procedural**: programs are lists of + instructions that tell the computer what to do with the program's input. C, + Pascal, and even Unix shells are procedural languages. + +* In **declarative** languages, you write a specification that describes the + problem to be solved, and the language implementation figures out how to + perform the computation efficiently. SQL is the declarative language you're + most likely to be familiar with; a SQL query describes the data set you want + to retrieve, and the SQL engine decides whether to scan tables or use indexes, + which subclauses should be performed first, etc. + +* **Object-oriented** programs manipulate collections of objects. Objects have + internal state and support methods that query or modify this internal state in + some way. Smalltalk and Java are object-oriented languages. C++ and Python + are languages that support object-oriented programming, but don't force the + use of object-oriented features. + +* **Functional** programming decomposes a problem into a set of functions. + Ideally, functions only take inputs and produce outputs, and don't have any + internal state that affects the output produced for a given input. Well-known + functional languages include the ML family (Standard ML, OCaml, and other + variants) and Haskell. + +The designers of some computer languages have chosen one approach to programming +that's emphasized. This often makes it difficult to write programs that use a +different approach. Other languages are multi-paradigm languages that support +several different approaches. Lisp, C++, and Python are multi-paradigm; you can +write programs or libraries that are largely procedural, object-oriented, or +functional in all of these languages. In a large program, different sections +might be written using different approaches; the GUI might be object-oriented +while the processing logic is procedural or functional, for example. + +In a functional program, input flows through a set of functions. Each function +operates on its input and produces some output. Functional style frowns upon +functions with side effects that modify internal state or make other changes +that aren't visible in the function's return value. Functions that have no side +effects at all are called **purely functional**. Avoiding side effects means +not using data structures that get updated as a program runs; every function's +output must only depend on its input. + +Some languages are very strict about purity and don't even have assignment +statements such as ``a=3`` or ``c = a + b``, but it's difficult to avoid all +side effects. Printing to the screen or writing to a disk file are side +effects, for example. For example, in Python a ``print`` statement or a +``time.sleep(1)`` both return no useful value; they're only called for their +side effects of sending some text to the screen or pausing execution for a +second. + +Python programs written in functional style usually won't go to the extreme of +avoiding all I/O or all assignments; instead, they'll provide a +functional-appearing interface but will use non-functional features internally. +For example, the implementation of a function will still use assignments to +local variables, but won't modify global variables or have other side effects. + +Functional programming can be considered the opposite of object-oriented +programming. Objects are little capsules containing some internal state along +with a collection of method calls that let you modify this state, and programs +consist of making the right set of state changes. Functional programming wants +to avoid state changes as much as possible and works with data flowing between +functions. In Python you might combine the two approaches by writing functions +that take and return instances representing objects in your application (e-mail +messages, transactions, etc.). + +Functional design may seem like an odd constraint to work under. Why should you +avoid objects and side effects? There are theoretical and practical advantages +to the functional style: + +* Formal provability. +* Modularity. +* Composability. +* Ease of debugging and testing. + +Formal provability +------------------ + +A theoretical benefit is that it's easier to construct a mathematical proof that +a functional program is correct. + +For a long time researchers have been interested in finding ways to +mathematically prove programs correct. This is different from testing a program +on numerous inputs and concluding that its output is usually correct, or reading +a program's source code and concluding that the code looks right; the goal is +instead a rigorous proof that a program produces the right result for all +possible inputs. + +The technique used to prove programs correct is to write down **invariants**, +properties of the input data and of the program's variables that are always +true. For each line of code, you then show that if invariants X and Y are true +**before** the line is executed, the slightly different invariants X' and Y' are +true **after** the line is executed. This continues until you reach the end of +the program, at which point the invariants should match the desired conditions +on the program's output. + +Functional programming's avoidance of assignments arose because assignments are +difficult to handle with this technique; assignments can break invariants that +were true before the assignment without producing any new invariants that can be +propagated onward. + +Unfortunately, proving programs correct is largely impractical and not relevant +to Python software. Even trivial programs require proofs that are several pages +long; the proof of correctness for a moderately complicated program would be +enormous, and few or none of the programs you use daily (the Python interpreter, +your XML parser, your web browser) could be proven correct. Even if you wrote +down or generated a proof, there would then be the question of verifying the +proof; maybe there's an error in it, and you wrongly believe you've proved the +program correct. + +Modularity +---------- + +A more practical benefit of functional programming is that it forces you to +break apart your problem into small pieces. Programs are more modular as a +result. It's easier to specify and write a small function that does one thing +than a large function that performs a complicated transformation. Small +functions are also easier to read and to check for errors. + + +Ease of debugging and testing +----------------------------- + +Testing and debugging a functional-style program is easier. + +Debugging is simplified because functions are generally small and clearly +specified. When a program doesn't work, each function is an interface point +where you can check that the data are correct. You can look at the intermediate +inputs and outputs to quickly isolate the function that's responsible for a bug. + +Testing is easier because each function is a potential subject for a unit test. +Functions don't depend on system state that needs to be replicated before +running a test; instead you only have to synthesize the right input and then +check that the output matches expectations. + + + +Composability +------------- + +As you work on a functional-style program, you'll write a number of functions +with varying inputs and outputs. Some of these functions will be unavoidably +specialized to a particular application, but others will be useful in a wide +variety of programs. For example, a function that takes a directory path and +returns all the XML files in the directory, or a function that takes a filename +and returns its contents, can be applied to many different situations. + +Over time you'll form a personal library of utilities. Often you'll assemble +new programs by arranging existing functions in a new configuration and writing +a few functions specialized for the current task. + + + +Iterators +========= + +I'll start by looking at a Python language feature that's an important +foundation for writing functional-style programs: iterators. + +An iterator is an object representing a stream of data; this object returns the +data one element at a time. A Python iterator must support a method called +``next()`` that takes no arguments and always returns the next element of the +stream. If there are no more elements in the stream, ``next()`` must raise the +``StopIteration`` exception. Iterators don't have to be finite, though; it's +perfectly reasonable to write an iterator that produces an infinite stream of +data. + +The built-in :func:`iter` function takes an arbitrary object and tries to return +an iterator that will return the object's contents or elements, raising +:exc:`TypeError` if the object doesn't support iteration. Several of Python's +built-in data types support iteration, the most common being lists and +dictionaries. An object is called an **iterable** object if you can get an +iterator for it. + +You can experiment with the iteration interface manually:: + + >>> L = [1,2,3] + >>> it = iter(L) + >>> print it + + >>> it.next() + 1 + >>> it.next() + 2 + >>> it.next() + 3 + >>> it.next() + Traceback (most recent call last): + File "", line 1, in ? + StopIteration + >>> + +Python expects iterable objects in several different contexts, the most +important being the ``for`` statement. In the statement ``for X in Y``, Y must +be an iterator or some object for which ``iter()`` can create an iterator. +These two statements are equivalent:: + + for i in iter(obj): + print i + + for i in obj: + print i + +Iterators can be materialized as lists or tuples by using the :func:`list` or +:func:`tuple` constructor functions:: + + >>> L = [1,2,3] + >>> iterator = iter(L) + >>> t = tuple(iterator) + >>> t + (1, 2, 3) + +Sequence unpacking also supports iterators: if you know an iterator will return +N elements, you can unpack them into an N-tuple:: + + >>> L = [1,2,3] + >>> iterator = iter(L) + >>> a,b,c = iterator + >>> a,b,c + (1, 2, 3) + +Built-in functions such as :func:`max` and :func:`min` can take a single +iterator argument and will return the largest or smallest element. The ``"in"`` +and ``"not in"`` operators also support iterators: ``X in iterator`` is true if +X is found in the stream returned by the iterator. You'll run into obvious +problems if the iterator is infinite; ``max()``, ``min()``, and ``"not in"`` +will never return, and if the element X never appears in the stream, the +``"in"`` operator won't return either. + +Note that you can only go forward in an iterator; there's no way to get the +previous element, reset the iterator, or make a copy of it. Iterator objects +can optionally provide these additional capabilities, but the iterator protocol +only specifies the ``next()`` method. Functions may therefore consume all of +the iterator's output, and if you need to do something different with the same +stream, you'll have to create a new iterator. + + + +Data Types That Support Iterators +--------------------------------- + +We've already seen how lists and tuples support iterators. In fact, any Python +sequence type, such as strings, will automatically support creation of an +iterator. + +Calling :func:`iter` on a dictionary returns an iterator that will loop over the +dictionary's keys:: + + >>> m = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, + ... 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12} + >>> for key in m: + ... print key, m[key] + Mar 3 + Feb 2 + Aug 8 + Sep 9 + May 5 + Jun 6 + Jul 7 + Jan 1 + Apr 4 + Nov 11 + Dec 12 + Oct 10 + +Note that the order is essentially random, because it's based on the hash +ordering of the objects in the dictionary. + +Applying ``iter()`` to a dictionary always loops over the keys, but dictionaries +have methods that return other iterators. If you want to iterate over keys, +values, or key/value pairs, you can explicitly call the ``iterkeys()``, +``itervalues()``, or ``iteritems()`` methods to get an appropriate iterator. + +The :func:`dict` constructor can accept an iterator that returns a finite stream +of ``(key, value)`` tuples:: + + >>> L = [('Italy', 'Rome'), ('France', 'Paris'), ('US', 'Washington DC')] + >>> dict(iter(L)) + {'Italy': 'Rome', 'US': 'Washington DC', 'France': 'Paris'} + +Files also support iteration by calling the ``readline()`` method until there +are no more lines in the file. This means you can read each line of a file like +this:: + + for line in file: + # do something for each line + ... + +Sets can take their contents from an iterable and let you iterate over the set's +elements:: + + S = set((2, 3, 5, 7, 11, 13)) + for i in S: + print i + + + +Generator expressions and list comprehensions +============================================= + +Two common operations on an iterator's output are 1) performing some operation +for every element, 2) selecting a subset of elements that meet some condition. +For example, given a list of strings, you might want to strip off trailing +whitespace from each line or extract all the strings containing a given +substring. + +List comprehensions and generator expressions (short form: "listcomps" and +"genexps") are a concise notation for such operations, borrowed from the +functional programming language Haskell (http://www.haskell.org). You can strip +all the whitespace from a stream of strings with the following code:: + + line_list = [' line 1\n', 'line 2 \n', ...] + + # Generator expression -- returns iterator + stripped_iter = (line.strip() for line in line_list) + + # List comprehension -- returns list + stripped_list = [line.strip() for line in line_list] + +You can select only certain elements by adding an ``"if"`` condition:: + + stripped_list = [line.strip() for line in line_list + if line != ""] + +With a list comprehension, you get back a Python list; ``stripped_list`` is a +list containing the resulting lines, not an iterator. Generator expressions +return an iterator that computes the values as necessary, not needing to +materialize all the values at once. This means that list comprehensions aren't +useful if you're working with iterators that return an infinite stream or a very +large amount of data. Generator expressions are preferable in these situations. + +Generator expressions are surrounded by parentheses ("()") and list +comprehensions are surrounded by square brackets ("[]"). Generator expressions +have the form:: + + ( expression for expr in sequence1 + if condition1 + for expr2 in sequence2 + if condition2 + for expr3 in sequence3 ... + if condition3 + for exprN in sequenceN + if conditionN ) + +Again, for a list comprehension only the outside brackets are different (square +brackets instead of parentheses). + +The elements of the generated output will be the successive values of +``expression``. The ``if`` clauses are all optional; if present, ``expression`` +is only evaluated and added to the result when ``condition`` is true. + +Generator expressions always have to be written inside parentheses, but the +parentheses signalling a function call also count. If you want to create an +iterator that will be immediately passed to a function you can write:: + + obj_total = sum(obj.count for obj in list_all_objects()) + +The ``for...in`` clauses contain the sequences to be iterated over. The +sequences do not have to be the same length, because they are iterated over from +left to right, **not** in parallel. For each element in ``sequence1``, +``sequence2`` is looped over from the beginning. ``sequence3`` is then looped +over for each resulting pair of elements from ``sequence1`` and ``sequence2``. + +To put it another way, a list comprehension or generator expression is +equivalent to the following Python code:: + + for expr1 in sequence1: + if not (condition1): + continue # Skip this element + for expr2 in sequence2: + if not (condition2): + continue # Skip this element + ... + for exprN in sequenceN: + if not (conditionN): + continue # Skip this element + + # Output the value of + # the expression. + +This means that when there are multiple ``for...in`` clauses but no ``if`` +clauses, the length of the resulting output will be equal to the product of the +lengths of all the sequences. If you have two lists of length 3, the output +list is 9 elements long:: + + seq1 = 'abc' + seq2 = (1,2,3) + >>> [ (x,y) for x in seq1 for y in seq2] + [('a', 1), ('a', 2), ('a', 3), + ('b', 1), ('b', 2), ('b', 3), + ('c', 1), ('c', 2), ('c', 3)] + +To avoid introducing an ambiguity into Python's grammar, if ``expression`` is +creating a tuple, it must be surrounded with parentheses. The first list +comprehension below is a syntax error, while the second one is correct:: + + # Syntax error + [ x,y for x in seq1 for y in seq2] + # Correct + [ (x,y) for x in seq1 for y in seq2] + + +Generators +========== + +Generators are a special class of functions that simplify the task of writing +iterators. Regular functions compute a value and return it, but generators +return an iterator that returns a stream of values. + +You're doubtless familiar with how regular function calls work in Python or C. +When you call a function, it gets a private namespace where its local variables +are created. When the function reaches a ``return`` statement, the local +variables are destroyed and the value is returned to the caller. A later call +to the same function creates a new private namespace and a fresh set of local +variables. But, what if the local variables weren't thrown away on exiting a +function? What if you could later resume the function where it left off? This +is what generators provide; they can be thought of as resumable functions. + +Here's the simplest example of a generator function:: + + def generate_ints(N): + for i in range(N): + yield i + +Any function containing a ``yield`` keyword is a generator function; this is +detected by Python's bytecode compiler which compiles the function specially as +a result. + +When you call a generator function, it doesn't return a single value; instead it +returns a generator object that supports the iterator protocol. On executing +the ``yield`` expression, the generator outputs the value of ``i``, similar to a +``return`` statement. The big difference between ``yield`` and a ``return`` +statement is that on reaching a ``yield`` the generator's state of execution is +suspended and local variables are preserved. On the next call to the +generator's ``.next()`` method, the function will resume executing. + +Here's a sample usage of the ``generate_ints()`` generator:: + + >>> gen = generate_ints(3) + >>> gen + + >>> gen.next() + 0 + >>> gen.next() + 1 + >>> gen.next() + 2 + >>> gen.next() + Traceback (most recent call last): + File "stdin", line 1, in ? + File "stdin", line 2, in generate_ints + StopIteration + +You could equally write ``for i in generate_ints(5)``, or ``a,b,c = +generate_ints(3)``. + +Inside a generator function, the ``return`` statement can only be used without a +value, and signals the end of the procession of values; after executing a +``return`` the generator cannot return any further values. ``return`` with a +value, such as ``return 5``, is a syntax error inside a generator function. The +end of the generator's results can also be indicated by raising +``StopIteration`` manually, or by just letting the flow of execution fall off +the bottom of the function. + +You could achieve the effect of generators manually by writing your own class +and storing all the local variables of the generator as instance variables. For +example, returning a list of integers could be done by setting ``self.count`` to +0, and having the ``next()`` method increment ``self.count`` and return it. +However, for a moderately complicated generator, writing a corresponding class +can be much messier. + +The test suite included with Python's library, ``test_generators.py``, contains +a number of more interesting examples. Here's one generator that implements an +in-order traversal of a tree using generators recursively. + +:: + + # A recursive generator that generates Tree leaves in in-order. + def inorder(t): + if t: + for x in inorder(t.left): + yield x + + yield t.label + + for x in inorder(t.right): + yield x + +Two other examples in ``test_generators.py`` produce solutions for the N-Queens +problem (placing N queens on an NxN chess board so that no queen threatens +another) and the Knight's Tour (finding a route that takes a knight to every +square of an NxN chessboard without visiting any square twice). + + + +Passing values into a generator +------------------------------- + +In Python 2.4 and earlier, generators only produced output. Once a generator's +code was invoked to create an iterator, there was no way to pass any new +information into the function when its execution is resumed. You could hack +together this ability by making the generator look at a global variable or by +passing in some mutable object that callers then modify, but these approaches +are messy. + +In Python 2.5 there's a simple way to pass values into a generator. +:keyword:`yield` became an expression, returning a value that can be assigned to +a variable or otherwise operated on:: + + val = (yield i) + +I recommend that you **always** put parentheses around a ``yield`` expression +when you're doing something with the returned value, as in the above example. +The parentheses aren't always necessary, but it's easier to always add them +instead of having to remember when they're needed. + +(PEP 342 explains the exact rules, which are that a ``yield``-expression must +always be parenthesized except when it occurs at the top-level expression on the +right-hand side of an assignment. This means you can write ``val = yield i`` +but have to use parentheses when there's an operation, as in ``val = (yield i) ++ 12``.) + +Values are sent into a generator by calling its ``send(value)`` method. This +method resumes the generator's code and the ``yield`` expression returns the +specified value. If the regular ``next()`` method is called, the ``yield`` +returns ``None``. + +Here's a simple counter that increments by 1 and allows changing the value of +the internal counter. + +:: + + def counter (maximum): + i = 0 + while i < maximum: + val = (yield i) + # If value provided, change counter + if val is not None: + i = val + else: + i += 1 + +And here's an example of changing the counter: + + >>> it = counter(10) + >>> print it.next() + 0 + >>> print it.next() + 1 + >>> print it.send(8) + 8 + >>> print it.next() + 9 + >>> print it.next() + Traceback (most recent call last): + File ``t.py'', line 15, in ? + print it.next() + StopIteration + +Because ``yield`` will often be returning ``None``, you should always check for +this case. Don't just use its value in expressions unless you're sure that the +``send()`` method will be the only method used resume your generator function. + +In addition to ``send()``, there are two other new methods on generators: + +* ``throw(type, value=None, traceback=None)`` is used to raise an exception + inside the generator; the exception is raised by the ``yield`` expression + where the generator's execution is paused. + +* ``close()`` raises a :exc:`GeneratorExit` exception inside the generator to + terminate the iteration. On receiving this exception, the generator's code + must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the + exception and doing anything else is illegal and will trigger a + :exc:`RuntimeError`. ``close()`` will also be called by Python's garbage + collector when the generator is garbage-collected. + + If you need to run cleanup code when a :exc:`GeneratorExit` occurs, I suggest + using a ``try: ... finally:`` suite instead of catching :exc:`GeneratorExit`. + +The cumulative effect of these changes is to turn generators from one-way +producers of information into both producers and consumers. + +Generators also become **coroutines**, a more generalized form of subroutines. +Subroutines are entered at one point and exited at another point (the top of the +function, and a ``return`` statement), but coroutines can be entered, exited, +and resumed at many different points (the ``yield`` statements). + + +Built-in functions +================== + +Let's look in more detail at built-in functions often used with iterators. + +Two Python's built-in functions, :func:`map` and :func:`filter`, are somewhat +obsolete; they duplicate the features of list comprehensions but return actual +lists instead of iterators. + +``map(f, iterA, iterB, ...)`` returns a list containing ``f(iterA[0], iterB[0]), +f(iterA[1], iterB[1]), f(iterA[2], iterB[2]), ...``. + +:: + + def upper(s): + return s.upper() + map(upper, ['sentence', 'fragment']) => + ['SENTENCE', 'FRAGMENT'] + + [upper(s) for s in ['sentence', 'fragment']] => + ['SENTENCE', 'FRAGMENT'] + +As shown above, you can achieve the same effect with a list comprehension. The +:func:`itertools.imap` function does the same thing but can handle infinite +iterators; it'll be discussed later, in the section on the :mod:`itertools` module. + +``filter(predicate, iter)`` returns a list that contains all the sequence +elements that meet a certain condition, and is similarly duplicated by list +comprehensions. A **predicate** is a function that returns the truth value of +some condition; for use with :func:`filter`, the predicate must take a single +value. + +:: + + def is_even(x): + return (x % 2) == 0 + + filter(is_even, range(10)) => + [0, 2, 4, 6, 8] + +This can also be written as a list comprehension:: + + >>> [x for x in range(10) if is_even(x)] + [0, 2, 4, 6, 8] + +:func:`filter` also has a counterpart in the :mod:`itertools` module, +:func:`itertools.ifilter`, that returns an iterator and can therefore handle +infinite sequences just as :func:`itertools.imap` can. + +``reduce(func, iter, [initial_value])`` doesn't have a counterpart in the +:mod:`itertools` module because it cumulatively performs an operation on all the +iterable's elements and therefore can't be applied to infinite iterables. +``func`` must be a function that takes two elements and returns a single value. +:func:`reduce` takes the first two elements A and B returned by the iterator and +calculates ``func(A, B)``. It then requests the third element, C, calculates +``func(func(A, B), C)``, combines this result with the fourth element returned, +and continues until the iterable is exhausted. If the iterable returns no +values at all, a :exc:`TypeError` exception is raised. If the initial value is +supplied, it's used as a starting point and ``func(initial_value, A)`` is the +first calculation. + +:: + + import operator + reduce(operator.concat, ['A', 'BB', 'C']) => + 'ABBC' + reduce(operator.concat, []) => + TypeError: reduce() of empty sequence with no initial value + reduce(operator.mul, [1,2,3], 1) => + 6 + reduce(operator.mul, [], 1) => + 1 + +If you use :func:`operator.add` with :func:`reduce`, you'll add up all the +elements of the iterable. This case is so common that there's a special +built-in called :func:`sum` to compute it:: + + reduce(operator.add, [1,2,3,4], 0) => + 10 + sum([1,2,3,4]) => + 10 + sum([]) => + 0 + +For many uses of :func:`reduce`, though, it can be clearer to just write the +obvious :keyword:`for` loop:: + + # Instead of: + product = reduce(operator.mul, [1,2,3], 1) + + # You can write: + product = 1 + for i in [1,2,3]: + product *= i + + +``enumerate(iter)`` counts off the elements in the iterable, returning 2-tuples +containing the count and each element. + +:: + + enumerate(['subject', 'verb', 'object']) => + (0, 'subject'), (1, 'verb'), (2, 'object') + +:func:`enumerate` is often used when looping through a list and recording the +indexes at which certain conditions are met:: + + f = open('data.txt', 'r') + for i, line in enumerate(f): + if line.strip() == '': + print 'Blank line at line #%i' % i + +``sorted(iterable, [cmp=None], [key=None], [reverse=False)`` collects all the +elements of the iterable into a list, sorts the list, and returns the sorted +result. The ``cmp``, ``key``, and ``reverse`` arguments are passed through to +the constructed list's ``.sort()`` method. + +:: + + import random + # Generate 8 random numbers between [0, 10000) + rand_list = random.sample(range(10000), 8) + rand_list => + [769, 7953, 9828, 6431, 8442, 9878, 6213, 2207] + sorted(rand_list) => + [769, 2207, 6213, 6431, 7953, 8442, 9828, 9878] + sorted(rand_list, reverse=True) => + [9878, 9828, 8442, 7953, 6431, 6213, 2207, 769] + +(For a more detailed discussion of sorting, see the Sorting mini-HOWTO in the +Python wiki at http://wiki.python.org/moin/HowTo/Sorting.) + +The ``any(iter)`` and ``all(iter)`` built-ins look at the truth values of an +iterable's contents. :func:`any` returns True if any element in the iterable is +a true value, and :func:`all` returns True if all of the elements are true +values:: + + any([0,1,0]) => + True + any([0,0,0]) => + False + any([1,1,1]) => + True + all([0,1,0]) => + False + all([0,0,0]) => + False + all([1,1,1]) => + True + + +Small functions and the lambda expression +========================================= + +When writing functional-style programs, you'll often need little functions that +act as predicates or that combine elements in some way. + +If there's a Python built-in or a module function that's suitable, you don't +need to define a new function at all:: + + stripped_lines = [line.strip() for line in lines] + existing_files = filter(os.path.exists, file_list) + +If the function you need doesn't exist, you need to write it. One way to write +small functions is to use the ``lambda`` statement. ``lambda`` takes a number +of parameters and an expression combining these parameters, and creates a small +function that returns the value of the expression:: + + lowercase = lambda x: x.lower() + + print_assign = lambda name, value: name + '=' + str(value) + + adder = lambda x, y: x+y + +An alternative is to just use the ``def`` statement and define a function in the +usual way:: + + def lowercase(x): + return x.lower() + + def print_assign(name, value): + return name + '=' + str(value) + + def adder(x,y): + return x + y + +Which alternative is preferable? That's a style question; my usual course is to +avoid using ``lambda``. + +One reason for my preference is that ``lambda`` is quite limited in the +functions it can define. The result has to be computable as a single +expression, which means you can't have multiway ``if... elif... else`` +comparisons or ``try... except`` statements. If you try to do too much in a +``lambda`` statement, you'll end up with an overly complicated expression that's +hard to read. Quick, what's the following code doing? + +:: + + total = reduce(lambda a, b: (0, a[1] + b[1]), items)[1] + +You can figure it out, but it takes time to disentangle the expression to figure +out what's going on. Using a short nested ``def`` statements makes things a +little bit better:: + + def combine (a, b): + return 0, a[1] + b[1] + + total = reduce(combine, items)[1] + +But it would be best of all if I had simply used a ``for`` loop:: + + total = 0 + for a, b in items: + total += b + +Or the :func:`sum` built-in and a generator expression:: + + total = sum(b for a,b in items) + +Many uses of :func:`reduce` are clearer when written as ``for`` loops. + +Fredrik Lundh once suggested the following set of rules for refactoring uses of +``lambda``: + +1) Write a lambda function. +2) Write a comment explaining what the heck that lambda does. +3) Study the comment for a while, and think of a name that captures the essence + of the comment. +4) Convert the lambda to a def statement, using that name. +5) Remove the comment. + +I really like these rules, but you're free to disagree that this lambda-free +style is better. + + +The itertools module +==================== + +The :mod:`itertools` module contains a number of commonly-used iterators as well +as functions for combining several iterators. This section will introduce the +module's contents by showing small examples. + +The module's functions fall into a few broad classes: + +* Functions that create a new iterator based on an existing iterator. +* Functions for treating an iterator's elements as function arguments. +* Functions for selecting portions of an iterator's output. +* A function for grouping an iterator's output. + +Creating new iterators +---------------------- + +``itertools.count(n)`` returns an infinite stream of integers, increasing by 1 +each time. You can optionally supply the starting number, which defaults to 0:: + + itertools.count() => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + itertools.count(10) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + +``itertools.cycle(iter)`` saves a copy of the contents of a provided iterable +and returns a new iterator that returns its elements from first to last. The +new iterator will repeat these elements infinitely. + +:: + + itertools.cycle([1,2,3,4,5]) => + 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, ... + +``itertools.repeat(elem, [n])`` returns the provided element ``n`` times, or +returns the element endlessly if ``n`` is not provided. + +:: + + itertools.repeat('abc') => + abc, abc, abc, abc, abc, abc, abc, abc, abc, abc, ... + itertools.repeat('abc', 5) => + abc, abc, abc, abc, abc + +``itertools.chain(iterA, iterB, ...)`` takes an arbitrary number of iterables as +input, and returns all the elements of the first iterator, then all the elements +of the second, and so on, until all of the iterables have been exhausted. + +:: + + itertools.chain(['a', 'b', 'c'], (1, 2, 3)) => + a, b, c, 1, 2, 3 + +``itertools.izip(iterA, iterB, ...)`` takes one element from each iterable and +returns them in a tuple:: + + itertools.izip(['a', 'b', 'c'], (1, 2, 3)) => + ('a', 1), ('b', 2), ('c', 3) + +It's similiar to the built-in :func:`zip` function, but doesn't construct an +in-memory list and exhaust all the input iterators before returning; instead +tuples are constructed and returned only if they're requested. (The technical +term for this behaviour is `lazy evaluation +`__.) + +This iterator is intended to be used with iterables that are all of the same +length. If the iterables are of different lengths, the resulting stream will be +the same length as the shortest iterable. + +:: + + itertools.izip(['a', 'b'], (1, 2, 3)) => + ('a', 1), ('b', 2) + +You should avoid doing this, though, because an element may be taken from the +longer iterators and discarded. This means you can't go on to use the iterators +further because you risk skipping a discarded element. + +``itertools.islice(iter, [start], stop, [step])`` returns a stream that's a +slice of the iterator. With a single ``stop`` argument, it will return the +first ``stop`` elements. If you supply a starting index, you'll get +``stop-start`` elements, and if you supply a value for ``step``, elements will +be skipped accordingly. Unlike Python's string and list slicing, you can't use +negative values for ``start``, ``stop``, or ``step``. + +:: + + itertools.islice(range(10), 8) => + 0, 1, 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8) => + 2, 3, 4, 5, 6, 7 + itertools.islice(range(10), 2, 8, 2) => + 2, 4, 6 + +``itertools.tee(iter, [n])`` replicates an iterator; it returns ``n`` +independent iterators that will all return the contents of the source iterator. +If you don't supply a value for ``n``, the default is 2. Replicating iterators +requires saving some of the contents of the source iterator, so this can consume +significant memory if the iterator is large and one of the new iterators is +consumed more than the others. + +:: + + itertools.tee( itertools.count() ) => + iterA, iterB + + where iterA -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + and iterB -> + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... + + +Calling functions on elements +----------------------------- + +Two functions are used for calling other functions on the contents of an +iterable. + +``itertools.imap(f, iterA, iterB, ...)`` returns a stream containing +``f(iterA[0], iterB[0]), f(iterA[1], iterB[1]), f(iterA[2], iterB[2]), ...``:: + + itertools.imap(operator.add, [5, 6, 5], [1, 2, 3]) => + 6, 8, 8 + +The ``operator`` module contains a set of functions corresponding to Python's +operators. Some examples are ``operator.add(a, b)`` (adds two values), +``operator.ne(a, b)`` (same as ``a!=b``), and ``operator.attrgetter('id')`` +(returns a callable that fetches the ``"id"`` attribute). + +``itertools.starmap(func, iter)`` assumes that the iterable will return a stream +of tuples, and calls ``f()`` using these tuples as the arguments:: + + itertools.starmap(os.path.join, + [('/usr', 'bin', 'java'), ('/bin', 'python'), + ('/usr', 'bin', 'perl'),('/usr', 'bin', 'ruby')]) + => + /usr/bin/java, /bin/python, /usr/bin/perl, /usr/bin/ruby + + +Selecting elements +------------------ + +Another group of functions chooses a subset of an iterator's elements based on a +predicate. + +``itertools.ifilter(predicate, iter)`` returns all the elements for which the +predicate returns true:: + + def is_even(x): + return (x % 2) == 0 + + itertools.ifilter(is_even, itertools.count()) => + 0, 2, 4, 6, 8, 10, 12, 14, ... + +``itertools.ifilterfalse(predicate, iter)`` is the opposite, returning all +elements for which the predicate returns false:: + + itertools.ifilterfalse(is_even, itertools.count()) => + 1, 3, 5, 7, 9, 11, 13, 15, ... + +``itertools.takewhile(predicate, iter)`` returns elements for as long as the +predicate returns true. Once the predicate returns false, the iterator will +signal the end of its results. + +:: + + def less_than_10(x): + return (x < 10) + + itertools.takewhile(less_than_10, itertools.count()) => + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 + + itertools.takewhile(is_even, itertools.count()) => + 0 + +``itertools.dropwhile(predicate, iter)`` discards elements while the predicate +returns true, and then returns the rest of the iterable's results. + +:: + + itertools.dropwhile(less_than_10, itertools.count()) => + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ... + + itertools.dropwhile(is_even, itertools.count()) => + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ... + + +Grouping elements +----------------- + +The last function I'll discuss, ``itertools.groupby(iter, key_func=None)``, is +the most complicated. ``key_func(elem)`` is a function that can compute a key +value for each element returned by the iterable. If you don't supply a key +function, the key is simply each element itself. + +``groupby()`` collects all the consecutive elements from the underlying iterable +that have the same key value, and returns a stream of 2-tuples containing a key +value and an iterator for the elements with that key. + +:: + + city_list = [('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL'), + ('Anchorage', 'AK'), ('Nome', 'AK'), + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ'), + ... + ] + + def get_state ((city, state)): + return state + + itertools.groupby(city_list, get_state) => + ('AL', iterator-1), + ('AK', iterator-2), + ('AZ', iterator-3), ... + + where + iterator-1 => + ('Decatur', 'AL'), ('Huntsville', 'AL'), ('Selma', 'AL') + iterator-2 => + ('Anchorage', 'AK'), ('Nome', 'AK') + iterator-3 => + ('Flagstaff', 'AZ'), ('Phoenix', 'AZ'), ('Tucson', 'AZ') + +``groupby()`` assumes that the underlying iterable's contents will already be +sorted based on the key. Note that the returned iterators also use the +underlying iterable, so you have to consume the results of iterator-1 before +requesting iterator-2 and its corresponding key. + + +The functools module +==================== + +The :mod:`functools` module in Python 2.5 contains some higher-order functions. +A **higher-order function** takes one or more functions as input and returns a +new function. The most useful tool in this module is the +:func:`functools.partial` function. + +For programs written in a functional style, you'll sometimes want to construct +variants of existing functions that have some of the parameters filled in. +Consider a Python function ``f(a, b, c)``; you may wish to create a new function +``g(b, c)`` that's equivalent to ``f(1, b, c)``; you're filling in a value for +one of ``f()``'s parameters. This is called "partial function application". + +The constructor for ``partial`` takes the arguments ``(function, arg1, arg2, +... kwarg1=value1, kwarg2=value2)``. The resulting object is callable, so you +can just call it to invoke ``function`` with the filled-in arguments. + +Here's a small but realistic example:: + + import functools + + def log (message, subsystem): + "Write the contents of 'message' to the specified subsystem." + print '%s: %s' % (subsystem, message) + ... + + server_log = functools.partial(log, subsystem='server') + server_log('Unable to open socket') + + +The operator module +------------------- + +The :mod:`operator` module was mentioned earlier. It contains a set of +functions corresponding to Python's operators. These functions are often useful +in functional-style code because they save you from writing trivial functions +that perform a single operation. + +Some of the functions in this module are: + +* Math operations: ``add()``, ``sub()``, ``mul()``, ``div()``, ``floordiv()``, + ``abs()``, ... +* Logical operations: ``not_()``, ``truth()``. +* Bitwise operations: ``and_()``, ``or_()``, ``invert()``. +* Comparisons: ``eq()``, ``ne()``, ``lt()``, ``le()``, ``gt()``, and ``ge()``. +* Object identity: ``is_()``, ``is_not()``. + +Consult the operator module's documentation for a complete list. + + + +The functional module +--------------------- + +Collin Winter's `functional module `__ +provides a number of more advanced tools for functional programming. It also +reimplements several Python built-ins, trying to make them more intuitive to +those used to functional programming in other languages. + +This section contains an introduction to some of the most important functions in +``functional``; full documentation can be found at `the project's website +`__. + +``compose(outer, inner, unpack=False)`` + +The ``compose()`` function implements function composition. In other words, it +returns a wrapper around the ``outer`` and ``inner`` callables, such that the +return value from ``inner`` is fed directly to ``outer``. That is, + +:: + + >>> def add(a, b): + ... return a + b + ... + >>> def double(a): + ... return 2 * a + ... + >>> compose(double, add)(5, 6) + 22 + +is equivalent to + +:: + + >>> double(add(5, 6)) + 22 + +The ``unpack`` keyword is provided to work around the fact that Python functions +are not always `fully curried `__. By +default, it is expected that the ``inner`` function will return a single object +and that the ``outer`` function will take a single argument. Setting the +``unpack`` argument causes ``compose`` to expect a tuple from ``inner`` which +will be expanded before being passed to ``outer``. Put simply, + +:: + + compose(f, g)(5, 6) + +is equivalent to:: + + f(g(5, 6)) + +while + +:: + + compose(f, g, unpack=True)(5, 6) + +is equivalent to:: + + f(*g(5, 6)) + +Even though ``compose()`` only accepts two functions, it's trivial to build up a +version that will compose any number of functions. We'll use ``reduce()``, +``compose()`` and ``partial()`` (the last of which is provided by both +``functional`` and ``functools``). + +:: + + from functional import compose, partial + + multi_compose = partial(reduce, compose) + + +We can also use ``map()``, ``compose()`` and ``partial()`` to craft a version of +``"".join(...)`` that converts its arguments to string:: + + from functional import compose, partial + + join = compose("".join, partial(map, str)) + + +``flip(func)`` + +``flip()`` wraps the callable in ``func`` and causes it to receive its +non-keyword arguments in reverse order. + +:: + + >>> def triple(a, b, c): + ... return (a, b, c) + ... + >>> triple(5, 6, 7) + (5, 6, 7) + >>> + >>> flipped_triple = flip(triple) + >>> flipped_triple(5, 6, 7) + (7, 6, 5) + +``foldl(func, start, iterable)`` + +``foldl()`` takes a binary function, a starting value (usually some kind of +'zero'), and an iterable. The function is applied to the starting value and the +first element of the list, then the result of that and the second element of the +list, then the result of that and the third element of the list, and so on. + +This means that a call such as:: + + foldl(f, 0, [1, 2, 3]) + +is equivalent to:: + + f(f(f(0, 1), 2), 3) + + +``foldl()`` is roughly equivalent to the following recursive function:: + + def foldl(func, start, seq): + if len(seq) == 0: + return start + + return foldl(func, func(start, seq[0]), seq[1:]) + +Speaking of equivalence, the above ``foldl`` call can be expressed in terms of +the built-in ``reduce`` like so:: + + reduce(f, [1, 2, 3], 0) + + +We can use ``foldl()``, ``operator.concat()`` and ``partial()`` to write a +cleaner, more aesthetically-pleasing version of Python's ``"".join(...)`` +idiom:: + + from functional import foldl, partial + from operator import concat + + join = partial(foldl, concat, "") + + +Revision History and Acknowledgements +===================================== + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Ian Bicking, +Nick Coghlan, Nick Efford, Raymond Hettinger, Jim Jewett, Mike Krell, Leandro +Lameiro, Jussi Salmela, Collin Winter, Blake Winton. + +Version 0.1: posted June 30 2006. + +Version 0.11: posted July 1 2006. Typo fixes. + +Version 0.2: posted July 10 2006. Merged genexp and listcomp sections into one. +Typo fixes. + +Version 0.21: Added more references suggested on the tutor mailing list. + +Version 0.30: Adds a section on the ``functional`` module written by Collin +Winter; adds short section on the operator module; a few other edits. + + +References +========== + +General +------- + +**Structure and Interpretation of Computer Programs**, by Harold Abelson and +Gerald Jay Sussman with Julie Sussman. Full text at +http://mitpress.mit.edu/sicp/. In this classic textbook of computer science, +chapters 2 and 3 discuss the use of sequences and streams to organize the data +flow inside a program. The book uses Scheme for its examples, but many of the +design approaches described in these chapters are applicable to functional-style +Python code. + +http://www.defmacro.org/ramblings/fp.html: A general introduction to functional +programming that uses Java examples and has a lengthy historical introduction. + +http://en.wikipedia.org/wiki/Functional_programming: General Wikipedia entry +describing functional programming. + +http://en.wikipedia.org/wiki/Coroutine: Entry for coroutines. + +http://en.wikipedia.org/wiki/Currying: Entry for the concept of currying. + +Python-specific +--------------- + +http://gnosis.cx/TPiP/: The first chapter of David Mertz's book +:title-reference:`Text Processing in Python` discusses functional programming +for text processing, in the section titled "Utilizing Higher-Order Functions in +Text Processing". + +Mertz also wrote a 3-part series of articles on functional programming +for IBM's DeveloperWorks site; see +`part 1 `__, +`part 2 `__, and +`part 3 `__, + + +Python documentation +-------------------- + +Documentation for the :mod:`itertools` module. + +Documentation for the :mod:`operator` module. + +:pep:`289`: "Generator Expressions" + +:pep:`342`: "Coroutines via Enhanced Generators" describes the new generator +features in Python 2.5. + +.. comment + + Topics to place + ----------------------------- + + XXX os.walk() + + XXX Need a large example. + + But will an example add much? I'll post a first draft and see + what the comments say. + +.. comment + + Original outline: + Introduction + Idea of FP + Programs built out of functions + Functions are strictly input-output, no internal state + Opposed to OO programming, where objects have state + + Why FP? + Formal provability + Assignment is difficult to reason about + Not very relevant to Python + Modularity + Small functions that do one thing + Debuggability: + Easy to test due to lack of state + Easy to verify output from intermediate steps + Composability + You assemble a toolbox of functions that can be mixed + + Tackling a problem + Need a significant example + + Iterators + Generators + The itertools module + List comprehensions + Small functions and the lambda statement + Built-in functions + map + filter + reduce + +.. comment + + Handy little function for printing part of an iterator -- used + while writing this document. + + import itertools + def print_iter(it): + slice = itertools.islice(it, 10) + for elem in slice[:-1]: + sys.stdout.write(str(elem)) + sys.stdout.write(', ') + print elem[-1] + + diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst new file mode 100644 index 0000000..e668856 --- /dev/null +++ b/Doc/howto/index.rst @@ -0,0 +1,25 @@ +*************** + Python HOWTOs +*************** + +Python HOWTOs are documents that cover a single, specific topic, +and attempt to cover it fairly completely. Modelled on the Linux +Documentation Project's HOWTO collection, this collection is an +effort to foster documentation that's more detailed than the +Python Library Reference. + +Currently, the HOWTOs are: + +.. toctree:: + :maxdepth: 1 + + advocacy.rst + pythonmac.rst + curses.rst + doanddont.rst + functional.rst + regex.rst + sockets.rst + unicode.rst + urllib2.rst + diff --git a/Doc/howto/pythonmac.rst b/Doc/howto/pythonmac.rst new file mode 100644 index 0000000..7811f37 --- /dev/null +++ b/Doc/howto/pythonmac.rst @@ -0,0 +1,202 @@ + +.. _using-on-mac: + +*************************** +Using Python on a Macintosh +*************************** + +:Author: Bob Savage + + +Python on a Macintosh running Mac OS X is in principle very similar to Python on +any other Unix platform, but there are a number of additional features such as +the IDE and the Package Manager that are worth pointing out. + +The Mac-specific modules are documented in :ref:`mac-specific-services`. + +Python on Mac OS 9 or earlier can be quite different from Python on Unix or +Windows, but is beyond the scope of this manual, as that platform is no longer +supported, starting with Python 2.4. See http://www.cwi.nl/~jack/macpython for +installers for the latest 2.3 release for Mac OS 9 and related documentation. + + +.. _getting-osx: + +Getting and Installing MacPython +================================ + +Mac OS X 10.4 comes with Python 2.3 pre-installed by Apple. However, you are +encouraged to install the most recent version of Python from the Python website +(http://www.python.org). A "universal binary" build of Python 2.5, which runs +natively on the Mac's new Intel and legacy PPC CPU's, is available there. + +What you get after installing is a number of things: + +* A :file:`MacPython 2.5` folder in your :file:`Applications` folder. In here + you find IDLE, the development environment that is a standard part of official + Python distributions; PythonLauncher, which handles double-clicking Python + scripts from the Finder; and the "Build Applet" tool, which allows you to + package Python scripts as standalone applications on your system. + +* A framework :file:`/Library/Frameworks/Python.framework`, which includes the + Python executable and libraries. The installer adds this location to your shell + path. To uninstall MacPython, you can simply remove these three things. A + symlink to the Python executable is placed in /usr/local/bin/. + +The Apple-provided build of Python is installed in +:file:`/System/Library/Frameworks/Python.framework` and :file:`/usr/bin/python`, +respectively. You should never modify or delete these, as they are +Apple-controlled and are used by Apple- or third-party software. + +IDLE includes a help menu that allows you to access Python documentation. If you +are completely new to Python you should start reading the tutorial introduction +in that document. + +If you are familiar with Python on other Unix platforms you should read the +section on running Python scripts from the Unix shell. + + +How to run a Python script +-------------------------- + +Your best way to get started with Python on Mac OS X is through the IDLE +integrated development environment, see section :ref:`ide` and use the Help menu +when the IDE is running. + +If you want to run Python scripts from the Terminal window command line or from +the Finder you first need an editor to create your script. Mac OS X comes with a +number of standard Unix command line editors, :program:`vim` and +:program:`emacs` among them. If you want a more Mac-like editor, +:program:`BBEdit` or :program:`TextWrangler` from Bare Bones Software (see +http://www.barebones.com/products/bbedit/index.shtml) are good choices, as is +:program:`TextMate` (see http://macromates.com/). Other editors include +:program:`Gvim` (http://macvim.org) and :program:`Aquamacs` +(http://aquamacs.org). + +To run your script from the Terminal window you must make sure that +:file:`/usr/local/bin` is in your shell search path. + +To run your script from the Finder you have two options: + +* Drag it to :program:`PythonLauncher` + +* Select :program:`PythonLauncher` as the default application to open your + script (or any .py script) through the finder Info window and double-click it. + :program:`PythonLauncher` has various preferences to control how your script is + launched. Option-dragging allows you to change these for one invocation, or use + its Preferences menu to change things globally. + + +.. _osx-gui-scripts: + +Running scripts with a GUI +-------------------------- + +With older versions of Python, there is one Mac OS X quirk that you need to be +aware of: programs that talk to the Aqua window manager (in other words, +anything that has a GUI) need to be run in a special way. Use :program:`pythonw` +instead of :program:`python` to start such scripts. + +With Python 2.5, you can use either :program:`python` or :program:`pythonw`. + + +Configuration +------------- + +Python on OS X honors all standard Unix environment variables such as +:envvar:`PYTHONPATH`, but setting these variables for programs started from the +Finder is non-standard as the Finder does not read your :file:`.profile` or +:file:`.cshrc` at startup. You need to create a file :file:`~ +/.MacOSX/environment.plist`. See Apple's Technical Document QA1067 for details. + +For more information on installation Python packages in MacPython, see section +:ref:`mac-package-manager`. + + +.. _ide: + +The IDE +======= + +MacPython ships with the standard IDLE development environment. A good +introduction to using IDLE can be found at http://hkn.eecs.berkeley.edu/ +dyoo/python/idle_intro/index.html. + + +.. _mac-package-manager: + +Installing Additional Python Packages +===================================== + +There are several methods to install additional Python packages: + +* http://pythonmac.org/packages/ contains selected compiled packages for Python + 2.5, 2.4, and 2.3. + +* Packages can be installed via the standard Python distutils mode (``python + setup.py install``). + +* Many packages can also be installed via the :program:`setuptools` extension. + + +GUI Programming on the Mac +========================== + +There are several options for building GUI applications on the Mac with Python. + +*PyObjC* is a Python binding to Apple's Objective-C/Cocoa framework, which is +the foundation of most modern Mac development. Information on PyObjC is +available from http://pyobjc.sourceforge.net. + +The standard Python GUI toolkit is :mod:`Tkinter`, based on the cross-platform +Tk toolkit (http://www.tcl.tk). An Aqua-native version of Tk is bundled with OS +X by Apple, and the latest version can be downloaded and installed from +http://www.activestate.com; it can also be built from source. + +*wxPython* is another popular cross-platform GUI toolkit that runs natively on +Mac OS X. Packages and documentation are available from http://www.wxpython.org. + +*PyQt* is another popular cross-platform GUI toolkit that runs natively on Mac +OS X. More information can be found at +http://www.riverbankcomputing.co.uk/pyqt/. + + +Distributing Python Applications on the Mac +=========================================== + +The "Build Applet" tool that is placed in the MacPython 2.5 folder is fine for +packaging small Python scripts on your own machine to run as a standard Mac +application. This tool, however, is not robust enough to distribute Python +applications to other users. + +The standard tool for deploying standalone Python applications on the Mac is +:program:`py2app`. More information on installing and using py2app can be found +at http://undefined.org/python/#py2app. + + +Application Scripting +===================== + +Python can also be used to script other Mac applications via Apple's Open +Scripting Architecture (OSA); see http://appscript.sourceforge.net. Appscript is +a high-level, user-friendly Apple event bridge that allows you to control +scriptable Mac OS X applications using ordinary Python scripts. Appscript makes +Python a serious alternative to Apple's own *AppleScript* language for +automating your Mac. A related package, *PyOSA*, is an OSA language component +for the Python scripting language, allowing Python code to be executed by any +OSA-enabled application (Script Editor, Mail, iTunes, etc.). PyOSA makes Python +a full peer to AppleScript. + + +Other Resources +=============== + +The MacPython mailing list is an excellent support resource for Python users and +developers on the Mac: + +http://www.python.org/community/sigs/current/pythonmac-sig/ + +Another useful resource is the MacPython wiki: + +http://wiki.python.org/moin/MacPython + diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst new file mode 100644 index 0000000..b200764 --- /dev/null +++ b/Doc/howto/regex.rst @@ -0,0 +1,1377 @@ +**************************** + Regular Expression HOWTO +**************************** + +:Author: A.M. Kuchling +:Release: 0.05 + +.. % TODO: +.. % Document lookbehind assertions +.. % Better way of displaying a RE, a string, and what it matches +.. % Mention optional argument to match.groups() +.. % Unicode (at least a reference) + + +.. topic:: Abstract + + This document is an introductory tutorial to using regular expressions in Python + with the :mod:`re` module. It provides a gentler introduction than the + corresponding section in the Library Reference. + + +Introduction +============ + +The :mod:`re` module was added in Python 1.5, and provides Perl-style regular +expression patterns. Earlier versions of Python came with the :mod:`regex` +module, which provided Emacs-style patterns. The :mod:`regex` module was +removed completely in Python 2.5. + +Regular expressions (called REs, or regexes, or regex patterns) are essentially +a tiny, highly specialized programming language embedded inside Python and made +available through the :mod:`re` module. Using this little language, you specify +the rules for the set of possible strings that you want to match; this set might +contain English sentences, or e-mail addresses, or TeX commands, or anything you +like. You can then ask questions such as "Does this string match the pattern?", +or "Is there a match for the pattern anywhere in this string?". You can also +use REs to modify a string or to split it apart in various ways. + +Regular expression patterns are compiled into a series of bytecodes which are +then executed by a matching engine written in C. For advanced use, it may be +necessary to pay careful attention to how the engine will execute a given RE, +and write the RE in a certain way in order to produce bytecode that runs faster. +Optimization isn't covered in this document, because it requires that you have a +good understanding of the matching engine's internals. + +The regular expression language is relatively small and restricted, so not all +possible string processing tasks can be done using regular expressions. There +are also tasks that *can* be done with regular expressions, but the expressions +turn out to be very complicated. In these cases, you may be better off writing +Python code to do the processing; while Python code will be slower than an +elaborate regular expression, it will also probably be more understandable. + + +Simple Patterns +=============== + +We'll start by learning about the simplest possible regular expressions. Since +regular expressions are used to operate on strings, we'll begin with the most +common task: matching characters. + +For a detailed explanation of the computer science underlying regular +expressions (deterministic and non-deterministic finite automata), you can refer +to almost any textbook on writing compilers. + + +Matching Characters +------------------- + +Most letters and characters will simply match themselves. For example, the +regular expression ``test`` will match the string ``test`` exactly. (You can +enable a case-insensitive mode that would let this RE match ``Test`` or ``TEST`` +as well; more about this later.) + +There are exceptions to this rule; some characters are special +:dfn:`metacharacters`, and don't match themselves. Instead, they signal that +some out-of-the-ordinary thing should be matched, or they affect other portions +of the RE by repeating them or changing their meaning. Much of this document is +devoted to discussing various metacharacters and what they do. + +Here's a complete list of the metacharacters; their meanings will be discussed +in the rest of this HOWTO. :: + + . ^ $ * + ? { [ ] \ | ( ) + +The first metacharacters we'll look at are ``[`` and ``]``. They're used for +specifying a character class, which is a set of characters that you wish to +match. Characters can be listed individually, or a range of characters can be +indicated by giving two characters and separating them by a ``'-'``. For +example, ``[abc]`` will match any of the characters ``a``, ``b``, or ``c``; this +is the same as ``[a-c]``, which uses a range to express the same set of +characters. If you wanted to match only lowercase letters, your RE would be +``[a-z]``. + +.. % $ + +Metacharacters are not active inside classes. For example, ``[akm$]`` will +match any of the characters ``'a'``, ``'k'``, ``'m'``, or ``'$'``; ``'$'`` is +usually a metacharacter, but inside a character class it's stripped of its +special nature. + +You can match the characters not listed within the class by :dfn:`complementing` +the set. This is indicated by including a ``'^'`` as the first character of the +class; ``'^'`` outside a character class will simply match the ``'^'`` +character. For example, ``[^5]`` will match any character except ``'5'``. + +Perhaps the most important metacharacter is the backslash, ``\``. As in Python +string literals, the backslash can be followed by various characters to signal +various special sequences. It's also used to escape all the metacharacters so +you can still match them in patterns; for example, if you need to match a ``[`` +or ``\``, you can precede them with a backslash to remove their special +meaning: ``\[`` or ``\\``. + +Some of the special sequences beginning with ``'\'`` represent predefined sets +of characters that are often useful, such as the set of digits, the set of +letters, or the set of anything that isn't whitespace. The following predefined +special sequences are available: + +``\d`` + Matches any decimal digit; this is equivalent to the class ``[0-9]``. + +``\D`` + Matches any non-digit character; this is equivalent to the class ``[^0-9]``. + +``\s`` + Matches any whitespace character; this is equivalent to the class ``[ + \t\n\r\f\v]``. + +``\S`` + Matches any non-whitespace character; this is equivalent to the class ``[^ + \t\n\r\f\v]``. + +``\w`` + Matches any alphanumeric character; this is equivalent to the class + ``[a-zA-Z0-9_]``. + +``\W`` + Matches any non-alphanumeric character; this is equivalent to the class + ``[^a-zA-Z0-9_]``. + +These sequences can be included inside a character class. For example, +``[\s,.]`` is a character class that will match any whitespace character, or +``','`` or ``'.'``. + +The final metacharacter in this section is ``.``. It matches anything except a +newline character, and there's an alternate mode (``re.DOTALL``) where it will +match even a newline. ``'.'`` is often used where you want to match "any +character". + + +Repeating Things +---------------- + +Being able to match varying sets of characters is the first thing regular +expressions can do that isn't already possible with the methods available on +strings. However, if that was the only additional capability of regexes, they +wouldn't be much of an advance. Another capability is that you can specify that +portions of the RE must be repeated a certain number of times. + +The first metacharacter for repeating things that we'll look at is ``*``. ``*`` +doesn't match the literal character ``*``; instead, it specifies that the +previous character can be matched zero or more times, instead of exactly once. + +For example, ``ca*t`` will match ``ct`` (0 ``a`` characters), ``cat`` (1 ``a``), +``caaat`` (3 ``a`` characters), and so forth. The RE engine has various +internal limitations stemming from the size of C's ``int`` type that will +prevent it from matching over 2 billion ``a`` characters; you probably don't +have enough memory to construct a string that large, so you shouldn't run into +that limit. + +Repetitions such as ``*`` are :dfn:`greedy`; when repeating a RE, the matching +engine will try to repeat it as many times as possible. If later portions of the +pattern don't match, the matching engine will then back up and try again with +few repetitions. + +A step-by-step example will make this more obvious. Let's consider the +expression ``a[bcd]*b``. This matches the letter ``'a'``, zero or more letters +from the class ``[bcd]``, and finally ends with a ``'b'``. Now imagine matching +this RE against the string ``abcbd``. + ++------+-----------+---------------------------------+ +| Step | Matched | Explanation | ++======+===========+=================================+ +| 1 | ``a`` | The ``a`` in the RE matches. | ++------+-----------+---------------------------------+ +| 2 | ``abcbd`` | The engine matches ``[bcd]*``, | +| | | going as far as it can, which | +| | | is to the end of the string. | ++------+-----------+---------------------------------+ +| 3 | *Failure* | The engine tries to match | +| | | ``b``, but the current position | +| | | is at the end of the string, so | +| | | it fails. | ++------+-----------+---------------------------------+ +| 4 | ``abcb`` | Back up, so that ``[bcd]*`` | +| | | matches one less character. | ++------+-----------+---------------------------------+ +| 5 | *Failure* | Try ``b`` again, but the | +| | | current position is at the last | +| | | character, which is a ``'d'``. | ++------+-----------+---------------------------------+ +| 6 | ``abc`` | Back up again, so that | +| | | ``[bcd]*`` is only matching | +| | | ``bc``. | ++------+-----------+---------------------------------+ +| 6 | ``abcb`` | Try ``b`` again. This time | +| | | but the character at the | +| | | current position is ``'b'``, so | +| | | it succeeds. | ++------+-----------+---------------------------------+ + +The end of the RE has now been reached, and it has matched ``abcb``. This +demonstrates how the matching engine goes as far as it can at first, and if no +match is found it will then progressively back up and retry the rest of the RE +again and again. It will back up until it has tried zero matches for +``[bcd]*``, and if that subsequently fails, the engine will conclude that the +string doesn't match the RE at all. + +Another repeating metacharacter is ``+``, which matches one or more times. Pay +careful attention to the difference between ``*`` and ``+``; ``*`` matches +*zero* or more times, so whatever's being repeated may not be present at all, +while ``+`` requires at least *one* occurrence. To use a similar example, +``ca+t`` will match ``cat`` (1 ``a``), ``caaat`` (3 ``a``'s), but won't match +``ct``. + +There are two more repeating qualifiers. The question mark character, ``?``, +matches either once or zero times; you can think of it as marking something as +being optional. For example, ``home-?brew`` matches either ``homebrew`` or +``home-brew``. + +The most complicated repeated qualifier is ``{m,n}``, where *m* and *n* are +decimal integers. This qualifier means there must be at least *m* repetitions, +and at most *n*. For example, ``a/{1,3}b`` will match ``a/b``, ``a//b``, and +``a///b``. It won't match ``ab``, which has no slashes, or ``a////b``, which +has four. + +You can omit either *m* or *n*; in that case, a reasonable value is assumed for +the missing value. Omitting *m* is interpreted as a lower limit of 0, while +omitting *n* results in an upper bound of infinity --- actually, the upper bound +is the 2-billion limit mentioned earlier, but that might as well be infinity. + +Readers of a reductionist bent may notice that the three other qualifiers can +all be expressed using this notation. ``{0,}`` is the same as ``*``, ``{1,}`` +is equivalent to ``+``, and ``{0,1}`` is the same as ``?``. It's better to use +``*``, ``+``, or ``?`` when you can, simply because they're shorter and easier +to read. + + +Using Regular Expressions +========================= + +Now that we've looked at some simple regular expressions, how do we actually use +them in Python? The :mod:`re` module provides an interface to the regular +expression engine, allowing you to compile REs into objects and then perform +matches with them. + + +Compiling Regular Expressions +----------------------------- + +Regular expressions are compiled into :class:`RegexObject` instances, which have +methods for various operations such as searching for pattern matches or +performing string substitutions. :: + + >>> import re + >>> p = re.compile('ab*') + >>> print p + + +:func:`re.compile` also accepts an optional *flags* argument, used to enable +various special features and syntax variations. We'll go over the available +settings later, but for now a single example will do:: + + >>> p = re.compile('ab*', re.IGNORECASE) + +The RE is passed to :func:`re.compile` as a string. REs are handled as strings +because regular expressions aren't part of the core Python language, and no +special syntax was created for expressing them. (There are applications that +don't need REs at all, so there's no need to bloat the language specification by +including them.) Instead, the :mod:`re` module is simply a C extension module +included with Python, just like the :mod:`socket` or :mod:`zlib` modules. + +Putting REs in strings keeps the Python language simpler, but has one +disadvantage which is the topic of the next section. + + +The Backslash Plague +-------------------- + +As stated earlier, regular expressions use the backslash character (``'\'``) to +indicate special forms or to allow special characters to be used without +invoking their special meaning. This conflicts with Python's usage of the same +character for the same purpose in string literals. + +Let's say you want to write a RE that matches the string ``\section``, which +might be found in a LaTeX file. To figure out what to write in the program +code, start with the desired string to be matched. Next, you must escape any +backslashes and other metacharacters by preceding them with a backslash, +resulting in the string ``\\section``. The resulting string that must be passed +to :func:`re.compile` must be ``\\section``. However, to express this as a +Python string literal, both backslashes must be escaped *again*. + ++-------------------+------------------------------------------+ +| Characters | Stage | ++===================+==========================================+ +| ``\section`` | Text string to be matched | ++-------------------+------------------------------------------+ +| ``\\section`` | Escaped backslash for :func:`re.compile` | ++-------------------+------------------------------------------+ +| ``"\\\\section"`` | Escaped backslashes for a string literal | ++-------------------+------------------------------------------+ + +In short, to match a literal backslash, one has to write ``'\\\\'`` as the RE +string, because the regular expression must be ``\\``, and each backslash must +be expressed as ``\\`` inside a regular Python string literal. In REs that +feature backslashes repeatedly, this leads to lots of repeated backslashes and +makes the resulting strings difficult to understand. + +The solution is to use Python's raw string notation for regular expressions; +backslashes are not handled in any special way in a string literal prefixed with +``'r'``, so ``r"\n"`` is a two-character string containing ``'\'`` and ``'n'``, +while ``"\n"`` is a one-character string containing a newline. Regular +expressions will often be written in Python code using this raw string notation. + ++-------------------+------------------+ +| Regular String | Raw string | ++===================+==================+ +| ``"ab*"`` | ``r"ab*"`` | ++-------------------+------------------+ +| ``"\\\\section"`` | ``r"\\section"`` | ++-------------------+------------------+ +| ``"\\w+\\s+\\1"`` | ``r"\w+\s+\1"`` | ++-------------------+------------------+ + + +Performing Matches +------------------ + +Once you have an object representing a compiled regular expression, what do you +do with it? :class:`RegexObject` instances have several methods and attributes. +Only the most significant ones will be covered here; consult `the Library +Reference `_ for a complete +listing. + ++------------------+-----------------------------------------------+ +| Method/Attribute | Purpose | ++==================+===============================================+ +| ``match()`` | Determine if the RE matches at the beginning | +| | of the string. | ++------------------+-----------------------------------------------+ +| ``search()`` | Scan through a string, looking for any | +| | location where this RE matches. | ++------------------+-----------------------------------------------+ +| ``findall()`` | Find all substrings where the RE matches, and | +| | returns them as a list. | ++------------------+-----------------------------------------------+ +| ``finditer()`` | Find all substrings where the RE matches, and | +| | returns them as an iterator. | ++------------------+-----------------------------------------------+ + +:meth:`match` and :meth:`search` return ``None`` if no match can be found. If +they're successful, a ``MatchObject`` instance is returned, containing +information about the match: where it starts and ends, the substring it matched, +and more. + +You can learn about this by interactively experimenting with the :mod:`re` +module. If you have Tkinter available, you may also want to look at +:file:`Tools/scripts/redemo.py`, a demonstration program included with the +Python distribution. It allows you to enter REs and strings, and displays +whether the RE matches or fails. :file:`redemo.py` can be quite useful when +trying to debug a complicated RE. Phil Schwartz's `Kodos +`_ is also an interactive tool for +developing and testing RE patterns. + +This HOWTO uses the standard Python interpreter for its examples. First, run the +Python interpreter, import the :mod:`re` module, and compile a RE:: + + Python 2.2.2 (#1, Feb 10 2003, 12:57:01) + >>> import re + >>> p = re.compile('[a-z]+') + >>> p + <_sre.SRE_Pattern object at 80c3c28> + +Now, you can try matching various strings against the RE ``[a-z]+``. An empty +string shouldn't match at all, since ``+`` means 'one or more repetitions'. +:meth:`match` should return ``None`` in this case, which will cause the +interpreter to print no output. You can explicitly print the result of +:meth:`match` to make this clear. :: + + >>> p.match("") + >>> print p.match("") + None + +Now, let's try it on a string that it should match, such as ``tempo``. In this +case, :meth:`match` will return a :class:`MatchObject`, so you should store the +result in a variable for later use. :: + + >>> m = p.match('tempo') + >>> print m + <_sre.SRE_Match object at 80c4f68> + +Now you can query the :class:`MatchObject` for information about the matching +string. :class:`MatchObject` instances also have several methods and +attributes; the most important ones are: + ++------------------+--------------------------------------------+ +| Method/Attribute | Purpose | ++==================+============================================+ +| ``group()`` | Return the string matched by the RE | ++------------------+--------------------------------------------+ +| ``start()`` | Return the starting position of the match | ++------------------+--------------------------------------------+ +| ``end()`` | Return the ending position of the match | ++------------------+--------------------------------------------+ +| ``span()`` | Return a tuple containing the (start, end) | +| | positions of the match | ++------------------+--------------------------------------------+ + +Trying these methods will soon clarify their meaning:: + + >>> m.group() + 'tempo' + >>> m.start(), m.end() + (0, 5) + >>> m.span() + (0, 5) + +:meth:`group` returns the substring that was matched by the RE. :meth:`start` +and :meth:`end` return the starting and ending index of the match. :meth:`span` +returns both start and end indexes in a single tuple. Since the :meth:`match` +method only checks if the RE matches at the start of a string, :meth:`start` +will always be zero. However, the :meth:`search` method of :class:`RegexObject` +instances scans through the string, so the match may not start at zero in that +case. :: + + >>> print p.match('::: message') + None + >>> m = p.search('::: message') ; print m + + >>> m.group() + 'message' + >>> m.span() + (4, 11) + +In actual programs, the most common style is to store the :class:`MatchObject` +in a variable, and then check if it was ``None``. This usually looks like:: + + p = re.compile( ... ) + m = p.match( 'string goes here' ) + if m: + print 'Match found: ', m.group() + else: + print 'No match' + +Two :class:`RegexObject` methods return all of the matches for a pattern. +:meth:`findall` returns a list of matching strings:: + + >>> p = re.compile('\d+') + >>> p.findall('12 drummers drumming, 11 pipers piping, 10 lords a-leaping') + ['12', '11', '10'] + +:meth:`findall` has to create the entire list before it can be returned as the +result. The :meth:`finditer` method returns a sequence of :class:`MatchObject` +instances as an iterator. [#]_ :: + + >>> iterator = p.finditer('12 drummers drumming, 11 ... 10 ...') + >>> iterator + + >>> for match in iterator: + ... print match.span() + ... + (0, 2) + (22, 24) + (29, 31) + + +Module-Level Functions +---------------------- + +You don't have to create a :class:`RegexObject` and call its methods; the +:mod:`re` module also provides top-level functions called :func:`match`, +:func:`search`, :func:`findall`, :func:`sub`, and so forth. These functions +take the same arguments as the corresponding :class:`RegexObject` method, with +the RE string added as the first argument, and still return either ``None`` or a +:class:`MatchObject` instance. :: + + >>> print re.match(r'From\s+', 'Fromage amk') + None + >>> re.match(r'From\s+', 'From amk Thu May 14 19:12:10 1998') + + +Under the hood, these functions simply produce a :class:`RegexObject` for you +and call the appropriate method on it. They also store the compiled object in a +cache, so future calls using the same RE are faster. + +Should you use these module-level functions, or should you get the +:class:`RegexObject` and call its methods yourself? That choice depends on how +frequently the RE will be used, and on your personal coding style. If the RE is +being used at only one point in the code, then the module functions are probably +more convenient. If a program contains a lot of regular expressions, or re-uses +the same ones in several locations, then it might be worthwhile to collect all +the definitions in one place, in a section of code that compiles all the REs +ahead of time. To take an example from the standard library, here's an extract +from :file:`xmllib.py`:: + + ref = re.compile( ... ) + entityref = re.compile( ... ) + charref = re.compile( ... ) + starttagopen = re.compile( ... ) + +I generally prefer to work with the compiled object, even for one-time uses, but +few people will be as much of a purist about this as I am. + + +Compilation Flags +----------------- + +Compilation flags let you modify some aspects of how regular expressions work. +Flags are available in the :mod:`re` module under two names, a long name such as +:const:`IGNORECASE` and a short, one-letter form such as :const:`I`. (If you're +familiar with Perl's pattern modifiers, the one-letter forms use the same +letters; the short form of :const:`re.VERBOSE` is :const:`re.X`, for example.) +Multiple flags can be specified by bitwise OR-ing them; ``re.I | re.M`` sets +both the :const:`I` and :const:`M` flags, for example. + +Here's a table of the available flags, followed by a more detailed explanation +of each one. + ++---------------------------------+--------------------------------------------+ +| Flag | Meaning | ++=================================+============================================+ +| :const:`DOTALL`, :const:`S` | Make ``.`` match any character, including | +| | newlines | ++---------------------------------+--------------------------------------------+ +| :const:`IGNORECASE`, :const:`I` | Do case-insensitive matches | ++---------------------------------+--------------------------------------------+ +| :const:`LOCALE`, :const:`L` | Do a locale-aware match | ++---------------------------------+--------------------------------------------+ +| :const:`MULTILINE`, :const:`M` | Multi-line matching, affecting ``^`` and | +| | ``$`` | ++---------------------------------+--------------------------------------------+ +| :const:`VERBOSE`, :const:`X` | Enable verbose REs, which can be organized | +| | more cleanly and understandably. | ++---------------------------------+--------------------------------------------+ + + +.. data:: I + IGNORECASE + :noindex: + + Perform case-insensitive matching; character class and literal strings will + match letters by ignoring case. For example, ``[A-Z]`` will match lowercase + letters, too, and ``Spam`` will match ``Spam``, ``spam``, or ``spAM``. This + lowercasing doesn't take the current locale into account; it will if you also + set the :const:`LOCALE` flag. + + +.. data:: L + LOCALE + :noindex: + + Make ``\w``, ``\W``, ``\b``, and ``\B``, dependent on the current locale. + + Locales are a feature of the C library intended to help in writing programs that + take account of language differences. For example, if you're processing French + text, you'd want to be able to write ``\w+`` to match words, but ``\w`` only + matches the character class ``[A-Za-z]``; it won't match ``'é'`` or ``'ç'``. If + your system is configured properly and a French locale is selected, certain C + functions will tell the program that ``'é'`` should also be considered a letter. + Setting the :const:`LOCALE` flag when compiling a regular expression will cause + the resulting compiled object to use these C functions for ``\w``; this is + slower, but also enables ``\w+`` to match French words as you'd expect. + + +.. data:: M + MULTILINE + :noindex: + + (``^`` and ``$`` haven't been explained yet; they'll be introduced in section + :ref:`more-metacharacters`.) + + Usually ``^`` matches only at the beginning of the string, and ``$`` matches + only at the end of the string and immediately before the newline (if any) at the + end of the string. When this flag is specified, ``^`` matches at the beginning + of the string and at the beginning of each line within the string, immediately + following each newline. Similarly, the ``$`` metacharacter matches either at + the end of the string and at the end of each line (immediately preceding each + newline). + + +.. data:: S + DOTALL + :noindex: + + Makes the ``'.'`` special character match any character at all, including a + newline; without this flag, ``'.'`` will match anything *except* a newline. + + +.. data:: X + VERBOSE + :noindex: + + This flag allows you to write regular expressions that are more readable by + granting you more flexibility in how you can format them. When this flag has + been specified, whitespace within the RE string is ignored, except when the + whitespace is in a character class or preceded by an unescaped backslash; this + lets you organize and indent the RE more clearly. This flag also lets you put + comments within a RE that will be ignored by the engine; comments are marked by + a ``'#'`` that's neither in a character class or preceded by an unescaped + backslash. + + For example, here's a RE that uses :const:`re.VERBOSE`; see how much easier it + is to read? :: + + charref = re.compile(r""" + &[#] # Start of a numeric entity reference + ( + 0[0-7]+ # Octal form + | [0-9]+ # Decimal form + | x[0-9a-fA-F]+ # Hexadecimal form + ) + ; # Trailing semicolon + """, re.VERBOSE) + + Without the verbose setting, the RE would look like this:: + + charref = re.compile("&#(0[0-7]+" + "|[0-9]+" + "|x[0-9a-fA-F]+);") + + In the above example, Python's automatic concatenation of string literals has + been used to break up the RE into smaller pieces, but it's still more difficult + to understand than the version using :const:`re.VERBOSE`. + + +More Pattern Power +================== + +So far we've only covered a part of the features of regular expressions. In +this section, we'll cover some new metacharacters, and how to use groups to +retrieve portions of the text that was matched. + + +.. _more-metacharacters: + +More Metacharacters +------------------- + +There are some metacharacters that we haven't covered yet. Most of them will be +covered in this section. + +Some of the remaining metacharacters to be discussed are :dfn:`zero-width +assertions`. They don't cause the engine to advance through the string; +instead, they consume no characters at all, and simply succeed or fail. For +example, ``\b`` is an assertion that the current position is located at a word +boundary; the position isn't changed by the ``\b`` at all. This means that +zero-width assertions should never be repeated, because if they match once at a +given location, they can obviously be matched an infinite number of times. + +``|`` + Alternation, or the "or" operator. If A and B are regular expressions, + ``A|B`` will match any string that matches either ``A`` or ``B``. ``|`` has very + low precedence in order to make it work reasonably when you're alternating + multi-character strings. ``Crow|Servo`` will match either ``Crow`` or ``Servo``, + not ``Cro``, a ``'w'`` or an ``'S'``, and ``ervo``. + + To match a literal ``'|'``, use ``\|``, or enclose it inside a character class, + as in ``[|]``. + +``^`` + Matches at the beginning of lines. Unless the :const:`MULTILINE` flag has been + set, this will only match at the beginning of the string. In :const:`MULTILINE` + mode, this also matches immediately after each newline within the string. + + For example, if you wish to match the word ``From`` only at the beginning of a + line, the RE to use is ``^From``. :: + + >>> print re.search('^From', 'From Here to Eternity') + + >>> print re.search('^From', 'Reciting From Memory') + None + + .. % To match a literal \character{\^}, use \regexp{\e\^} or enclose it + .. % inside a character class, as in \regexp{[{\e}\^]}. + +``$`` + Matches at the end of a line, which is defined as either the end of the string, + or any location followed by a newline character. :: + + >>> print re.search('}$', '{block}') + + >>> print re.search('}$', '{block} ') + None + >>> print re.search('}$', '{block}\n') + + + To match a literal ``'$'``, use ``\$`` or enclose it inside a character class, + as in ``[$]``. + + .. % $ + +``\A`` + Matches only at the start of the string. When not in :const:`MULTILINE` mode, + ``\A`` and ``^`` are effectively the same. In :const:`MULTILINE` mode, they're + different: ``\A`` still matches only at the beginning of the string, but ``^`` + may match at any location inside the string that follows a newline character. + +``\Z`` + Matches only at the end of the string. + +``\b`` + Word boundary. This is a zero-width assertion that matches only at the + beginning or end of a word. A word is defined as a sequence of alphanumeric + characters, so the end of a word is indicated by whitespace or a + non-alphanumeric character. + + The following example matches ``class`` only when it's a complete word; it won't + match when it's contained inside another word. :: + + >>> p = re.compile(r'\bclass\b') + >>> print p.search('no class at all') + + >>> print p.search('the declassified algorithm') + None + >>> print p.search('one subclass is') + None + + There are two subtleties you should remember when using this special sequence. + First, this is the worst collision between Python's string literals and regular + expression sequences. In Python's string literals, ``\b`` is the backspace + character, ASCII value 8. If you're not using raw strings, then Python will + convert the ``\b`` to a backspace, and your RE won't match as you expect it to. + The following example looks the same as our previous RE, but omits the ``'r'`` + in front of the RE string. :: + + >>> p = re.compile('\bclass\b') + >>> print p.search('no class at all') + None + >>> print p.search('\b' + 'class' + '\b') + + + Second, inside a character class, where there's no use for this assertion, + ``\b`` represents the backspace character, for compatibility with Python's + string literals. + +``\B`` + Another zero-width assertion, this is the opposite of ``\b``, only matching when + the current position is not at a word boundary. + + +Grouping +-------- + +Frequently you need to obtain more information than just whether the RE matched +or not. Regular expressions are often used to dissect strings by writing a RE +divided into several subgroups which match different components of interest. +For example, an RFC-822 header line is divided into a header name and a value, +separated by a ``':'``, like this:: + + From: author@example.com + User-Agent: Thunderbird 1.5.0.9 (X11/20061227) + MIME-Version: 1.0 + To: editor@example.com + +This can be handled by writing a regular expression which matches an entire +header line, and has one group which matches the header name, and another group +which matches the header's value. + +Groups are marked by the ``'('``, ``')'`` metacharacters. ``'('`` and ``')'`` +have much the same meaning as they do in mathematical expressions; they group +together the expressions contained inside them, and you can repeat the contents +of a group with a repeating qualifier, such as ``*``, ``+``, ``?``, or +``{m,n}``. For example, ``(ab)*`` will match zero or more repetitions of +``ab``. :: + + >>> p = re.compile('(ab)*') + >>> print p.match('ababababab').span() + (0, 10) + +Groups indicated with ``'('``, ``')'`` also capture the starting and ending +index of the text that they match; this can be retrieved by passing an argument +to :meth:`group`, :meth:`start`, :meth:`end`, and :meth:`span`. Groups are +numbered starting with 0. Group 0 is always present; it's the whole RE, so +:class:`MatchObject` methods all have group 0 as their default argument. Later +we'll see how to express groups that don't capture the span of text that they +match. :: + + >>> p = re.compile('(a)b') + >>> m = p.match('ab') + >>> m.group() + 'ab' + >>> m.group(0) + 'ab' + +Subgroups are numbered from left to right, from 1 upward. Groups can be nested; +to determine the number, just count the opening parenthesis characters, going +from left to right. :: + + >>> p = re.compile('(a(b)c)d') + >>> m = p.match('abcd') + >>> m.group(0) + 'abcd' + >>> m.group(1) + 'abc' + >>> m.group(2) + 'b' + +:meth:`group` can be passed multiple group numbers at a time, in which case it +will return a tuple containing the corresponding values for those groups. :: + + >>> m.group(2,1,2) + ('b', 'abc', 'b') + +The :meth:`groups` method returns a tuple containing the strings for all the +subgroups, from 1 up to however many there are. :: + + >>> m.groups() + ('abc', 'b') + +Backreferences in a pattern allow you to specify that the contents of an earlier +capturing group must also be found at the current location in the string. For +example, ``\1`` will succeed if the exact contents of group 1 can be found at +the current position, and fails otherwise. Remember that Python's string +literals also use a backslash followed by numbers to allow including arbitrary +characters in a string, so be sure to use a raw string when incorporating +backreferences in a RE. + +For example, the following RE detects doubled words in a string. :: + + >>> p = re.compile(r'(\b\w+)\s+\1') + >>> p.search('Paris in the the spring').group() + 'the the' + +Backreferences like this aren't often useful for just searching through a string +--- there are few text formats which repeat data in this way --- but you'll soon +find out that they're *very* useful when performing string substitutions. + + +Non-capturing and Named Groups +------------------------------ + +Elaborate REs may use many groups, both to capture substrings of interest, and +to group and structure the RE itself. In complex REs, it becomes difficult to +keep track of the group numbers. There are two features which help with this +problem. Both of them use a common syntax for regular expression extensions, so +we'll look at that first. + +Perl 5 added several additional features to standard regular expressions, and +the Python :mod:`re` module supports most of them. It would have been +difficult to choose new single-keystroke metacharacters or new special sequences +beginning with ``\`` to represent the new features without making Perl's regular +expressions confusingly different from standard REs. If you chose ``&`` as a +new metacharacter, for example, old expressions would be assuming that ``&`` was +a regular character and wouldn't have escaped it by writing ``\&`` or ``[&]``. + +The solution chosen by the Perl developers was to use ``(?...)`` as the +extension syntax. ``?`` immediately after a parenthesis was a syntax error +because the ``?`` would have nothing to repeat, so this didn't introduce any +compatibility problems. The characters immediately after the ``?`` indicate +what extension is being used, so ``(?=foo)`` is one thing (a positive lookahead +assertion) and ``(?:foo)`` is something else (a non-capturing group containing +the subexpression ``foo``). + +Python adds an extension syntax to Perl's extension syntax. If the first +character after the question mark is a ``P``, you know that it's an extension +that's specific to Python. Currently there are two such extensions: +``(?P...)`` defines a named group, and ``(?P=name)`` is a backreference to +a named group. If future versions of Perl 5 add similar features using a +different syntax, the :mod:`re` module will be changed to support the new +syntax, while preserving the Python-specific syntax for compatibility's sake. + +Now that we've looked at the general extension syntax, we can return to the +features that simplify working with groups in complex REs. Since groups are +numbered from left to right and a complex expression may use many groups, it can +become difficult to keep track of the correct numbering. Modifying such a +complex RE is annoying, too: insert a new group near the beginning and you +change the numbers of everything that follows it. + +Sometimes you'll want to use a group to collect a part of a regular expression, +but aren't interested in retrieving the group's contents. You can make this fact +explicit by using a non-capturing group: ``(?:...)``, where you can replace the +``...`` with any other regular expression. :: + + >>> m = re.match("([abc])+", "abc") + >>> m.groups() + ('c',) + >>> m = re.match("(?:[abc])+", "abc") + >>> m.groups() + () + +Except for the fact that you can't retrieve the contents of what the group +matched, a non-capturing group behaves exactly the same as a capturing group; +you can put anything inside it, repeat it with a repetition metacharacter such +as ``*``, and nest it within other groups (capturing or non-capturing). +``(?:...)`` is particularly useful when modifying an existing pattern, since you +can add new groups without changing how all the other groups are numbered. It +should be mentioned that there's no performance difference in searching between +capturing and non-capturing groups; neither form is any faster than the other. + +A more significant feature is named groups: instead of referring to them by +numbers, groups can be referenced by a name. + +The syntax for a named group is one of the Python-specific extensions: +``(?P...)``. *name* is, obviously, the name of the group. Named groups +also behave exactly like capturing groups, and additionally associate a name +with a group. The :class:`MatchObject` methods that deal with capturing groups +all accept either integers that refer to the group by number or strings that +contain the desired group's name. Named groups are still given numbers, so you +can retrieve information about a group in two ways:: + + >>> p = re.compile(r'(?P\b\w+\b)') + >>> m = p.search( '(((( Lots of punctuation )))' ) + >>> m.group('word') + 'Lots' + >>> m.group(1) + 'Lots' + +Named groups are handy because they let you use easily-remembered names, instead +of having to remember numbers. Here's an example RE from the :mod:`imaplib` +module:: + + InternalDate = re.compile(r'INTERNALDATE "' + r'(?P[ 123][0-9])-(?P[A-Z][a-z][a-z])-' + r'(?P[0-9][0-9][0-9][0-9])' + r' (?P[0-9][0-9]):(?P[0-9][0-9]):(?P[0-9][0-9])' + r' (?P[-+])(?P[0-9][0-9])(?P[0-9][0-9])' + r'"') + +It's obviously much easier to retrieve ``m.group('zonem')``, instead of having +to remember to retrieve group 9. + +The syntax for backreferences in an expression such as ``(...)\1`` refers to the +number of the group. There's naturally a variant that uses the group name +instead of the number. This is another Python extension: ``(?P=name)`` indicates +that the contents of the group called *name* should again be matched at the +current point. The regular expression for finding doubled words, +``(\b\w+)\s+\1`` can also be written as ``(?P\b\w+)\s+(?P=word)``:: + + >>> p = re.compile(r'(?P\b\w+)\s+(?P=word)') + >>> p.search('Paris in the the spring').group() + 'the the' + + +Lookahead Assertions +-------------------- + +Another zero-width assertion is the lookahead assertion. Lookahead assertions +are available in both positive and negative form, and look like this: + +``(?=...)`` + Positive lookahead assertion. This succeeds if the contained regular + expression, represented here by ``...``, successfully matches at the current + location, and fails otherwise. But, once the contained expression has been + tried, the matching engine doesn't advance at all; the rest of the pattern is + tried right where the assertion started. + +``(?!...)`` + Negative lookahead assertion. This is the opposite of the positive assertion; + it succeeds if the contained expression *doesn't* match at the current position + in the string. + +To make this concrete, let's look at a case where a lookahead is useful. +Consider a simple pattern to match a filename and split it apart into a base +name and an extension, separated by a ``.``. For example, in ``news.rc``, +``news`` is the base name, and ``rc`` is the filename's extension. + +The pattern to match this is quite simple: + +``.*[.].*$`` + +Notice that the ``.`` needs to be treated specially because it's a +metacharacter; I've put it inside a character class. Also notice the trailing +``$``; this is added to ensure that all the rest of the string must be included +in the extension. This regular expression matches ``foo.bar`` and +``autoexec.bat`` and ``sendmail.cf`` and ``printers.conf``. + +Now, consider complicating the problem a bit; what if you want to match +filenames where the extension is not ``bat``? Some incorrect attempts: + +``.*[.][^b].*$`` The first attempt above tries to exclude ``bat`` by requiring +that the first character of the extension is not a ``b``. This is wrong, +because the pattern also doesn't match ``foo.bar``. + +.. % $ + +``.*[.]([^b]..|.[^a].|..[^t])$`` + +.. % Messes up the HTML without the curly braces around \^ + +The expression gets messier when you try to patch up the first solution by +requiring one of the following cases to match: the first character of the +extension isn't ``b``; the second character isn't ``a``; or the third character +isn't ``t``. This accepts ``foo.bar`` and rejects ``autoexec.bat``, but it +requires a three-letter extension and won't accept a filename with a two-letter +extension such as ``sendmail.cf``. We'll complicate the pattern again in an +effort to fix it. + +``.*[.]([^b].?.?|.[^a]?.?|..?[^t]?)$`` + +In the third attempt, the second and third letters are all made optional in +order to allow matching extensions shorter than three characters, such as +``sendmail.cf``. + +The pattern's getting really complicated now, which makes it hard to read and +understand. Worse, if the problem changes and you want to exclude both ``bat`` +and ``exe`` as extensions, the pattern would get even more complicated and +confusing. + +A negative lookahead cuts through all this confusion: + +``.*[.](?!bat$).*$`` The negative lookahead means: if the expression ``bat`` +doesn't match at this point, try the rest of the pattern; if ``bat$`` does +match, the whole pattern will fail. The trailing ``$`` is required to ensure +that something like ``sample.batch``, where the extension only starts with +``bat``, will be allowed. + +.. % $ + +Excluding another filename extension is now easy; simply add it as an +alternative inside the assertion. The following pattern excludes filenames that +end in either ``bat`` or ``exe``: + +``.*[.](?!bat$|exe$).*$`` + +.. % $ + + +Modifying Strings +================= + +Up to this point, we've simply performed searches against a static string. +Regular expressions are also commonly used to modify strings in various ways, +using the following :class:`RegexObject` methods: + ++------------------+-----------------------------------------------+ +| Method/Attribute | Purpose | ++==================+===============================================+ +| ``split()`` | Split the string into a list, splitting it | +| | wherever the RE matches | ++------------------+-----------------------------------------------+ +| ``sub()`` | Find all substrings where the RE matches, and | +| | replace them with a different string | ++------------------+-----------------------------------------------+ +| ``subn()`` | Does the same thing as :meth:`sub`, but | +| | returns the new string and the number of | +| | replacements | ++------------------+-----------------------------------------------+ + + +Splitting Strings +----------------- + +The :meth:`split` method of a :class:`RegexObject` splits a string apart +wherever the RE matches, returning a list of the pieces. It's similar to the +:meth:`split` method of strings but provides much more generality in the +delimiters that you can split by; :meth:`split` only supports splitting by +whitespace or by a fixed string. As you'd expect, there's a module-level +:func:`re.split` function, too. + + +.. method:: .split(string [, maxsplit=0]) + :noindex: + + Split *string* by the matches of the regular expression. If capturing + parentheses are used in the RE, then their contents will also be returned as + part of the resulting list. If *maxsplit* is nonzero, at most *maxsplit* splits + are performed. + +You can limit the number of splits made, by passing a value for *maxsplit*. +When *maxsplit* is nonzero, at most *maxsplit* splits will be made, and the +remainder of the string is returned as the final element of the list. In the +following example, the delimiter is any sequence of non-alphanumeric characters. +:: + + >>> p = re.compile(r'\W+') + >>> p.split('This is a test, short and sweet, of split().') + ['This', 'is', 'a', 'test', 'short', 'and', 'sweet', 'of', 'split', ''] + >>> p.split('This is a test, short and sweet, of split().', 3) + ['This', 'is', 'a', 'test, short and sweet, of split().'] + +Sometimes you're not only interested in what the text between delimiters is, but +also need to know what the delimiter was. If capturing parentheses are used in +the RE, then their values are also returned as part of the list. Compare the +following calls:: + + >>> p = re.compile(r'\W+') + >>> p2 = re.compile(r'(\W+)') + >>> p.split('This... is a test.') + ['This', 'is', 'a', 'test', ''] + >>> p2.split('This... is a test.') + ['This', '... ', 'is', ' ', 'a', ' ', 'test', '.', ''] + +The module-level function :func:`re.split` adds the RE to be used as the first +argument, but is otherwise the same. :: + + >>> re.split('[\W]+', 'Words, words, words.') + ['Words', 'words', 'words', ''] + >>> re.split('([\W]+)', 'Words, words, words.') + ['Words', ', ', 'words', ', ', 'words', '.', ''] + >>> re.split('[\W]+', 'Words, words, words.', 1) + ['Words', 'words, words.'] + + +Search and Replace +------------------ + +Another common task is to find all the matches for a pattern, and replace them +with a different string. The :meth:`sub` method takes a replacement value, +which can be either a string or a function, and the string to be processed. + + +.. method:: .sub(replacement, string[, count=0]) + :noindex: + + Returns the string obtained by replacing the leftmost non-overlapping + occurrences of the RE in *string* by the replacement *replacement*. If the + pattern isn't found, *string* is returned unchanged. + + The optional argument *count* is the maximum number of pattern occurrences to be + replaced; *count* must be a non-negative integer. The default value of 0 means + to replace all occurrences. + +Here's a simple example of using the :meth:`sub` method. It replaces colour +names with the word ``colour``:: + + >>> p = re.compile( '(blue|white|red)') + >>> p.sub( 'colour', 'blue socks and red shoes') + 'colour socks and colour shoes' + >>> p.sub( 'colour', 'blue socks and red shoes', count=1) + 'colour socks and red shoes' + +The :meth:`subn` method does the same work, but returns a 2-tuple containing the +new string value and the number of replacements that were performed:: + + >>> p = re.compile( '(blue|white|red)') + >>> p.subn( 'colour', 'blue socks and red shoes') + ('colour socks and colour shoes', 2) + >>> p.subn( 'colour', 'no colours at all') + ('no colours at all', 0) + +Empty matches are replaced only when they're not adjacent to a previous match. +:: + + >>> p = re.compile('x*') + >>> p.sub('-', 'abxd') + '-a-b-d-' + +If *replacement* is a string, any backslash escapes in it are processed. That +is, ``\n`` is converted to a single newline character, ``\r`` is converted to a +carriage return, and so forth. Unknown escapes such as ``\j`` are left alone. +Backreferences, such as ``\6``, are replaced with the substring matched by the +corresponding group in the RE. This lets you incorporate portions of the +original text in the resulting replacement string. + +This example matches the word ``section`` followed by a string enclosed in +``{``, ``}``, and changes ``section`` to ``subsection``:: + + >>> p = re.compile('section{ ( [^}]* ) }', re.VERBOSE) + >>> p.sub(r'subsection{\1}','section{First} section{second}') + 'subsection{First} subsection{second}' + +There's also a syntax for referring to named groups as defined by the +``(?P...)`` syntax. ``\g`` will use the substring matched by the +group named ``name``, and ``\g`` uses the corresponding group number. +``\g<2>`` is therefore equivalent to ``\2``, but isn't ambiguous in a +replacement string such as ``\g<2>0``. (``\20`` would be interpreted as a +reference to group 20, not a reference to group 2 followed by the literal +character ``'0'``.) The following substitutions are all equivalent, but use all +three variations of the replacement string. :: + + >>> p = re.compile('section{ (?P [^}]* ) }', re.VERBOSE) + >>> p.sub(r'subsection{\1}','section{First}') + 'subsection{First}' + >>> p.sub(r'subsection{\g<1>}','section{First}') + 'subsection{First}' + >>> p.sub(r'subsection{\g}','section{First}') + 'subsection{First}' + +*replacement* can also be a function, which gives you even more control. If +*replacement* is a function, the function is called for every non-overlapping +occurrence of *pattern*. On each call, the function is passed a +:class:`MatchObject` argument for the match and can use this information to +compute the desired replacement string and return it. + +In the following example, the replacement function translates decimals into +hexadecimal:: + + >>> def hexrepl( match ): + ... "Return the hex string for a decimal number" + ... value = int( match.group() ) + ... return hex(value) + ... + >>> p = re.compile(r'\d+') + >>> p.sub(hexrepl, 'Call 65490 for printing, 49152 for user code.') + 'Call 0xffd2 for printing, 0xc000 for user code.' + +When using the module-level :func:`re.sub` function, the pattern is passed as +the first argument. The pattern may be a string or a :class:`RegexObject`; if +you need to specify regular expression flags, you must either use a +:class:`RegexObject` as the first parameter, or use embedded modifiers in the +pattern, e.g. ``sub("(?i)b+", "x", "bbbb BBBB")`` returns ``'x x'``. + + +Common Problems +=============== + +Regular expressions are a powerful tool for some applications, but in some ways +their behaviour isn't intuitive and at times they don't behave the way you may +expect them to. This section will point out some of the most common pitfalls. + + +Use String Methods +------------------ + +Sometimes using the :mod:`re` module is a mistake. If you're matching a fixed +string, or a single character class, and you're not using any :mod:`re` features +such as the :const:`IGNORECASE` flag, then the full power of regular expressions +may not be required. Strings have several methods for performing operations with +fixed strings and they're usually much faster, because the implementation is a +single small C loop that's been optimized for the purpose, instead of the large, +more generalized regular expression engine. + +One example might be replacing a single fixed string with another one; for +example, you might replace ``word`` with ``deed``. ``re.sub()`` seems like the +function to use for this, but consider the :meth:`replace` method. Note that +:func:`replace` will also replace ``word`` inside words, turning ``swordfish`` +into ``sdeedfish``, but the naive RE ``word`` would have done that, too. (To +avoid performing the substitution on parts of words, the pattern would have to +be ``\bword\b``, in order to require that ``word`` have a word boundary on +either side. This takes the job beyond :meth:`replace`'s abilities.) + +Another common task is deleting every occurrence of a single character from a +string or replacing it with another single character. You might do this with +something like ``re.sub('\n', ' ', S)``, but :meth:`translate` is capable of +doing both tasks and will be faster than any regular expression operation can +be. + +In short, before turning to the :mod:`re` module, consider whether your problem +can be solved with a faster and simpler string method. + + +match() versus search() +----------------------- + +The :func:`match` function only checks if the RE matches at the beginning of the +string while :func:`search` will scan forward through the string for a match. +It's important to keep this distinction in mind. Remember, :func:`match` will +only report a successful match which will start at 0; if the match wouldn't +start at zero, :func:`match` will *not* report it. :: + + >>> print re.match('super', 'superstition').span() + (0, 5) + >>> print re.match('super', 'insuperable') + None + +On the other hand, :func:`search` will scan forward through the string, +reporting the first match it finds. :: + + >>> print re.search('super', 'superstition').span() + (0, 5) + >>> print re.search('super', 'insuperable').span() + (2, 7) + +Sometimes you'll be tempted to keep using :func:`re.match`, and just add ``.*`` +to the front of your RE. Resist this temptation and use :func:`re.search` +instead. The regular expression compiler does some analysis of REs in order to +speed up the process of looking for a match. One such analysis figures out what +the first character of a match must be; for example, a pattern starting with +``Crow`` must match starting with a ``'C'``. The analysis lets the engine +quickly scan through the string looking for the starting character, only trying +the full match if a ``'C'`` is found. + +Adding ``.*`` defeats this optimization, requiring scanning to the end of the +string and then backtracking to find a match for the rest of the RE. Use +:func:`re.search` instead. + + +Greedy versus Non-Greedy +------------------------ + +When repeating a regular expression, as in ``a*``, the resulting action is to +consume as much of the pattern as possible. This fact often bites you when +you're trying to match a pair of balanced delimiters, such as the angle brackets +surrounding an HTML tag. The naive pattern for matching a single HTML tag +doesn't work because of the greedy nature of ``.*``. :: + + >>> s = 'Title' + >>> len(s) + 32 + >>> print re.match('<.*>', s).span() + (0, 32) + >>> print re.match('<.*>', s).group() + Title + +The RE matches the ``'<'`` in ````, and the ``.*`` consumes the rest of +the string. There's still more left in the RE, though, and the ``>`` can't +match at the end of the string, so the regular expression engine has to +backtrack character by character until it finds a match for the ``>``. The +final match extends from the ``'<'`` in ```` to the ``'>'`` in +````, which isn't what you want. + +In this case, the solution is to use the non-greedy qualifiers ``*?``, ``+?``, +``??``, or ``{m,n}?``, which match as *little* text as possible. In the above +example, the ``'>'`` is tried immediately after the first ``'<'`` matches, and +when it fails, the engine advances a character at a time, retrying the ``'>'`` +at every step. This produces just the right result:: + + >>> print re.match('<.*?>', s).group() + + +(Note that parsing HTML or XML with regular expressions is painful. +Quick-and-dirty patterns will handle common cases, but HTML and XML have special +cases that will break the obvious regular expression; by the time you've written +a regular expression that handles all of the possible cases, the patterns will +be *very* complicated. Use an HTML or XML parser module for such tasks.) + + +Not Using re.VERBOSE +-------------------- + +By now you've probably noticed that regular expressions are a very compact +notation, but they're not terribly readable. REs of moderate complexity can +become lengthy collections of backslashes, parentheses, and metacharacters, +making them difficult to read and understand. + +For such REs, specifying the ``re.VERBOSE`` flag when compiling the regular +expression can be helpful, because it allows you to format the regular +expression more clearly. + +The ``re.VERBOSE`` flag has several effects. Whitespace in the regular +expression that *isn't* inside a character class is ignored. This means that an +expression such as ``dog | cat`` is equivalent to the less readable ``dog|cat``, +but ``[a b]`` will still match the characters ``'a'``, ``'b'``, or a space. In +addition, you can also put comments inside a RE; comments extend from a ``#`` +character to the next newline. When used with triple-quoted strings, this +enables REs to be formatted more neatly:: + + pat = re.compile(r""" + \s* # Skip leading whitespace + (?P
[^:]+) # Header name + \s* : # Whitespace, and a colon + (?P.*?) # The header's value -- *? used to + # lose the following trailing whitespace + \s*$ # Trailing whitespace to end-of-line + """, re.VERBOSE) + +This is far more readable than: + +.. % $ + +:: + + pat = re.compile(r"\s*(?P
[^:]+)\s*:(?P.*?)\s*$") + +.. % $ + + +Feedback +======== + +Regular expressions are a complicated topic. Did this document help you +understand them? Were there parts that were unclear, or Problems you +encountered that weren't covered here? If so, please send suggestions for +improvements to the author. + +The most complete book on regular expressions is almost certainly Jeffrey +Friedl's Mastering Regular Expressions, published by O'Reilly. Unfortunately, +it exclusively concentrates on Perl and Java's flavours of regular expressions, +and doesn't contain any Python material at all, so it won't be useful as a +reference for programming in Python. (The first edition covered Python's +now-removed :mod:`regex` module, which won't help you much.) Consider checking +it out from your library. + + +.. rubric:: Footnotes + +.. [#] Introduced in Python 2.2.2. + diff --git a/Doc/howto/sockets.rst b/Doc/howto/sockets.rst new file mode 100644 index 0000000..dc05d32 --- /dev/null +++ b/Doc/howto/sockets.rst @@ -0,0 +1,421 @@ +**************************** + Socket Programming HOWTO +**************************** + +:Author: Gordon McMillan + + +.. topic:: Abstract + + Sockets are used nearly everywhere, but are one of the most severely + misunderstood technologies around. This is a 10,000 foot overview of sockets. + It's not really a tutorial - you'll still have work to do in getting things + operational. It doesn't cover the fine points (and there are a lot of them), but + I hope it will give you enough background to begin using them decently. + + +Sockets +======= + +Sockets are used nearly everywhere, but are one of the most severely +misunderstood technologies around. This is a 10,000 foot overview of sockets. +It's not really a tutorial - you'll still have work to do in getting things +working. It doesn't cover the fine points (and there are a lot of them), but I +hope it will give you enough background to begin using them decently. + +I'm only going to talk about INET sockets, but they account for at least 99% of +the sockets in use. And I'll only talk about STREAM sockets - unless you really +know what you're doing (in which case this HOWTO isn't for you!), you'll get +better behavior and performance from a STREAM socket than anything else. I will +try to clear up the mystery of what a socket is, as well as some hints on how to +work with blocking and non-blocking sockets. But I'll start by talking about +blocking sockets. You'll need to know how they work before dealing with +non-blocking sockets. + +Part of the trouble with understanding these things is that "socket" can mean a +number of subtly different things, depending on context. So first, let's make a +distinction between a "client" socket - an endpoint of a conversation, and a +"server" socket, which is more like a switchboard operator. The client +application (your browser, for example) uses "client" sockets exclusively; the +web server it's talking to uses both "server" sockets and "client" sockets. + + +History +------- + +Of the various forms of IPC (*Inter Process Communication*), sockets are by far +the most popular. On any given platform, there are likely to be other forms of +IPC that are faster, but for cross-platform communication, sockets are about the +only game in town. + +They were invented in Berkeley as part of the BSD flavor of Unix. They spread +like wildfire with the Internet. With good reason --- the combination of sockets +with INET makes talking to arbitrary machines around the world unbelievably easy +(at least compared to other schemes). + + +Creating a Socket +================= + +Roughly speaking, when you clicked on the link that brought you to this page, +your browser did something like the following:: + + #create an INET, STREAMing socket + s = socket.socket( + socket.AF_INET, socket.SOCK_STREAM) + #now connect to the web server on port 80 + # - the normal http port + s.connect(("www.mcmillan-inc.com", 80)) + +When the ``connect`` completes, the socket ``s`` can now be used to send in a +request for the text of this page. The same socket will read the reply, and then +be destroyed. That's right - destroyed. Client sockets are normally only used +for one exchange (or a small set of sequential exchanges). + +What happens in the web server is a bit more complex. First, the web server +creates a "server socket". :: + + #create an INET, STREAMing socket + serversocket = socket.socket( + socket.AF_INET, socket.SOCK_STREAM) + #bind the socket to a public host, + # and a well-known port + serversocket.bind((socket.gethostname(), 80)) + #become a server socket + serversocket.listen(5) + +A couple things to notice: we used ``socket.gethostname()`` so that the socket +would be visible to the outside world. If we had used ``s.bind(('', 80))`` or +``s.bind(('localhost', 80))`` or ``s.bind(('127.0.0.1', 80))`` we would still +have a "server" socket, but one that was only visible within the same machine. + +A second thing to note: low number ports are usually reserved for "well known" +services (HTTP, SNMP etc). If you're playing around, use a nice high number (4 +digits). + +Finally, the argument to ``listen`` tells the socket library that we want it to +queue up as many as 5 connect requests (the normal max) before refusing outside +connections. If the rest of the code is written properly, that should be plenty. + +OK, now we have a "server" socket, listening on port 80. Now we enter the +mainloop of the web server:: + + while 1: + #accept connections from outside + (clientsocket, address) = serversocket.accept() + #now do something with the clientsocket + #in this case, we'll pretend this is a threaded server + ct = client_thread(clientsocket) + ct.run() + +There's actually 3 general ways in which this loop could work - dispatching a +thread to handle ``clientsocket``, create a new process to handle +``clientsocket``, or restructure this app to use non-blocking sockets, and +mulitplex between our "server" socket and any active ``clientsocket``\ s using +``select``. More about that later. The important thing to understand now is +this: this is *all* a "server" socket does. It doesn't send any data. It doesn't +receive any data. It just produces "client" sockets. Each ``clientsocket`` is +created in response to some *other* "client" socket doing a ``connect()`` to the +host and port we're bound to. As soon as we've created that ``clientsocket``, we +go back to listening for more connections. The two "clients" are free to chat it +up - they are using some dynamically allocated port which will be recycled when +the conversation ends. + + +IPC +--- + +If you need fast IPC between two processes on one machine, you should look into +whatever form of shared memory the platform offers. A simple protocol based +around shared memory and locks or semaphores is by far the fastest technique. + +If you do decide to use sockets, bind the "server" socket to ``'localhost'``. On +most platforms, this will take a shortcut around a couple of layers of network +code and be quite a bit faster. + + +Using a Socket +============== + +The first thing to note, is that the web browser's "client" socket and the web +server's "client" socket are identical beasts. That is, this is a "peer to peer" +conversation. Or to put it another way, *as the designer, you will have to +decide what the rules of etiquette are for a conversation*. Normally, the +``connect``\ ing socket starts the conversation, by sending in a request, or +perhaps a signon. But that's a design decision - it's not a rule of sockets. + +Now there are two sets of verbs to use for communication. You can use ``send`` +and ``recv``, or you can transform your client socket into a file-like beast and +use ``read`` and ``write``. The latter is the way Java presents their sockets. +I'm not going to talk about it here, except to warn you that you need to use +``flush`` on sockets. These are buffered "files", and a common mistake is to +``write`` something, and then ``read`` for a reply. Without a ``flush`` in +there, you may wait forever for the reply, because the request may still be in +your output buffer. + +Now we come the major stumbling block of sockets - ``send`` and ``recv`` operate +on the network buffers. They do not necessarily handle all the bytes you hand +them (or expect from them), because their major focus is handling the network +buffers. In general, they return when the associated network buffers have been +filled (``send``) or emptied (``recv``). They then tell you how many bytes they +handled. It is *your* responsibility to call them again until your message has +been completely dealt with. + +When a ``recv`` returns 0 bytes, it means the other side has closed (or is in +the process of closing) the connection. You will not receive any more data on +this connection. Ever. You may be able to send data successfully; I'll talk +about that some on the next page. + +A protocol like HTTP uses a socket for only one transfer. The client sends a +request, the reads a reply. That's it. The socket is discarded. This means that +a client can detect the end of the reply by receiving 0 bytes. + +But if you plan to reuse your socket for further transfers, you need to realize +that *there is no "EOT" (End of Transfer) on a socket.* I repeat: if a socket +``send`` or ``recv`` returns after handling 0 bytes, the connection has been +broken. If the connection has *not* been broken, you may wait on a ``recv`` +forever, because the socket will *not* tell you that there's nothing more to +read (for now). Now if you think about that a bit, you'll come to realize a +fundamental truth of sockets: *messages must either be fixed length* (yuck), *or +be delimited* (shrug), *or indicate how long they are* (much better), *or end by +shutting down the connection*. The choice is entirely yours, (but some ways are +righter than others). + +Assuming you don't want to end the connection, the simplest solution is a fixed +length message:: + + class mysocket: + '''demonstration class only + - coded for clarity, not efficiency + ''' + + def __init__(self, sock=None): + if sock is None: + self.sock = socket.socket( + socket.AF_INET, socket.SOCK_STREAM) + else: + self.sock = sock + + def connect(self, host, port): + self.sock.connect((host, port)) + + def mysend(self, msg): + totalsent = 0 + while totalsent < MSGLEN: + sent = self.sock.send(msg[totalsent:]) + if sent == 0: + raise RuntimeError, \ + "socket connection broken" + totalsent = totalsent + sent + + def myreceive(self): + msg = '' + while len(msg) < MSGLEN: + chunk = self.sock.recv(MSGLEN-len(msg)) + if chunk == '': + raise RuntimeError, \ + "socket connection broken" + msg = msg + chunk + return msg + +The sending code here is usable for almost any messaging scheme - in Python you +send strings, and you can use ``len()`` to determine its length (even if it has +embedded ``\0`` characters). It's mostly the receiving code that gets more +complex. (And in C, it's not much worse, except you can't use ``strlen`` if the +message has embedded ``\0``\ s.) + +The easiest enhancement is to make the first character of the message an +indicator of message type, and have the type determine the length. Now you have +two ``recv``\ s - the first to get (at least) that first character so you can +look up the length, and the second in a loop to get the rest. If you decide to +go the delimited route, you'll be receiving in some arbitrary chunk size, (4096 +or 8192 is frequently a good match for network buffer sizes), and scanning what +you've received for a delimiter. + +One complication to be aware of: if your conversational protocol allows multiple +messages to be sent back to back (without some kind of reply), and you pass +``recv`` an arbitrary chunk size, you may end up reading the start of a +following message. You'll need to put that aside and hold onto it, until it's +needed. + +Prefixing the message with it's length (say, as 5 numeric characters) gets more +complex, because (believe it or not), you may not get all 5 characters in one +``recv``. In playing around, you'll get away with it; but in high network loads, +your code will very quickly break unless you use two ``recv`` loops - the first +to determine the length, the second to get the data part of the message. Nasty. +This is also when you'll discover that ``send`` does not always manage to get +rid of everything in one pass. And despite having read this, you will eventually +get bit by it! + +In the interests of space, building your character, (and preserving my +competitive position), these enhancements are left as an exercise for the +reader. Lets move on to cleaning up. + + +Binary Data +----------- + +It is perfectly possible to send binary data over a socket. The major problem is +that not all machines use the same formats for binary data. For example, a +Motorola chip will represent a 16 bit integer with the value 1 as the two hex +bytes 00 01. Intel and DEC, however, are byte-reversed - that same 1 is 01 00. +Socket libraries have calls for converting 16 and 32 bit integers - ``ntohl, +htonl, ntohs, htons`` where "n" means *network* and "h" means *host*, "s" means +*short* and "l" means *long*. Where network order is host order, these do +nothing, but where the machine is byte-reversed, these swap the bytes around +appropriately. + +In these days of 32 bit machines, the ascii representation of binary data is +frequently smaller than the binary representation. That's because a surprising +amount of the time, all those longs have the value 0, or maybe 1. The string "0" +would be two bytes, while binary is four. Of course, this doesn't fit well with +fixed-length messages. Decisions, decisions. + + +Disconnecting +============= + +Strictly speaking, you're supposed to use ``shutdown`` on a socket before you +``close`` it. The ``shutdown`` is an advisory to the socket at the other end. +Depending on the argument you pass it, it can mean "I'm not going to send +anymore, but I'll still listen", or "I'm not listening, good riddance!". Most +socket libraries, however, are so used to programmers neglecting to use this +piece of etiquette that normally a ``close`` is the same as ``shutdown(); +close()``. So in most situations, an explicit ``shutdown`` is not needed. + +One way to use ``shutdown`` effectively is in an HTTP-like exchange. The client +sends a request and then does a ``shutdown(1)``. This tells the server "This +client is done sending, but can still receive." The server can detect "EOF" by +a receive of 0 bytes. It can assume it has the complete request. The server +sends a reply. If the ``send`` completes successfully then, indeed, the client +was still receiving. + +Python takes the automatic shutdown a step further, and says that when a socket +is garbage collected, it will automatically do a ``close`` if it's needed. But +relying on this is a very bad habit. If your socket just disappears without +doing a ``close``, the socket at the other end may hang indefinitely, thinking +you're just being slow. *Please* ``close`` your sockets when you're done. + + +When Sockets Die +---------------- + +Probably the worst thing about using blocking sockets is what happens when the +other side comes down hard (without doing a ``close``). Your socket is likely to +hang. SOCKSTREAM is a reliable protocol, and it will wait a long, long time +before giving up on a connection. If you're using threads, the entire thread is +essentially dead. There's not much you can do about it. As long as you aren't +doing something dumb, like holding a lock while doing a blocking read, the +thread isn't really consuming much in the way of resources. Do *not* try to kill +the thread - part of the reason that threads are more efficient than processes +is that they avoid the overhead associated with the automatic recycling of +resources. In other words, if you do manage to kill the thread, your whole +process is likely to be screwed up. + + +Non-blocking Sockets +==================== + +If you've understood the preceeding, you already know most of what you need to +know about the mechanics of using sockets. You'll still use the same calls, in +much the same ways. It's just that, if you do it right, your app will be almost +inside-out. + +In Python, you use ``socket.setblocking(0)`` to make it non-blocking. In C, it's +more complex, (for one thing, you'll need to choose between the BSD flavor +``O_NONBLOCK`` and the almost indistinguishable Posix flavor ``O_NDELAY``, which +is completely different from ``TCP_NODELAY``), but it's the exact same idea. You +do this after creating the socket, but before using it. (Actually, if you're +nuts, you can switch back and forth.) + +The major mechanical difference is that ``send``, ``recv``, ``connect`` and +``accept`` can return without having done anything. You have (of course) a +number of choices. You can check return code and error codes and generally drive +yourself crazy. If you don't believe me, try it sometime. Your app will grow +large, buggy and suck CPU. So let's skip the brain-dead solutions and do it +right. + +Use ``select``. + +In C, coding ``select`` is fairly complex. In Python, it's a piece of cake, but +it's close enough to the C version that if you understand ``select`` in Python, +you'll have little trouble with it in C. :: + + ready_to_read, ready_to_write, in_error = \ + select.select( + potential_readers, + potential_writers, + potential_errs, + timeout) + +You pass ``select`` three lists: the first contains all sockets that you might +want to try reading; the second all the sockets you might want to try writing +to, and the last (normally left empty) those that you want to check for errors. +You should note that a socket can go into more than one list. The ``select`` +call is blocking, but you can give it a timeout. This is generally a sensible +thing to do - give it a nice long timeout (say a minute) unless you have good +reason to do otherwise. + +In return, you will get three lists. They have the sockets that are actually +readable, writable and in error. Each of these lists is a subset (possbily +empty) of the corresponding list you passed in. And if you put a socket in more +than one input list, it will only be (at most) in one output list. + +If a socket is in the output readable list, you can be +as-close-to-certain-as-we-ever-get-in-this-business that a ``recv`` on that +socket will return *something*. Same idea for the writable list. You'll be able +to send *something*. Maybe not all you want to, but *something* is better than +nothing. (Actually, any reasonably healthy socket will return as writable - it +just means outbound network buffer space is available.) + +If you have a "server" socket, put it in the potential_readers list. If it comes +out in the readable list, your ``accept`` will (almost certainly) work. If you +have created a new socket to ``connect`` to someone else, put it in the +ptoential_writers list. If it shows up in the writable list, you have a decent +chance that it has connected. + +One very nasty problem with ``select``: if somewhere in those input lists of +sockets is one which has died a nasty death, the ``select`` will fail. You then +need to loop through every single damn socket in all those lists and do a +``select([sock],[],[],0)`` until you find the bad one. That timeout of 0 means +it won't take long, but it's ugly. + +Actually, ``select`` can be handy even with blocking sockets. It's one way of +determining whether you will block - the socket returns as readable when there's +something in the buffers. However, this still doesn't help with the problem of +determining whether the other end is done, or just busy with something else. + +**Portability alert**: On Unix, ``select`` works both with the sockets and +files. Don't try this on Windows. On Windows, ``select`` works with sockets +only. Also note that in C, many of the more advanced socket options are done +differently on Windows. In fact, on Windows I usually use threads (which work +very, very well) with my sockets. Face it, if you want any kind of performance, +your code will look very different on Windows than on Unix. (I haven't the +foggiest how you do this stuff on a Mac.) + + +Performance +----------- + +There's no question that the fastest sockets code uses non-blocking sockets and +select to multiplex them. You can put together something that will saturate a +LAN connection without putting any strain on the CPU. The trouble is that an app +written this way can't do much of anything else - it needs to be ready to +shuffle bytes around at all times. + +Assuming that your app is actually supposed to do something more than that, +threading is the optimal solution, (and using non-blocking sockets will be +faster than using blocking sockets). Unfortunately, threading support in Unixes +varies both in API and quality. So the normal Unix solution is to fork a +subprocess to deal with each connection. The overhead for this is significant +(and don't do this on Windows - the overhead of process creation is enormous +there). It also means that unless each subprocess is completely independent, +you'll need to use another form of IPC, say a pipe, or shared memory and +semaphores, to communicate between the parent and child processes. + +Finally, remember that even though blocking sockets are somewhat slower than +non-blocking, in many cases they are the "right" solution. After all, if your +app is driven by the data it receives over a socket, there's not much sense in +complicating the logic just so your app can wait on ``select`` instead of +``recv``. + diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst new file mode 100644 index 0000000..16bd5a8 --- /dev/null +++ b/Doc/howto/unicode.rst @@ -0,0 +1,732 @@ +***************** + Unicode HOWTO +***************** + +:Release: 1.02 + +This HOWTO discusses Python's support for Unicode, and explains various problems +that people commonly encounter when trying to work with Unicode. + +Introduction to Unicode +======================= + +History of Character Codes +-------------------------- + +In 1968, the American Standard Code for Information Interchange, better known by +its acronym ASCII, was standardized. ASCII defined numeric codes for various +characters, with the numeric values running from 0 to +127. For example, the lowercase letter 'a' is assigned 97 as its code +value. + +ASCII was an American-developed standard, so it only defined unaccented +characters. There was an 'e', but no 'é' or 'Í'. This meant that languages +which required accented characters couldn't be faithfully represented in ASCII. +(Actually the missing accents matter for English, too, which contains words such +as 'naïve' and 'café', and some publications have house styles which require +spellings such as 'coöperate'.) + +For a while people just wrote programs that didn't display accents. I remember +looking at Apple ][ BASIC programs, published in French-language publications in +the mid-1980s, that had lines like these:: + + PRINT "FICHER EST COMPLETE." + PRINT "CARACTERE NON ACCEPTE." + +Those messages should contain accents, and they just look wrong to someone who +can read French. + +In the 1980s, almost all personal computers were 8-bit, meaning that bytes could +hold values ranging from 0 to 255. ASCII codes only went up to 127, so some +machines assigned values between 128 and 255 to accented characters. Different +machines had different codes, however, which led to problems exchanging files. +Eventually various commonly used sets of values for the 128-255 range emerged. +Some were true standards, defined by the International Standards Organization, +and some were **de facto** conventions that were invented by one company or +another and managed to catch on. + +255 characters aren't very many. For example, you can't fit both the accented +characters used in Western Europe and the Cyrillic alphabet used for Russian +into the 128-255 range because there are more than 127 such characters. + +You could write files using different codes (all your Russian files in a coding +system called KOI8, all your French files in a different coding system called +Latin1), but what if you wanted to write a French document that quotes some +Russian text? In the 1980s people began to want to solve this problem, and the +Unicode standardization effort began. + +Unicode started out using 16-bit characters instead of 8-bit characters. 16 +bits means you have 2^16 = 65,536 distinct values available, making it possible +to represent many different characters from many different alphabets; an initial +goal was to have Unicode contain the alphabets for every single human language. +It turns out that even 16 bits isn't enough to meet that goal, and the modern +Unicode specification uses a wider range of codes, 0-1,114,111 (0x10ffff in +base-16). + +There's a related ISO standard, ISO 10646. Unicode and ISO 10646 were +originally separate efforts, but the specifications were merged with the 1.1 +revision of Unicode. + +(This discussion of Unicode's history is highly simplified. I don't think the +average Python programmer needs to worry about the historical details; consult +the Unicode consortium site listed in the References for more information.) + + +Definitions +----------- + +A **character** is the smallest possible component of a text. 'A', 'B', 'C', +etc., are all different characters. So are 'È' and 'Í'. Characters are +abstractions, and vary depending on the language or context you're talking +about. For example, the symbol for ohms (Ω) is usually drawn much like the +capital letter omega (Ω) in the Greek alphabet (they may even be the same in +some fonts), but these are two different characters that have different +meanings. + +The Unicode standard describes how characters are represented by **code +points**. A code point is an integer value, usually denoted in base 16. In the +standard, a code point is written using the notation U+12ca to mean the +character with value 0x12ca (4810 decimal). The Unicode standard contains a lot +of tables listing characters and their corresponding code points:: + + 0061 'a'; LATIN SMALL LETTER A + 0062 'b'; LATIN SMALL LETTER B + 0063 'c'; LATIN SMALL LETTER C + ... + 007B '{'; LEFT CURLY BRACKET + +Strictly, these definitions imply that it's meaningless to say 'this is +character U+12ca'. U+12ca is a code point, which represents some particular +character; in this case, it represents the character 'ETHIOPIC SYLLABLE WI'. In +informal contexts, this distinction between code points and characters will +sometimes be forgotten. + +A character is represented on a screen or on paper by a set of graphical +elements that's called a **glyph**. The glyph for an uppercase A, for example, +is two diagonal strokes and a horizontal stroke, though the exact details will +depend on the font being used. Most Python code doesn't need to worry about +glyphs; figuring out the correct glyph to display is generally the job of a GUI +toolkit or a terminal's font renderer. + + +Encodings +--------- + +To summarize the previous section: a Unicode string is a sequence of code +points, which are numbers from 0 to 0x10ffff. This sequence needs to be +represented as a set of bytes (meaning, values from 0-255) in memory. The rules +for translating a Unicode string into a sequence of bytes are called an +**encoding**. + +The first encoding you might think of is an array of 32-bit integers. In this +representation, the string "Python" would look like this:: + + P y t h o n + 0x50 00 00 00 79 00 00 00 74 00 00 00 68 00 00 00 6f 00 00 00 6e 00 00 00 + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 + +This representation is straightforward but using it presents a number of +problems. + +1. It's not portable; different processors order the bytes differently. + +2. It's very wasteful of space. In most texts, the majority of the code points + are less than 127, or less than 255, so a lot of space is occupied by zero + bytes. The above string takes 24 bytes compared to the 6 bytes needed for an + ASCII representation. Increased RAM usage doesn't matter too much (desktop + computers have megabytes of RAM, and strings aren't usually that large), but + expanding our usage of disk and network bandwidth by a factor of 4 is + intolerable. + +3. It's not compatible with existing C functions such as ``strlen()``, so a new + family of wide string functions would need to be used. + +4. Many Internet standards are defined in terms of textual data, and can't + handle content with embedded zero bytes. + +Generally people don't use this encoding, instead choosing other encodings that +are more efficient and convenient. + +Encodings don't have to handle every possible Unicode character, and most +encodings don't. For example, Python's default encoding is the 'ascii' +encoding. The rules for converting a Unicode string into the ASCII encoding are +simple; for each code point: + +1. If the code point is < 128, each byte is the same as the value of the code + point. + +2. If the code point is 128 or greater, the Unicode string can't be represented + in this encoding. (Python raises a :exc:`UnicodeEncodeError` exception in this + case.) + +Latin-1, also known as ISO-8859-1, is a similar encoding. Unicode code points +0-255 are identical to the Latin-1 values, so converting to this encoding simply +requires converting code points to byte values; if a code point larger than 255 +is encountered, the string can't be encoded into Latin-1. + +Encodings don't have to be simple one-to-one mappings like Latin-1. Consider +IBM's EBCDIC, which was used on IBM mainframes. Letter values weren't in one +block: 'a' through 'i' had values from 129 to 137, but 'j' through 'r' were 145 +through 153. If you wanted to use EBCDIC as an encoding, you'd probably use +some sort of lookup table to perform the conversion, but this is largely an +internal detail. + +UTF-8 is one of the most commonly used encodings. UTF stands for "Unicode +Transformation Format", and the '8' means that 8-bit numbers are used in the +encoding. (There's also a UTF-16 encoding, but it's less frequently used than +UTF-8.) UTF-8 uses the following rules: + +1. If the code point is <128, it's represented by the corresponding byte value. +2. If the code point is between 128 and 0x7ff, it's turned into two byte values + between 128 and 255. +3. Code points >0x7ff are turned into three- or four-byte sequences, where each + byte of the sequence is between 128 and 255. + +UTF-8 has several convenient properties: + +1. It can handle any Unicode code point. +2. A Unicode string is turned into a string of bytes containing no embedded zero + bytes. This avoids byte-ordering issues, and means UTF-8 strings can be + processed by C functions such as ``strcpy()`` and sent through protocols that + can't handle zero bytes. +3. A string of ASCII text is also valid UTF-8 text. +4. UTF-8 is fairly compact; the majority of code points are turned into two + bytes, and values less than 128 occupy only a single byte. +5. If bytes are corrupted or lost, it's possible to determine the start of the + next UTF-8-encoded code point and resynchronize. It's also unlikely that + random 8-bit data will look like valid UTF-8. + + + +References +---------- + +The Unicode Consortium site at has character charts, a +glossary, and PDF versions of the Unicode specification. Be prepared for some +difficult reading. is a chronology of the +origin and development of Unicode. + +To help understand the standard, Jukka Korpela has written an introductory guide +to reading the Unicode character tables, available at +. + +Roman Czyborra wrote another explanation of Unicode's basic principles; it's at +. Czyborra has written a number of +other Unicode-related documentation, available from . + +Two other good introductory articles were written by Joel Spolsky + and Jason Orendorff +. If this introduction didn't make +things clear to you, you should try reading one of these alternate articles +before continuing. + +Wikipedia entries are often helpful; see the entries for "character encoding" + and UTF-8 +, for example. + + +Python's Unicode Support +======================== + +Now that you've learned the rudiments of Unicode, we can look at Python's +Unicode features. + + +The Unicode Type +---------------- + +Unicode strings are expressed as instances of the :class:`unicode` type, one of +Python's repertoire of built-in types. It derives from an abstract type called +:class:`basestring`, which is also an ancestor of the :class:`str` type; you can +therefore check if a value is a string type with ``isinstance(value, +basestring)``. Under the hood, Python represents Unicode strings as either 16- +or 32-bit integers, depending on how the Python interpreter was compiled. + +The :func:`unicode` constructor has the signature ``unicode(string[, encoding, +errors])``. All of its arguments should be 8-bit strings. The first argument +is converted to Unicode using the specified encoding; if you leave off the +``encoding`` argument, the ASCII encoding is used for the conversion, so +characters greater than 127 will be treated as errors:: + + >>> unicode('abcdef') + u'abcdef' + >>> s = unicode('abcdef') + >>> type(s) + + >>> unicode('abcdef' + chr(255)) + Traceback (most recent call last): + File "", line 1, in ? + UnicodeDecodeError: 'ascii' codec can't decode byte 0xff in position 6: + ordinal not in range(128) + +The ``errors`` argument specifies the response when the input string can't be +converted according to the encoding's rules. Legal values for this argument are +'strict' (raise a ``UnicodeDecodeError`` exception), 'replace' (add U+FFFD, +'REPLACEMENT CHARACTER'), or 'ignore' (just leave the character out of the +Unicode result). The following examples show the differences:: + + >>> unicode('\x80abc', errors='strict') + Traceback (most recent call last): + File "", line 1, in ? + UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: + ordinal not in range(128) + >>> unicode('\x80abc', errors='replace') + u'\ufffdabc' + >>> unicode('\x80abc', errors='ignore') + u'abc' + +Encodings are specified as strings containing the encoding's name. Python 2.4 +comes with roughly 100 different encodings; see the Python Library Reference at + for a list. Some encodings +have multiple names; for example, 'latin-1', 'iso_8859_1' and '8859' are all +synonyms for the same encoding. + +One-character Unicode strings can also be created with the :func:`unichr` +built-in function, which takes integers and returns a Unicode string of length 1 +that contains the corresponding code point. The reverse operation is the +built-in :func:`ord` function that takes a one-character Unicode string and +returns the code point value:: + + >>> unichr(40960) + u'\ua000' + >>> ord(u'\ua000') + 40960 + +Instances of the :class:`unicode` type have many of the same methods as the +8-bit string type for operations such as searching and formatting:: + + >>> s = u'Was ever feather so lightly blown to and fro as this multitude?' + >>> s.count('e') + 5 + >>> s.find('feather') + 9 + >>> s.find('bird') + -1 + >>> s.replace('feather', 'sand') + u'Was ever sand so lightly blown to and fro as this multitude?' + >>> s.upper() + u'WAS EVER FEATHER SO LIGHTLY BLOWN TO AND FRO AS THIS MULTITUDE?' + +Note that the arguments to these methods can be Unicode strings or 8-bit +strings. 8-bit strings will be converted to Unicode before carrying out the +operation; Python's default ASCII encoding will be used, so characters greater +than 127 will cause an exception:: + + >>> s.find('Was\x9f') + Traceback (most recent call last): + File "", line 1, in ? + UnicodeDecodeError: 'ascii' codec can't decode byte 0x9f in position 3: ordinal not in range(128) + >>> s.find(u'Was\x9f') + -1 + +Much Python code that operates on strings will therefore work with Unicode +strings without requiring any changes to the code. (Input and output code needs +more updating for Unicode; more on this later.) + +Another important method is ``.encode([encoding], [errors='strict'])``, which +returns an 8-bit string version of the Unicode string, encoded in the requested +encoding. The ``errors`` parameter is the same as the parameter of the +``unicode()`` constructor, with one additional possibility; as well as 'strict', +'ignore', and 'replace', you can also pass 'xmlcharrefreplace' which uses XML's +character references. The following example shows the different results:: + + >>> u = unichr(40960) + u'abcd' + unichr(1972) + >>> u.encode('utf-8') + '\xea\x80\x80abcd\xde\xb4' + >>> u.encode('ascii') + Traceback (most recent call last): + File "", line 1, in ? + UnicodeEncodeError: 'ascii' codec can't encode character '\ua000' in position 0: ordinal not in range(128) + >>> u.encode('ascii', 'ignore') + 'abcd' + >>> u.encode('ascii', 'replace') + '?abcd?' + >>> u.encode('ascii', 'xmlcharrefreplace') + 'ꀀabcd޴' + +Python's 8-bit strings have a ``.decode([encoding], [errors])`` method that +interprets the string using the given encoding:: + + >>> u = unichr(40960) + u'abcd' + unichr(1972) # Assemble a string + >>> utf8_version = u.encode('utf-8') # Encode as UTF-8 + >>> type(utf8_version), utf8_version + (, '\xea\x80\x80abcd\xde\xb4') + >>> u2 = utf8_version.decode('utf-8') # Decode using UTF-8 + >>> u == u2 # The two strings match + True + +The low-level routines for registering and accessing the available encodings are +found in the :mod:`codecs` module. However, the encoding and decoding functions +returned by this module are usually more low-level than is comfortable, so I'm +not going to describe the :mod:`codecs` module here. If you need to implement a +completely new encoding, you'll need to learn about the :mod:`codecs` module +interfaces, but implementing encodings is a specialized task that also won't be +covered here. Consult the Python documentation to learn more about this module. + +The most commonly used part of the :mod:`codecs` module is the +:func:`codecs.open` function which will be discussed in the section on input and +output. + + +Unicode Literals in Python Source Code +-------------------------------------- + +In Python source code, Unicode literals are written as strings prefixed with the +'u' or 'U' character: ``u'abcdefghijk'``. Specific code points can be written +using the ``\u`` escape sequence, which is followed by four hex digits giving +the code point. The ``\U`` escape sequence is similar, but expects 8 hex +digits, not 4. + +Unicode literals can also use the same escape sequences as 8-bit strings, +including ``\x``, but ``\x`` only takes two hex digits so it can't express an +arbitrary code point. Octal escapes can go up to U+01ff, which is octal 777. + +:: + + >>> s = u"a\xac\u1234\u20ac\U00008000" + ^^^^ two-digit hex escape + ^^^^^^ four-digit Unicode escape + ^^^^^^^^^^ eight-digit Unicode escape + >>> for c in s: print ord(c), + ... + 97 172 4660 8364 32768 + +Using escape sequences for code points greater than 127 is fine in small doses, +but becomes an annoyance if you're using many accented characters, as you would +in a program with messages in French or some other accent-using language. You +can also assemble strings using the :func:`unichr` built-in function, but this is +even more tedious. + +Ideally, you'd want to be able to write literals in your language's natural +encoding. You could then edit Python source code with your favorite editor +which would display the accented characters naturally, and have the right +characters used at runtime. + +Python supports writing Unicode literals in any encoding, but you have to +declare the encoding being used. This is done by including a special comment as +either the first or second line of the source file:: + + #!/usr/bin/env python + # -*- coding: latin-1 -*- + + u = u'abcdé' + print ord(u[-1]) + +The syntax is inspired by Emacs's notation for specifying variables local to a +file. Emacs supports many different variables, but Python only supports +'coding'. The ``-*-`` symbols indicate that the comment is special; within +them, you must supply the name ``coding`` and the name of your chosen encoding, +separated by ``':'``. + +If you don't include such a comment, the default encoding used will be ASCII. +Versions of Python before 2.4 were Euro-centric and assumed Latin-1 as a default +encoding for string literals; in Python 2.4, characters greater than 127 still +work but result in a warning. For example, the following program has no +encoding declaration:: + + #!/usr/bin/env python + u = u'abcdé' + print ord(u[-1]) + +When you run it with Python 2.4, it will output the following warning:: + + amk:~$ python p263.py + sys:1: DeprecationWarning: Non-ASCII character '\xe9' + in file p263.py on line 2, but no encoding declared; + see http://www.python.org/peps/pep-0263.html for details + + +Unicode Properties +------------------ + +The Unicode specification includes a database of information about code points. +For each code point that's defined, the information includes the character's +name, its category, the numeric value if applicable (Unicode has characters +representing the Roman numerals and fractions such as one-third and +four-fifths). There are also properties related to the code point's use in +bidirectional text and other display-related properties. + +The following program displays some information about several characters, and +prints the numeric value of one particular character:: + + import unicodedata + + u = unichr(233) + unichr(0x0bf2) + unichr(3972) + unichr(6000) + unichr(13231) + + for i, c in enumerate(u): + print i, '%04x' % ord(c), unicodedata.category(c), + print unicodedata.name(c) + + # Get numeric value of second character + print unicodedata.numeric(u[1]) + +When run, this prints:: + + 0 00e9 Ll LATIN SMALL LETTER E WITH ACUTE + 1 0bf2 No TAMIL NUMBER ONE THOUSAND + 2 0f84 Mn TIBETAN MARK HALANTA + 3 1770 Lo TAGBANWA LETTER SA + 4 33af So SQUARE RAD OVER S SQUARED + 1000.0 + +The category codes are abbreviations describing the nature of the character. +These are grouped into categories such as "Letter", "Number", "Punctuation", or +"Symbol", which in turn are broken up into subcategories. To take the codes +from the above output, ``'Ll'`` means 'Letter, lowercase', ``'No'`` means +"Number, other", ``'Mn'`` is "Mark, nonspacing", and ``'So'`` is "Symbol, +other". See + for a +list of category codes. + +References +---------- + +The Unicode and 8-bit string types are described in the Python library reference +at :ref:`typesseq`. + +The documentation for the :mod:`unicodedata` module. + +The documentation for the :mod:`codecs` module. + +Marc-André Lemburg gave a presentation at EuroPython 2002 titled "Python and +Unicode". A PDF version of his slides is available at +, and is an +excellent overview of the design of Python's Unicode features. + + +Reading and Writing Unicode Data +================================ + +Once you've written some code that works with Unicode data, the next problem is +input/output. How do you get Unicode strings into your program, and how do you +convert Unicode into a form suitable for storage or transmission? + +It's possible that you may not need to do anything depending on your input +sources and output destinations; you should check whether the libraries used in +your application support Unicode natively. XML parsers often return Unicode +data, for example. Many relational databases also support Unicode-valued +columns and can return Unicode values from an SQL query. + +Unicode data is usually converted to a particular encoding before it gets +written to disk or sent over a socket. It's possible to do all the work +yourself: open a file, read an 8-bit string from it, and convert the string with +``unicode(str, encoding)``. However, the manual approach is not recommended. + +One problem is the multi-byte nature of encodings; one Unicode character can be +represented by several bytes. If you want to read the file in arbitrary-sized +chunks (say, 1K or 4K), you need to write error-handling code to catch the case +where only part of the bytes encoding a single Unicode character are read at the +end of a chunk. One solution would be to read the entire file into memory and +then perform the decoding, but that prevents you from working with files that +are extremely large; if you need to read a 2Gb file, you need 2Gb of RAM. +(More, really, since for at least a moment you'd need to have both the encoded +string and its Unicode version in memory.) + +The solution would be to use the low-level decoding interface to catch the case +of partial coding sequences. The work of implementing this has already been +done for you: the :mod:`codecs` module includes a version of the :func:`open` +function that returns a file-like object that assumes the file's contents are in +a specified encoding and accepts Unicode parameters for methods such as +``.read()`` and ``.write()``. + +The function's parameters are ``open(filename, mode='rb', encoding=None, +errors='strict', buffering=1)``. ``mode`` can be ``'r'``, ``'w'``, or ``'a'``, +just like the corresponding parameter to the regular built-in ``open()`` +function; add a ``'+'`` to update the file. ``buffering`` is similarly parallel +to the standard function's parameter. ``encoding`` is a string giving the +encoding to use; if it's left as ``None``, a regular Python file object that +accepts 8-bit strings is returned. Otherwise, a wrapper object is returned, and +data written to or read from the wrapper object will be converted as needed. +``errors`` specifies the action for encoding errors and can be one of the usual +values of 'strict', 'ignore', and 'replace'. + +Reading Unicode from a file is therefore simple:: + + import codecs + f = codecs.open('unicode.rst', encoding='utf-8') + for line in f: + print repr(line) + +It's also possible to open files in update mode, allowing both reading and +writing:: + + f = codecs.open('test', encoding='utf-8', mode='w+') + f.write(u'\u4500 blah blah blah\n') + f.seek(0) + print repr(f.readline()[:1]) + f.close() + +Unicode character U+FEFF is used as a byte-order mark (BOM), and is often +written as the first character of a file in order to assist with autodetection +of the file's byte ordering. Some encodings, such as UTF-16, expect a BOM to be +present at the start of a file; when such an encoding is used, the BOM will be +automatically written as the first character and will be silently dropped when +the file is read. There are variants of these encodings, such as 'utf-16-le' +and 'utf-16-be' for little-endian and big-endian encodings, that specify one +particular byte ordering and don't skip the BOM. + + +Unicode filenames +----------------- + +Most of the operating systems in common use today support filenames that contain +arbitrary Unicode characters. Usually this is implemented by converting the +Unicode string into some encoding that varies depending on the system. For +example, MacOS X uses UTF-8 while Windows uses a configurable encoding; on +Windows, Python uses the name "mbcs" to refer to whatever the currently +configured encoding is. On Unix systems, there will only be a filesystem +encoding if you've set the ``LANG`` or ``LC_CTYPE`` environment variables; if +you haven't, the default encoding is ASCII. + +The :func:`sys.getfilesystemencoding` function returns the encoding to use on +your current system, in case you want to do the encoding manually, but there's +not much reason to bother. When opening a file for reading or writing, you can +usually just provide the Unicode string as the filename, and it will be +automatically converted to the right encoding for you:: + + filename = u'filename\u4500abc' + f = open(filename, 'w') + f.write('blah\n') + f.close() + +Functions in the :mod:`os` module such as :func:`os.stat` will also accept Unicode +filenames. + +:func:`os.listdir`, which returns filenames, raises an issue: should it return +the Unicode version of filenames, or should it return 8-bit strings containing +the encoded versions? :func:`os.listdir` will do both, depending on whether you +provided the directory path as an 8-bit string or a Unicode string. If you pass +a Unicode string as the path, filenames will be decoded using the filesystem's +encoding and a list of Unicode strings will be returned, while passing an 8-bit +path will return the 8-bit versions of the filenames. For example, assuming the +default filesystem encoding is UTF-8, running the following program:: + + fn = u'filename\u4500abc' + f = open(fn, 'w') + f.close() + + import os + print os.listdir('.') + print os.listdir(u'.') + +will produce the following output:: + + amk:~$ python t.py + ['.svn', 'filename\xe4\x94\x80abc', ...] + [u'.svn', u'filename\u4500abc', ...] + +The first list contains UTF-8-encoded filenames, and the second list contains +the Unicode versions. + + + +Tips for Writing Unicode-aware Programs +--------------------------------------- + +This section provides some suggestions on writing software that deals with +Unicode. + +The most important tip is: + + Software should only work with Unicode strings internally, converting to a + particular encoding on output. + +If you attempt to write processing functions that accept both Unicode and 8-bit +strings, you will find your program vulnerable to bugs wherever you combine the +two different kinds of strings. Python's default encoding is ASCII, so whenever +a character with an ASCII value > 127 is in the input data, you'll get a +:exc:`UnicodeDecodeError` because that character can't be handled by the ASCII +encoding. + +It's easy to miss such problems if you only test your software with data that +doesn't contain any accents; everything will seem to work, but there's actually +a bug in your program waiting for the first user who attempts to use characters +> 127. A second tip, therefore, is: + + Include characters > 127 and, even better, characters > 255 in your test + data. + +When using data coming from a web browser or some other untrusted source, a +common technique is to check for illegal characters in a string before using the +string in a generated command line or storing it in a database. If you're doing +this, be careful to check the string once it's in the form that will be used or +stored; it's possible for encodings to be used to disguise characters. This is +especially true if the input data also specifies the encoding; many encodings +leave the commonly checked-for characters alone, but Python includes some +encodings such as ``'base64'`` that modify every single character. + +For example, let's say you have a content management system that takes a Unicode +filename, and you want to disallow paths with a '/' character. You might write +this code:: + + def read_file (filename, encoding): + if '/' in filename: + raise ValueError("'/' not allowed in filenames") + unicode_name = filename.decode(encoding) + f = open(unicode_name, 'r') + # ... return contents of file ... + +However, if an attacker could specify the ``'base64'`` encoding, they could pass +``'L2V0Yy9wYXNzd2Q='``, which is the base-64 encoded form of the string +``'/etc/passwd'``, to read a system file. The above code looks for ``'/'`` +characters in the encoded form and misses the dangerous character in the +resulting decoded form. + +References +---------- + +The PDF slides for Marc-André Lemburg's presentation "Writing Unicode-aware +Applications in Python" are available at + +and discuss questions of character encodings as well as how to internationalize +and localize an application. + + +Revision History and Acknowledgements +===================================== + +Thanks to the following people who have noted errors or offered suggestions on +this article: Nicholas Bastin, Marius Gedminas, Kent Johnson, Ken Krugler, +Marc-André Lemburg, Martin von Löwis, Chad Whitacre. + +Version 1.0: posted August 5 2005. + +Version 1.01: posted August 7 2005. Corrects factual and markup errors; adds +several links. + +Version 1.02: posted August 16 2005. Corrects factual errors. + + +.. comment Additional topic: building Python w/ UCS2 or UCS4 support +.. comment Describe obscure -U switch somewhere? +.. comment Describe use of codecs.StreamRecoder and StreamReaderWriter + +.. comment + Original outline: + + - [ ] Unicode introduction + - [ ] ASCII + - [ ] Terms + - [ ] Character + - [ ] Code point + - [ ] Encodings + - [ ] Common encodings: ASCII, Latin-1, UTF-8 + - [ ] Unicode Python type + - [ ] Writing unicode literals + - [ ] Obscurity: -U switch + - [ ] Built-ins + - [ ] unichr() + - [ ] ord() + - [ ] unicode() constructor + - [ ] Unicode type + - [ ] encode(), decode() methods + - [ ] Unicodedata module for character properties + - [ ] I/O + - [ ] Reading/writing Unicode data into files + - [ ] Byte-order marks + - [ ] Unicode filenames + - [ ] Writing Unicode programs + - [ ] Do everything in Unicode + - [ ] Declaring source code encodings (PEP 263) + - [ ] Other issues + - [ ] Building Python (UCS2, UCS4) diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst new file mode 100644 index 0000000..dc20b02 --- /dev/null +++ b/Doc/howto/urllib2.rst @@ -0,0 +1,578 @@ +************************************************ + HOWTO Fetch Internet Resources Using urllib2 +************************************************ + +:Author: `Michael Foord `_ + +.. note:: + + There is an French translation of an earlier revision of this + HOWTO, available at `urllib2 - Le Manuel manquant + `_. + + + +Introduction +============ + +.. sidebar:: Related Articles + + You may also find useful the following article on fetching web resources + with Python : + + * `Basic Authentication `_ + + A tutorial on *Basic Authentication*, with examples in Python. + +**urllib2** is a `Python `_ module for fetching URLs +(Uniform Resource Locators). It offers a very simple interface, in the form of +the *urlopen* function. This is capable of fetching URLs using a variety of +different protocols. It also offers a slightly more complex interface for +handling common situations - like basic authentication, cookies, proxies and so +on. These are provided by objects called handlers and openers. + +urllib2 supports fetching URLs for many "URL schemes" (identified by the string +before the ":" in URL - for example "ftp" is the URL scheme of +"ftp://python.org/") using their associated network protocols (e.g. FTP, HTTP). +This tutorial focuses on the most common case, HTTP. + +For straightforward situations *urlopen* is very easy to use. But as soon as you +encounter errors or non-trivial cases when opening HTTP URLs, you will need some +understanding of the HyperText Transfer Protocol. The most comprehensive and +authoritative reference to HTTP is :rfc:`2616`. This is a technical document and +not intended to be easy to read. This HOWTO aims to illustrate using *urllib2*, +with enough detail about HTTP to help you through. It is not intended to replace +the :mod:`urllib2` docs, but is supplementary to them. + + +Fetching URLs +============= + +The simplest way to use urllib2 is as follows:: + + import urllib2 + response = urllib2.urlopen('http://python.org/') + html = response.read() + +Many uses of urllib2 will be that simple (note that instead of an 'http:' URL we +could have used an URL starting with 'ftp:', 'file:', etc.). However, it's the +purpose of this tutorial to explain the more complicated cases, concentrating on +HTTP. + +HTTP is based on requests and responses - the client makes requests and servers +send responses. urllib2 mirrors this with a ``Request`` object which represents +the HTTP request you are making. In its simplest form you create a Request +object that specifies the URL you want to fetch. Calling ``urlopen`` with this +Request object returns a response object for the URL requested. This response is +a file-like object, which means you can for example call ``.read()`` on the +response:: + + import urllib2 + + req = urllib2.Request('http://www.voidspace.org.uk') + response = urllib2.urlopen(req) + the_page = response.read() + +Note that urllib2 makes use of the same Request interface to handle all URL +schemes. For example, you can make an FTP request like so:: + + req = urllib2.Request('ftp://example.com/') + +In the case of HTTP, there are two extra things that Request objects allow you +to do: First, you can pass data to be sent to the server. Second, you can pass +extra information ("metadata") *about* the data or the about request itself, to +the server - this information is sent as HTTP "headers". Let's look at each of +these in turn. + +Data +---- + +Sometimes you want to send data to a URL (often the URL will refer to a CGI +(Common Gateway Interface) script [#]_ or other web application). With HTTP, +this is often done using what's known as a **POST** request. This is often what +your browser does when you submit a HTML form that you filled in on the web. Not +all POSTs have to come from forms: you can use a POST to transmit arbitrary data +to your own application. In the common case of HTML forms, the data needs to be +encoded in a standard way, and then passed to the Request object as the ``data`` +argument. The encoding is done using a function from the ``urllib`` library +*not* from ``urllib2``. :: + + import urllib + import urllib2 + + url = 'http://www.someserver.com/cgi-bin/register.cgi' + values = {'name' : 'Michael Foord', + 'location' : 'Northampton', + 'language' : 'Python' } + + data = urllib.urlencode(values) + req = urllib2.Request(url, data) + response = urllib2.urlopen(req) + the_page = response.read() + +Note that other encodings are sometimes required (e.g. for file upload from HTML +forms - see `HTML Specification, Form Submission +`_ for more +details). + +If you do not pass the ``data`` argument, urllib2 uses a **GET** request. One +way in which GET and POST requests differ is that POST requests often have +"side-effects": they change the state of the system in some way (for example by +placing an order with the website for a hundredweight of tinned spam to be +delivered to your door). Though the HTTP standard makes it clear that POSTs are +intended to *always* cause side-effects, and GET requests *never* to cause +side-effects, nothing prevents a GET request from having side-effects, nor a +POST requests from having no side-effects. Data can also be passed in an HTTP +GET request by encoding it in the URL itself. + +This is done as follows:: + + >>> import urllib2 + >>> import urllib + >>> data = {} + >>> data['name'] = 'Somebody Here' + >>> data['location'] = 'Northampton' + >>> data['language'] = 'Python' + >>> url_values = urllib.urlencode(data) + >>> print url_values + name=Somebody+Here&language=Python&location=Northampton + >>> url = 'http://www.example.com/example.cgi' + >>> full_url = url + '?' + url_values + >>> data = urllib2.open(full_url) + +Notice that the full URL is created by adding a ``?`` to the URL, followed by +the encoded values. + +Headers +------- + +We'll discuss here one particular HTTP header, to illustrate how to add headers +to your HTTP request. + +Some websites [#]_ dislike being browsed by programs, or send different versions +to different browsers [#]_ . By default urllib2 identifies itself as +``Python-urllib/x.y`` (where ``x`` and ``y`` are the major and minor version +numbers of the Python release, +e.g. ``Python-urllib/2.5``), which may confuse the site, or just plain +not work. The way a browser identifies itself is through the +``User-Agent`` header [#]_. When you create a Request object you can +pass a dictionary of headers in. The following example makes the same +request as above, but identifies itself as a version of Internet +Explorer [#]_. :: + + import urllib + import urllib2 + + url = 'http://www.someserver.com/cgi-bin/register.cgi' + user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' + values = {'name' : 'Michael Foord', + 'location' : 'Northampton', + 'language' : 'Python' } + headers = { 'User-Agent' : user_agent } + + data = urllib.urlencode(values) + req = urllib2.Request(url, data, headers) + response = urllib2.urlopen(req) + the_page = response.read() + +The response also has two useful methods. See the section on `info and geturl`_ +which comes after we have a look at what happens when things go wrong. + + +Handling Exceptions +=================== + +*urlopen* raises ``URLError`` when it cannot handle a response (though as usual +with Python APIs, builtin exceptions such as ValueError, TypeError etc. may also +be raised). + +``HTTPError`` is the subclass of ``URLError`` raised in the specific case of +HTTP URLs. + +URLError +-------- + +Often, URLError is raised because there is no network connection (no route to +the specified server), or the specified server doesn't exist. In this case, the +exception raised will have a 'reason' attribute, which is a tuple containing an +error code and a text error message. + +e.g. :: + + >>> req = urllib2.Request('http://www.pretend_server.org') + >>> try: urllib2.urlopen(req) + >>> except URLError, e: + >>> print e.reason + >>> + (4, 'getaddrinfo failed') + + +HTTPError +--------- + +Every HTTP response from the server contains a numeric "status code". Sometimes +the status code indicates that the server is unable to fulfil the request. The +default handlers will handle some of these responses for you (for example, if +the response is a "redirection" that requests the client fetch the document from +a different URL, urllib2 will handle that for you). For those it can't handle, +urlopen will raise an ``HTTPError``. Typical errors include '404' (page not +found), '403' (request forbidden), and '401' (authentication required). + +See section 10 of RFC 2616 for a reference on all the HTTP error codes. + +The ``HTTPError`` instance raised will have an integer 'code' attribute, which +corresponds to the error sent by the server. + +Error Codes +~~~~~~~~~~~ + +Because the default handlers handle redirects (codes in the 300 range), and +codes in the 100-299 range indicate success, you will usually only see error +codes in the 400-599 range. + +``BaseHTTPServer.BaseHTTPRequestHandler.responses`` is a useful dictionary of +response codes in that shows all the response codes used by RFC 2616. The +dictionary is reproduced here for convenience :: + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + responses = { + 100: ('Continue', 'Request received, please continue'), + 101: ('Switching Protocols', + 'Switching to new protocol; obey Upgrade header'), + + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), + 204: ('No Content', 'Request fulfilled, nothing follows'), + 205: ('Reset Content', 'Clear input form for further input.'), + 206: ('Partial Content', 'Partial content follows.'), + + 300: ('Multiple Choices', + 'Object has several resources -- see URI list'), + 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('See Other', 'Object moved -- see Method and URL list'), + 304: ('Not Modified', + 'Document has not changed since given time'), + 305: ('Use Proxy', + 'You must use proxy specified in Location to access this ' + 'resource.'), + 307: ('Temporary Redirect', + 'Object moved temporarily -- see URI list'), + + 400: ('Bad Request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment Required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not Found', 'Nothing matches the given URI'), + 405: ('Method Not Allowed', + 'Specified method is invalid for this server.'), + 406: ('Not Acceptable', 'URI not available in preferred format.'), + 407: ('Proxy Authentication Required', 'You must authenticate with ' + 'this proxy before proceeding.'), + 408: ('Request Timeout', 'Request timed out; try again later.'), + 409: ('Conflict', 'Request conflict.'), + 410: ('Gone', + 'URI no longer exists and has been permanently removed.'), + 411: ('Length Required', 'Client must specify Content-Length.'), + 412: ('Precondition Failed', 'Precondition in headers is false.'), + 413: ('Request Entity Too Large', 'Entity is too large.'), + 414: ('Request-URI Too Long', 'URI is too long.'), + 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), + 416: ('Requested Range Not Satisfiable', + 'Cannot satisfy request range.'), + 417: ('Expectation Failed', + 'Expect condition could not be satisfied.'), + + 500: ('Internal Server Error', 'Server got itself in trouble'), + 501: ('Not Implemented', + 'Server does not support this operation'), + 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), + 503: ('Service Unavailable', + 'The server cannot process the request due to a high load'), + 504: ('Gateway Timeout', + 'The gateway server did not receive a timely response'), + 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + } + +When an error is raised the server responds by returning an HTTP error code +*and* an error page. You can use the ``HTTPError`` instance as a response on the +page returned. This means that as well as the code attribute, it also has read, +geturl, and info, methods. :: + + >>> req = urllib2.Request('http://www.python.org/fish.html') + >>> try: + >>> urllib2.urlopen(req) + >>> except URLError, e: + >>> print e.code + >>> print e.read() + >>> + 404 + + + Error 404: File Not Found + ...... etc... + +Wrapping it Up +-------------- + +So if you want to be prepared for ``HTTPError`` *or* ``URLError`` there are two +basic approaches. I prefer the second approach. + +Number 1 +~~~~~~~~ + +:: + + + from urllib2 import Request, urlopen, URLError, HTTPError + req = Request(someurl) + try: + response = urlopen(req) + except HTTPError, e: + print 'The server couldn\'t fulfill the request.' + print 'Error code: ', e.code + except URLError, e: + print 'We failed to reach a server.' + print 'Reason: ', e.reason + else: + # everything is fine + + +.. note:: + + The ``except HTTPError`` *must* come first, otherwise ``except URLError`` + will *also* catch an ``HTTPError``. + +Number 2 +~~~~~~~~ + +:: + + from urllib2 import Request, urlopen, URLError + req = Request(someurl) + try: + response = urlopen(req) + except URLError, e: + if hasattr(e, 'reason'): + print 'We failed to reach a server.' + print 'Reason: ', e.reason + elif hasattr(e, 'code'): + print 'The server couldn\'t fulfill the request.' + print 'Error code: ', e.code + else: + # everything is fine + + +info and geturl +=============== + +The response returned by urlopen (or the ``HTTPError`` instance) has two useful +methods ``info`` and ``geturl``. + +**geturl** - this returns the real URL of the page fetched. This is useful +because ``urlopen`` (or the opener object used) may have followed a +redirect. The URL of the page fetched may not be the same as the URL requested. + +**info** - this returns a dictionary-like object that describes the page +fetched, particularly the headers sent by the server. It is currently an +``httplib.HTTPMessage`` instance. + +Typical headers include 'Content-length', 'Content-type', and so on. See the +`Quick Reference to HTTP Headers `_ +for a useful listing of HTTP headers with brief explanations of their meaning +and use. + + +Openers and Handlers +==================== + +When you fetch a URL you use an opener (an instance of the perhaps +confusingly-named :class:`urllib2.OpenerDirector`). Normally we have been using +the default opener - via ``urlopen`` - but you can create custom +openers. Openers use handlers. All the "heavy lifting" is done by the +handlers. Each handler knows how to open URLs for a particular URL scheme (http, +ftp, etc.), or how to handle an aspect of URL opening, for example HTTP +redirections or HTTP cookies. + +You will want to create openers if you want to fetch URLs with specific handlers +installed, for example to get an opener that handles cookies, or to get an +opener that does not handle redirections. + +To create an opener, instantiate an ``OpenerDirector``, and then call +``.add_handler(some_handler_instance)`` repeatedly. + +Alternatively, you can use ``build_opener``, which is a convenience function for +creating opener objects with a single function call. ``build_opener`` adds +several handlers by default, but provides a quick way to add more and/or +override the default handlers. + +Other sorts of handlers you might want to can handle proxies, authentication, +and other common but slightly specialised situations. + +``install_opener`` can be used to make an ``opener`` object the (global) default +opener. This means that calls to ``urlopen`` will use the opener you have +installed. + +Opener objects have an ``open`` method, which can be called directly to fetch +urls in the same way as the ``urlopen`` function: there's no need to call +``install_opener``, except as a convenience. + + +Basic Authentication +==================== + +To illustrate creating and installing a handler we will use the +``HTTPBasicAuthHandler``. For a more detailed discussion of this subject -- +including an explanation of how Basic Authentication works - see the `Basic +Authentication Tutorial +`_. + +When authentication is required, the server sends a header (as well as the 401 +error code) requesting authentication. This specifies the authentication scheme +and a 'realm'. The header looks like : ``Www-authenticate: SCHEME +realm="REALM"``. + +e.g. :: + + Www-authenticate: Basic realm="cPanel Users" + + +The client should then retry the request with the appropriate name and password +for the realm included as a header in the request. This is 'basic +authentication'. In order to simplify this process we can create an instance of +``HTTPBasicAuthHandler`` and an opener to use this handler. + +The ``HTTPBasicAuthHandler`` uses an object called a password manager to handle +the mapping of URLs and realms to passwords and usernames. If you know what the +realm is (from the authentication header sent by the server), then you can use a +``HTTPPasswordMgr``. Frequently one doesn't care what the realm is. In that +case, it is convenient to use ``HTTPPasswordMgrWithDefaultRealm``. This allows +you to specify a default username and password for a URL. This will be supplied +in the absence of you providing an alternative combination for a specific +realm. We indicate this by providing ``None`` as the realm argument to the +``add_password`` method. + +The top-level URL is the first URL that requires authentication. URLs "deeper" +than the URL you pass to .add_password() will also match. :: + + # create a password manager + password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() + + # Add the username and password. + # If we knew the realm, we could use it instead of ``None``. + top_level_url = "http://example.com/foo/" + password_mgr.add_password(None, top_level_url, username, password) + + handler = urllib2.HTTPBasicAuthHandler(password_mgr) + + # create "opener" (OpenerDirector instance) + opener = urllib2.build_opener(handler) + + # use the opener to fetch a URL + opener.open(a_url) + + # Install the opener. + # Now all calls to urllib2.urlopen use our opener. + urllib2.install_opener(opener) + +.. note:: + + In the above example we only supplied our ``HHTPBasicAuthHandler`` to + ``build_opener``. By default openers have the handlers for normal situations + -- ``ProxyHandler``, ``UnknownHandler``, ``HTTPHandler``, + ``HTTPDefaultErrorHandler``, ``HTTPRedirectHandler``, ``FTPHandler``, + ``FileHandler``, ``HTTPErrorProcessor``. + +``top_level_url`` is in fact *either* a full URL (including the 'http:' scheme +component and the hostname and optionally the port number) +e.g. "http://example.com/" *or* an "authority" (i.e. the hostname, +optionally including the port number) e.g. "example.com" or "example.com:8080" +(the latter example includes a port number). The authority, if present, must +NOT contain the "userinfo" component - for example "joe@password:example.com" is +not correct. + + +Proxies +======= + +**urllib2** will auto-detect your proxy settings and use those. This is through +the ``ProxyHandler`` which is part of the normal handler chain. Normally that's +a good thing, but there are occasions when it may not be helpful [#]_. One way +to do this is to setup our own ``ProxyHandler``, with no proxies defined. This +is done using similar steps to setting up a `Basic Authentication`_ handler : :: + + >>> proxy_support = urllib2.ProxyHandler({}) + >>> opener = urllib2.build_opener(proxy_support) + >>> urllib2.install_opener(opener) + +.. note:: + + Currently ``urllib2`` *does not* support fetching of ``https`` locations + through a proxy. However, this can be enabled by extending urllib2 as + shown in the recipe [#]_. + + +Sockets and Layers +================== + +The Python support for fetching resources from the web is layered. urllib2 uses +the httplib library, which in turn uses the socket library. + +As of Python 2.3 you can specify how long a socket should wait for a response +before timing out. This can be useful in applications which have to fetch web +pages. By default the socket module has *no timeout* and can hang. Currently, +the socket timeout is not exposed at the httplib or urllib2 levels. However, +you can set the default timeout globally for all sockets using :: + + import socket + import urllib2 + + # timeout in seconds + timeout = 10 + socket.setdefaulttimeout(timeout) + + # this call to urllib2.urlopen now uses the default timeout + # we have set in the socket module + req = urllib2.Request('http://www.voidspace.org.uk') + response = urllib2.urlopen(req) + + +------- + + +Footnotes +========= + +This document was reviewed and revised by John Lee. + +.. [#] For an introduction to the CGI protocol see + `Writing Web Applications in Python `_. +.. [#] Like Google for example. The *proper* way to use google from a program + is to use `PyGoogle `_ of course. See + `Voidspace Google `_ + for some examples of using the Google API. +.. [#] Browser sniffing is a very bad practise for website design - building + sites using web standards is much more sensible. Unfortunately a lot of + sites still send different versions to different browsers. +.. [#] The user agent for MSIE 6 is + *'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'* +.. [#] For details of more HTTP request headers, see + `Quick Reference to HTTP Headers`_. +.. [#] In my case I have to use a proxy to access the internet at work. If you + attempt to fetch *localhost* URLs through this proxy it blocks them. IE + is set to use the proxy, which urllib2 picks up on. In order to test + scripts with a localhost server, I have to prevent urllib2 from using + the proxy. +.. [#] urllib2 opener for SSL proxy (CONNECT method): `ASPN Cookbook Recipe + `_. + diff --git a/Doc/includes/email-dir.py b/Doc/includes/email-dir.py new file mode 100644 index 0000000..c04f57d --- /dev/null +++ b/Doc/includes/email-dir.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +"""Send the contents of a directory as a MIME message.""" + +import os +import sys +import smtplib +# For guessing MIME type based on file name extension +import mimetypes + +from optparse import OptionParser + +from email import encoders +from email.message import Message +from email.mime.audio import MIMEAudio +from email.mime.base import MIMEBase +from email.mime.image import MIMEImage +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +COMMASPACE = ', ' + + +def main(): + parser = OptionParser(usage="""\ +Send the contents of a directory as a MIME message. + +Usage: %prog [options] + +Unless the -o option is given, the email is sent by forwarding to your local +SMTP server, which then does the normal delivery process. Your local machine +must be running an SMTP server. +""") + parser.add_option('-d', '--directory', + type='string', action='store', + help="""Mail the contents of the specified directory, + otherwise use the current directory. Only the regular + files in the directory are sent, and we don't recurse to + subdirectories.""") + parser.add_option('-o', '--output', + type='string', action='store', metavar='FILE', + help="""Print the composed message to FILE instead of + sending the message to the SMTP server.""") + parser.add_option('-s', '--sender', + type='string', action='store', metavar='SENDER', + help='The value of the From: header (required)') + parser.add_option('-r', '--recipient', + type='string', action='append', metavar='RECIPIENT', + default=[], dest='recipients', + help='A To: header value (at least one required)') + opts, args = parser.parse_args() + if not opts.sender or not opts.recipients: + parser.print_help() + sys.exit(1) + directory = opts.directory + if not directory: + directory = '.' + # Create the enclosing (outer) message + outer = MIMEMultipart() + outer['Subject'] = 'Contents of directory %s' % os.path.abspath(directory) + outer['To'] = COMMASPACE.join(opts.recipients) + outer['From'] = opts.sender + outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' + + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if not os.path.isfile(path): + continue + # Guess the content type based on the file's extension. Encoding + # will be ignored, although we should check for simple things like + # gzip'd or compressed files. + ctype, encoding = mimetypes.guess_type(path) + if ctype is None or encoding is not None: + # No guess could be made, or the file is encoded (compressed), so + # use a generic bag-of-bits type. + ctype = 'application/octet-stream' + maintype, subtype = ctype.split('/', 1) + if maintype == 'text': + fp = open(path) + # Note: we should handle calculating the charset + msg = MIMEText(fp.read(), _subtype=subtype) + fp.close() + elif maintype == 'image': + fp = open(path, 'rb') + msg = MIMEImage(fp.read(), _subtype=subtype) + fp.close() + elif maintype == 'audio': + fp = open(path, 'rb') + msg = MIMEAudio(fp.read(), _subtype=subtype) + fp.close() + else: + fp = open(path, 'rb') + msg = MIMEBase(maintype, subtype) + msg.set_payload(fp.read()) + fp.close() + # Encode the payload using Base64 + encoders.encode_base64(msg) + # Set the filename parameter + msg.add_header('Content-Disposition', 'attachment', filename=filename) + outer.attach(msg) + # Now send or store the message + composed = outer.as_string() + if opts.output: + fp = open(opts.output, 'w') + fp.write(composed) + fp.close() + else: + s = smtplib.SMTP() + s.connect() + s.sendmail(opts.sender, opts.recipients, composed) + s.close() + + +if __name__ == '__main__': + main() diff --git a/Doc/includes/email-mime.py b/Doc/includes/email-mime.py new file mode 100644 index 0000000..5097253 --- /dev/null +++ b/Doc/includes/email-mime.py @@ -0,0 +1,32 @@ +# Import smtplib for the actual sending function +import smtplib + +# Here are the email package modules we'll need +from email.mime.image import MIMEImage +from email.mime.multipart import MIMEMultipart + +COMMASPACE = ', ' + +# Create the container (outer) email message. +msg = MIMEMultipart() +msg['Subject'] = 'Our family reunion' +# me == the sender's email address +# family = the list of all recipients' email addresses +msg['From'] = me +msg['To'] = COMMASPACE.join(family) +msg.preamble = 'Our family reunion' + +# Assume we know that the image files are all in PNG format +for file in pngfiles: + # Open the files in binary mode. Let the MIMEImage class automatically + # guess the specific image type. + fp = open(file, 'rb') + img = MIMEImage(fp.read()) + fp.close() + msg.attach(img) + +# Send the email via our own SMTP server. +s = smtplib.SMTP() +s.connect() +s.sendmail(me, family, msg.as_string()) +s.close() diff --git a/Doc/includes/email-simple.py b/Doc/includes/email-simple.py new file mode 100644 index 0000000..44152a4 --- /dev/null +++ b/Doc/includes/email-simple.py @@ -0,0 +1,25 @@ +# Import smtplib for the actual sending function +import smtplib + +# Import the email modules we'll need +from email.mime.text import MIMEText + +# Open a plain text file for reading. For this example, assume that +# the text file contains only ASCII characters. +fp = open(textfile, 'rb') +# Create a text/plain message +msg = MIMEText(fp.read()) +fp.close() + +# me == the sender's email address +# you == the recipient's email address +msg['Subject'] = 'The contents of %s' % textfile +msg['From'] = me +msg['To'] = you + +# Send the message via our own SMTP server, but don't include the +# envelope header. +s = smtplib.SMTP() +s.connect() +s.sendmail(me, [you], msg.as_string()) +s.close() diff --git a/Doc/includes/email-unpack.py b/Doc/includes/email-unpack.py new file mode 100644 index 0000000..e596b98 --- /dev/null +++ b/Doc/includes/email-unpack.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +"""Unpack a MIME message into a directory of files.""" + +import os +import sys +import email +import errno +import mimetypes + +from optparse import OptionParser + + +def main(): + parser = OptionParser(usage="""\ +Unpack a MIME message into a directory of files. + +Usage: %prog [options] msgfile +""") + parser.add_option('-d', '--directory', + type='string', action='store', + help="""Unpack the MIME message into the named + directory, which will be created if it doesn't already + exist.""") + opts, args = parser.parse_args() + if not opts.directory: + parser.print_help() + sys.exit(1) + + try: + msgfile = args[0] + except IndexError: + parser.print_help() + sys.exit(1) + + try: + os.mkdir(opts.directory) + except OSError as e: + # Ignore directory exists error + if e.errno != errno.EEXIST: + raise + + fp = open(msgfile) + msg = email.message_from_file(fp) + fp.close() + + counter = 1 + for part in msg.walk(): + # multipart/* are just containers + if part.get_content_maintype() == 'multipart': + continue + # Applications should really sanitize the given filename so that an + # email message can't be used to overwrite important files + filename = part.get_filename() + if not filename: + ext = mimetypes.guess_extension(part.get_type()) + if not ext: + # Use a generic bag-of-bits extension + ext = '.bin' + filename = 'part-%03d%s' % (counter, ext) + counter += 1 + fp = open(os.path.join(opts.directory, filename), 'wb') + fp.write(part.get_payload(decode=True)) + fp.close() + + +if __name__ == '__main__': + main() diff --git a/Doc/includes/minidom-example.py b/Doc/includes/minidom-example.py new file mode 100644 index 0000000..c30c4e0 --- /dev/null +++ b/Doc/includes/minidom-example.py @@ -0,0 +1,64 @@ +import xml.dom.minidom + +document = """\ + +Demo slideshow +Slide title +This is a demo +Of a program for processing slides + + +Another demo slide +It is important +To have more than +one slide + + +""" + +dom = xml.dom.minidom.parseString(document) + +def getText(nodelist): + rc = "" + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + return rc + +def handleSlideshow(slideshow): + print "" + handleSlideshowTitle(slideshow.getElementsByTagName("title")[0]) + slides = slideshow.getElementsByTagName("slide") + handleToc(slides) + handleSlides(slides) + print "" + +def handleSlides(slides): + for slide in slides: + handleSlide(slide) + +def handleSlide(slide): + handleSlideTitle(slide.getElementsByTagName("title")[0]) + handlePoints(slide.getElementsByTagName("point")) + +def handleSlideshowTitle(title): + print "%s" % getText(title.childNodes) + +def handleSlideTitle(title): + print "

%s

" % getText(title.childNodes) + +def handlePoints(points): + print "
    " + for point in points: + handlePoint(point) + print "
" + +def handlePoint(point): + print "
  • %s
  • " % getText(point.childNodes) + +def handleToc(slides): + for slide in slides: + title = slide.getElementsByTagName("title")[0] + print "

    %s

    " % getText(title.childNodes) + +handleSlideshow(dom) diff --git a/Doc/includes/noddy.c b/Doc/includes/noddy.c new file mode 100644 index 0000000..ec2d669 --- /dev/null +++ b/Doc/includes/noddy.c @@ -0,0 +1,54 @@ +#include + +typedef struct { + PyObject_HEAD + /* Type-specific fields go here. */ +} noddy_NoddyObject; + +static PyTypeObject noddy_NoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "noddy.Noddy", /*tp_name*/ + sizeof(noddy_NoddyObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + 0, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT, /*tp_flags*/ + "Noddy objects", /* tp_doc */ +}; + +static PyMethodDef noddy_methods[] = { + {NULL} /* Sentinel */ +}; + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +initnoddy(void) +{ + PyObject* m; + + noddy_NoddyType.tp_new = PyType_GenericNew; + if (PyType_Ready(&noddy_NoddyType) < 0) + return; + + m = Py_InitModule3("noddy", noddy_methods, + "Example module that creates an extension type."); + + Py_INCREF(&noddy_NoddyType); + PyModule_AddObject(m, "Noddy", (PyObject *)&noddy_NoddyType); +} diff --git a/Doc/includes/noddy2.c b/Doc/includes/noddy2.c new file mode 100644 index 0000000..2caf985 --- /dev/null +++ b/Doc/includes/noddy2.c @@ -0,0 +1,190 @@ +#include +#include "structmember.h" + +typedef struct { + PyObject_HEAD + PyObject *first; /* first name */ + PyObject *last; /* last name */ + int number; +} Noddy; + +static void +Noddy_dealloc(Noddy* self) +{ + Py_XDECREF(self->first); + Py_XDECREF(self->last); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + Noddy *self; + + self = (Noddy *)type->tp_alloc(type, 0); + if (self != NULL) { + self->first = PyString_FromString(""); + if (self->first == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->last = PyString_FromString(""); + if (self->last == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->number = 0; + } + + return (PyObject *)self; +} + +static int +Noddy_init(Noddy *self, PyObject *args, PyObject *kwds) +{ + PyObject *first=NULL, *last=NULL, *tmp; + + static char *kwlist[] = {"first", "last", "number", NULL}; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist, + &first, &last, + &self->number)) + return -1; + + if (first) { + tmp = self->first; + Py_INCREF(first); + self->first = first; + Py_XDECREF(tmp); + } + + if (last) { + tmp = self->last; + Py_INCREF(last); + self->last = last; + Py_XDECREF(tmp); + } + + return 0; +} + + +static PyMemberDef Noddy_members[] = { + {"first", T_OBJECT_EX, offsetof(Noddy, first), 0, + "first name"}, + {"last", T_OBJECT_EX, offsetof(Noddy, last), 0, + "last name"}, + {"number", T_INT, offsetof(Noddy, number), 0, + "noddy number"}, + {NULL} /* Sentinel */ +}; + +static PyObject * +Noddy_name(Noddy* self) +{ + static PyObject *format = NULL; + PyObject *args, *result; + + if (format == NULL) { + format = PyString_FromString("%s %s"); + if (format == NULL) + return NULL; + } + + if (self->first == NULL) { + PyErr_SetString(PyExc_AttributeError, "first"); + return NULL; + } + + if (self->last == NULL) { + PyErr_SetString(PyExc_AttributeError, "last"); + return NULL; + } + + args = Py_BuildValue("OO", self->first, self->last); + if (args == NULL) + return NULL; + + result = PyString_Format(format, args); + Py_DECREF(args); + + return result; +} + +static PyMethodDef Noddy_methods[] = { + {"name", (PyCFunction)Noddy_name, METH_NOARGS, + "Return the name, combining the first and last name" + }, + {NULL} /* Sentinel */ +}; + +static PyTypeObject NoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "noddy.Noddy", /*tp_name*/ + sizeof(Noddy), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Noddy_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Noddy objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Noddy_methods, /* tp_methods */ + Noddy_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Noddy_init, /* tp_init */ + 0, /* tp_alloc */ + Noddy_new, /* tp_new */ +}; + +static PyMethodDef module_methods[] = { + {NULL} /* Sentinel */ +}; + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +initnoddy2(void) +{ + PyObject* m; + + if (PyType_Ready(&NoddyType) < 0) + return; + + m = Py_InitModule3("noddy2", module_methods, + "Example module that creates an extension type."); + + if (m == NULL) + return; + + Py_INCREF(&NoddyType); + PyModule_AddObject(m, "Noddy", (PyObject *)&NoddyType); +} diff --git a/Doc/includes/noddy3.c b/Doc/includes/noddy3.c new file mode 100644 index 0000000..60260ad --- /dev/null +++ b/Doc/includes/noddy3.c @@ -0,0 +1,243 @@ +#include +#include "structmember.h" + +typedef struct { + PyObject_HEAD + PyObject *first; + PyObject *last; + int number; +} Noddy; + +static void +Noddy_dealloc(Noddy* self) +{ + Py_XDECREF(self->first); + Py_XDECREF(self->last); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + Noddy *self; + + self = (Noddy *)type->tp_alloc(type, 0); + if (self != NULL) { + self->first = PyString_FromString(""); + if (self->first == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->last = PyString_FromString(""); + if (self->last == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->number = 0; + } + + return (PyObject *)self; +} + +static int +Noddy_init(Noddy *self, PyObject *args, PyObject *kwds) +{ + PyObject *first=NULL, *last=NULL, *tmp; + + static char *kwlist[] = {"first", "last", "number", NULL}; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|SSi", kwlist, + &first, &last, + &self->number)) + return -1; + + if (first) { + tmp = self->first; + Py_INCREF(first); + self->first = first; + Py_DECREF(tmp); + } + + if (last) { + tmp = self->last; + Py_INCREF(last); + self->last = last; + Py_DECREF(tmp); + } + + return 0; +} + +static PyMemberDef Noddy_members[] = { + {"number", T_INT, offsetof(Noddy, number), 0, + "noddy number"}, + {NULL} /* Sentinel */ +}; + +static PyObject * +Noddy_getfirst(Noddy *self, void *closure) +{ + Py_INCREF(self->first); + return self->first; +} + +static int +Noddy_setfirst(Noddy *self, PyObject *value, void *closure) +{ + if (value == NULL) { + PyErr_SetString(PyExc_TypeError, "Cannot delete the first attribute"); + return -1; + } + + if (! PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "The first attribute value must be a string"); + return -1; + } + + Py_DECREF(self->first); + Py_INCREF(value); + self->first = value; + + return 0; +} + +static PyObject * +Noddy_getlast(Noddy *self, void *closure) +{ + Py_INCREF(self->last); + return self->last; +} + +static int +Noddy_setlast(Noddy *self, PyObject *value, void *closure) +{ + if (value == NULL) { + PyErr_SetString(PyExc_TypeError, "Cannot delete the last attribute"); + return -1; + } + + if (! PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, + "The last attribute value must be a string"); + return -1; + } + + Py_DECREF(self->last); + Py_INCREF(value); + self->last = value; + + return 0; +} + +static PyGetSetDef Noddy_getseters[] = { + {"first", + (getter)Noddy_getfirst, (setter)Noddy_setfirst, + "first name", + NULL}, + {"last", + (getter)Noddy_getlast, (setter)Noddy_setlast, + "last name", + NULL}, + {NULL} /* Sentinel */ +}; + +static PyObject * +Noddy_name(Noddy* self) +{ + static PyObject *format = NULL; + PyObject *args, *result; + + if (format == NULL) { + format = PyString_FromString("%s %s"); + if (format == NULL) + return NULL; + } + + args = Py_BuildValue("OO", self->first, self->last); + if (args == NULL) + return NULL; + + result = PyString_Format(format, args); + Py_DECREF(args); + + return result; +} + +static PyMethodDef Noddy_methods[] = { + {"name", (PyCFunction)Noddy_name, METH_NOARGS, + "Return the name, combining the first and last name" + }, + {NULL} /* Sentinel */ +}; + +static PyTypeObject NoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "noddy.Noddy", /*tp_name*/ + sizeof(Noddy), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Noddy_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + "Noddy objects", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Noddy_methods, /* tp_methods */ + Noddy_members, /* tp_members */ + Noddy_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Noddy_init, /* tp_init */ + 0, /* tp_alloc */ + Noddy_new, /* tp_new */ +}; + +static PyMethodDef module_methods[] = { + {NULL} /* Sentinel */ +}; + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +initnoddy3(void) +{ + PyObject* m; + + if (PyType_Ready(&NoddyType) < 0) + return; + + m = Py_InitModule3("noddy3", module_methods, + "Example module that creates an extension type."); + + if (m == NULL) + return; + + Py_INCREF(&NoddyType); + PyModule_AddObject(m, "Noddy", (PyObject *)&NoddyType); +} diff --git a/Doc/includes/noddy4.c b/Doc/includes/noddy4.c new file mode 100644 index 0000000..878e086 --- /dev/null +++ b/Doc/includes/noddy4.c @@ -0,0 +1,224 @@ +#include +#include "structmember.h" + +typedef struct { + PyObject_HEAD + PyObject *first; + PyObject *last; + int number; +} Noddy; + +static int +Noddy_traverse(Noddy *self, visitproc visit, void *arg) +{ + int vret; + + if (self->first) { + vret = visit(self->first, arg); + if (vret != 0) + return vret; + } + if (self->last) { + vret = visit(self->last, arg); + if (vret != 0) + return vret; + } + + return 0; +} + +static int +Noddy_clear(Noddy *self) +{ + PyObject *tmp; + + tmp = self->first; + self->first = NULL; + Py_XDECREF(tmp); + + tmp = self->last; + self->last = NULL; + Py_XDECREF(tmp); + + return 0; +} + +static void +Noddy_dealloc(Noddy* self) +{ + Noddy_clear(self); + self->ob_type->tp_free((PyObject*)self); +} + +static PyObject * +Noddy_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + Noddy *self; + + self = (Noddy *)type->tp_alloc(type, 0); + if (self != NULL) { + self->first = PyString_FromString(""); + if (self->first == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->last = PyString_FromString(""); + if (self->last == NULL) + { + Py_DECREF(self); + return NULL; + } + + self->number = 0; + } + + return (PyObject *)self; +} + +static int +Noddy_init(Noddy *self, PyObject *args, PyObject *kwds) +{ + PyObject *first=NULL, *last=NULL, *tmp; + + static char *kwlist[] = {"first", "last", "number", NULL}; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OOi", kwlist, + &first, &last, + &self->number)) + return -1; + + if (first) { + tmp = self->first; + Py_INCREF(first); + self->first = first; + Py_XDECREF(tmp); + } + + if (last) { + tmp = self->last; + Py_INCREF(last); + self->last = last; + Py_XDECREF(tmp); + } + + return 0; +} + + +static PyMemberDef Noddy_members[] = { + {"first", T_OBJECT_EX, offsetof(Noddy, first), 0, + "first name"}, + {"last", T_OBJECT_EX, offsetof(Noddy, last), 0, + "last name"}, + {"number", T_INT, offsetof(Noddy, number), 0, + "noddy number"}, + {NULL} /* Sentinel */ +}; + +static PyObject * +Noddy_name(Noddy* self) +{ + static PyObject *format = NULL; + PyObject *args, *result; + + if (format == NULL) { + format = PyString_FromString("%s %s"); + if (format == NULL) + return NULL; + } + + if (self->first == NULL) { + PyErr_SetString(PyExc_AttributeError, "first"); + return NULL; + } + + if (self->last == NULL) { + PyErr_SetString(PyExc_AttributeError, "last"); + return NULL; + } + + args = Py_BuildValue("OO", self->first, self->last); + if (args == NULL) + return NULL; + + result = PyString_Format(format, args); + Py_DECREF(args); + + return result; +} + +static PyMethodDef Noddy_methods[] = { + {"name", (PyCFunction)Noddy_name, METH_NOARGS, + "Return the name, combining the first and last name" + }, + {NULL} /* Sentinel */ +}; + +static PyTypeObject NoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /*ob_size*/ + "noddy.Noddy", /*tp_name*/ + sizeof(Noddy), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)Noddy_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ + "Noddy objects", /* tp_doc */ + (traverseproc)Noddy_traverse, /* tp_traverse */ + (inquiry)Noddy_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Noddy_methods, /* tp_methods */ + Noddy_members, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Noddy_init, /* tp_init */ + 0, /* tp_alloc */ + Noddy_new, /* tp_new */ +}; + +static PyMethodDef module_methods[] = { + {NULL} /* Sentinel */ +}; + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +initnoddy4(void) +{ + PyObject* m; + + if (PyType_Ready(&NoddyType) < 0) + return; + + m = Py_InitModule3("noddy4", module_methods, + "Example module that creates an extension type."); + + if (m == NULL) + return; + + Py_INCREF(&NoddyType); + PyModule_AddObject(m, "Noddy", (PyObject *)&NoddyType); +} diff --git a/Doc/includes/run-func.c b/Doc/includes/run-func.c new file mode 100644 index 0000000..5a7df0d --- /dev/null +++ b/Doc/includes/run-func.c @@ -0,0 +1,68 @@ +#include + +int +main(int argc, char *argv[]) +{ + PyObject *pName, *pModule, *pDict, *pFunc; + PyObject *pArgs, *pValue; + int i; + + if (argc < 3) { + fprintf(stderr,"Usage: call pythonfile funcname [args]\n"); + return 1; + } + + Py_Initialize(); + pName = PyString_FromString(argv[1]); + /* Error checking of pName left out */ + + pModule = PyImport_Import(pName); + Py_DECREF(pName); + + if (pModule != NULL) { + pFunc = PyObject_GetAttrString(pModule, argv[2]); + /* pFunc is a new reference */ + + if (pFunc && PyCallable_Check(pFunc)) { + pArgs = PyTuple_New(argc - 3); + for (i = 0; i < argc - 3; ++i) { + pValue = PyInt_FromLong(atoi(argv[i + 3])); + if (!pValue) { + Py_DECREF(pArgs); + Py_DECREF(pModule); + fprintf(stderr, "Cannot convert argument\n"); + return 1; + } + /* pValue reference stolen here: */ + PyTuple_SetItem(pArgs, i, pValue); + } + pValue = PyObject_CallObject(pFunc, pArgs); + Py_DECREF(pArgs); + if (pValue != NULL) { + printf("Result of call: %ld\n", PyInt_AsLong(pValue)); + Py_DECREF(pValue); + } + else { + Py_DECREF(pFunc); + Py_DECREF(pModule); + PyErr_Print(); + fprintf(stderr,"Call failed\n"); + return 1; + } + } + else { + if (PyErr_Occurred()) + PyErr_Print(); + fprintf(stderr, "Cannot find function \"%s\"\n", argv[2]); + } + Py_XDECREF(pFunc); + Py_DECREF(pModule); + } + else { + PyErr_Print(); + fprintf(stderr, "Failed to load \"%s\"\n", argv[1]); + return 1; + } + Py_Finalize(); + return 0; +} diff --git a/Doc/includes/setup.py b/Doc/includes/setup.py new file mode 100644 index 0000000..b853d23 --- /dev/null +++ b/Doc/includes/setup.py @@ -0,0 +1,8 @@ +from distutils.core import setup, Extension +setup(name="noddy", version="1.0", + ext_modules=[ + Extension("noddy", ["noddy.c"]), + Extension("noddy2", ["noddy2.c"]), + Extension("noddy3", ["noddy3.c"]), + Extension("noddy4", ["noddy4.c"]), + ]) diff --git a/Doc/includes/shoddy.c b/Doc/includes/shoddy.c new file mode 100644 index 0000000..07a4177 --- /dev/null +++ b/Doc/includes/shoddy.c @@ -0,0 +1,91 @@ +#include + +typedef struct { + PyListObject list; + int state; +} Shoddy; + + +static PyObject * +Shoddy_increment(Shoddy *self, PyObject *unused) +{ + self->state++; + return PyInt_FromLong(self->state); +} + + +static PyMethodDef Shoddy_methods[] = { + {"increment", (PyCFunction)Shoddy_increment, METH_NOARGS, + PyDoc_STR("increment state counter")}, + {NULL, NULL}, +}; + +static int +Shoddy_init(Shoddy *self, PyObject *args, PyObject *kwds) +{ + if (PyList_Type.tp_init((PyObject *)self, args, kwds) < 0) + return -1; + self->state = 0; + return 0; +} + + +static PyTypeObject ShoddyType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "shoddy.Shoddy", /* tp_name */ + sizeof(Shoddy), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + Shoddy_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)Shoddy_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; + +PyMODINIT_FUNC +initshoddy(void) +{ + PyObject *m; + + ShoddyType.tp_base = &PyList_Type; + if (PyType_Ready(&ShoddyType) < 0) + return; + + m = Py_InitModule3("shoddy", NULL, "Shoddy module"); + if (m == NULL) + return; + + Py_INCREF(&ShoddyType); + PyModule_AddObject(m, "Shoddy", (PyObject *) &ShoddyType); +} diff --git a/Doc/includes/sqlite3/adapter_datetime.py b/Doc/includes/sqlite3/adapter_datetime.py new file mode 100644 index 0000000..5869e22 --- /dev/null +++ b/Doc/includes/sqlite3/adapter_datetime.py @@ -0,0 +1,14 @@ +import sqlite3 +import datetime, time + +def adapt_datetime(ts): + return time.mktime(ts.timetuple()) + +sqlite3.register_adapter(datetime.datetime, adapt_datetime) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +now = datetime.datetime.now() +cur.execute("select ?", (now,)) +print(cur.fetchone()[0]) diff --git a/Doc/includes/sqlite3/adapter_point_1.py b/Doc/includes/sqlite3/adapter_point_1.py new file mode 100644 index 0000000..1343acd --- /dev/null +++ b/Doc/includes/sqlite3/adapter_point_1.py @@ -0,0 +1,16 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + + def __conform__(self, protocol): + if protocol is sqlite3.PrepareProtocol: + return "%f;%f" % (self.x, self.y) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +p = Point(4.0, -3.2) +cur.execute("select ?", (p,)) +print(cur.fetchone()[0]) diff --git a/Doc/includes/sqlite3/adapter_point_2.py b/Doc/includes/sqlite3/adapter_point_2.py new file mode 100644 index 0000000..1e1719a --- /dev/null +++ b/Doc/includes/sqlite3/adapter_point_2.py @@ -0,0 +1,17 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + +def adapt_point(point): + return "%f;%f" % (point.x, point.y) + +sqlite3.register_adapter(Point, adapt_point) + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +p = Point(4.0, -3.2) +cur.execute("select ?", (p,)) +print(cur.fetchone()[0]) diff --git a/Doc/includes/sqlite3/collation_reverse.py b/Doc/includes/sqlite3/collation_reverse.py new file mode 100644 index 0000000..bfd7f5b --- /dev/null +++ b/Doc/includes/sqlite3/collation_reverse.py @@ -0,0 +1,15 @@ +import sqlite3 + +def collate_reverse(string1, string2): + return -cmp(string1, string2) + +con = sqlite3.connect(":memory:") +con.create_collation("reverse", collate_reverse) + +cur = con.cursor() +cur.execute("create table test(x)") +cur.executemany("insert into test(x) values (?)", [("a",), ("b",)]) +cur.execute("select x from test order by x collate reverse") +for row in cur: + print(row) +con.close() diff --git a/Doc/includes/sqlite3/complete_statement.py b/Doc/includes/sqlite3/complete_statement.py new file mode 100644 index 0000000..cd38d73 --- /dev/null +++ b/Doc/includes/sqlite3/complete_statement.py @@ -0,0 +1,30 @@ +# A minimal SQLite shell for experiments + +import sqlite3 + +con = sqlite3.connect(":memory:") +con.isolation_level = None +cur = con.cursor() + +buffer = "" + +print("Enter your SQL commands to execute in sqlite3.") +print("Enter a blank line to exit.") + +while True: + line = input() + if line == "": + break + buffer += line + if sqlite3.complete_statement(buffer): + try: + buffer = buffer.strip() + cur.execute(buffer) + + if buffer.lstrip().upper().startswith("SELECT"): + print(cur.fetchall()) + except sqlite3.Error as e: + print("An error occurred:", e.args[0]) + buffer = "" + +con.close() diff --git a/Doc/includes/sqlite3/connect_db_1.py b/Doc/includes/sqlite3/connect_db_1.py new file mode 100644 index 0000000..1b97523 --- /dev/null +++ b/Doc/includes/sqlite3/connect_db_1.py @@ -0,0 +1,3 @@ +import sqlite3 + +con = sqlite3.connect("mydb") diff --git a/Doc/includes/sqlite3/connect_db_2.py b/Doc/includes/sqlite3/connect_db_2.py new file mode 100644 index 0000000..f9728b36 --- /dev/null +++ b/Doc/includes/sqlite3/connect_db_2.py @@ -0,0 +1,3 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") diff --git a/Doc/includes/sqlite3/converter_point.py b/Doc/includes/sqlite3/converter_point.py new file mode 100644 index 0000000..d0707ab --- /dev/null +++ b/Doc/includes/sqlite3/converter_point.py @@ -0,0 +1,47 @@ +import sqlite3 + +class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + + def __repr__(self): + return "(%f;%f)" % (self.x, self.y) + +def adapt_point(point): + return "%f;%f" % (point.x, point.y) + +def convert_point(s): + x, y = list(map(float, s.split(";"))) + return Point(x, y) + +# Register the adapter +sqlite3.register_adapter(Point, adapt_point) + +# Register the converter +sqlite3.register_converter("point", convert_point) + +p = Point(4.0, -3.2) + +######################### +# 1) Using declared types +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES) +cur = con.cursor() +cur.execute("create table test(p point)") + +cur.execute("insert into test(p) values (?)", (p,)) +cur.execute("select p from test") +print("with declared types:", cur.fetchone()[0]) +cur.close() +con.close() + +####################### +# 1) Using column names +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute("create table test(p)") + +cur.execute("insert into test(p) values (?)", (p,)) +cur.execute('select p as "p [point]" from test') +print("with column names:", cur.fetchone()[0]) +cur.close() +con.close() diff --git a/Doc/includes/sqlite3/countcursors.py b/Doc/includes/sqlite3/countcursors.py new file mode 100644 index 0000000..ef3e70a --- /dev/null +++ b/Doc/includes/sqlite3/countcursors.py @@ -0,0 +1,15 @@ +import sqlite3 + +class CountCursorsConnection(sqlite3.Connection): + def __init__(self, *args, **kwargs): + sqlite3.Connection.__init__(self, *args, **kwargs) + self.numcursors = 0 + + def cursor(self, *args, **kwargs): + self.numcursors += 1 + return sqlite3.Connection.cursor(self, *args, **kwargs) + +con = sqlite3.connect(":memory:", factory=CountCursorsConnection) +cur1 = con.cursor() +cur2 = con.cursor() +print(con.numcursors) diff --git a/Doc/includes/sqlite3/createdb.py b/Doc/includes/sqlite3/createdb.py new file mode 100644 index 0000000..ee2950b --- /dev/null +++ b/Doc/includes/sqlite3/createdb.py @@ -0,0 +1,28 @@ +# Not referenced from the documentation, but builds the database file the other +# code snippets expect. + +import sqlite3 +import os + +DB_FILE = "mydb" + +if os.path.exists(DB_FILE): + os.remove(DB_FILE) + +con = sqlite3.connect(DB_FILE) +cur = con.cursor() +cur.execute(""" + create table people + ( + name_last varchar(20), + age integer + ) + """) + +cur.execute("insert into people (name_last, age) values ('Yeltsin', 72)") +cur.execute("insert into people (name_last, age) values ('Putin', 51)") + +con.commit() + +cur.close() +con.close() diff --git a/Doc/includes/sqlite3/execsql_fetchonerow.py b/Doc/includes/sqlite3/execsql_fetchonerow.py new file mode 100644 index 0000000..078873b --- /dev/null +++ b/Doc/includes/sqlite3/execsql_fetchonerow.py @@ -0,0 +1,17 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() +SELECT = "select name_last, age from people order by age, name_last" + +# 1. Iterate over the rows available from the cursor, unpacking the +# resulting sequences to yield their elements (name_last, age): +cur.execute(SELECT) +for (name_last, age) in cur: + print('%s is %d years old.' % (name_last, age)) + +# 2. Equivalently: +cur.execute(SELECT) +for row in cur: + print('%s is %d years old.' % (row[0], row[1])) diff --git a/Doc/includes/sqlite3/execsql_printall_1.py b/Doc/includes/sqlite3/execsql_printall_1.py new file mode 100644 index 0000000..a4ce5c5 --- /dev/null +++ b/Doc/includes/sqlite3/execsql_printall_1.py @@ -0,0 +1,13 @@ +import sqlite3 + +# Create a connection to the database file "mydb": +con = sqlite3.connect("mydb") + +# Get a Cursor object that operates in the context of Connection con: +cur = con.cursor() + +# Execute the SELECT statement: +cur.execute("select * from people order by age") + +# Retrieve all rows as a sequence and print that sequence: +print(cur.fetchall()) diff --git a/Doc/includes/sqlite3/execute_1.py b/Doc/includes/sqlite3/execute_1.py new file mode 100644 index 0000000..3d08840 --- /dev/null +++ b/Doc/includes/sqlite3/execute_1.py @@ -0,0 +1,11 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=? and age=?", (who, age)) +print(cur.fetchone()) diff --git a/Doc/includes/sqlite3/execute_2.py b/Doc/includes/sqlite3/execute_2.py new file mode 100644 index 0000000..84734f9 --- /dev/null +++ b/Doc/includes/sqlite3/execute_2.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=:who and age=:age", + {"who": who, "age": age}) +print(cur.fetchone()) diff --git a/Doc/includes/sqlite3/execute_3.py b/Doc/includes/sqlite3/execute_3.py new file mode 100644 index 0000000..0353683 --- /dev/null +++ b/Doc/includes/sqlite3/execute_3.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +who = "Yeltsin" +age = 72 + +cur.execute("select name_last, age from people where name_last=:who and age=:age", + locals()) +print(cur.fetchone()) diff --git a/Doc/includes/sqlite3/executemany_1.py b/Doc/includes/sqlite3/executemany_1.py new file mode 100644 index 0000000..efae106 --- /dev/null +++ b/Doc/includes/sqlite3/executemany_1.py @@ -0,0 +1,24 @@ +import sqlite3 + +class IterChars: + def __init__(self): + self.count = ord('a') + + def __iter__(self): + return self + + def __next__(self): + if self.count > ord('z'): + raise StopIteration + self.count += 1 + return (chr(self.count - 1),) # this is a 1-tuple + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.execute("create table characters(c)") + +theIter = IterChars() +cur.executemany("insert into characters(c) values (?)", theIter) + +cur.execute("select c from characters") +print(cur.fetchall()) diff --git a/Doc/includes/sqlite3/executemany_2.py b/Doc/includes/sqlite3/executemany_2.py new file mode 100644 index 0000000..518cd94 --- /dev/null +++ b/Doc/includes/sqlite3/executemany_2.py @@ -0,0 +1,15 @@ +import sqlite3 + +def char_generator(): + import string + for c in string.letters[:26]: + yield (c,) + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.execute("create table characters(c)") + +cur.executemany("insert into characters(c) values (?)", char_generator()) + +cur.execute("select c from characters") +print(cur.fetchall()) diff --git a/Doc/includes/sqlite3/executescript.py b/Doc/includes/sqlite3/executescript.py new file mode 100644 index 0000000..7e53581 --- /dev/null +++ b/Doc/includes/sqlite3/executescript.py @@ -0,0 +1,24 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") +cur = con.cursor() +cur.executescript(""" + create table person( + firstname, + lastname, + age + ); + + create table book( + title, + author, + published + ); + + insert into book(title, author, published) + values ( + 'Dirk Gently''s Holistic Detective Agency', + 'Douglas Adams', + 1987 + ); + """) diff --git a/Doc/includes/sqlite3/insert_more_people.py b/Doc/includes/sqlite3/insert_more_people.py new file mode 100644 index 0000000..edbc79e --- /dev/null +++ b/Doc/includes/sqlite3/insert_more_people.py @@ -0,0 +1,16 @@ +import sqlite3 + +con = sqlite3.connect("mydb") + +cur = con.cursor() + +newPeople = ( + ('Lebed' , 53), + ('Zhirinovsky' , 57), + ) + +for person in newPeople: + cur.execute("insert into people (name_last, age) values (?, ?)", person) + +# The changes will not be saved unless the transaction is committed explicitly: +con.commit() diff --git a/Doc/includes/sqlite3/md5func.py b/Doc/includes/sqlite3/md5func.py new file mode 100644 index 0000000..b7bc05b --- /dev/null +++ b/Doc/includes/sqlite3/md5func.py @@ -0,0 +1,11 @@ +import sqlite3 +import hashlib + +def md5sum(t): + return hashlib.md5(t).hexdigest() + +con = sqlite3.connect(":memory:") +con.create_function("md5", 1, md5sum) +cur = con.cursor() +cur.execute("select md5(?)", ("foo",)) +print(cur.fetchone()[0]) diff --git a/Doc/includes/sqlite3/mysumaggr.py b/Doc/includes/sqlite3/mysumaggr.py new file mode 100644 index 0000000..d2dfd2c --- /dev/null +++ b/Doc/includes/sqlite3/mysumaggr.py @@ -0,0 +1,20 @@ +import sqlite3 + +class MySum: + def __init__(self): + self.count = 0 + + def step(self, value): + self.count += value + + def finalize(self): + return self.count + +con = sqlite3.connect(":memory:") +con.create_aggregate("mysum", 1, MySum) +cur = con.cursor() +cur.execute("create table test(i)") +cur.execute("insert into test(i) values (1)") +cur.execute("insert into test(i) values (2)") +cur.execute("select mysum(i) from test") +print(cur.fetchone()[0]) diff --git a/Doc/includes/sqlite3/parse_colnames.py b/Doc/includes/sqlite3/parse_colnames.py new file mode 100644 index 0000000..cc68c76 --- /dev/null +++ b/Doc/includes/sqlite3/parse_colnames.py @@ -0,0 +1,8 @@ +import sqlite3 +import datetime + +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute('select ? as "x [timestamp]"', (datetime.datetime.now(),)) +dt = cur.fetchone()[0] +print(dt, type(dt)) diff --git a/Doc/includes/sqlite3/pysqlite_datetime.py b/Doc/includes/sqlite3/pysqlite_datetime.py new file mode 100644 index 0000000..68d4935 --- /dev/null +++ b/Doc/includes/sqlite3/pysqlite_datetime.py @@ -0,0 +1,20 @@ +import sqlite3 +import datetime + +con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES|sqlite3.PARSE_COLNAMES) +cur = con.cursor() +cur.execute("create table test(d date, ts timestamp)") + +today = datetime.date.today() +now = datetime.datetime.now() + +cur.execute("insert into test(d, ts) values (?, ?)", (today, now)) +cur.execute("select d, ts from test") +row = cur.fetchone() +print(today, "=>", row[0], type(row[0])) +print(now, "=>", row[1], type(row[1])) + +cur.execute('select current_date as "d [date]", current_timestamp as "ts [timestamp]"') +row = cur.fetchone() +print("current_date", row[0], type(row[0])) +print("current_timestamp", row[1], type(row[1])) diff --git a/Doc/includes/sqlite3/row_factory.py b/Doc/includes/sqlite3/row_factory.py new file mode 100644 index 0000000..e436ffc --- /dev/null +++ b/Doc/includes/sqlite3/row_factory.py @@ -0,0 +1,13 @@ +import sqlite3 + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +con = sqlite3.connect(":memory:") +con.row_factory = dict_factory +cur = con.cursor() +cur.execute("select 1 as a") +print(cur.fetchone()["a"]) diff --git a/Doc/includes/sqlite3/rowclass.py b/Doc/includes/sqlite3/rowclass.py new file mode 100644 index 0000000..3fa0b87 --- /dev/null +++ b/Doc/includes/sqlite3/rowclass.py @@ -0,0 +1,12 @@ +import sqlite3 + +con = sqlite3.connect("mydb") +con.row_factory = sqlite3.Row + +cur = con.cursor() +cur.execute("select name_last, age from people") +for row in cur: + assert row[0] == row["name_last"] + assert row["name_last"] == row["nAmE_lAsT"] + assert row[1] == row["age"] + assert row[1] == row["AgE"] diff --git a/Doc/includes/sqlite3/shared_cache.py b/Doc/includes/sqlite3/shared_cache.py new file mode 100644 index 0000000..bf1d7b4 --- /dev/null +++ b/Doc/includes/sqlite3/shared_cache.py @@ -0,0 +1,6 @@ +import sqlite3 + +# The shared cache is only available in SQLite versions 3.3.3 or later +# See the SQLite documentaton for details. + +sqlite3.enable_shared_cache(True) diff --git a/Doc/includes/sqlite3/shortcut_methods.py b/Doc/includes/sqlite3/shortcut_methods.py new file mode 100644 index 0000000..596d87c --- /dev/null +++ b/Doc/includes/sqlite3/shortcut_methods.py @@ -0,0 +1,21 @@ +import sqlite3 + +persons = [ + ("Hugo", "Boss"), + ("Calvin", "Klein") + ] + +con = sqlite3.connect(":memory:") + +# Create the table +con.execute("create table person(firstname, lastname)") + +# Fill the table +con.executemany("insert into person(firstname, lastname) values (?, ?)", persons) + +# Print the table contents +for row in con.execute("select firstname, lastname from person"): + print(row) + +# Using a dummy WHERE clause to not let SQLite take the shortcut table deletes. +print("I just deleted", con.execute("delete from person where 1=1").rowcount, "rows") diff --git a/Doc/includes/sqlite3/simple_tableprinter.py b/Doc/includes/sqlite3/simple_tableprinter.py new file mode 100644 index 0000000..231d872 --- /dev/null +++ b/Doc/includes/sqlite3/simple_tableprinter.py @@ -0,0 +1,26 @@ +import sqlite3 + +FIELD_MAX_WIDTH = 20 +TABLE_NAME = 'people' +SELECT = 'select * from %s order by age, name_last' % TABLE_NAME + +con = sqlite3.connect("mydb") + +cur = con.cursor() +cur.execute(SELECT) + +# Print a header. +for fieldDesc in cur.description: + print(fieldDesc[0].ljust(FIELD_MAX_WIDTH), end=' ') +print() # Finish the header with a newline. +print('-' * 78) + +# For each row, print the value of each field left-justified within +# the maximum possible width of that field. +fieldIndices = range(len(cur.description)) +for row in cur: + for fieldIndex in fieldIndices: + fieldValue = str(row[fieldIndex]) + print(fieldValue.ljust(FIELD_MAX_WIDTH), end=' ') + + print() # Finish the row with a newline. diff --git a/Doc/includes/sqlite3/text_factory.py b/Doc/includes/sqlite3/text_factory.py new file mode 100644 index 0000000..2dab8e4 --- /dev/null +++ b/Doc/includes/sqlite3/text_factory.py @@ -0,0 +1,42 @@ +import sqlite3 + +con = sqlite3.connect(":memory:") +cur = con.cursor() + +# Create the table +con.execute("create table person(lastname, firstname)") + +AUSTRIA = "\xd6sterreich" + +# by default, rows are returned as Unicode +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert row[0] == AUSTRIA + +# but we can make pysqlite always return bytestrings ... +con.text_factory = str +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert type(row[0]) == str +# the bytestrings will be encoded in UTF-8, unless you stored garbage in the +# database ... +assert row[0] == AUSTRIA.encode("utf-8") + +# we can also implement a custom text_factory ... +# here we implement one that will ignore Unicode characters that cannot be +# decoded from UTF-8 +con.text_factory = lambda x: str(x, "utf-8", "ignore") +cur.execute("select ?", ("this is latin1 and would normally create errors" + "\xe4\xf6\xfc".encode("latin1"),)) +row = cur.fetchone() +assert type(row[0]) == str + +# pysqlite offers a builtin optimized text_factory that will return bytestring +# objects, if the data is in ASCII only, and otherwise return unicode objects +con.text_factory = sqlite3.OptimizedUnicode +cur.execute("select ?", (AUSTRIA,)) +row = cur.fetchone() +assert type(row[0]) == str + +cur.execute("select ?", ("Germany",)) +row = cur.fetchone() +assert type(row[0]) == str diff --git a/Doc/includes/test.py b/Doc/includes/test.py new file mode 100644 index 0000000..7ebf46a --- /dev/null +++ b/Doc/includes/test.py @@ -0,0 +1,213 @@ +"""Test module for the noddy examples + +Noddy 1: + +>>> import noddy +>>> n1 = noddy.Noddy() +>>> n2 = noddy.Noddy() +>>> del n1 +>>> del n2 + + +Noddy 2 + +>>> import noddy2 +>>> n1 = noddy2.Noddy('jim', 'fulton', 42) +>>> n1.first +'jim' +>>> n1.last +'fulton' +>>> n1.number +42 +>>> n1.name() +'jim fulton' +>>> n1.first = 'will' +>>> n1.name() +'will fulton' +>>> n1.last = 'tell' +>>> n1.name() +'will tell' +>>> del n1.first +>>> n1.name() +Traceback (most recent call last): +... +AttributeError: first +>>> n1.first +Traceback (most recent call last): +... +AttributeError: first +>>> n1.first = 'drew' +>>> n1.first +'drew' +>>> del n1.number +Traceback (most recent call last): +... +TypeError: can't delete numeric/char attribute +>>> n1.number=2 +>>> n1.number +2 +>>> n1.first = 42 +>>> n1.name() +'42 tell' +>>> n2 = noddy2.Noddy() +>>> n2.name() +' ' +>>> n2.first +'' +>>> n2.last +'' +>>> del n2.first +>>> n2.first +Traceback (most recent call last): +... +AttributeError: first +>>> n2.first +Traceback (most recent call last): +... +AttributeError: first +>>> n2.name() +Traceback (most recent call last): + File "", line 1, in ? +AttributeError: first +>>> n2.number +0 +>>> n3 = noddy2.Noddy('jim', 'fulton', 'waaa') +Traceback (most recent call last): + File "", line 1, in ? +TypeError: an integer is required +>>> del n1 +>>> del n2 + + +Noddy 3 + +>>> import noddy3 +>>> n1 = noddy3.Noddy('jim', 'fulton', 42) +>>> n1 = noddy3.Noddy('jim', 'fulton', 42) +>>> n1.name() +'jim fulton' +>>> del n1.first +Traceback (most recent call last): + File "", line 1, in ? +TypeError: Cannot delete the first attribute +>>> n1.first = 42 +Traceback (most recent call last): + File "", line 1, in ? +TypeError: The first attribute value must be a string +>>> n1.first = 'will' +>>> n1.name() +'will fulton' +>>> n2 = noddy3.Noddy() +>>> n2 = noddy3.Noddy() +>>> n2 = noddy3.Noddy() +>>> n3 = noddy3.Noddy('jim', 'fulton', 'waaa') +Traceback (most recent call last): + File "", line 1, in ? +TypeError: an integer is required +>>> del n1 +>>> del n2 + +Noddy 4 + +>>> import noddy4 +>>> n1 = noddy4.Noddy('jim', 'fulton', 42) +>>> n1.first +'jim' +>>> n1.last +'fulton' +>>> n1.number +42 +>>> n1.name() +'jim fulton' +>>> n1.first = 'will' +>>> n1.name() +'will fulton' +>>> n1.last = 'tell' +>>> n1.name() +'will tell' +>>> del n1.first +>>> n1.name() +Traceback (most recent call last): +... +AttributeError: first +>>> n1.first +Traceback (most recent call last): +... +AttributeError: first +>>> n1.first = 'drew' +>>> n1.first +'drew' +>>> del n1.number +Traceback (most recent call last): +... +TypeError: can't delete numeric/char attribute +>>> n1.number=2 +>>> n1.number +2 +>>> n1.first = 42 +>>> n1.name() +'42 tell' +>>> n2 = noddy4.Noddy() +>>> n2 = noddy4.Noddy() +>>> n2 = noddy4.Noddy() +>>> n2 = noddy4.Noddy() +>>> n2.name() +' ' +>>> n2.first +'' +>>> n2.last +'' +>>> del n2.first +>>> n2.first +Traceback (most recent call last): +... +AttributeError: first +>>> n2.first +Traceback (most recent call last): +... +AttributeError: first +>>> n2.name() +Traceback (most recent call last): + File "", line 1, in ? +AttributeError: first +>>> n2.number +0 +>>> n3 = noddy4.Noddy('jim', 'fulton', 'waaa') +Traceback (most recent call last): + File "", line 1, in ? +TypeError: an integer is required + + +Test cyclic gc(?) + +>>> import gc +>>> gc.disable() + +>>> x = [] +>>> l = [x] +>>> n2.first = l +>>> n2.first +[[]] +>>> l.append(n2) +>>> del l +>>> del n1 +>>> del n2 +>>> sys.getrefcount(x) +3 +>>> ignore = gc.collect() +>>> sys.getrefcount(x) +2 + +>>> gc.enable() +""" + +import os +import sys +from distutils.util import get_platform +PLAT_SPEC = "%s-%s" % (get_platform(), sys.version[0:3]) +src = os.path.join("build", "lib.%s" % PLAT_SPEC) +sys.path.append(src) + +if __name__ == "__main__": + import doctest, __main__ + doctest.testmod(__main__) diff --git a/Doc/includes/typestruct.h b/Doc/includes/typestruct.h new file mode 100644 index 0000000..0afe375 --- /dev/null +++ b/Doc/includes/typestruct.h @@ -0,0 +1,76 @@ +typedef struct _typeobject { + PyObject_VAR_HEAD + char *tp_name; /* For printing, in format "." */ + int tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + printfunc tp_print; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + cmpfunc tp_compare; + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + long tp_flags; + + char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + long tp_weaklistoffset; + + /* Added in release 2.2 */ + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + long tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; + PyObject *tp_subclasses; + PyObject *tp_weaklist; + +} PyTypeObject; diff --git a/Doc/includes/tzinfo-examples.py b/Doc/includes/tzinfo-examples.py new file mode 100644 index 0000000..5a2b8ad --- /dev/null +++ b/Doc/includes/tzinfo-examples.py @@ -0,0 +1,139 @@ +from datetime import tzinfo, timedelta, datetime + +ZERO = timedelta(0) +HOUR = timedelta(hours=1) + +# A UTC class. + +class UTC(tzinfo): + """UTC""" + + def utcoffset(self, dt): + return ZERO + + def tzname(self, dt): + return "UTC" + + def dst(self, dt): + return ZERO + +utc = UTC() + +# A class building tzinfo objects for fixed-offset time zones. +# Note that FixedOffset(0, "UTC") is a different way to build a +# UTC tzinfo object. + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes = offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return ZERO + +# A class capturing the platform's idea of local time. + +import time as _time + +STDOFFSET = timedelta(seconds = -_time.timezone) +if _time.daylight: + DSTOFFSET = timedelta(seconds = -_time.altzone) +else: + DSTOFFSET = STDOFFSET + +DSTDIFF = DSTOFFSET - STDOFFSET + +class LocalTimezone(tzinfo): + + def utcoffset(self, dt): + if self._isdst(dt): + return DSTOFFSET + else: + return STDOFFSET + + def dst(self, dt): + if self._isdst(dt): + return DSTDIFF + else: + return ZERO + + def tzname(self, dt): + return _time.tzname[self._isdst(dt)] + + def _isdst(self, dt): + tt = (dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.weekday(), 0, -1) + stamp = _time.mktime(tt) + tt = _time.localtime(stamp) + return tt.tm_isdst > 0 + +Local = LocalTimezone() + + +# A complete implementation of current DST rules for major US time zones. + +def first_sunday_on_or_after(dt): + days_to_go = 6 - dt.weekday() + if days_to_go: + dt += timedelta(days_to_go) + return dt + +# In the US, DST starts at 2am (standard time) on the first Sunday in April. +DSTSTART = datetime(1, 4, 1, 2) +# and ends at 2am (DST time; 1am standard time) on the last Sunday of Oct. +# which is the first Sunday on or after Oct 25. +DSTEND = datetime(1, 10, 25, 1) + +class USTimeZone(tzinfo): + + def __init__(self, hours, reprname, stdname, dstname): + self.stdoffset = timedelta(hours=hours) + self.reprname = reprname + self.stdname = stdname + self.dstname = dstname + + def __repr__(self): + return self.reprname + + def tzname(self, dt): + if self.dst(dt): + return self.dstname + else: + return self.stdname + + def utcoffset(self, dt): + return self.stdoffset + self.dst(dt) + + def dst(self, dt): + if dt is None or dt.tzinfo is None: + # An exception may be sensible here, in one or both cases. + # It depends on how you want to treat them. The default + # fromutc() implementation (called by the default astimezone() + # implementation) passes a datetime with dt.tzinfo is self. + return ZERO + assert dt.tzinfo is self + + # Find first Sunday in April & the last in October. + start = first_sunday_on_or_after(DSTSTART.replace(year=dt.year)) + end = first_sunday_on_or_after(DSTEND.replace(year=dt.year)) + + # Can't compare naive to aware objects, so strip the timezone from + # dt first. + if start <= dt.replace(tzinfo=None) < end: + return HOUR + else: + return ZERO + +Eastern = USTimeZone(-5, "Eastern", "EST", "EDT") +Central = USTimeZone(-6, "Central", "CST", "CDT") +Mountain = USTimeZone(-7, "Mountain", "MST", "MDT") +Pacific = USTimeZone(-8, "Pacific", "PST", "PDT") diff --git a/Doc/install/index.rst b/Doc/install/index.rst new file mode 100644 index 0000000..01f17f8 --- /dev/null +++ b/Doc/install/index.rst @@ -0,0 +1,1011 @@ +.. highlightlang:: none + +.. _install-index: + +***************************** + Installing Python Modules +***************************** + +:Author: Greg Ward +:Release: |version| +:Date: |today| + +.. % TODO: +.. % Fill in XXX comments + +.. % The audience for this document includes people who don't know anything +.. % about Python and aren't about to learn the language just in order to +.. % install and maintain it for their users, i.e. system administrators. +.. % Thus, I have to be sure to explain the basics at some point: +.. % sys.path and PYTHONPATH at least. Should probably give pointers to +.. % other docs on "import site", PYTHONSTARTUP, PYTHONHOME, etc. +.. % +.. % Finally, it might be useful to include all the material from my "Care +.. % and Feeding of a Python Installation" talk in here somewhere. Yow! + +.. topic:: Abstract + + This document describes the Python Distribution Utilities ("Distutils") from the + end-user's point-of-view, describing how to extend the capabilities of a + standard Python installation by building and installing third-party Python + modules and extensions. + + +.. _inst-intro: + +Introduction +============ + +Although Python's extensive standard library covers many programming needs, +there often comes a time when you need to add some new functionality to your +Python installation in the form of third-party modules. This might be necessary +to support your own programming, or to support an application that you want to +use and that happens to be written in Python. + +In the past, there has been little support for adding third-party modules to an +existing Python installation. With the introduction of the Python Distribution +Utilities (Distutils for short) in Python 2.0, this changed. + +This document is aimed primarily at the people who need to install third-party +Python modules: end-users and system administrators who just need to get some +Python application running, and existing Python programmers who want to add some +new goodies to their toolbox. You don't need to know Python to read this +document; there will be some brief forays into using Python's interactive mode +to explore your installation, but that's it. If you're looking for information +on how to distribute your own Python modules so that others may use them, see +the :ref:`distutils-index` manual. + + +.. _inst-trivial-install: + +Best case: trivial installation +------------------------------- + +In the best case, someone will have prepared a special version of the module +distribution you want to install that is targeted specifically at your platform +and is installed just like any other software on your platform. For example, +the module developer might make an executable installer available for Windows +users, an RPM package for users of RPM-based Linux systems (Red Hat, SuSE, +Mandrake, and many others), a Debian package for users of Debian-based Linux +systems, and so forth. + +In that case, you would download the installer appropriate to your platform and +do the obvious thing with it: run it if it's an executable installer, ``rpm +--install`` it if it's an RPM, etc. You don't need to run Python or a setup +script, you don't need to compile anything---you might not even need to read any +instructions (although it's always a good idea to do so anyways). + +Of course, things will not always be that easy. You might be interested in a +module distribution that doesn't have an easy-to-use installer for your +platform. In that case, you'll have to start with the source distribution +released by the module's author/maintainer. Installing from a source +distribution is not too hard, as long as the modules are packaged in the +standard way. The bulk of this document is about building and installing +modules from standard source distributions. + + +.. _inst-new-standard: + +The new standard: Distutils +--------------------------- + +If you download a module source distribution, you can tell pretty quickly if it +was packaged and distributed in the standard way, i.e. using the Distutils. +First, the distribution's name and version number will be featured prominently +in the name of the downloaded archive, e.g. :file:`foo-1.0.tar.gz` or +:file:`widget-0.9.7.zip`. Next, the archive will unpack into a similarly-named +directory: :file:`foo-1.0` or :file:`widget-0.9.7`. Additionally, the +distribution will contain a setup script :file:`setup.py`, and a file named +:file:`README.txt` or possibly just :file:`README`, which should explain that +building and installing the module distribution is a simple matter of running :: + + python setup.py install + +If all these things are true, then you already know how to build and install the +modules you've just downloaded: Run the command above. Unless you need to +install things in a non-standard way or customize the build process, you don't +really need this manual. Or rather, the above command is everything you need to +get out of this manual. + + +.. _inst-standard-install: + +Standard Build and Install +========================== + +As described in section :ref:`inst-new-standard`, building and installing a module +distribution using the Distutils is usually one simple command:: + + python setup.py install + +On Unix, you'd run this command from a shell prompt; on Windows, you have to +open a command prompt window ("DOS box") and do it there; on Mac OS X, you open +a :command:`Terminal` window to get a shell prompt. + + +.. _inst-platform-variations: + +Platform variations +------------------- + +You should always run the setup command from the distribution root directory, +i.e. the top-level subdirectory that the module source distribution unpacks +into. For example, if you've just downloaded a module source distribution +:file:`foo-1.0.tar.gz` onto a Unix system, the normal thing to do is:: + + gunzip -c foo-1.0.tar.gz | tar xf - # unpacks into directory foo-1.0 + cd foo-1.0 + python setup.py install + +On Windows, you'd probably download :file:`foo-1.0.zip`. If you downloaded the +archive file to :file:`C:\\Temp`, then it would unpack into +:file:`C:\\Temp\\foo-1.0`; you can use either a archive manipulator with a +graphical user interface (such as WinZip) or a command-line tool (such as +:program:`unzip` or :program:`pkunzip`) to unpack the archive. Then, open a +command prompt window ("DOS box"), and run:: + + cd c:\Temp\foo-1.0 + python setup.py install + + +.. _inst-splitting-up: + +Splitting the job up +-------------------- + +Running ``setup.py install`` builds and installs all modules in one run. If you +prefer to work incrementally---especially useful if you want to customize the +build process, or if things are going wrong---you can use the setup script to do +one thing at a time. This is particularly helpful when the build and install +will be done by different users---for example, you might want to build a module +distribution and hand it off to a system administrator for installation (or do +it yourself, with super-user privileges). + +For example, you can build everything in one step, and then install everything +in a second step, by invoking the setup script twice:: + + python setup.py build + python setup.py install + +If you do this, you will notice that running the :command:`install` command +first runs the :command:`build` command, which---in this case---quickly notices +that it has nothing to do, since everything in the :file:`build` directory is +up-to-date. + +You may not need this ability to break things down often if all you do is +install modules downloaded off the 'net, but it's very handy for more advanced +tasks. If you get into distributing your own Python modules and extensions, +you'll run lots of individual Distutils commands on their own. + + +.. _inst-how-build-works: + +How building works +------------------ + +As implied above, the :command:`build` command is responsible for putting the +files to install into a *build directory*. By default, this is :file:`build` +under the distribution root; if you're excessively concerned with speed, or want +to keep the source tree pristine, you can change the build directory with the +:option:`--build-base` option. For example:: + + python setup.py build --build-base=/tmp/pybuild/foo-1.0 + +(Or you could do this permanently with a directive in your system or personal +Distutils configuration file; see section :ref:`inst-config-files`.) Normally, this +isn't necessary. + +The default layout for the build tree is as follows:: + + --- build/ --- lib/ + or + --- build/ --- lib./ + temp./ + +where ```` expands to a brief description of the current OS/hardware +platform and Python version. The first form, with just a :file:`lib` directory, +is used for "pure module distributions"---that is, module distributions that +include only pure Python modules. If a module distribution contains any +extensions (modules written in C/C++), then the second form, with two ```` +directories, is used. In that case, the :file:`temp.{plat}` directory holds +temporary files generated by the compile/link process that don't actually get +installed. In either case, the :file:`lib` (or :file:`lib.{plat}`) directory +contains all Python modules (pure Python and extensions) that will be installed. + +In the future, more directories will be added to handle Python scripts, +documentation, binary executables, and whatever else is needed to handle the job +of installing Python modules and applications. + + +.. _inst-how-install-works: + +How installation works +---------------------- + +After the :command:`build` command runs (whether you run it explicitly, or the +:command:`install` command does it for you), the work of the :command:`install` +command is relatively simple: all it has to do is copy everything under +:file:`build/lib` (or :file:`build/lib.{plat}`) to your chosen installation +directory. + +If you don't choose an installation directory---i.e., if you just run ``setup.py +install``\ ---then the :command:`install` command installs to the standard +location for third-party Python modules. This location varies by platform and +by how you built/installed Python itself. On Unix (and Mac OS X, which is also +Unix-based), it also depends on whether the module distribution being installed +is pure Python or contains extensions ("non-pure"): + ++-----------------+-----------------------------------------------------+--------------------------------------------------+-------+ +| Platform | Standard installation location | Default value | Notes | ++=================+=====================================================+==================================================+=======+ +| Unix (pure) | :file:`{prefix}/lib/python{X.Y}/site-packages` | :file:`/usr/local/lib/python{X.Y}/site-packages` | \(1) | ++-----------------+-----------------------------------------------------+--------------------------------------------------+-------+ +| Unix (non-pure) | :file:`{exec-prefix}/lib/python{X.Y}/site-packages` | :file:`/usr/local/lib/python{X.Y}/site-packages` | \(1) | ++-----------------+-----------------------------------------------------+--------------------------------------------------+-------+ +| Windows | :file:`{prefix}` | :file:`C:\\Python` | \(2) | ++-----------------+-----------------------------------------------------+--------------------------------------------------+-------+ + +Notes: + +(1) + Most Linux distributions include Python as a standard part of the system, so + :file:`{prefix}` and :file:`{exec-prefix}` are usually both :file:`/usr` on + Linux. If you build Python yourself on Linux (or any Unix-like system), the + default :file:`{prefix}` and :file:`{exec-prefix}` are :file:`/usr/local`. + +(2) + The default installation directory on Windows was :file:`C:\\Program + Files\\Python` under Python 1.6a1, 1.5.2, and earlier. + +:file:`{prefix}` and :file:`{exec-prefix}` stand for the directories that Python +is installed to, and where it finds its libraries at run-time. They are always +the same under Windows, and very often the same under Unix and Mac OS X. You +can find out what your Python installation uses for :file:`{prefix}` and +:file:`{exec-prefix}` by running Python in interactive mode and typing a few +simple commands. Under Unix, just type ``python`` at the shell prompt. Under +Windows, choose :menuselection:`Start --> Programs --> Python X.Y --> +Python (command line)`. Once the interpreter is started, you type Python code +at the prompt. For example, on my Linux system, I type the three Python +statements shown below, and get the output as shown, to find out my +:file:`{prefix}` and :file:`{exec-prefix}`:: + + Python 2.4 (#26, Aug 7 2004, 17:19:02) + Type "help", "copyright", "credits" or "license" for more information. + >>> import sys + >>> sys.prefix + '/usr' + >>> sys.exec_prefix + '/usr' + +If you don't want to install modules to the standard location, or if you don't +have permission to write there, then you need to read about alternate +installations in section :ref:`inst-alt-install`. If you want to customize your +installation directories more heavily, see section :ref:`inst-custom-install` on +custom installations. + + +.. _inst-alt-install: + +Alternate Installation +====================== + +Often, it is necessary or desirable to install modules to a location other than +the standard location for third-party Python modules. For example, on a Unix +system you might not have permission to write to the standard third-party module +directory. Or you might wish to try out a module before making it a standard +part of your local Python installation. This is especially true when upgrading +a distribution already present: you want to make sure your existing base of +scripts still works with the new version before actually upgrading. + +The Distutils :command:`install` command is designed to make installing module +distributions to an alternate location simple and painless. The basic idea is +that you supply a base directory for the installation, and the +:command:`install` command picks a set of directories (called an *installation +scheme*) under this base directory in which to install files. The details +differ across platforms, so read whichever of the following sections applies to +you. + + +.. _inst-alt-install-prefix: + +Alternate installation: the home scheme +--------------------------------------- + +The idea behind the "home scheme" is that you build and maintain a personal +stash of Python modules. This scheme's name is derived from the idea of a +"home" directory on Unix, since it's not unusual for a Unix user to make their +home directory have a layout similar to :file:`/usr/` or :file:`/usr/local/`. +This scheme can be used by anyone, regardless of the operating system their +installing for. + +Installing a new module distribution is as simple as :: + + python setup.py install --home= + +where you can supply any directory you like for the :option:`--home` option. On +Unix, lazy typists can just type a tilde (``~``); the :command:`install` command +will expand this to your home directory:: + + python setup.py install --home=~ + +The :option:`--home` option defines the installation base directory. Files are +installed to the following directories under the installation base as follows: + ++------------------------------+---------------------------+-----------------------------+ +| Type of file | Installation Directory | Override option | ++==============================+===========================+=============================+ +| pure module distribution | :file:`{home}/lib/python` | :option:`--install-purelib` | ++------------------------------+---------------------------+-----------------------------+ +| non-pure module distribution | :file:`{home}/lib/python` | :option:`--install-platlib` | ++------------------------------+---------------------------+-----------------------------+ +| scripts | :file:`{home}/bin` | :option:`--install-scripts` | ++------------------------------+---------------------------+-----------------------------+ +| data | :file:`{home}/share` | :option:`--install-data` | ++------------------------------+---------------------------+-----------------------------+ + +.. versionchanged:: 2.4 + The :option:`--home` option used to be supported only on Unix. + + +.. _inst-alt-install-home: + +Alternate installation: Unix (the prefix scheme) +------------------------------------------------ + +The "prefix scheme" is useful when you wish to use one Python installation to +perform the build/install (i.e., to run the setup script), but install modules +into the third-party module directory of a different Python installation (or +something that looks like a different Python installation). If this sounds a +trifle unusual, it is---that's why the "home scheme" comes first. However, +there are at least two known cases where the prefix scheme will be useful. + +First, consider that many Linux distributions put Python in :file:`/usr`, rather +than the more traditional :file:`/usr/local`. This is entirely appropriate, +since in those cases Python is part of "the system" rather than a local add-on. +However, if you are installing Python modules from source, you probably want +them to go in :file:`/usr/local/lib/python2.{X}` rather than +:file:`/usr/lib/python2.{X}`. This can be done with :: + + /usr/bin/python setup.py install --prefix=/usr/local + +Another possibility is a network filesystem where the name used to write to a +remote directory is different from the name used to read it: for example, the +Python interpreter accessed as :file:`/usr/local/bin/python` might search for +modules in :file:`/usr/local/lib/python2.{X}`, but those modules would have to +be installed to, say, :file:`/mnt/{@server}/export/lib/python2.{X}`. This could +be done with :: + + /usr/local/bin/python setup.py install --prefix=/mnt/@server/export + +In either case, the :option:`--prefix` option defines the installation base, and +the :option:`--exec-prefix` option defines the platform-specific installation +base, which is used for platform-specific files. (Currently, this just means +non-pure module distributions, but could be expanded to C libraries, binary +executables, etc.) If :option:`--exec-prefix` is not supplied, it defaults to +:option:`--prefix`. Files are installed as follows: + ++------------------------------+-----------------------------------------------------+-----------------------------+ +| Type of file | Installation Directory | Override option | ++==============================+=====================================================+=============================+ +| pure module distribution | :file:`{prefix}/lib/python{X.Y}/site-packages` | :option:`--install-purelib` | ++------------------------------+-----------------------------------------------------+-----------------------------+ +| non-pure module distribution | :file:`{exec-prefix}/lib/python{X.Y}/site-packages` | :option:`--install-platlib` | ++------------------------------+-----------------------------------------------------+-----------------------------+ +| scripts | :file:`{prefix}/bin` | :option:`--install-scripts` | ++------------------------------+-----------------------------------------------------+-----------------------------+ +| data | :file:`{prefix}/share` | :option:`--install-data` | ++------------------------------+-----------------------------------------------------+-----------------------------+ + +There is no requirement that :option:`--prefix` or :option:`--exec-prefix` +actually point to an alternate Python installation; if the directories listed +above do not already exist, they are created at installation time. + +Incidentally, the real reason the prefix scheme is important is simply that a +standard Unix installation uses the prefix scheme, but with :option:`--prefix` +and :option:`--exec-prefix` supplied by Python itself as ``sys.prefix`` and +``sys.exec_prefix``. Thus, you might think you'll never use the prefix scheme, +but every time you run ``python setup.py install`` without any other options, +you're using it. + +Note that installing extensions to an alternate Python installation has no +effect on how those extensions are built: in particular, the Python header files +(:file:`Python.h` and friends) installed with the Python interpreter used to run +the setup script will be used in compiling extensions. It is your +responsibility to ensure that the interpreter used to run extensions installed +in this way is compatible with the interpreter used to build them. The best way +to do this is to ensure that the two interpreters are the same version of Python +(possibly different builds, or possibly copies of the same build). (Of course, +if your :option:`--prefix` and :option:`--exec-prefix` don't even point to an +alternate Python installation, this is immaterial.) + + +.. _inst-alt-install-windows: + +Alternate installation: Windows (the prefix scheme) +--------------------------------------------------- + +Windows has no concept of a user's home directory, and since the standard Python +installation under Windows is simpler than under Unix, the :option:`--prefix` +option has traditionally been used to install additional packages in separate +locations on Windows. :: + + python setup.py install --prefix="\Temp\Python" + +to install modules to the :file:`\\Temp\\Python` directory on the current drive. + +The installation base is defined by the :option:`--prefix` option; the +:option:`--exec-prefix` option is not supported under Windows. Files are +installed as follows: + ++------------------------------+---------------------------+-----------------------------+ +| Type of file | Installation Directory | Override option | ++==============================+===========================+=============================+ +| pure module distribution | :file:`{prefix}` | :option:`--install-purelib` | ++------------------------------+---------------------------+-----------------------------+ +| non-pure module distribution | :file:`{prefix}` | :option:`--install-platlib` | ++------------------------------+---------------------------+-----------------------------+ +| scripts | :file:`{prefix}\\Scripts` | :option:`--install-scripts` | ++------------------------------+---------------------------+-----------------------------+ +| data | :file:`{prefix}\\Data` | :option:`--install-data` | ++------------------------------+---------------------------+-----------------------------+ + + +.. _inst-custom-install: + +Custom Installation +=================== + +Sometimes, the alternate installation schemes described in section +:ref:`inst-alt-install` just don't do what you want. You might want to tweak just +one or two directories while keeping everything under the same base directory, +or you might want to completely redefine the installation scheme. In either +case, you're creating a *custom installation scheme*. + +You probably noticed the column of "override options" in the tables describing +the alternate installation schemes above. Those options are how you define a +custom installation scheme. These override options can be relative, absolute, +or explicitly defined in terms of one of the installation base directories. +(There are two installation base directories, and they are normally the same--- +they only differ when you use the Unix "prefix scheme" and supply different +:option:`--prefix` and :option:`--exec-prefix` options.) + +For example, say you're installing a module distribution to your home directory +under Unix---but you want scripts to go in :file:`~/scripts` rather than +:file:`~/bin`. As you might expect, you can override this directory with the +:option:`--install-scripts` option; in this case, it makes most sense to supply +a relative path, which will be interpreted relative to the installation base +directory (your home directory, in this case):: + + python setup.py install --home=~ --install-scripts=scripts + +Another Unix example: suppose your Python installation was built and installed +with a prefix of :file:`/usr/local/python`, so under a standard installation +scripts will wind up in :file:`/usr/local/python/bin`. If you want them in +:file:`/usr/local/bin` instead, you would supply this absolute directory for the +:option:`--install-scripts` option:: + + python setup.py install --install-scripts=/usr/local/bin + +(This performs an installation using the "prefix scheme," where the prefix is +whatever your Python interpreter was installed with--- :file:`/usr/local/python` +in this case.) + +If you maintain Python on Windows, you might want third-party modules to live in +a subdirectory of :file:`{prefix}`, rather than right in :file:`{prefix}` +itself. This is almost as easy as customizing the script installation directory +---you just have to remember that there are two types of modules to worry about, +pure modules and non-pure modules (i.e., modules from a non-pure distribution). +For example:: + + python setup.py install --install-purelib=Site --install-platlib=Site + +The specified installation directories are relative to :file:`{prefix}`. Of +course, you also have to ensure that these directories are in Python's module +search path, such as by putting a :file:`.pth` file in :file:`{prefix}`. See +section :ref:`inst-search-path` to find out how to modify Python's search path. + +If you want to define an entire installation scheme, you just have to supply all +of the installation directory options. The recommended way to do this is to +supply relative paths; for example, if you want to maintain all Python +module-related files under :file:`python` in your home directory, and you want a +separate directory for each platform that you use your home directory from, you +might define the following installation scheme:: + + python setup.py install --home=~ \ + --install-purelib=python/lib \ + --install-platlib=python/lib.$PLAT \ + --install-scripts=python/scripts + --install-data=python/data + +or, equivalently, + +.. % $ % -- bow to font-lock + +:: + + python setup.py install --home=~/python \ + --install-purelib=lib \ + --install-platlib='lib.$PLAT' \ + --install-scripts=scripts + --install-data=data + +``$PLAT`` is not (necessarily) an environment variable---it will be expanded by +the Distutils as it parses your command line options, just as it does when +parsing your configuration file(s). + +.. % $ % -- bow to font-lock + +Obviously, specifying the entire installation scheme every time you install a +new module distribution would be very tedious. Thus, you can put these options +into your Distutils config file (see section :ref:`inst-config-files`):: + + [install] + install-base=$HOME + install-purelib=python/lib + install-platlib=python/lib.$PLAT + install-scripts=python/scripts + install-data=python/data + +or, equivalently, :: + + [install] + install-base=$HOME/python + install-purelib=lib + install-platlib=lib.$PLAT + install-scripts=scripts + install-data=data + +Note that these two are *not* equivalent if you supply a different installation +base directory when you run the setup script. For example, :: + + python setup.py install --install-base=/tmp + +would install pure modules to :file:`{/tmp/python/lib}` in the first case, and +to :file:`{/tmp/lib}` in the second case. (For the second case, you probably +want to supply an installation base of :file:`/tmp/python`.) + +You probably noticed the use of ``$HOME`` and ``$PLAT`` in the sample +configuration file input. These are Distutils configuration variables, which +bear a strong resemblance to environment variables. In fact, you can use +environment variables in config files on platforms that have such a notion but +the Distutils additionally define a few extra variables that may not be in your +environment, such as ``$PLAT``. (And of course, on systems that don't have +environment variables, such as Mac OS 9, the configuration variables supplied by +the Distutils are the only ones you can use.) See section :ref:`inst-config-files` +for details. + +.. % XXX need some Windows examples---when would custom +.. % installation schemes be needed on those platforms? + +.. % XXX I'm not sure where this section should go. + + +.. _inst-search-path: + +Modifying Python's Search Path +------------------------------ + +When the Python interpreter executes an :keyword:`import` statement, it searches +for both Python code and extension modules along a search path. A default value +for the path is configured into the Python binary when the interpreter is built. +You can determine the path by importing the :mod:`sys` module and printing the +value of ``sys.path``. :: + + $ python + Python 2.2 (#11, Oct 3 2002, 13:31:27) + [GCC 2.96 20000731 (Red Hat Linux 7.3 2.96-112)] on linux2 + Type ``help'', ``copyright'', ``credits'' or ``license'' for more information. + >>> import sys + >>> sys.path + ['', '/usr/local/lib/python2.3', '/usr/local/lib/python2.3/plat-linux2', + '/usr/local/lib/python2.3/lib-tk', '/usr/local/lib/python2.3/lib-dynload', + '/usr/local/lib/python2.3/site-packages'] + >>> + +The null string in ``sys.path`` represents the current working directory. + +.. % $ <-- bow to font-lock + +The expected convention for locally installed packages is to put them in the +:file:`{...}/site-packages/` directory, but you may want to install Python +modules into some arbitrary directory. For example, your site may have a +convention of keeping all software related to the web server under :file:`/www`. +Add-on Python modules might then belong in :file:`/www/python`, and in order to +import them, this directory must be added to ``sys.path``. There are several +different ways to add the directory. + +The most convenient way is to add a path configuration file to a directory +that's already on Python's path, usually to the :file:`.../site-packages/` +directory. Path configuration files have an extension of :file:`.pth`, and each +line must contain a single path that will be appended to ``sys.path``. (Because +the new paths are appended to ``sys.path``, modules in the added directories +will not override standard modules. This means you can't use this mechanism for +installing fixed versions of standard modules.) + +Paths can be absolute or relative, in which case they're relative to the +directory containing the :file:`.pth` file. Any directories added to the search +path will be scanned in turn for :file:`.pth` files. See `site module +documentation `_ for +more information. + +A slightly less convenient way is to edit the :file:`site.py` file in Python's +standard library, and modify ``sys.path``. :file:`site.py` is automatically +imported when the Python interpreter is executed, unless the :option:`-S` switch +is supplied to suppress this behaviour. So you could simply edit +:file:`site.py` and add two lines to it:: + + import sys + sys.path.append('/www/python/') + +However, if you reinstall the same major version of Python (perhaps when +upgrading from 2.2 to 2.2.2, for example) :file:`site.py` will be overwritten by +the stock version. You'd have to remember that it was modified and save a copy +before doing the installation. + +There are two environment variables that can modify ``sys.path``. +:envvar:`PYTHONHOME` sets an alternate value for the prefix of the Python +installation. For example, if :envvar:`PYTHONHOME` is set to ``/www/python``, +the search path will be set to ``['', '/www/python/lib/pythonX.Y/', +'/www/python/lib/pythonX.Y/plat-linux2', ...]``. + +The :envvar:`PYTHONPATH` variable can be set to a list of paths that will be +added to the beginning of ``sys.path``. For example, if :envvar:`PYTHONPATH` is +set to ``/www/python:/opt/py``, the search path will begin with +``['/www/python', '/opt/py']``. (Note that directories must exist in order to +be added to ``sys.path``; the :mod:`site` module removes paths that don't +exist.) + +Finally, ``sys.path`` is just a regular Python list, so any Python application +can modify it by adding or removing entries. + + +.. _inst-config-files: + +Distutils Configuration Files +============================= + +As mentioned above, you can use Distutils configuration files to record personal +or site preferences for any Distutils options. That is, any option to any +command can be stored in one of two or three (depending on your platform) +configuration files, which will be consulted before the command-line is parsed. +This means that configuration files will override default values, and the +command-line will in turn override configuration files. Furthermore, if +multiple configuration files apply, values from "earlier" files are overridden +by "later" files. + + +.. _inst-config-filenames: + +Location and names of config files +---------------------------------- + +The names and locations of the configuration files vary slightly across +platforms. On Unix and Mac OS X, the three configuration files (in the order +they are processed) are: + ++--------------+----------------------------------------------------------+-------+ +| Type of file | Location and filename | Notes | ++==============+==========================================================+=======+ +| system | :file:`{prefix}/lib/python{ver}/distutils/distutils.cfg` | \(1) | ++--------------+----------------------------------------------------------+-------+ +| personal | :file:`$HOME/.pydistutils.cfg` | \(2) | ++--------------+----------------------------------------------------------+-------+ +| local | :file:`setup.cfg` | \(3) | ++--------------+----------------------------------------------------------+-------+ + +And on Windows, the configuration files are: + ++--------------+-------------------------------------------------+-------+ +| Type of file | Location and filename | Notes | ++==============+=================================================+=======+ +| system | :file:`{prefix}\\Lib\\distutils\\distutils.cfg` | \(4) | ++--------------+-------------------------------------------------+-------+ +| personal | :file:`%HOME%\\pydistutils.cfg` | \(5) | ++--------------+-------------------------------------------------+-------+ +| local | :file:`setup.cfg` | \(3) | ++--------------+-------------------------------------------------+-------+ + +Notes: + +(1) + Strictly speaking, the system-wide configuration file lives in the directory + where the Distutils are installed; under Python 1.6 and later on Unix, this is + as shown. For Python 1.5.2, the Distutils will normally be installed to + :file:`{prefix}/lib/python1.5/site-packages/distutils`, so the system + configuration file should be put there under Python 1.5.2. + +(2) + On Unix, if the :envvar:`HOME` environment variable is not defined, the user's + home directory will be determined with the :func:`getpwuid` function from the + standard :mod:`pwd` module. + +(3) + I.e., in the current directory (usually the location of the setup script). + +(4) + (See also note (1).) Under Python 1.6 and later, Python's default "installation + prefix" is :file:`C:\\Python`, so the system configuration file is normally + :file:`C:\\Python\\Lib\\distutils\\distutils.cfg`. Under Python 1.5.2, the + default prefix was :file:`C:\\Program Files\\Python`, and the Distutils were not + part of the standard library---so the system configuration file would be + :file:`C:\\Program Files\\Python\\distutils\\distutils.cfg` in a standard Python + 1.5.2 installation under Windows. + +(5) + On Windows, if the :envvar:`HOME` environment variable is not defined, no + personal configuration file will be found or used. (In other words, the + Distutils make no attempt to guess your home directory on Windows.) + + +.. _inst-config-syntax: + +Syntax of config files +---------------------- + +The Distutils configuration files all have the same syntax. The config files +are grouped into sections. There is one section for each Distutils command, +plus a ``global`` section for global options that affect every command. Each +section consists of one option per line, specified as ``option=value``. + +For example, the following is a complete config file that just forces all +commands to run quietly by default:: + + [global] + verbose=0 + +If this is installed as the system config file, it will affect all processing of +any Python module distribution by any user on the current system. If it is +installed as your personal config file (on systems that support them), it will +affect only module distributions processed by you. And if it is used as the +:file:`setup.cfg` for a particular module distribution, it affects only that +distribution. + +You could override the default "build base" directory and make the +:command:`build\*` commands always forcibly rebuild all files with the +following:: + + [build] + build-base=blib + force=1 + +which corresponds to the command-line arguments :: + + python setup.py build --build-base=blib --force + +except that including the :command:`build` command on the command-line means +that command will be run. Including a particular command in config files has no +such implication; it only means that if the command is run, the options in the +config file will apply. (Or if other commands that derive values from it are +run, they will use the values in the config file.) + +You can find out the complete list of options for any command using the +:option:`--help` option, e.g.:: + + python setup.py build --help + +and you can find out the complete list of global options by using +:option:`--help` without a command:: + + python setup.py --help + +See also the "Reference" section of the "Distributing Python Modules" manual. + + +.. _inst-building-ext: + +Building Extensions: Tips and Tricks +==================================== + +Whenever possible, the Distutils try to use the configuration information made +available by the Python interpreter used to run the :file:`setup.py` script. +For example, the same compiler and linker flags used to compile Python will also +be used for compiling extensions. Usually this will work well, but in +complicated situations this might be inappropriate. This section discusses how +to override the usual Distutils behaviour. + + +.. _inst-tweak-flags: + +Tweaking compiler/linker flags +------------------------------ + +Compiling a Python extension written in C or C++ will sometimes require +specifying custom flags for the compiler and linker in order to use a particular +library or produce a special kind of object code. This is especially true if the +extension hasn't been tested on your platform, or if you're trying to +cross-compile Python. + +In the most general case, the extension author might have foreseen that +compiling the extensions would be complicated, and provided a :file:`Setup` file +for you to edit. This will likely only be done if the module distribution +contains many separate extension modules, or if they often require elaborate +sets of compiler flags in order to work. + +A :file:`Setup` file, if present, is parsed in order to get a list of extensions +to build. Each line in a :file:`Setup` describes a single module. Lines have +the following structure:: + + module ... [sourcefile ...] [cpparg ...] [library ...] + + +Let's examine each of the fields in turn. + +* *module* is the name of the extension module to be built, and should be a + valid Python identifier. You can't just change this in order to rename a module + (edits to the source code would also be needed), so this should be left alone. + +* *sourcefile* is anything that's likely to be a source code file, at least + judging by the filename. Filenames ending in :file:`.c` are assumed to be + written in C, filenames ending in :file:`.C`, :file:`.cc`, and :file:`.c++` are + assumed to be C++, and filenames ending in :file:`.m` or :file:`.mm` are assumed + to be in Objective C. + +* *cpparg* is an argument for the C preprocessor, and is anything starting with + :option:`-I`, :option:`-D`, :option:`-U` or :option:`-C`. + +* *library* is anything ending in :file:`.a` or beginning with :option:`-l` or + :option:`-L`. + +If a particular platform requires a special library on your platform, you can +add it by editing the :file:`Setup` file and running ``python setup.py build``. +For example, if the module defined by the line :: + + foo foomodule.c + +must be linked with the math library :file:`libm.a` on your platform, simply add +:option:`-lm` to the line:: + + foo foomodule.c -lm + +Arbitrary switches intended for the compiler or the linker can be supplied with +the :option:`-Xcompiler` *arg* and :option:`-Xlinker` *arg* options:: + + foo foomodule.c -Xcompiler -o32 -Xlinker -shared -lm + +The next option after :option:`-Xcompiler` and :option:`-Xlinker` will be +appended to the proper command line, so in the above example the compiler will +be passed the :option:`-o32` option, and the linker will be passed +:option:`-shared`. If a compiler option requires an argument, you'll have to +supply multiple :option:`-Xcompiler` options; for example, to pass ``-x c++`` +the :file:`Setup` file would have to contain ``-Xcompiler -x -Xcompiler c++``. + +Compiler flags can also be supplied through setting the :envvar:`CFLAGS` +environment variable. If set, the contents of :envvar:`CFLAGS` will be added to +the compiler flags specified in the :file:`Setup` file. + + +.. _inst-non-ms-compilers: + +Using non-Microsoft compilers on Windows +---------------------------------------- + +.. sectionauthor:: Rene Liebscher + + + +Borland C++ +^^^^^^^^^^^ + +This subsection describes the necessary steps to use Distutils with the Borland +C++ compiler version 5.5. First you have to know that Borland's object file +format (OMF) is different from the format used by the Python version you can +download from the Python or ActiveState Web site. (Python is built with +Microsoft Visual C++, which uses COFF as the object file format.) For this +reason you have to convert Python's library :file:`python25.lib` into the +Borland format. You can do this as follows: + +.. % Should we mention that users have to create cfg-files for the compiler? +.. % see also http://community.borland.com/article/0,1410,21205,00.html + +:: + + coff2omf python25.lib python25_bcpp.lib + +The :file:`coff2omf` program comes with the Borland compiler. The file +:file:`python25.lib` is in the :file:`Libs` directory of your Python +installation. If your extension uses other libraries (zlib, ...) you have to +convert them too. + +The converted files have to reside in the same directories as the normal +libraries. + +How does Distutils manage to use these libraries with their changed names? If +the extension needs a library (eg. :file:`foo`) Distutils checks first if it +finds a library with suffix :file:`_bcpp` (eg. :file:`foo_bcpp.lib`) and then +uses this library. In the case it doesn't find such a special library it uses +the default name (:file:`foo.lib`.) [#]_ + +To let Distutils compile your extension with Borland C++ you now have to type:: + + python setup.py build --compiler=bcpp + +If you want to use the Borland C++ compiler as the default, you could specify +this in your personal or system-wide configuration file for Distutils (see +section :ref:`inst-config-files`.) + + +.. seealso:: + + `C++Builder Compiler `_ + Information about the free C++ compiler from Borland, including links to the + download pages. + + `Creating Python Extensions Using Borland's Free Compiler `_ + Document describing how to use Borland's free command-line C++ compiler to build + Python. + + +GNU C / Cygwin / MinGW +^^^^^^^^^^^^^^^^^^^^^^ + +These instructions only apply if you're using a version of Python prior to +2.4.1 with a MinGW prior to 3.0.0 (with binutils-2.13.90-20030111-1). + +This section describes the necessary steps to use Distutils with the GNU C/C++ +compilers in their Cygwin and MinGW distributions. [#]_ For a Python interpreter +that was built with Cygwin, everything should work without any of these +following steps. + +These compilers require some special libraries. This task is more complex than +for Borland's C++, because there is no program to convert the library. First +you have to create a list of symbols which the Python DLL exports. (You can find +a good program for this task at +http://starship.python.net/crew/kernr/mingw32/Notes.html, see at PExports 0.42h +there.) + +.. % I don't understand what the next line means. --amk +.. % (inclusive the references on data structures.) + +:: + + pexports python25.dll >python25.def + +The location of an installed :file:`python25.dll` will depend on the +installation options and the version and language of Windows. In a "just for +me" installation, it will appear in the root of the installation directory. In +a shared installation, it will be located in the system directory. + +Then you can create from these information an import library for gcc. :: + + /cygwin/bin/dlltool --dllname python25.dll --def python25.def --output-lib libpython25.a + +The resulting library has to be placed in the same directory as +:file:`python25.lib`. (Should be the :file:`libs` directory under your Python +installation directory.) + +If your extension uses other libraries (zlib,...) you might have to convert +them too. The converted files have to reside in the same directories as the +normal libraries do. + +To let Distutils compile your extension with Cygwin you now have to type :: + + python setup.py build --compiler=cygwin + +and for Cygwin in no-cygwin mode [#]_ or for MinGW type:: + + python setup.py build --compiler=mingw32 + +If you want to use any of these options/compilers as default, you should +consider to write it in your personal or system-wide configuration file for +Distutils (see section :ref:`inst-config-files`.) + + +.. seealso:: + + `Building Python modules on MS Windows platform with MinGW `_ + Information about building the required libraries for the MinGW environment. + + http://pyopengl.sourceforge.net/ftp/win32-stuff/ + Converted import libraries in Cygwin/MinGW and Borland format, and a script to + create the registry entries needed for Distutils to locate the built Python. + +.. rubric:: Footnotes + +.. [#] This also means you could replace all existing COFF-libraries with OMF-libraries + of the same name. + +.. [#] Check http://sources.redhat.com/cygwin/ and http://www.mingw.org/ for more + information + +.. [#] Then you have no POSIX emulation available, but you also don't need + :file:`cygwin1.dll`. diff --git a/Doc/library/__builtin__.rst b/Doc/library/__builtin__.rst new file mode 100644 index 0000000..b3e1e11 --- /dev/null +++ b/Doc/library/__builtin__.rst @@ -0,0 +1,41 @@ + +:mod:`__builtin__` --- Built-in objects +======================================= + +.. module:: __builtin__ + :synopsis: The module that provides the built-in namespace. + + +This module provides direct access to all 'built-in' identifiers of Python; for +example, ``__builtin__.open`` is the full name for the built-in function +:func:`open`. See chapter :ref:`builtin`. + +This module is not normally accessed explicitly by most applications, but can be +useful in modules that provide objects with the same name as a built-in value, +but in which the built-in of that name is also needed. For example, in a module +that wants to implement an :func:`open` function that wraps the built-in +:func:`open`, this module can be used directly:: + + import __builtin__ + + def open(path): + f = __builtin__.open(path, 'r') + return UpperCaser(f) + + class UpperCaser: + '''Wrapper around a file that converts output to upper-case.''' + + def __init__(self, f): + self._f = f + + def read(self, count=-1): + return self._f.read(count).upper() + + # ... + +As an implementation detail, most modules have the name ``__builtins__`` (note +the ``'s'``) made available as part of their globals. The value of +``__builtins__`` is normally either this module or the value of this modules's +:attr:`__dict__` attribute. Since this is an implementation detail, it may not +be used by alternate implementations of Python. + diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst new file mode 100644 index 0000000..6bf2830 --- /dev/null +++ b/Doc/library/__future__.rst @@ -0,0 +1,61 @@ + +:mod:`__future__` --- Future statement definitions +================================================== + +.. module:: __future__ + :synopsis: Future statement definitions + + +:mod:`__future__` is a real module, and serves three purposes: + +* To avoid confusing existing tools that analyze import statements and expect to + find the modules they're importing. + +* To ensure that future_statements run under releases prior to 2.1 at least + yield runtime exceptions (the import of :mod:`__future__` will fail, because + there was no module of that name prior to 2.1). + +* To document when incompatible changes were introduced, and when they will be + --- or were --- made mandatory. This is a form of executable documentation, and + can be inspected programatically via importing :mod:`__future__` and examining + its contents. + +Each statement in :file:`__future__.py` is of the form:: + + FeatureName = "_Feature(" OptionalRelease "," MandatoryRelease "," + CompilerFlag ")" + + +where, normally, *OptionalRelease* is less than *MandatoryRelease*, and both are +5-tuples of the same form as ``sys.version_info``:: + + (PY_MAJOR_VERSION, # the 2 in 2.1.0a3; an int + PY_MINOR_VERSION, # the 1; an int + PY_MICRO_VERSION, # the 0; an int + PY_RELEASE_LEVEL, # "alpha", "beta", "candidate" or "final"; string + PY_RELEASE_SERIAL # the 3; an int + ) + +*OptionalRelease* records the first release in which the feature was accepted. + +In the case of a *MandatoryRelease* that has not yet occurred, +*MandatoryRelease* predicts the release in which the feature will become part of +the language. + +Else *MandatoryRelease* records when the feature became part of the language; in +releases at or after that, modules no longer need a future statement to use the +feature in question, but may continue to use such imports. + +*MandatoryRelease* may also be ``None``, meaning that a planned feature got +dropped. + +Instances of class :class:`_Feature` have two corresponding methods, +:meth:`getOptionalRelease` and :meth:`getMandatoryRelease`. + +*CompilerFlag* is the (bitfield) flag that should be passed in the fourth +argument to the builtin function :func:`compile` to enable the feature in +dynamically compiled code. This flag is stored in the :attr:`compiler_flag` +attribute on :class:`_Feature` instances. + +No feature description will ever be deleted from :mod:`__future__`. + diff --git a/Doc/library/__main__.rst b/Doc/library/__main__.rst new file mode 100644 index 0000000..a1d3c24 --- /dev/null +++ b/Doc/library/__main__.rst @@ -0,0 +1,17 @@ + +:mod:`__main__` --- Top-level script environment +================================================ + +.. module:: __main__ + :synopsis: The environment where the top-level script is run. + + +This module represents the (otherwise anonymous) scope in which the +interpreter's main program executes --- commands read either from standard +input, from a script file, or from an interactive prompt. It is this +environment in which the idiomatic "conditional script" stanza causes a script +to run:: + + if __name__ == "__main__": + main() + diff --git a/Doc/library/_ast.rst b/Doc/library/_ast.rst new file mode 100644 index 0000000..9b195be --- /dev/null +++ b/Doc/library/_ast.rst @@ -0,0 +1,59 @@ +.. _ast: + +Abstract Syntax Trees +===================== + +.. module:: _ast + :synopsis: Abstract Syntax Tree classes. + +.. sectionauthor:: Martin v. Löwis + + +.. versionadded:: 2.5 + +The ``_ast`` module helps Python applications to process trees of the Python +abstract syntax grammar. The Python compiler currently provides read-only access +to such trees, meaning that applications can only create a tree for a given +piece of Python source code; generating byte code from a (potentially modified) +tree is not supported. The abstract syntax itself might change with each Python +release; this module helps to find out programmatically what the current grammar +looks like. + +An abstract syntax tree can be generated by passing ``_ast.PyCF_ONLY_AST`` as a +flag to the :func:`compile` builtin function. The result will be a tree of +objects whose classes all inherit from ``_ast.AST``. + +The actual classes are derived from the ``Parser/Python.asdl`` file, which is +reproduced below. There is one class defined for each left-hand side symbol in +the abstract grammar (for example, ``_ast.stmt`` or ``_ast.expr``). In addition, +there is one class defined for each constructor on the right-hand side; these +classes inherit from the classes for the left-hand side trees. For example, +``_ast.BinOp`` inherits from ``_ast.expr``. For production rules with +alternatives (aka "sums"), the left-hand side class is abstract: only instances +of specific constructor nodes are ever created. + +Each concrete class has an attribute ``_fields`` which gives the names of all +child nodes. + +Each instance of a concrete class has one attribute for each child node, of the +type as defined in the grammar. For example, ``_ast.BinOp`` instances have an +attribute ``left`` of type ``_ast.expr``. Instances of ``_ast.expr`` and +``_ast.stmt`` subclasses also have lineno and col_offset attributes. The lineno +is the line number of source text (1 indexed so the first line is line 1) and +the col_offset is the utf8 byte offset of the first token that generated the +node. The utf8 offset is recorded because the parser uses utf8 internally. + +If these attributes are marked as optional in the grammar (using a question +mark), the value might be ``None``. If the attributes can have zero-or-more +values (marked with an asterisk), the values are represented as Python lists. + + +Abstract Grammar +---------------- + +The module defines a string constant ``__version__`` which is the decimal +subversion revision number of the file shown below. + +The abstract grammar is currently defined as follows: + +.. literalinclude:: ../../Parser/Python.asdl diff --git a/Doc/library/_winreg.rst b/Doc/library/_winreg.rst new file mode 100644 index 0000000..fddbfd1 --- /dev/null +++ b/Doc/library/_winreg.rst @@ -0,0 +1,420 @@ + +:mod:`_winreg` -- Windows registry access +========================================= + +.. module:: _winreg + :platform: Windows + :synopsis: Routines and objects for manipulating the Windows registry. +.. sectionauthor:: Mark Hammond + + +.. versionadded:: 2.0 + +These functions expose the Windows registry API to Python. Instead of using an +integer as the registry handle, a handle object is used to ensure that the +handles are closed correctly, even if the programmer neglects to explicitly +close them. + +This module exposes a very low-level interface to the Windows registry; it is +expected that in the future a new ``winreg`` module will be created offering a +higher-level interface to the registry API. + +This module offers the following functions: + + +.. function:: CloseKey(hkey) + + Closes a previously opened registry key. The hkey argument specifies a + previously opened key. + + Note that if *hkey* is not closed using this method (or via + :meth:`handle.Close`), it is closed when the *hkey* object is destroyed by + Python. + + +.. function:: ConnectRegistry(computer_name, key) + + Establishes a connection to a predefined registry handle on another computer, + and returns a :dfn:`handle object` + + *computer_name* is the name of the remote computer, of the form + ``r"\\computername"``. If ``None``, the local computer is used. + + *key* is the predefined handle to connect to. + + The return value is the handle of the opened key. If the function fails, an + :exc:`EnvironmentError` exception is raised. + + +.. function:: CreateKey(key, sub_key) + + Creates or opens the specified key, returning a :dfn:`handle object` + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that names the key this method opens or creates. + + If *key* is one of the predefined keys, *sub_key* may be ``None``. In that + case, the handle returned is the same key handle passed in to the function. + + If the key already exists, this function opens the existing key. + + The return value is the handle of the opened key. If the function fails, an + :exc:`EnvironmentError` exception is raised. + + +.. function:: DeleteKey(key, sub_key) + + Deletes the specified key. + + *key* is an already open key, or any one of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that must be a subkey of the key identified by the *key* + parameter. This value must not be ``None``, and the key may not have subkeys. + + *This method can not delete keys with subkeys.* + + If the method succeeds, the entire key, including all of its values, is removed. + If the method fails, an :exc:`EnvironmentError` exception is raised. + + +.. function:: DeleteValue(key, value) + + Removes a named value from a registry key. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *value* is a string that identifies the value to remove. + + +.. function:: EnumKey(key, index) + + Enumerates subkeys of an open registry key, returning a string. + + *key* is an already open key, or any one of the predefined :const:`HKEY_\*` + constants. + + *index* is an integer that identifies the index of the key to retrieve. + + The function retrieves the name of one subkey each time it is called. It is + typically called repeatedly until an :exc:`EnvironmentError` exception is + raised, indicating, no more values are available. + + +.. function:: EnumValue(key, index) + + Enumerates values of an open registry key, returning a tuple. + + *key* is an already open key, or any one of the predefined :const:`HKEY_\*` + constants. + + *index* is an integer that identifies the index of the value to retrieve. + + The function retrieves the name of one subkey each time it is called. It is + typically called repeatedly, until an :exc:`EnvironmentError` exception is + raised, indicating no more values. + + The result is a tuple of 3 items: + + +-------+--------------------------------------------+ + | Index | Meaning | + +=======+============================================+ + | ``0`` | A string that identifies the value name | + +-------+--------------------------------------------+ + | ``1`` | An object that holds the value data, and | + | | whose type depends on the underlying | + | | registry type | + +-------+--------------------------------------------+ + | ``2`` | An integer that identifies the type of the | + | | value data | + +-------+--------------------------------------------+ + + +.. function:: FlushKey(key) + + Writes all the attributes of a key to the registry. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + It is not necessary to call RegFlushKey to change a key. Registry changes are + flushed to disk by the registry using its lazy flusher. Registry changes are + also flushed to disk at system shutdown. Unlike :func:`CloseKey`, the + :func:`FlushKey` method returns only when all the data has been written to the + registry. An application should only call :func:`FlushKey` if it requires + absolute certainty that registry changes are on disk. + + .. note:: + + If you don't know whether a :func:`FlushKey` call is required, it probably + isn't. + + +.. function:: RegLoadKey(key, sub_key, file_name) + + Creates a subkey under the specified key and stores registration information + from a specified file into that subkey. + + *key* is an already open key, or any of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that identifies the sub_key to load. + + *file_name* is the name of the file to load registry data from. This file must + have been created with the :func:`SaveKey` function. Under the file allocation + table (FAT) file system, the filename may not have an extension. + + A call to LoadKey() fails if the calling process does not have the + :const:`SE_RESTORE_PRIVILEGE` privilege. Note that privileges are different than + permissions - see the Win32 documentation for more details. + + If *key* is a handle returned by :func:`ConnectRegistry`, then the path + specified in *fileName* is relative to the remote computer. + + The Win32 documentation implies *key* must be in the :const:`HKEY_USER` or + :const:`HKEY_LOCAL_MACHINE` tree. This may or may not be true. + + +.. function:: OpenKey(key, sub_key[, res=0][, sam=KEY_READ]) + + Opens the specified key, returning a :dfn:`handle object` + + *key* is an already open key, or any one of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that identifies the sub_key to open. + + *res* is a reserved integer, and must be zero. The default is zero. + + *sam* is an integer that specifies an access mask that describes the desired + security access for the key. Default is :const:`KEY_READ` + + The result is a new handle to the specified key. + + If the function fails, :exc:`EnvironmentError` is raised. + + +.. function:: OpenKeyEx() + + The functionality of :func:`OpenKeyEx` is provided via :func:`OpenKey`, by the + use of default arguments. + + +.. function:: QueryInfoKey(key) + + Returns information about a key, as a tuple. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + The result is a tuple of 3 items: + + +-------+---------------------------------------------+ + | Index | Meaning | + +=======+=============================================+ + | ``0`` | An integer giving the number of sub keys | + | | this key has. | + +-------+---------------------------------------------+ + | ``1`` | An integer giving the number of values this | + | | key has. | + +-------+---------------------------------------------+ + | ``2`` | A long integer giving when the key was last | + | | modified (if available) as 100's of | + | | nanoseconds since Jan 1, 1600. | + +-------+---------------------------------------------+ + + +.. function:: QueryValue(key, sub_key) + + Retrieves the unnamed value for a key, as a string + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that holds the name of the subkey with which the value is + associated. If this parameter is ``None`` or empty, the function retrieves the + value set by the :func:`SetValue` method for the key identified by *key*. + + Values in the registry have name, type, and data components. This method + retrieves the data for a key's first value that has a NULL name. But the + underlying API call doesn't return the type, Lame Lame Lame, DO NOT USE THIS!!! + + +.. function:: QueryValueEx(key, value_name) + + Retrieves the type and data for a specified value name associated with an open + registry key. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *value_name* is a string indicating the value to query. + + The result is a tuple of 2 items: + + +-------+-----------------------------------------+ + | Index | Meaning | + +=======+=========================================+ + | ``0`` | The value of the registry item. | + +-------+-----------------------------------------+ + | ``1`` | An integer giving the registry type for | + | | this value. | + +-------+-----------------------------------------+ + + +.. function:: SaveKey(key, file_name) + + Saves the specified key, and all its subkeys to the specified file. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *file_name* is the name of the file to save registry data to. This file cannot + already exist. If this filename includes an extension, it cannot be used on file + allocation table (FAT) file systems by the :meth:`LoadKey`, :meth:`ReplaceKey` + or :meth:`RestoreKey` methods. + + If *key* represents a key on a remote computer, the path described by + *file_name* is relative to the remote computer. The caller of this method must + possess the :const:`SeBackupPrivilege` security privilege. Note that + privileges are different than permissions - see the Win32 documentation for + more details. + + This function passes NULL for *security_attributes* to the API. + + +.. function:: SetValue(key, sub_key, type, value) + + Associates a value with a specified key. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *sub_key* is a string that names the subkey with which the value is associated. + + *type* is an integer that specifies the type of the data. Currently this must be + :const:`REG_SZ`, meaning only strings are supported. Use the :func:`SetValueEx` + function for support for other data types. + + *value* is a string that specifies the new value. + + If the key specified by the *sub_key* parameter does not exist, the SetValue + function creates it. + + Value lengths are limited by available memory. Long values (more than 2048 + bytes) should be stored as files with the filenames stored in the configuration + registry. This helps the registry perform efficiently. + + The key identified by the *key* parameter must have been opened with + :const:`KEY_SET_VALUE` access. + + +.. function:: SetValueEx(key, value_name, reserved, type, value) + + Stores data in the value field of an open registry key. + + *key* is an already open key, or one of the predefined :const:`HKEY_\*` + constants. + + *value_name* is a string that names the subkey with which the value is + associated. + + *type* is an integer that specifies the type of the data. This should be one + of the following constants defined in this module: + + +----------------------------------+---------------------------------------------+ + | Constant | Meaning | + +==================================+=============================================+ + | :const:`REG_BINARY` | Binary data in any form. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_DWORD` | A 32-bit number. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_DWORD_LITTLE_ENDIAN` | A 32-bit number in little-endian format. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_DWORD_BIG_ENDIAN` | A 32-bit number in big-endian format. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_EXPAND_SZ` | Null-terminated string containing | + | | references to environment variables | + | | (``%PATH%``). | + +----------------------------------+---------------------------------------------+ + | :const:`REG_LINK` | A Unicode symbolic link. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_MULTI_SZ` | A sequence of null-terminated strings, | + | | terminated by two null characters. (Python | + | | handles this termination automatically.) | + +----------------------------------+---------------------------------------------+ + | :const:`REG_NONE` | No defined value type. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_RESOURCE_LIST` | A device-driver resource list. | + +----------------------------------+---------------------------------------------+ + | :const:`REG_SZ` | A null-terminated string. | + +----------------------------------+---------------------------------------------+ + + *reserved* can be anything - zero is always passed to the API. + + *value* is a string that specifies the new value. + + This method can also set additional value and type information for the specified + key. The key identified by the key parameter must have been opened with + :const:`KEY_SET_VALUE` access. + + To open the key, use the :func:`CreateKeyEx` or :func:`OpenKey` methods. + + Value lengths are limited by available memory. Long values (more than 2048 + bytes) should be stored as files with the filenames stored in the configuration + registry. This helps the registry perform efficiently. + + +.. _handle-object: + +Registry Handle Objects +----------------------- + +This object wraps a Windows HKEY object, automatically closing it when the +object is destroyed. To guarantee cleanup, you can call either the +:meth:`Close` method on the object, or the :func:`CloseKey` function. + +All registry functions in this module return one of these objects. + +All registry functions in this module which accept a handle object also accept +an integer, however, use of the handle object is encouraged. + +Handle objects provide semantics for :meth:`__bool__` - thus :: + + if handle: + print "Yes" + +will print ``Yes`` if the handle is currently valid (has not been closed or +detached). + +The object also support comparison semantics, so handle objects will compare +true if they both reference the same underlying Windows handle value. + +Handle objects can be converted to an integer (e.g., using the builtin +:func:`int` function), in which case the underlying Windows handle value is +returned. You can also use the :meth:`Detach` method to return the integer +handle, and also disconnect the Windows handle from the handle object. + + +.. method:: PyHKEY.Close() + + Closes the underlying Windows handle. + + If the handle is already closed, no error is raised. + + +.. method:: PyHKEY.Detach() + + Detaches the Windows handle from the handle object. + + The result is an integer (or long on 64 bit Windows) that holds the value of the + handle before it is detached. If the handle is already detached or closed, this + will return zero. + + After calling this function, the handle is effectively invalidated, but the + handle is not closed. You would call this function when you need the + underlying Win32 handle to exist beyond the lifetime of the handle object. + diff --git a/Doc/library/aepack.rst b/Doc/library/aepack.rst new file mode 100644 index 0000000..7eaffd8 --- /dev/null +++ b/Doc/library/aepack.rst @@ -0,0 +1,92 @@ + +:mod:`aepack` --- Conversion between Python variables and AppleEvent data containers +==================================================================================== + +.. module:: aepack + :platform: Mac + :synopsis: Conversion between Python variables and AppleEvent data containers. +.. sectionauthor:: Vincent Marchetti + + +.. % \moduleauthor{Jack Jansen?}{email} + +The :mod:`aepack` module defines functions for converting (packing) Python +variables to AppleEvent descriptors and back (unpacking). Within Python the +AppleEvent descriptor is handled by Python objects of built-in type +:class:`AEDesc`, defined in module :mod:`Carbon.AE`. + +The :mod:`aepack` module defines the following functions: + + +.. function:: pack(x[, forcetype]) + + Returns an :class:`AEDesc` object containing a conversion of Python value x. If + *forcetype* is provided it specifies the descriptor type of the result. + Otherwise, a default mapping of Python types to Apple Event descriptor types is + used, as follows: + + +-----------------+-----------------------------------+ + | Python type | descriptor type | + +=================+===================================+ + | :class:`FSSpec` | typeFSS | + +-----------------+-----------------------------------+ + | :class:`FSRef` | typeFSRef | + +-----------------+-----------------------------------+ + | :class:`Alias` | typeAlias | + +-----------------+-----------------------------------+ + | integer | typeLong (32 bit integer) | + +-----------------+-----------------------------------+ + | float | typeFloat (64 bit floating point) | + +-----------------+-----------------------------------+ + | string | typeText | + +-----------------+-----------------------------------+ + | unicode | typeUnicodeText | + +-----------------+-----------------------------------+ + | list | typeAEList | + +-----------------+-----------------------------------+ + | dictionary | typeAERecord | + +-----------------+-----------------------------------+ + | instance | *see below* | + +-----------------+-----------------------------------+ + + If *x* is a Python instance then this function attempts to call an + :meth:`__aepack__` method. This method should return an :class:`AEDesc` object. + + If the conversion *x* is not defined above, this function returns the Python + string representation of a value (the repr() function) encoded as a text + descriptor. + + +.. function:: unpack(x[, formodulename]) + + *x* must be an object of type :class:`AEDesc`. This function returns a Python + object representation of the data in the Apple Event descriptor *x*. Simple + AppleEvent data types (integer, text, float) are returned as their obvious + Python counterparts. Apple Event lists are returned as Python lists, and the + list elements are recursively unpacked. Object references (ex. ``line 3 of + document 1``) are returned as instances of :class:`aetypes.ObjectSpecifier`, + unless ``formodulename`` is specified. AppleEvent descriptors with descriptor + type typeFSS are returned as :class:`FSSpec` objects. AppleEvent record + descriptors are returned as Python dictionaries, with 4-character string keys + and elements recursively unpacked. + + The optional ``formodulename`` argument is used by the stub packages generated + by :mod:`gensuitemodule`, and ensures that the OSA classes for object specifiers + are looked up in the correct module. This ensures that if, say, the Finder + returns an object specifier for a window you get an instance of + ``Finder.Window`` and not a generic ``aetypes.Window``. The former knows about + all the properties and elements a window has in the Finder, while the latter + knows no such things. + + +.. seealso:: + + Module :mod:`Carbon.AE` + Built-in access to Apple Event Manager routines. + + Module :mod:`aetypes` + Python definitions of codes for Apple Event descriptor types. + + ` Inside Macintosh: Interapplication Communication `_ + Information about inter-process communications on the Macintosh. + diff --git a/Doc/library/aetools.rst b/Doc/library/aetools.rst new file mode 100644 index 0000000..b5fd4ad --- /dev/null +++ b/Doc/library/aetools.rst @@ -0,0 +1,86 @@ + +:mod:`aetools` --- OSA client support +===================================== + +.. module:: aetools + :platform: Mac + :synopsis: Basic support for sending Apple Events +.. sectionauthor:: Jack Jansen + + +.. % \moduleauthor{Jack Jansen?}{email} + +The :mod:`aetools` module contains the basic functionality on which Python +AppleScript client support is built. It also imports and re-exports the core +functionality of the :mod:`aetypes` and :mod:`aepack` modules. The stub packages +generated by :mod:`gensuitemodule` import the relevant portions of +:mod:`aetools`, so usually you do not need to import it yourself. The exception +to this is when you cannot use a generated suite package and need lower-level +access to scripting. + +The :mod:`aetools` module itself uses the AppleEvent support provided by the +:mod:`Carbon.AE` module. This has one drawback: you need access to the window +manager, see section :ref:`osx-gui-scripts` for details. This restriction may be +lifted in future releases. + +The :mod:`aetools` module defines the following functions: + + +.. function:: packevent(ae, parameters, attributes) + + Stores parameters and attributes in a pre-created ``Carbon.AE.AEDesc`` object. + ``parameters`` and ``attributes`` are dictionaries mapping 4-character OSA + parameter keys to Python objects. The objects are packed using + ``aepack.pack()``. + + +.. function:: unpackevent(ae[, formodulename]) + + Recursively unpacks a ``Carbon.AE.AEDesc`` event to Python objects. The function + returns the parameter dictionary and the attribute dictionary. The + ``formodulename`` argument is used by generated stub packages to control where + AppleScript classes are looked up. + + +.. function:: keysubst(arguments, keydict) + + Converts a Python keyword argument dictionary ``arguments`` to the format + required by ``packevent`` by replacing the keys, which are Python identifiers, + by the four-character OSA keys according to the mapping specified in + ``keydict``. Used by the generated suite packages. + + +.. function:: enumsubst(arguments, key, edict) + + If the ``arguments`` dictionary contains an entry for ``key`` convert the value + for that entry according to dictionary ``edict``. This converts human-readable + Python enumeration names to the OSA 4-character codes. Used by the generated + suite packages. + +The :mod:`aetools` module defines the following class: + + +.. class:: TalkTo([signature=None, start=0, timeout=0]) + + Base class for the proxy used to talk to an application. ``signature`` overrides + the class attribute ``_signature`` (which is usually set by subclasses) and is + the 4-char creator code defining the application to talk to. ``start`` can be + set to true to enable running the application on class instantiation. + ``timeout`` can be specified to change the default timeout used while waiting + for an AppleEvent reply. + + +.. method:: TalkTo._start() + + Test whether the application is running, and attempt to start it if not. + + +.. method:: TalkTo.send(code, subcode[, parameters, attributes]) + + Create the AppleEvent ``Carbon.AE.AEDesc`` for the verb with the OSA designation + ``code, subcode`` (which are the usual 4-character strings), pack the + ``parameters`` and ``attributes`` into it, send it to the target application, + wait for the reply, unpack the reply with ``unpackevent`` and return the reply + appleevent, the unpacked return values as a dictionary and the return + attributes. + diff --git a/Doc/library/aetypes.rst b/Doc/library/aetypes.rst new file mode 100644 index 0000000..0dd0a88 --- /dev/null +++ b/Doc/library/aetypes.rst @@ -0,0 +1,150 @@ + +:mod:`aetypes` --- AppleEvent objects +===================================== + +.. module:: aetypes + :platform: Mac + :synopsis: Python representation of the Apple Event Object Model. +.. sectionauthor:: Vincent Marchetti + + +.. % \moduleauthor{Jack Jansen?}{email} + +The :mod:`aetypes` defines classes used to represent Apple Event data +descriptors and Apple Event object specifiers. + +Apple Event data is contained in descriptors, and these descriptors are typed. +For many descriptors the Python representation is simply the corresponding +Python type: ``typeText`` in OSA is a Python string, ``typeFloat`` is a float, +etc. For OSA types that have no direct Python counterpart this module declares +classes. Packing and unpacking instances of these classes is handled +automatically by :mod:`aepack`. + +An object specifier is essentially an address of an object implemented in a +Apple Event server. An Apple Event specifier is used as the direct object for an +Apple Event or as the argument of an optional parameter. The :mod:`aetypes` +module contains the base classes for OSA classes and properties, which are used +by the packages generated by :mod:`gensuitemodule` to populate the classes and +properties in a given suite. + +For reasons of backward compatibility, and for cases where you need to script an +application for which you have not generated the stub package this module also +contains object specifiers for a number of common OSA classes such as +``Document``, ``Window``, ``Character``, etc. + +The :mod:`AEObjects` module defines the following classes to represent Apple +Event descriptor data: + + +.. class:: Unknown(type, data) + + The representation of OSA descriptor data for which the :mod:`aepack` and + :mod:`aetypes` modules have no support, i.e. anything that is not represented by + the other classes here and that is not equivalent to a simple Python value. + + +.. class:: Enum(enum) + + An enumeration value with the given 4-character string value. + + +.. class:: InsertionLoc(of, pos) + + Position ``pos`` in object ``of``. + + +.. class:: Boolean(bool) + + A boolean. + + +.. class:: StyledText(style, text) + + Text with style information (font, face, etc) included. + + +.. class:: AEText(script, style, text) + + Text with script system and style information included. + + +.. class:: IntlText(script, language, text) + + Text with script system and language information included. + + +.. class:: IntlWritingCode(script, language) + + Script system and language information. + + +.. class:: QDPoint(v, h) + + A quickdraw point. + + +.. class:: QDRectangle(v0, h0, v1, h1) + + A quickdraw rectangle. + + +.. class:: RGBColor(r, g, b) + + A color. + + +.. class:: Type(type) + + An OSA type value with the given 4-character name. + + +.. class:: Keyword(name) + + An OSA keyword with the given 4-character name. + + +.. class:: Range(start, stop) + + A range. + + +.. class:: Ordinal(abso) + + Non-numeric absolute positions, such as ``"firs"``, first, or ``"midd"``, + middle. + + +.. class:: Logical(logc, term) + + The logical expression of applying operator ``logc`` to ``term``. + + +.. class:: Comparison(obj1, relo, obj2) + + The comparison ``relo`` of ``obj1`` to ``obj2``. + +The following classes are used as base classes by the generated stub packages to +represent AppleScript classes and properties in Python: + + +.. class:: ComponentItem(which[, fr]) + + Abstract baseclass for an OSA class. The subclass should set the class attribute + ``want`` to the 4-character OSA class code. Instances of subclasses of this + class are equivalent to AppleScript Object Specifiers. Upon instantiation you + should pass a selector in ``which``, and optionally a parent object in ``fr``. + + +.. class:: NProperty(fr) + + Abstract baseclass for an OSA property. The subclass should set the class + attributes ``want`` and ``which`` to designate which property we are talking + about. Instances of subclasses of this class are Object Specifiers. + + +.. class:: ObjectSpecifier(want, form, seld[, fr]) + + Base class of ``ComponentItem`` and ``NProperty``, a general OSA Object + Specifier. See the Apple Open Scripting Architecture documentation for the + parameters. Note that this class is not abstract. + diff --git a/Doc/library/aifc.rst b/Doc/library/aifc.rst new file mode 100644 index 0000000..0cfcb52 --- /dev/null +++ b/Doc/library/aifc.rst @@ -0,0 +1,225 @@ + +:mod:`aifc` --- Read and write AIFF and AIFC files +================================================== + +.. module:: aifc + :synopsis: Read and write audio files in AIFF or AIFC format. + + +.. index:: + single: Audio Interchange File Format + single: AIFF + single: AIFF-C + +This module provides support for reading and writing AIFF and AIFF-C files. +AIFF is Audio Interchange File Format, a format for storing digital audio +samples in a file. AIFF-C is a newer version of the format that includes the +ability to compress the audio data. + +**Caveat:** Some operations may only work under IRIX; these will raise +:exc:`ImportError` when attempting to import the :mod:`cl` module, which is only +available on IRIX. + +Audio files have a number of parameters that describe the audio data. The +sampling rate or frame rate is the number of times per second the sound is +sampled. The number of channels indicate if the audio is mono, stereo, or +quadro. Each frame consists of one sample per channel. The sample size is the +size in bytes of each sample. Thus a frame consists of +*nchannels*\**samplesize* bytes, and a second's worth of audio consists of +*nchannels*\**samplesize*\**framerate* bytes. + +For example, CD quality audio has a sample size of two bytes (16 bits), uses two +channels (stereo) and has a frame rate of 44,100 frames/second. This gives a +frame size of 4 bytes (2\*2), and a second's worth occupies 2\*2\*44100 bytes +(176,400 bytes). + +Module :mod:`aifc` defines the following function: + + +.. function:: open(file[, mode]) + + Open an AIFF or AIFF-C file and return an object instance with methods that are + described below. The argument *file* is either a string naming a file or a file + object. *mode* must be ``'r'`` or ``'rb'`` when the file must be opened for + reading, or ``'w'`` or ``'wb'`` when the file must be opened for writing. If + omitted, ``file.mode`` is used if it exists, otherwise ``'rb'`` is used. When + used for writing, the file object should be seekable, unless you know ahead of + time how many samples you are going to write in total and use + :meth:`writeframesraw` and :meth:`setnframes`. + +Objects returned by :func:`open` when a file is opened for reading have the +following methods: + + +.. method:: aifc.getnchannels() + + Return the number of audio channels (1 for mono, 2 for stereo). + + +.. method:: aifc.getsampwidth() + + Return the size in bytes of individual samples. + + +.. method:: aifc.getframerate() + + Return the sampling rate (number of audio frames per second). + + +.. method:: aifc.getnframes() + + Return the number of audio frames in the file. + + +.. method:: aifc.getcomptype() + + Return a four-character string describing the type of compression used in the + audio file. For AIFF files, the returned value is ``'NONE'``. + + +.. method:: aifc.getcompname() + + Return a human-readable description of the type of compression used in the audio + file. For AIFF files, the returned value is ``'not compressed'``. + + +.. method:: aifc.getparams() + + Return a tuple consisting of all of the above values in the above order. + + +.. method:: aifc.getmarkers() + + Return a list of markers in the audio file. A marker consists of a tuple of + three elements. The first is the mark ID (an integer), the second is the mark + position in frames from the beginning of the data (an integer), the third is the + name of the mark (a string). + + +.. method:: aifc.getmark(id) + + Return the tuple as described in :meth:`getmarkers` for the mark with the given + *id*. + + +.. method:: aifc.readframes(nframes) + + Read and return the next *nframes* frames from the audio file. The returned + data is a string containing for each frame the uncompressed samples of all + channels. + + +.. method:: aifc.rewind() + + Rewind the read pointer. The next :meth:`readframes` will start from the + beginning. + + +.. method:: aifc.setpos(pos) + + Seek to the specified frame number. + + +.. method:: aifc.tell() + + Return the current frame number. + + +.. method:: aifc.close() + + Close the AIFF file. After calling this method, the object can no longer be + used. + +Objects returned by :func:`open` when a file is opened for writing have all the +above methods, except for :meth:`readframes` and :meth:`setpos`. In addition +the following methods exist. The :meth:`get\*` methods can only be called after +the corresponding :meth:`set\*` methods have been called. Before the first +:meth:`writeframes` or :meth:`writeframesraw`, all parameters except for the +number of frames must be filled in. + + +.. method:: aifc.aiff() + + Create an AIFF file. The default is that an AIFF-C file is created, unless the + name of the file ends in ``'.aiff'`` in which case the default is an AIFF file. + + +.. method:: aifc.aifc() + + Create an AIFF-C file. The default is that an AIFF-C file is created, unless + the name of the file ends in ``'.aiff'`` in which case the default is an AIFF + file. + + +.. method:: aifc.setnchannels(nchannels) + + Specify the number of channels in the audio file. + + +.. method:: aifc.setsampwidth(width) + + Specify the size in bytes of audio samples. + + +.. method:: aifc.setframerate(rate) + + Specify the sampling frequency in frames per second. + + +.. method:: aifc.setnframes(nframes) + + Specify the number of frames that are to be written to the audio file. If this + parameter is not set, or not set correctly, the file needs to support seeking. + + +.. method:: aifc.setcomptype(type, name) + + .. index:: + single: u-LAW + single: A-LAW + single: G.722 + + Specify the compression type. If not specified, the audio data will not be + compressed. In AIFF files, compression is not possible. The name parameter + should be a human-readable description of the compression type, the type + parameter should be a four-character string. Currently the following + compression types are supported: NONE, ULAW, ALAW, G722. + + +.. method:: aifc.setparams(nchannels, sampwidth, framerate, comptype, compname) + + Set all the above parameters at once. The argument is a tuple consisting of the + various parameters. This means that it is possible to use the result of a + :meth:`getparams` call as argument to :meth:`setparams`. + + +.. method:: aifc.setmark(id, pos, name) + + Add a mark with the given id (larger than 0), and the given name at the given + position. This method can be called at any time before :meth:`close`. + + +.. method:: aifc.tell() + + Return the current write position in the output file. Useful in combination + with :meth:`setmark`. + + +.. method:: aifc.writeframes(data) + + Write data to the output file. This method can only be called after the audio + file parameters have been set. + + +.. method:: aifc.writeframesraw(data) + + Like :meth:`writeframes`, except that the header of the audio file is not + updated. + + +.. method:: aifc.close() + + Close the AIFF file. The header of the file is updated to reflect the actual + size of the audio data. After calling this method, the object can no longer be + used. + diff --git a/Doc/library/allos.rst b/Doc/library/allos.rst new file mode 100644 index 0000000..900d6d3 --- /dev/null +++ b/Doc/library/allos.rst @@ -0,0 +1,27 @@ + +.. _allos: + +********************************* +Generic Operating System Services +********************************* + +The modules described in this chapter provide interfaces to operating system +features that are available on (almost) all operating systems, such as files and +a clock. The interfaces are generally modeled after the Unix or C interfaces, +but they are available on most other systems as well. Here's an overview: + + +.. toctree:: + + os.rst + time.rst + optparse.rst + getopt.rst + logging.rst + getpass.rst + curses.rst + curses.ascii.rst + curses.panel.rst + platform.rst + errno.rst + ctypes.rst diff --git a/Doc/library/anydbm.rst b/Doc/library/anydbm.rst new file mode 100644 index 0000000..413b7de --- /dev/null +++ b/Doc/library/anydbm.rst @@ -0,0 +1,96 @@ + +:mod:`anydbm` --- Generic access to DBM-style databases +======================================================= + +.. module:: anydbm + :synopsis: Generic interface to DBM-style database modules. + + +.. index:: + module: dbhash + module: bsddb + module: gdbm + module: dbm + module: dumbdbm + +:mod:`anydbm` is a generic interface to variants of the DBM database --- +:mod:`dbhash` (requires :mod:`bsddb`), :mod:`gdbm`, or :mod:`dbm`. If none of +these modules is installed, the slow-but-simple implementation in module +:mod:`dumbdbm` will be used. + + +.. function:: open(filename[, flag[, mode]]) + + Open the database file *filename* and return a corresponding object. + + If the database file already exists, the :mod:`whichdb` module is used to + determine its type and the appropriate module is used; if it does not exist, the + first module listed above that can be imported is used. + + The optional *flag* argument can be ``'r'`` to open an existing database for + reading only, ``'w'`` to open an existing database for reading and writing, + ``'c'`` to create the database if it doesn't exist, or ``'n'``, which will + always create a new empty database. If not specified, the default value is + ``'r'``. + + The optional *mode* argument is the Unix mode of the file, used only when the + database has to be created. It defaults to octal ``0666`` (and will be modified + by the prevailing umask). + + +.. exception:: error + + A tuple containing the exceptions that can be raised by each of the supported + modules, with a unique exception also named :exc:`anydbm.error` as the first + item --- the latter is used when :exc:`anydbm.error` is raised. + +The object returned by :func:`open` supports most of the same functionality as +dictionaries; keys and their corresponding values can be stored, retrieved, and +deleted, and the :meth:`has_key` and :meth:`keys` methods are available. Keys +and values must always be strings. + +The following example records some hostnames and a corresponding title, and +then prints out the contents of the database:: + + import anydbm + + # Open database, creating it if necessary. + db = anydbm.open('cache', 'c') + + # Record some values + db['www.python.org'] = 'Python Website' + db['www.cnn.com'] = 'Cable News Network' + + # Loop through contents. Other dictionary methods + # such as .keys(), .values() also work. + for k, v in db.iteritems(): + print k, '\t', v + + # Storing a non-string key or value will raise an exception (most + # likely a TypeError). + db['www.yahoo.com'] = 4 + + # Close when done. + db.close() + + +.. seealso:: + + Module :mod:`dbhash` + BSD ``db`` database interface. + + Module :mod:`dbm` + Standard Unix database interface. + + Module :mod:`dumbdbm` + Portable implementation of the ``dbm`` interface. + + Module :mod:`gdbm` + GNU database interface, based on the ``dbm`` interface. + + Module :mod:`shelve` + General object persistence built on top of the Python ``dbm`` interface. + + Module :mod:`whichdb` + Utility module used to determine the type of an existing database. + diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst new file mode 100644 index 0000000..7d0df5f --- /dev/null +++ b/Doc/library/archiving.rst @@ -0,0 +1,18 @@ + +.. _archiving: + +****************************** +Data Compression and Archiving +****************************** + +The modules described in this chapter support data compression with the zlib, +gzip, and bzip2 algorithms, and the creation of ZIP- and tar-format archives. + + +.. toctree:: + + zlib.rst + gzip.rst + bz2.rst + zipfile.rst + tarfile.rst diff --git a/Doc/library/array.rst b/Doc/library/array.rst new file mode 100644 index 0000000..5194edc --- /dev/null +++ b/Doc/library/array.rst @@ -0,0 +1,272 @@ + +:mod:`array` --- Efficient arrays of numeric values +=================================================== + +.. module:: array + :synopsis: Efficient arrays of uniformly typed numeric values. + + +.. index:: single: arrays + +This module defines an object type which can efficiently represent an array of +basic values: characters, integers, floating point numbers. Arrays are sequence +types and behave very much like lists, except that the type of objects stored in +them is constrained. The type is specified at object creation time by using a +:dfn:`type code`, which is a single character. The following type codes are +defined: + ++-----------+----------------+-------------------+-----------------------+ +| Type code | C Type | Python Type | Minimum size in bytes | ++===========+================+===================+=======================+ +| ``'c'`` | char | character | 1 | ++-----------+----------------+-------------------+-----------------------+ +| ``'b'`` | signed char | int | 1 | ++-----------+----------------+-------------------+-----------------------+ +| ``'B'`` | unsigned char | int | 1 | ++-----------+----------------+-------------------+-----------------------+ +| ``'u'`` | Py_UNICODE | Unicode character | 2 | ++-----------+----------------+-------------------+-----------------------+ +| ``'h'`` | signed short | int | 2 | ++-----------+----------------+-------------------+-----------------------+ +| ``'H'`` | unsigned short | int | 2 | ++-----------+----------------+-------------------+-----------------------+ +| ``'i'`` | signed int | int | 2 | ++-----------+----------------+-------------------+-----------------------+ +| ``'I'`` | unsigned int | long | 2 | ++-----------+----------------+-------------------+-----------------------+ +| ``'l'`` | signed long | int | 4 | ++-----------+----------------+-------------------+-----------------------+ +| ``'L'`` | unsigned long | long | 4 | ++-----------+----------------+-------------------+-----------------------+ +| ``'f'`` | float | float | 4 | ++-----------+----------------+-------------------+-----------------------+ +| ``'d'`` | double | float | 8 | ++-----------+----------------+-------------------+-----------------------+ + +The actual representation of values is determined by the machine architecture +(strictly speaking, by the C implementation). The actual size can be accessed +through the :attr:`itemsize` attribute. The values stored for ``'L'`` and +``'I'`` items will be represented as Python long integers when retrieved, +because Python's plain integer type cannot represent the full range of C's +unsigned (long) integers. + +The module defines the following type: + + +.. function:: array(typecode[, initializer]) + + Return a new array whose items are restricted by *typecode*, and initialized + from the optional *initializer* value, which must be a list, string, or iterable + over elements of the appropriate type. + + .. versionchanged:: 2.4 + Formerly, only lists or strings were accepted. + + If given a list or string, the initializer is passed to the new array's + :meth:`fromlist`, :meth:`fromstring`, or :meth:`fromunicode` method (see below) + to add initial items to the array. Otherwise, the iterable initializer is + passed to the :meth:`extend` method. + + +.. data:: ArrayType + + Obsolete alias for :func:`array`. + +Array objects support the ordinary sequence operations of indexing, slicing, +concatenation, and multiplication. When using slice assignment, the assigned +value must be an array object with the same type code; in all other cases, +:exc:`TypeError` is raised. Array objects also implement the buffer interface, +and may be used wherever buffer objects are supported. + +The following data items and methods are also supported: + + +.. attribute:: array.typecode + + The typecode character used to create the array. + + +.. attribute:: array.itemsize + + The length in bytes of one array item in the internal representation. + + +.. method:: array.append(x) + + Append a new item with value *x* to the end of the array. + + +.. method:: array.buffer_info() + + Return a tuple ``(address, length)`` giving the current memory address and the + length in elements of the buffer used to hold array's contents. The size of the + memory buffer in bytes can be computed as ``array.buffer_info()[1] * + array.itemsize``. This is occasionally useful when working with low-level (and + inherently unsafe) I/O interfaces that require memory addresses, such as certain + :cfunc:`ioctl` operations. The returned numbers are valid as long as the array + exists and no length-changing operations are applied to it. + + .. note:: + + When using array objects from code written in C or C++ (the only way to + effectively make use of this information), it makes more sense to use the buffer + interface supported by array objects. This method is maintained for backward + compatibility and should be avoided in new code. The buffer interface is + documented in :ref:`bufferobjects`. + + +.. method:: array.byteswap() + + "Byteswap" all items of the array. This is only supported for values which are + 1, 2, 4, or 8 bytes in size; for other types of values, :exc:`RuntimeError` is + raised. It is useful when reading data from a file written on a machine with a + different byte order. + + +.. method:: array.count(x) + + Return the number of occurrences of *x* in the array. + + +.. method:: array.extend(iterable) + + Append items from *iterable* to the end of the array. If *iterable* is another + array, it must have *exactly* the same type code; if not, :exc:`TypeError` will + be raised. If *iterable* is not an array, it must be iterable and its elements + must be the right type to be appended to the array. + + .. versionchanged:: 2.4 + Formerly, the argument could only be another array. + + +.. method:: array.fromfile(f, n) + + Read *n* items (as machine values) from the file object *f* and append them to + the end of the array. If less than *n* items are available, :exc:`EOFError` is + raised, but the items that were available are still inserted into the array. + *f* must be a real built-in file object; something else with a :meth:`read` + method won't do. + + +.. method:: array.fromlist(list) + + Append items from the list. This is equivalent to ``for x in list: + a.append(x)`` except that if there is a type error, the array is unchanged. + + +.. method:: array.fromstring(s) + + Appends items from the string, interpreting the string as an array of machine + values (as if it had been read from a file using the :meth:`fromfile` method). + + +.. method:: array.fromunicode(s) + + Extends this array with data from the given unicode string. The array must + be a type ``'u'`` array; otherwise a :exc:`ValueError` is raised. Use + ``array.fromstring(unicodestring.encode(enc))`` to append Unicode data to an + array of some other type. + + +.. method:: array.index(x) + + Return the smallest *i* such that *i* is the index of the first occurrence of + *x* in the array. + + +.. method:: array.insert(i, x) + + Insert a new item with value *x* in the array before position *i*. Negative + values are treated as being relative to the end of the array. + + +.. method:: array.pop([i]) + + Removes the item with the index *i* from the array and returns it. The optional + argument defaults to ``-1``, so that by default the last item is removed and + returned. + + +.. method:: array.read(f, n) + + .. deprecated:: 1.5.1 + Use the :meth:`fromfile` method. + + Read *n* items (as machine values) from the file object *f* and append them to + the end of the array. If less than *n* items are available, :exc:`EOFError` is + raised, but the items that were available are still inserted into the array. + *f* must be a real built-in file object; something else with a :meth:`read` + method won't do. + + +.. method:: array.remove(x) + + Remove the first occurrence of *x* from the array. + + +.. method:: array.reverse() + + Reverse the order of the items in the array. + + +.. method:: array.tofile(f) + + Write all items (as machine values) to the file object *f*. + + +.. method:: array.tolist() + + Convert the array to an ordinary list with the same items. + + +.. method:: array.tostring() + + Convert the array to an array of machine values and return the string + representation (the same sequence of bytes that would be written to a file by + the :meth:`tofile` method.) + + +.. method:: array.tounicode() + + Convert the array to a unicode string. The array must be a type ``'u'`` array; + otherwise a :exc:`ValueError` is raised. Use ``array.tostring().decode(enc)`` to + obtain a unicode string from an array of some other type. + + +.. method:: array.write(f) + + .. deprecated:: 1.5.1 + Use the :meth:`tofile` method. + + Write all items (as machine values) to the file object *f*. + +When an array object is printed or converted to a string, it is represented as +``array(typecode, initializer)``. The *initializer* is omitted if the array is +empty, otherwise it is a string if the *typecode* is ``'c'``, otherwise it is a +list of numbers. The string is guaranteed to be able to be converted back to an +array with the same type and value using :func:`eval`, so long as the +:func:`array` function has been imported using ``from array import array``. +Examples:: + + array('l') + array('c', 'hello world') + array('u', u'hello \u2641') + array('l', [1, 2, 3, 4, 5]) + array('d', [1.0, 2.0, 3.14]) + + +.. seealso:: + + Module :mod:`struct` + Packing and unpacking of heterogeneous binary data. + + Module :mod:`xdrlib` + Packing and unpacking of External Data Representation (XDR) data as used in some + remote procedure call systems. + + `The Numerical Python Manual `_ + The Numeric Python extension (NumPy) defines another array type; see + http://numpy.sourceforge.net/ for further information about Numerical Python. + (A PDF version of the NumPy manual is available at + http://numpy.sourceforge.net/numdoc/numdoc.pdf). + diff --git a/Doc/library/asynchat.rst b/Doc/library/asynchat.rst new file mode 100644 index 0000000..b651c40 --- /dev/null +++ b/Doc/library/asynchat.rst @@ -0,0 +1,284 @@ + +:mod:`asynchat` --- Asynchronous socket command/response handler +================================================================ + +.. module:: asynchat + :synopsis: Support for asynchronous command/response protocols. +.. moduleauthor:: Sam Rushing +.. sectionauthor:: Steve Holden + + +This module builds on the :mod:`asyncore` infrastructure, simplifying +asynchronous clients and servers and making it easier to handle protocols whose +elements are terminated by arbitrary strings, or are of variable length. +:mod:`asynchat` defines the abstract class :class:`async_chat` that you +subclass, providing implementations of the :meth:`collect_incoming_data` and +:meth:`found_terminator` methods. It uses the same asynchronous loop as +:mod:`asyncore`, and the two types of channel, :class:`asyncore.dispatcher` and +:class:`asynchat.async_chat`, can freely be mixed in the channel map. Typically +an :class:`asyncore.dispatcher` server channel generates new +:class:`asynchat.async_chat` channel objects as it receives incoming connection +requests. + + +.. class:: async_chat() + + This class is an abstract subclass of :class:`asyncore.dispatcher`. To make + practical use of the code you must subclass :class:`async_chat`, providing + meaningful :meth:`collect_incoming_data` and :meth:`found_terminator` methods. + The :class:`asyncore.dispatcher` methods can be used, although not all make + sense in a message/response context. + + Like :class:`asyncore.dispatcher`, :class:`async_chat` defines a set of events + that are generated by an analysis of socket conditions after a :cfunc:`select` + call. Once the polling loop has been started the :class:`async_chat` object's + methods are called by the event-processing framework with no action on the part + of the programmer. + + Unlike :class:`asyncore.dispatcher`, :class:`async_chat` allows you to define a + first-in-first-out queue (fifo) of *producers*. A producer need have only one + method, :meth:`more`, which should return data to be transmitted on the channel. + The producer indicates exhaustion (*i.e.* that it contains no more data) by + having its :meth:`more` method return the empty string. At this point the + :class:`async_chat` object removes the producer from the fifo and starts using + the next producer, if any. When the producer fifo is empty the + :meth:`handle_write` method does nothing. You use the channel object's + :meth:`set_terminator` method to describe how to recognize the end of, or an + important breakpoint in, an incoming transmission from the remote endpoint. + + To build a functioning :class:`async_chat` subclass your input methods + :meth:`collect_incoming_data` and :meth:`found_terminator` must handle the data + that the channel receives asynchronously. The methods are described below. + + +.. method:: async_chat.close_when_done() + + Pushes a ``None`` on to the producer fifo. When this producer is popped off the + fifo it causes the channel to be closed. + + +.. method:: async_chat.collect_incoming_data(data) + + Called with *data* holding an arbitrary amount of received data. The default + method, which must be overridden, raises a :exc:`NotImplementedError` exception. + + +.. method:: async_chat.discard_buffers() + + In emergencies this method will discard any data held in the input and/or output + buffers and the producer fifo. + + +.. method:: async_chat.found_terminator() + + Called when the incoming data stream matches the termination condition set by + :meth:`set_terminator`. The default method, which must be overridden, raises a + :exc:`NotImplementedError` exception. The buffered input data should be + available via an instance attribute. + + +.. method:: async_chat.get_terminator() + + Returns the current terminator for the channel. + + +.. method:: async_chat.handle_close() + + Called when the channel is closed. The default method silently closes the + channel's socket. + + +.. method:: async_chat.handle_read() + + Called when a read event fires on the channel's socket in the asynchronous loop. + The default method checks for the termination condition established by + :meth:`set_terminator`, which can be either the appearance of a particular + string in the input stream or the receipt of a particular number of characters. + When the terminator is found, :meth:`handle_read` calls the + :meth:`found_terminator` method after calling :meth:`collect_incoming_data` with + any data preceding the terminating condition. + + +.. method:: async_chat.handle_write() + + Called when the application may write data to the channel. The default method + calls the :meth:`initiate_send` method, which in turn will call + :meth:`refill_buffer` to collect data from the producer fifo associated with the + channel. + + +.. method:: async_chat.push(data) + + Creates a :class:`simple_producer` object (*see below*) containing the data and + pushes it on to the channel's ``producer_fifo`` to ensure its transmission. This + is all you need to do to have the channel write the data out to the network, + although it is possible to use your own producers in more complex schemes to + implement encryption and chunking, for example. + + +.. method:: async_chat.push_with_producer(producer) + + Takes a producer object and adds it to the producer fifo associated with the + channel. When all currently-pushed producers have been exhausted the channel + will consume this producer's data by calling its :meth:`more` method and send + the data to the remote endpoint. + + +.. method:: async_chat.readable() + + Should return ``True`` for the channel to be included in the set of channels + tested by the :cfunc:`select` loop for readability. + + +.. method:: async_chat.refill_buffer() + + Refills the output buffer by calling the :meth:`more` method of the producer at + the head of the fifo. If it is exhausted then the producer is popped off the + fifo and the next producer is activated. If the current producer is, or becomes, + ``None`` then the channel is closed. + + +.. method:: async_chat.set_terminator(term) + + Sets the terminating condition to be recognised on the channel. ``term`` may be + any of three types of value, corresponding to three different ways to handle + incoming protocol data. + + +-----------+---------------------------------------------+ + | term | Description | + +===========+=============================================+ + | *string* | Will call :meth:`found_terminator` when the | + | | string is found in the input stream | + +-----------+---------------------------------------------+ + | *integer* | Will call :meth:`found_terminator` when the | + | | indicated number of characters have been | + | | received | + +-----------+---------------------------------------------+ + | ``None`` | The channel continues to collect data | + | | forever | + +-----------+---------------------------------------------+ + + Note that any data following the terminator will be available for reading by the + channel after :meth:`found_terminator` is called. + + +.. method:: async_chat.writable() + + Should return ``True`` as long as items remain on the producer fifo, or the + channel is connected and the channel's output buffer is non-empty. + + +asynchat - Auxiliary Classes and Functions +------------------------------------------ + + +.. class:: simple_producer(data[, buffer_size=512]) + + A :class:`simple_producer` takes a chunk of data and an optional buffer size. + Repeated calls to its :meth:`more` method yield successive chunks of the data no + larger than *buffer_size*. + + +.. method:: simple_producer.more() + + Produces the next chunk of information from the producer, or returns the empty + string. + + +.. class:: fifo([list=None]) + + Each channel maintains a :class:`fifo` holding data which has been pushed by the + application but not yet popped for writing to the channel. A :class:`fifo` is a + list used to hold data and/or producers until they are required. If the *list* + argument is provided then it should contain producers or data items to be + written to the channel. + + +.. method:: fifo.is_empty() + + Returns ``True`` iff the fifo is empty. + + +.. method:: fifo.first() + + Returns the least-recently :meth:`push`\ ed item from the fifo. + + +.. method:: fifo.push(data) + + Adds the given data (which may be a string or a producer object) to the producer + fifo. + + +.. method:: fifo.pop() + + If the fifo is not empty, returns ``True, first()``, deleting the popped item. + Returns ``False, None`` for an empty fifo. + +The :mod:`asynchat` module also defines one utility function, which may be of +use in network and textual analysis operations. + + +.. function:: find_prefix_at_end(haystack, needle) + + Returns ``True`` if string *haystack* ends with any non-empty prefix of string + *needle*. + + +.. _asynchat-example: + +asynchat Example +---------------- + +The following partial example shows how HTTP requests can be read with +:class:`async_chat`. A web server might create an :class:`http_request_handler` +object for each incoming client connection. Notice that initially the channel +terminator is set to match the blank line at the end of the HTTP headers, and a +flag indicates that the headers are being read. + +Once the headers have been read, if the request is of type POST (indicating that +further data are present in the input stream) then the ``Content-Length:`` +header is used to set a numeric terminator to read the right amount of data from +the channel. + +The :meth:`handle_request` method is called once all relevant input has been +marshalled, after setting the channel terminator to ``None`` to ensure that any +extraneous data sent by the web client are ignored. :: + + class http_request_handler(asynchat.async_chat): + + def __init__(self, conn, addr, sessions, log): + asynchat.async_chat.__init__(self, conn=conn) + self.addr = addr + self.sessions = sessions + self.ibuffer = [] + self.obuffer = "" + self.set_terminator("\r\n\r\n") + self.reading_headers = True + self.handling = False + self.cgi_data = None + self.log = log + + def collect_incoming_data(self, data): + """Buffer the data""" + self.ibuffer.append(data) + + def found_terminator(self): + if self.reading_headers: + self.reading_headers = False + self.parse_headers("".join(self.ibuffer)) + self.ibuffer = [] + if self.op.upper() == "POST": + clen = self.headers.getheader("content-length") + self.set_terminator(int(clen)) + else: + self.handling = True + self.set_terminator(None) + self.handle_request() + elif not self.handling: + self.set_terminator(None) # browsers sometimes over-send + self.cgi_data = parse(self.headers, "".join(self.ibuffer)) + self.handling = True + self.ibuffer = [] + self.handle_request() + diff --git a/Doc/library/asyncore.rst b/Doc/library/asyncore.rst new file mode 100644 index 0000000..7f80dd3 --- /dev/null +++ b/Doc/library/asyncore.rst @@ -0,0 +1,269 @@ + +:mod:`asyncore` --- Asynchronous socket handler +=============================================== + +.. module:: asyncore + :synopsis: A base class for developing asynchronous socket handling services. +.. moduleauthor:: Sam Rushing +.. sectionauthor:: Christopher Petrilli +.. sectionauthor:: Steve Holden + + +This module provides the basic infrastructure for writing asynchronous socket +service clients and servers. + +.. % Heavily adapted from original documentation by Sam Rushing. + +There are only two ways to have a program on a single processor do "more than +one thing at a time." Multi-threaded programming is the simplest and most +popular way to do it, but there is another very different technique, that lets +you have nearly all the advantages of multi-threading, without actually using +multiple threads. It's really only practical if your program is largely I/O +bound. If your program is processor bound, then pre-emptive scheduled threads +are probably what you really need. Network servers are rarely processor bound, +however. + +If your operating system supports the :cfunc:`select` system call in its I/O +library (and nearly all do), then you can use it to juggle multiple +communication channels at once; doing other work while your I/O is taking place +in the "background." Although this strategy can seem strange and complex, +especially at first, it is in many ways easier to understand and control than +multi-threaded programming. The :mod:`asyncore` module solves many of the +difficult problems for you, making the task of building sophisticated +high-performance network servers and clients a snap. For "conversational" +applications and protocols the companion :mod:`asynchat` module is invaluable. + +The basic idea behind both modules is to create one or more network *channels*, +instances of class :class:`asyncore.dispatcher` and +:class:`asynchat.async_chat`. Creating the channels adds them to a global map, +used by the :func:`loop` function if you do not provide it with your own *map*. + +Once the initial channel(s) is(are) created, calling the :func:`loop` function +activates channel service, which continues until the last channel (including any +that have been added to the map during asynchronous service) is closed. + + +.. function:: loop([timeout[, use_poll[, map[,count]]]]) + + Enter a polling loop that terminates after count passes or all open channels + have been closed. All arguments are optional. The *count* parameter defaults + to None, resulting in the loop terminating only when all channels have been + closed. The *timeout* argument sets the timeout parameter for the appropriate + :func:`select` or :func:`poll` call, measured in seconds; the default is 30 + seconds. The *use_poll* parameter, if true, indicates that :func:`poll` should + be used in preference to :func:`select` (the default is ``False``). + + The *map* parameter is a dictionary whose items are the channels to watch. As + channels are closed they are deleted from their map. If *map* is omitted, a + global map is used. Channels (instances of :class:`asyncore.dispatcher`, + :class:`asynchat.async_chat` and subclasses thereof) can freely be mixed in the + map. + + +.. class:: dispatcher() + + The :class:`dispatcher` class is a thin wrapper around a low-level socket + object. To make it more useful, it has a few methods for event-handling which + are called from the asynchronous loop. Otherwise, it can be treated as a + normal non-blocking socket object. + + Two class attributes can be modified, to improve performance, or possibly even + to conserve memory. + + + .. data:: ac_in_buffer_size + + The asynchronous input buffer size (default ``4096``). + + + .. data:: ac_out_buffer_size + + The asynchronous output buffer size (default ``4096``). + + The firing of low-level events at certain times or in certain connection states + tells the asynchronous loop that certain higher-level events have taken place. + For example, if we have asked for a socket to connect to another host, we know + that the connection has been made when the socket becomes writable for the first + time (at this point you know that you may write to it with the expectation of + success). The implied higher-level events are: + + +----------------------+----------------------------------------+ + | Event | Description | + +======================+========================================+ + | ``handle_connect()`` | Implied by the first write event | + +----------------------+----------------------------------------+ + | ``handle_close()`` | Implied by a read event with no data | + | | available | + +----------------------+----------------------------------------+ + | ``handle_accept()`` | Implied by a read event on a listening | + | | socket | + +----------------------+----------------------------------------+ + + During asynchronous processing, each mapped channel's :meth:`readable` and + :meth:`writable` methods are used to determine whether the channel's socket + should be added to the list of channels :cfunc:`select`\ ed or :cfunc:`poll`\ ed + for read and write events. + +Thus, the set of channel events is larger than the basic socket events. The full +set of methods that can be overridden in your subclass follows: + + +.. method:: dispatcher.handle_read() + + Called when the asynchronous loop detects that a :meth:`read` call on the + channel's socket will succeed. + + +.. method:: dispatcher.handle_write() + + Called when the asynchronous loop detects that a writable socket can be written. + Often this method will implement the necessary buffering for performance. For + example:: + + def handle_write(self): + sent = self.send(self.buffer) + self.buffer = self.buffer[sent:] + + +.. method:: dispatcher.handle_expt() + + Called when there is out of band (OOB) data for a socket connection. This will + almost never happen, as OOB is tenuously supported and rarely used. + + +.. method:: dispatcher.handle_connect() + + Called when the active opener's socket actually makes a connection. Might send a + "welcome" banner, or initiate a protocol negotiation with the remote endpoint, + for example. + + +.. method:: dispatcher.handle_close() + + Called when the socket is closed. + + +.. method:: dispatcher.handle_error() + + Called when an exception is raised and not otherwise handled. The default + version prints a condensed traceback. + + +.. method:: dispatcher.handle_accept() + + Called on listening channels (passive openers) when a connection can be + established with a new remote endpoint that has issued a :meth:`connect` call + for the local endpoint. + + +.. method:: dispatcher.readable() + + Called each time around the asynchronous loop to determine whether a channel's + socket should be added to the list on which read events can occur. The default + method simply returns ``True``, indicating that by default, all channels will + be interested in read events. + + +.. method:: dispatcher.writable() + + Called each time around the asynchronous loop to determine whether a channel's + socket should be added to the list on which write events can occur. The default + method simply returns ``True``, indicating that by default, all channels will + be interested in write events. + +In addition, each channel delegates or extends many of the socket methods. Most +of these are nearly identical to their socket partners. + + +.. method:: dispatcher.create_socket(family, type) + + This is identical to the creation of a normal socket, and will use the same + options for creation. Refer to the :mod:`socket` documentation for information + on creating sockets. + + +.. method:: dispatcher.connect(address) + + As with the normal socket object, *address* is a tuple with the first element + the host to connect to, and the second the port number. + + +.. method:: dispatcher.send(data) + + Send *data* to the remote end-point of the socket. + + +.. method:: dispatcher.recv(buffer_size) + + Read at most *buffer_size* bytes from the socket's remote end-point. An empty + string implies that the channel has been closed from the other end. + + +.. method:: dispatcher.listen(backlog) + + Listen for connections made to the socket. The *backlog* argument specifies the + maximum number of queued connections and should be at least 1; the maximum value + is system-dependent (usually 5). + + +.. method:: dispatcher.bind(address) + + Bind the socket to *address*. The socket must not already be bound. (The + format of *address* depends on the address family --- see above.) To mark the + socket as re-usable (setting the :const:`SO_REUSEADDR` option), call the + :class:`dispatcher` object's :meth:`set_reuse_addr` method. + + +.. method:: dispatcher.accept() + + Accept a connection. The socket must be bound to an address and listening for + connections. The return value is a pair ``(conn, address)`` where *conn* is a + *new* socket object usable to send and receive data on the connection, and + *address* is the address bound to the socket on the other end of the connection. + + +.. method:: dispatcher.close() + + Close the socket. All future operations on the socket object will fail. The + remote end-point will receive no more data (after queued data is flushed). + Sockets are automatically closed when they are garbage-collected. + + +.. _asyncore-example: + +asyncore Example basic HTTP client +---------------------------------- + +Here is a very basic HTTP client that uses the :class:`dispatcher` class to +implement its socket handling:: + + import asyncore, socket + + class http_client(asyncore.dispatcher): + + def __init__(self, host, path): + asyncore.dispatcher.__init__(self) + self.create_socket(socket.AF_INET, socket.SOCK_STREAM) + self.connect( (host, 80) ) + self.buffer = 'GET %s HTTP/1.0\r\n\r\n' % path + + def handle_connect(self): + pass + + def handle_close(self): + self.close() + + def handle_read(self): + print self.recv(8192) + + def writable(self): + return (len(self.buffer) > 0) + + def handle_write(self): + sent = self.send(self.buffer) + self.buffer = self.buffer[sent:] + + c = http_client('www.python.org', '/') + + asyncore.loop() + diff --git a/Doc/library/atexit.rst b/Doc/library/atexit.rst new file mode 100644 index 0000000..94d750b --- /dev/null +++ b/Doc/library/atexit.rst @@ -0,0 +1,105 @@ + +:mod:`atexit` --- Exit handlers +=============================== + +.. module:: atexit + :synopsis: Register and execute cleanup functions. +.. moduleauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro + + +.. versionadded:: 2.0 + +The :mod:`atexit` module defines functions to register and unregister cleanup +functions. Functions thus registered are automatically executed upon normal +interpreter termination. + +Note: the functions registered via this module are not called when the program +is killed by a signal, when a Python fatal internal error is detected, or when +:func:`os._exit` is called. + + +.. function:: register(func[, *args[, **kargs]]) + + Register *func* as a function to be executed at termination. Any optional + arguments that are to be passed to *func* must be passed as arguments to + :func:`register`. + + At normal program termination (for instance, if :func:`sys.exit` is called or + the main module's execution completes), all functions registered are called in + last in, first out order. The assumption is that lower level modules will + normally be imported before higher level modules and thus must be cleaned up + later. + + If an exception is raised during execution of the exit handlers, a traceback is + printed (unless :exc:`SystemExit` is raised) and the exception information is + saved. After all exit handlers have had a chance to run the last exception to + be raised is re-raised. + + .. versionchanged:: 2.6 + This function now returns *func* which makes it possible to use it as a + decorator without binding the original name to ``None``. + + +.. function:: unregister(func) + + Remove a function *func* from the list of functions to be run at interpreter- + shutdown. After calling :func:`unregister`, *func* is guaranteed not to be + called when the interpreter shuts down. + + .. versionadded:: 3.0 + + +.. seealso:: + + Module :mod:`readline` + Useful example of :mod:`atexit` to read and write :mod:`readline` history files. + + +.. _atexit-example: + +:mod:`atexit` Example +--------------------- + +The following simple example demonstrates how a module can initialize a counter +from a file when it is imported and save the counter's updated value +automatically when the program terminates without relying on the application +making an explicit call into this module at termination. :: + + try: + _count = int(open("/tmp/counter").read()) + except IOError: + _count = 0 + + def incrcounter(n): + global _count + _count = _count + n + + def savecounter(): + open("/tmp/counter", "w").write("%d" % _count) + + import atexit + atexit.register(savecounter) + +Positional and keyword arguments may also be passed to :func:`register` to be +passed along to the registered function when it is called:: + + def goodbye(name, adjective): + print 'Goodbye, %s, it was %s to meet you.' % (name, adjective) + + import atexit + atexit.register(goodbye, 'Donny', 'nice') + + # or: + atexit.register(goodbye, adjective='nice', name='Donny') + +Usage as a decorator:: + + import atexit + + @atexit.register + def goodbye(): + print "You are now leaving the Python sector." + +This obviously only works with functions that don't take arguments. + diff --git a/Doc/library/audioop.rst b/Doc/library/audioop.rst new file mode 100644 index 0000000..84a2690 --- /dev/null +++ b/Doc/library/audioop.rst @@ -0,0 +1,261 @@ + +:mod:`audioop` --- Manipulate raw audio data +============================================ + +.. module:: audioop + :synopsis: Manipulate raw audio data. + + +The :mod:`audioop` module contains some useful operations on sound fragments. +It operates on sound fragments consisting of signed integer samples 8, 16 or 32 +bits wide, stored in Python strings. All scalar items are integers, unless +specified otherwise. + +.. index:: + single: Intel/DVI ADPCM + single: ADPCM, Intel/DVI + single: a-LAW + single: u-LAW + +This module provides support for a-LAW, u-LAW and Intel/DVI ADPCM encodings. + +.. % This para is mostly here to provide an excuse for the index entries... + +A few of the more complicated operations only take 16-bit samples, otherwise the +sample size (in bytes) is always a parameter of the operation. + +The module defines the following variables and functions: + + +.. exception:: error + + This exception is raised on all errors, such as unknown number of bytes per + sample, etc. + + +.. function:: add(fragment1, fragment2, width) + + Return a fragment which is the addition of the two samples passed as parameters. + *width* is the sample width in bytes, either ``1``, ``2`` or ``4``. Both + fragments should have the same length. + + +.. function:: adpcm2lin(adpcmfragment, width, state) + + Decode an Intel/DVI ADPCM coded fragment to a linear fragment. See the + description of :func:`lin2adpcm` for details on ADPCM coding. Return a tuple + ``(sample, newstate)`` where the sample has the width specified in *width*. + + +.. function:: alaw2lin(fragment, width) + + Convert sound fragments in a-LAW encoding to linearly encoded sound fragments. + a-LAW encoding always uses 8 bits samples, so *width* refers only to the sample + width of the output fragment here. + + .. versionadded:: 2.5 + + +.. function:: avg(fragment, width) + + Return the average over all samples in the fragment. + + +.. function:: avgpp(fragment, width) + + Return the average peak-peak value over all samples in the fragment. No + filtering is done, so the usefulness of this routine is questionable. + + +.. function:: bias(fragment, width, bias) + + Return a fragment that is the original fragment with a bias added to each + sample. + + +.. function:: cross(fragment, width) + + Return the number of zero crossings in the fragment passed as an argument. + + +.. function:: findfactor(fragment, reference) + + Return a factor *F* such that ``rms(add(fragment, mul(reference, -F)))`` is + minimal, i.e., return the factor with which you should multiply *reference* to + make it match as well as possible to *fragment*. The fragments should both + contain 2-byte samples. + + The time taken by this routine is proportional to ``len(fragment)``. + + +.. function:: findfit(fragment, reference) + + Try to match *reference* as well as possible to a portion of *fragment* (which + should be the longer fragment). This is (conceptually) done by taking slices + out of *fragment*, using :func:`findfactor` to compute the best match, and + minimizing the result. The fragments should both contain 2-byte samples. + Return a tuple ``(offset, factor)`` where *offset* is the (integer) offset into + *fragment* where the optimal match started and *factor* is the (floating-point) + factor as per :func:`findfactor`. + + +.. function:: findmax(fragment, length) + + Search *fragment* for a slice of length *length* samples (not bytes!) with + maximum energy, i.e., return *i* for which ``rms(fragment[i*2:(i+length)*2])`` + is maximal. The fragments should both contain 2-byte samples. + + The routine takes time proportional to ``len(fragment)``. + + +.. function:: getsample(fragment, width, index) + + Return the value of sample *index* from the fragment. + + +.. function:: lin2adpcm(fragment, width, state) + + Convert samples to 4 bit Intel/DVI ADPCM encoding. ADPCM coding is an adaptive + coding scheme, whereby each 4 bit number is the difference between one sample + and the next, divided by a (varying) step. The Intel/DVI ADPCM algorithm has + been selected for use by the IMA, so it may well become a standard. + + *state* is a tuple containing the state of the coder. The coder returns a tuple + ``(adpcmfrag, newstate)``, and the *newstate* should be passed to the next call + of :func:`lin2adpcm`. In the initial call, ``None`` can be passed as the state. + *adpcmfrag* is the ADPCM coded fragment packed 2 4-bit values per byte. + + +.. function:: lin2alaw(fragment, width) + + Convert samples in the audio fragment to a-LAW encoding and return this as a + Python string. a-LAW is an audio encoding format whereby you get a dynamic + range of about 13 bits using only 8 bit samples. It is used by the Sun audio + hardware, among others. + + .. versionadded:: 2.5 + + +.. function:: lin2lin(fragment, width, newwidth) + + Convert samples between 1-, 2- and 4-byte formats. + + +.. function:: lin2ulaw(fragment, width) + + Convert samples in the audio fragment to u-LAW encoding and return this as a + Python string. u-LAW is an audio encoding format whereby you get a dynamic + range of about 14 bits using only 8 bit samples. It is used by the Sun audio + hardware, among others. + + +.. function:: minmax(fragment, width) + + Return a tuple consisting of the minimum and maximum values of all samples in + the sound fragment. + + +.. function:: max(fragment, width) + + Return the maximum of the *absolute value* of all samples in a fragment. + + +.. function:: maxpp(fragment, width) + + Return the maximum peak-peak value in the sound fragment. + + +.. function:: mul(fragment, width, factor) + + Return a fragment that has all samples in the original fragment multiplied by + the floating-point value *factor*. Overflow is silently ignored. + + +.. function:: ratecv(fragment, width, nchannels, inrate, outrate, state[, weightA[, weightB]]) + + Convert the frame rate of the input fragment. + + *state* is a tuple containing the state of the converter. The converter returns + a tuple ``(newfragment, newstate)``, and *newstate* should be passed to the next + call of :func:`ratecv`. The initial call should pass ``None`` as the state. + + The *weightA* and *weightB* arguments are parameters for a simple digital filter + and default to ``1`` and ``0`` respectively. + + +.. function:: reverse(fragment, width) + + Reverse the samples in a fragment and returns the modified fragment. + + +.. function:: rms(fragment, width) + + Return the root-mean-square of the fragment, i.e. ``sqrt(sum(S_i^2)/n)``. + + This is a measure of the power in an audio signal. + + +.. function:: tomono(fragment, width, lfactor, rfactor) + + Convert a stereo fragment to a mono fragment. The left channel is multiplied by + *lfactor* and the right channel by *rfactor* before adding the two channels to + give a mono signal. + + +.. function:: tostereo(fragment, width, lfactor, rfactor) + + Generate a stereo fragment from a mono fragment. Each pair of samples in the + stereo fragment are computed from the mono sample, whereby left channel samples + are multiplied by *lfactor* and right channel samples by *rfactor*. + + +.. function:: ulaw2lin(fragment, width) + + Convert sound fragments in u-LAW encoding to linearly encoded sound fragments. + u-LAW encoding always uses 8 bits samples, so *width* refers only to the sample + width of the output fragment here. + +Note that operations such as :func:`mul` or :func:`max` make no distinction +between mono and stereo fragments, i.e. all samples are treated equal. If this +is a problem the stereo fragment should be split into two mono fragments first +and recombined later. Here is an example of how to do that:: + + def mul_stereo(sample, width, lfactor, rfactor): + lsample = audioop.tomono(sample, width, 1, 0) + rsample = audioop.tomono(sample, width, 0, 1) + lsample = audioop.mul(sample, width, lfactor) + rsample = audioop.mul(sample, width, rfactor) + lsample = audioop.tostereo(lsample, width, 1, 0) + rsample = audioop.tostereo(rsample, width, 0, 1) + return audioop.add(lsample, rsample, width) + +If you use the ADPCM coder to build network packets and you want your protocol +to be stateless (i.e. to be able to tolerate packet loss) you should not only +transmit the data but also the state. Note that you should send the *initial* +state (the one you passed to :func:`lin2adpcm`) along to the decoder, not the +final state (as returned by the coder). If you want to use +:func:`struct.struct` to store the state in binary you can code the first +element (the predicted value) in 16 bits and the second (the delta index) in 8. + +The ADPCM coders have never been tried against other ADPCM coders, only against +themselves. It could well be that I misinterpreted the standards in which case +they will not be interoperable with the respective standards. + +The :func:`find\*` routines might look a bit funny at first sight. They are +primarily meant to do echo cancellation. A reasonably fast way to do this is to +pick the most energetic piece of the output sample, locate that in the input +sample and subtract the whole output sample from the input sample:: + + def echocancel(outputdata, inputdata): + pos = audioop.findmax(outputdata, 800) # one tenth second + out_test = outputdata[pos*2:] + in_test = inputdata[pos*2:] + ipos, factor = audioop.findfit(in_test, out_test) + # Optional (for better cancellation): + # factor = audioop.findfactor(in_test[ipos*2:ipos*2+len(out_test)], + # out_test) + prefill = '\0'*(pos+ipos)*2 + postfill = '\0'*(len(inputdata)-len(prefill)-len(outputdata)) + outputdata = prefill + audioop.mul(outputdata,2,-factor) + postfill + return audioop.add(inputdata, outputdata, 2) + diff --git a/Doc/library/autogil.rst b/Doc/library/autogil.rst new file mode 100644 index 0000000..93f0d04 --- /dev/null +++ b/Doc/library/autogil.rst @@ -0,0 +1,30 @@ + +:mod:`autoGIL` --- Global Interpreter Lock handling in event loops +================================================================== + +.. module:: autoGIL + :platform: Mac + :synopsis: Global Interpreter Lock handling in event loops. +.. moduleauthor:: Just van Rossum + + +The :mod:`autoGIL` module provides a function :func:`installAutoGIL` that +automatically locks and unlocks Python's Global Interpreter Lock when running an +event loop. + + +.. exception:: AutoGILError + + Raised if the observer callback cannot be installed, for example because the + current thread does not have a run loop. + + +.. function:: installAutoGIL() + + Install an observer callback in the event loop (CFRunLoop) for the current + thread, that will lock and unlock the Global Interpreter Lock (GIL) at + appropriate times, allowing other Python threads to run while the event loop is + idle. + + Availability: OSX 10.1 or later. + diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst new file mode 100644 index 0000000..daa8fd5 --- /dev/null +++ b/Doc/library/base64.rst @@ -0,0 +1,172 @@ + +:mod:`base64` --- RFC 3548: Base16, Base32, Base64 Data Encodings +================================================================= + +.. module:: base64 + :synopsis: RFC 3548: Base16, Base32, Base64 Data Encodings + + +.. index:: + pair: base64; encoding + single: MIME; base64 encoding + +This module provides data encoding and decoding as specified in :rfc:`3548`. +This standard defines the Base16, Base32, and Base64 algorithms for encoding and +decoding arbitrary binary strings into text strings that can be safely sent by +email, used as parts of URLs, or included as part of an HTTP POST request. The +encoding algorithm is not the same as the :program:`uuencode` program. + +There are two interfaces provided by this module. The modern interface supports +encoding and decoding string objects using all three alphabets. The legacy +interface provides for encoding and decoding to and from file-like objects as +well as strings, but only using the Base64 standard alphabet. + +The modern interface, which was introduced in Python 2.4, provides: + + +.. function:: b64encode(s[, altchars]) + + Encode a string use Base64. + + *s* is the string to encode. Optional *altchars* must be a string of at least + length 2 (additional characters are ignored) which specifies an alternative + alphabet for the ``+`` and ``/`` characters. This allows an application to e.g. + generate URL or filesystem safe Base64 strings. The default is ``None``, for + which the standard Base64 alphabet is used. + + The encoded string is returned. + + +.. function:: b64decode(s[, altchars]) + + Decode a Base64 encoded string. + + *s* is the string to decode. Optional *altchars* must be a string of at least + length 2 (additional characters are ignored) which specifies the alternative + alphabet used instead of the ``+`` and ``/`` characters. + + The decoded string is returned. A :exc:`TypeError` is raised if *s* were + incorrectly padded or if there are non-alphabet characters present in the + string. + + +.. function:: standard_b64encode(s) + + Encode string *s* using the standard Base64 alphabet. + + +.. function:: standard_b64decode(s) + + Decode string *s* using the standard Base64 alphabet. + + +.. function:: urlsafe_b64encode(s) + + Encode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. + + +.. function:: urlsafe_b64decode(s) + + Decode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of + ``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. + + +.. function:: b32encode(s) + + Encode a string using Base32. *s* is the string to encode. The encoded string + is returned. + + +.. function:: b32decode(s[, casefold[, map01]]) + + Decode a Base32 encoded string. + + *s* is the string to decode. Optional *casefold* is a flag specifying whether a + lowercase alphabet is acceptable as input. For security purposes, the default + is ``False``. + + :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the letter O + (oh), and for optional mapping of the digit 1 (one) to either the letter I (eye) + or letter L (el). The optional argument *map01* when not ``None``, specifies + which letter the digit 1 should be mapped to (when *map01* is not ``None``, the + digit 0 is always mapped to the letter O). For security purposes the default is + ``None``, so that 0 and 1 are not allowed in the input. + + The decoded string is returned. A :exc:`TypeError` is raised if *s* were + incorrectly padded or if there are non-alphabet characters present in the + string. + + +.. function:: b16encode(s) + + Encode a string using Base16. + + *s* is the string to encode. The encoded string is returned. + + +.. function:: b16decode(s[, casefold]) + + Decode a Base16 encoded string. + + *s* is the string to decode. Optional *casefold* is a flag specifying whether a + lowercase alphabet is acceptable as input. For security purposes, the default + is ``False``. + + The decoded string is returned. A :exc:`TypeError` is raised if *s* were + incorrectly padded or if there are non-alphabet characters present in the + string. + +The legacy interface: + + +.. function:: decode(input, output) + + Decode the contents of the *input* file and write the resulting binary data to + the *output* file. *input* and *output* must either be file objects or objects + that mimic the file object interface. *input* will be read until + ``input.read()`` returns an empty string. + + +.. function:: decodestring(s) + + Decode the string *s*, which must contain one or more lines of base64 encoded + data, and return a string containing the resulting binary data. + + +.. function:: encode(input, output) + + Encode the contents of the *input* file and write the resulting base64 encoded + data to the *output* file. *input* and *output* must either be file objects or + objects that mimic the file object interface. *input* will be read until + ``input.read()`` returns an empty string. :func:`encode` returns the encoded + data plus a trailing newline character (``'\n'``). + + +.. function:: encodestring(s) + + Encode the string *s*, which can contain arbitrary binary data, and return a + string containing one or more lines of base64-encoded data. + :func:`encodestring` returns a string containing one or more lines of + base64-encoded data always including an extra trailing newline (``'\n'``). + +An example usage of the module:: + + >>> import base64 + >>> encoded = base64.b64encode('data to be encoded') + >>> encoded + 'ZGF0YSB0byBiZSBlbmNvZGVk' + >>> data = base64.b64decode(encoded) + >>> data + 'data to be encoded' + + +.. seealso:: + + Module :mod:`binascii` + Support module containing ASCII-to-binary and binary-to-ASCII conversions. + + :rfc:`1521` - MIME (Multipurpose Internet Mail Extensions) Part One: Mechanisms for Specifying and Describing the Format of Internet Message Bodies + Section 5.2, "Base64 Content-Transfer-Encoding," provides the definition of the + base64 encoding. + diff --git a/Doc/library/basehttpserver.rst b/Doc/library/basehttpserver.rst new file mode 100644 index 0000000..2e8d6a3 --- /dev/null +++ b/Doc/library/basehttpserver.rst @@ -0,0 +1,254 @@ + +:mod:`BaseHTTPServer` --- Basic HTTP server +=========================================== + +.. module:: BaseHTTPServer + :synopsis: Basic HTTP server (base class for SimpleHTTPServer and CGIHTTPServer). + + +.. index:: + pair: WWW; server + pair: HTTP; protocol + single: URL + single: httpd + +.. index:: + module: SimpleHTTPServer + module: CGIHTTPServer + +This module defines two classes for implementing HTTP servers (Web servers). +Usually, this module isn't used directly, but is used as a basis for building +functioning Web servers. See the :mod:`SimpleHTTPServer` and +:mod:`CGIHTTPServer` modules. + +The first class, :class:`HTTPServer`, is a :class:`SocketServer.TCPServer` +subclass. It creates and listens at the HTTP socket, dispatching the requests +to a handler. Code to create and run the server looks like this:: + + def run(server_class=BaseHTTPServer.HTTPServer, + handler_class=BaseHTTPServer.BaseHTTPRequestHandler): + server_address = ('', 8000) + httpd = server_class(server_address, handler_class) + httpd.serve_forever() + + +.. class:: HTTPServer(server_address, RequestHandlerClass) + + This class builds on the :class:`TCPServer` class by storing the server address + as instance variables named :attr:`server_name` and :attr:`server_port`. The + server is accessible by the handler, typically through the handler's + :attr:`server` instance variable. + + +.. class:: BaseHTTPRequestHandler(request, client_address, server) + + This class is used to handle the HTTP requests that arrive at the server. By + itself, it cannot respond to any actual HTTP requests; it must be subclassed to + handle each request method (e.g. GET or POST). :class:`BaseHTTPRequestHandler` + provides a number of class and instance variables, and methods for use by + subclasses. + + The handler will parse the request and the headers, then call a method specific + to the request type. The method name is constructed from the request. For + example, for the request method ``SPAM``, the :meth:`do_SPAM` method will be + called with no arguments. All of the relevant information is stored in instance + variables of the handler. Subclasses should not need to override or extend the + :meth:`__init__` method. + +:class:`BaseHTTPRequestHandler` has the following instance variables: + + +.. attribute:: BaseHTTPRequestHandler.client_address + + Contains a tuple of the form ``(host, port)`` referring to the client's address. + + +.. attribute:: BaseHTTPRequestHandler.command + + Contains the command (request type). For example, ``'GET'``. + + +.. attribute:: BaseHTTPRequestHandler.path + + Contains the request path. + + +.. attribute:: BaseHTTPRequestHandler.request_version + + Contains the version string from the request. For example, ``'HTTP/1.0'``. + + +.. attribute:: BaseHTTPRequestHandler.headers + + Holds an instance of the class specified by the :attr:`MessageClass` class + variable. This instance parses and manages the headers in the HTTP request. + + +.. attribute:: BaseHTTPRequestHandler.rfile + + Contains an input stream, positioned at the start of the optional input data. + + +.. attribute:: BaseHTTPRequestHandler.wfile + + Contains the output stream for writing a response back to the client. Proper + adherence to the HTTP protocol must be used when writing to this stream. + +:class:`BaseHTTPRequestHandler` has the following class variables: + + +.. attribute:: BaseHTTPRequestHandler.server_version + + Specifies the server software version. You may want to override this. The + format is multiple whitespace-separated strings, where each string is of the + form name[/version]. For example, ``'BaseHTTP/0.2'``. + + +.. attribute:: BaseHTTPRequestHandler.sys_version + + Contains the Python system version, in a form usable by the + :attr:`version_string` method and the :attr:`server_version` class variable. For + example, ``'Python/1.4'``. + + +.. attribute:: BaseHTTPRequestHandler.error_message_format + + Specifies a format string for building an error response to the client. It uses + parenthesized, keyed format specifiers, so the format operand must be a + dictionary. The *code* key should be an integer, specifying the numeric HTTP + error code value. *message* should be a string containing a (detailed) error + message of what occurred, and *explain* should be an explanation of the error + code number. Default *message* and *explain* values can found in the *responses* + class variable. + + +.. attribute:: BaseHTTPRequestHandler.protocol_version + + This specifies the HTTP protocol version used in responses. If set to + ``'HTTP/1.1'``, the server will permit HTTP persistent connections; however, + your server *must* then include an accurate ``Content-Length`` header (using + :meth:`send_header`) in all of its responses to clients. For backwards + compatibility, the setting defaults to ``'HTTP/1.0'``. + + +.. attribute:: BaseHTTPRequestHandler.MessageClass + + .. index:: single: Message (in module mimetools) + + Specifies a :class:`rfc822.Message`\ -like class to parse HTTP headers. + Typically, this is not overridden, and it defaults to + :class:`mimetools.Message`. + + +.. attribute:: BaseHTTPRequestHandler.responses + + This variable contains a mapping of error code integers to two-element tuples + containing a short and long message. For example, ``{code: (shortmessage, + longmessage)}``. The *shortmessage* is usually used as the *message* key in an + error response, and *longmessage* as the *explain* key (see the + :attr:`error_message_format` class variable). + +A :class:`BaseHTTPRequestHandler` instance has the following methods: + + +.. method:: BaseHTTPRequestHandler.handle() + + Calls :meth:`handle_one_request` once (or, if persistent connections are + enabled, multiple times) to handle incoming HTTP requests. You should never need + to override it; instead, implement appropriate :meth:`do_\*` methods. + + +.. method:: BaseHTTPRequestHandler.handle_one_request() + + This method will parse and dispatch the request to the appropriate :meth:`do_\*` + method. You should never need to override it. + + +.. method:: BaseHTTPRequestHandler.send_error(code[, message]) + + Sends and logs a complete error reply to the client. The numeric *code* + specifies the HTTP error code, with *message* as optional, more specific text. A + complete set of headers is sent, followed by text composed using the + :attr:`error_message_format` class variable. + + +.. method:: BaseHTTPRequestHandler.send_response(code[, message]) + + Sends a response header and logs the accepted request. The HTTP response line is + sent, followed by *Server* and *Date* headers. The values for these two headers + are picked up from the :meth:`version_string` and :meth:`date_time_string` + methods, respectively. + + +.. method:: BaseHTTPRequestHandler.send_header(keyword, value) + + Writes a specific HTTP header to the output stream. *keyword* should specify the + header keyword, with *value* specifying its value. + + +.. method:: BaseHTTPRequestHandler.end_headers() + + Sends a blank line, indicating the end of the HTTP headers in the response. + + +.. method:: BaseHTTPRequestHandler.log_request([code[, size]]) + + Logs an accepted (successful) request. *code* should specify the numeric HTTP + code associated with the response. If a size of the response is available, then + it should be passed as the *size* parameter. + + +.. method:: BaseHTTPRequestHandler.log_error(...) + + Logs an error when a request cannot be fulfilled. By default, it passes the + message to :meth:`log_message`, so it takes the same arguments (*format* and + additional values). + + +.. method:: BaseHTTPRequestHandler.log_message(format, ...) + + Logs an arbitrary message to ``sys.stderr``. This is typically overridden to + create custom error logging mechanisms. The *format* argument is a standard + printf-style format string, where the additional arguments to + :meth:`log_message` are applied as inputs to the formatting. The client address + and current date and time are prefixed to every message logged. + + +.. method:: BaseHTTPRequestHandler.version_string() + + Returns the server software's version string. This is a combination of the + :attr:`server_version` and :attr:`sys_version` class variables. + + +.. method:: BaseHTTPRequestHandler.date_time_string([timestamp]) + + Returns the date and time given by *timestamp* (which must be in the format + returned by :func:`time.time`), formatted for a message header. If *timestamp* + is omitted, it uses the current date and time. + + The result looks like ``'Sun, 06 Nov 1994 08:49:37 GMT'``. + + .. versionadded:: 2.5 + The *timestamp* parameter. + + +.. method:: BaseHTTPRequestHandler.log_date_time_string() + + Returns the current date and time, formatted for logging. + + +.. method:: BaseHTTPRequestHandler.address_string() + + Returns the client address, formatted for logging. A name lookup is performed on + the client's IP address. + + +.. seealso:: + + Module :mod:`CGIHTTPServer` + Extended request handler that supports CGI scripts. + + Module :mod:`SimpleHTTPServer` + Basic request handler that limits response to files actually under the document + root. + diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst new file mode 100644 index 0000000..ffea232 --- /dev/null +++ b/Doc/library/binascii.rst @@ -0,0 +1,161 @@ + +:mod:`binascii` --- Convert between binary and ASCII +==================================================== + +.. module:: binascii + :synopsis: Tools for converting between binary and various ASCII-encoded binary + representations. + + +.. index:: + module: uu + module: base64 + module: binhex + +The :mod:`binascii` module contains a number of methods to convert between +binary and various ASCII-encoded binary representations. Normally, you will not +use these functions directly but use wrapper modules like :mod:`uu`, +:mod:`base64`, or :mod:`binhex` instead. The :mod:`binascii` module contains +low-level functions written in C for greater speed that are used by the +higher-level modules. + +The :mod:`binascii` module defines the following functions: + + +.. function:: a2b_uu(string) + + Convert a single line of uuencoded data back to binary and return the binary + data. Lines normally contain 45 (binary) bytes, except for the last line. Line + data may be followed by whitespace. + + +.. function:: b2a_uu(data) + + Convert binary data to a line of ASCII characters, the return value is the + converted line, including a newline char. The length of *data* should be at most + 45. + + +.. function:: a2b_base64(string) + + Convert a block of base64 data back to binary and return the binary data. More + than one line may be passed at a time. + + +.. function:: b2a_base64(data) + + Convert binary data to a line of ASCII characters in base64 coding. The return + value is the converted line, including a newline char. The length of *data* + should be at most 57 to adhere to the base64 standard. + + +.. function:: a2b_qp(string[, header]) + + Convert a block of quoted-printable data back to binary and return the binary + data. More than one line may be passed at a time. If the optional argument + *header* is present and true, underscores will be decoded as spaces. + + +.. function:: b2a_qp(data[, quotetabs, istext, header]) + + Convert binary data to a line(s) of ASCII characters in quoted-printable + encoding. The return value is the converted line(s). If the optional argument + *quotetabs* is present and true, all tabs and spaces will be encoded. If the + optional argument *istext* is present and true, newlines are not encoded but + trailing whitespace will be encoded. If the optional argument *header* is + present and true, spaces will be encoded as underscores per RFC1522. If the + optional argument *header* is present and false, newline characters will be + encoded as well; otherwise linefeed conversion might corrupt the binary data + stream. + + +.. function:: a2b_hqx(string) + + Convert binhex4 formatted ASCII data to binary, without doing RLE-decompression. + The string should contain a complete number of binary bytes, or (in case of the + last portion of the binhex4 data) have the remaining bits zero. + + +.. function:: rledecode_hqx(data) + + Perform RLE-decompression on the data, as per the binhex4 standard. The + algorithm uses ``0x90`` after a byte as a repeat indicator, followed by a count. + A count of ``0`` specifies a byte value of ``0x90``. The routine returns the + decompressed data, unless data input data ends in an orphaned repeat indicator, + in which case the :exc:`Incomplete` exception is raised. + + +.. function:: rlecode_hqx(data) + + Perform binhex4 style RLE-compression on *data* and return the result. + + +.. function:: b2a_hqx(data) + + Perform hexbin4 binary-to-ASCII translation and return the resulting string. The + argument should already be RLE-coded, and have a length divisible by 3 (except + possibly the last fragment). + + +.. function:: crc_hqx(data, crc) + + Compute the binhex4 crc value of *data*, starting with an initial *crc* and + returning the result. + + +.. function:: crc32(data[, crc]) + + Compute CRC-32, the 32-bit checksum of data, starting with an initial crc. This + is consistent with the ZIP file checksum. Since the algorithm is designed for + use as a checksum algorithm, it is not suitable for use as a general hash + algorithm. Use as follows:: + + print binascii.crc32("hello world") + # Or, in two pieces: + crc = binascii.crc32("hello") + crc = binascii.crc32(" world", crc) + print crc + + +.. function:: b2a_hex(data) + hexlify(data) + + Return the hexadecimal representation of the binary *data*. Every byte of + *data* is converted into the corresponding 2-digit hex representation. The + resulting string is therefore twice as long as the length of *data*. + + +.. function:: a2b_hex(hexstr) + unhexlify(hexstr) + + Return the binary data represented by the hexadecimal string *hexstr*. This + function is the inverse of :func:`b2a_hex`. *hexstr* must contain an even number + of hexadecimal digits (which can be upper or lower case), otherwise a + :exc:`TypeError` is raised. + + +.. exception:: Error + + Exception raised on errors. These are usually programming errors. + + +.. exception:: Incomplete + + Exception raised on incomplete data. These are usually not programming errors, + but may be handled by reading a little more data and trying again. + + +.. seealso:: + + Module :mod:`base64` + Support for base64 encoding used in MIME email messages. + + Module :mod:`binhex` + Support for the binhex format used on the Macintosh. + + Module :mod:`uu` + Support for UU encoding used on Unix. + + Module :mod:`quopri` + Support for quoted-printable encoding used in MIME email messages. + diff --git a/Doc/library/binhex.rst b/Doc/library/binhex.rst new file mode 100644 index 0000000..3b0485c --- /dev/null +++ b/Doc/library/binhex.rst @@ -0,0 +1,59 @@ + +:mod:`binhex` --- Encode and decode binhex4 files +================================================= + +.. module:: binhex + :synopsis: Encode and decode files in binhex4 format. + + +This module encodes and decodes files in binhex4 format, a format allowing +representation of Macintosh files in ASCII. On the Macintosh, both forks of a +file and the finder information are encoded (or decoded), on other platforms +only the data fork is handled. + +The :mod:`binhex` module defines the following functions: + + +.. function:: binhex(input, output) + + Convert a binary file with filename *input* to binhex file *output*. The + *output* parameter can either be a filename or a file-like object (any object + supporting a :meth:`write` and :meth:`close` method). + + +.. function:: hexbin(input[, output]) + + Decode a binhex file *input*. *input* may be a filename or a file-like object + supporting :meth:`read` and :meth:`close` methods. The resulting file is written + to a file named *output*, unless the argument is omitted in which case the + output filename is read from the binhex file. + +The following exception is also defined: + + +.. exception:: Error + + Exception raised when something can't be encoded using the binhex format (for + example, a filename is too long to fit in the filename field), or when input is + not properly encoded binhex data. + + +.. seealso:: + + Module :mod:`binascii` + Support module containing ASCII-to-binary and binary-to-ASCII conversions. + + +.. _binhex-notes: + +Notes +----- + +There is an alternative, more powerful interface to the coder and decoder, see +the source for details. + +If you code or decode textfiles on non-Macintosh platforms they will still use +the Macintosh newline convention (carriage-return as end of line). + +As of this writing, :func:`hexbin` appears to not work in all cases. + diff --git a/Doc/library/bisect.rst b/Doc/library/bisect.rst new file mode 100644 index 0000000..b8eb348 --- /dev/null +++ b/Doc/library/bisect.rst @@ -0,0 +1,92 @@ + +:mod:`bisect` --- Array bisection algorithm +=========================================== + +.. module:: bisect + :synopsis: Array bisection algorithms for binary searching. +.. sectionauthor:: Fred L. Drake, Jr. + + +.. % LaTeX produced by Fred L. Drake, Jr. , with an +.. % example based on the PyModules FAQ entry by Aaron Watters +.. % . + +This module provides support for maintaining a list in sorted order without +having to sort the list after each insertion. For long lists of items with +expensive comparison operations, this can be an improvement over the more common +approach. The module is called :mod:`bisect` because it uses a basic bisection +algorithm to do its work. The source code may be most useful as a working +example of the algorithm (the boundary conditions are already right!). + +The following functions are provided: + + +.. function:: bisect_left(list, item[, lo[, hi]]) + + Locate the proper insertion point for *item* in *list* to maintain sorted order. + The parameters *lo* and *hi* may be used to specify a subset of the list which + should be considered; by default the entire list is used. If *item* is already + present in *list*, the insertion point will be before (to the left of) any + existing entries. The return value is suitable for use as the first parameter + to ``list.insert()``. This assumes that *list* is already sorted. + + .. versionadded:: 2.1 + + +.. function:: bisect_right(list, item[, lo[, hi]]) + + Similar to :func:`bisect_left`, but returns an insertion point which comes after + (to the right of) any existing entries of *item* in *list*. + + .. versionadded:: 2.1 + + +.. function:: bisect(...) + + Alias for :func:`bisect_right`. + + +.. function:: insort_left(list, item[, lo[, hi]]) + + Insert *item* in *list* in sorted order. This is equivalent to + ``list.insert(bisect.bisect_left(list, item, lo, hi), item)``. This assumes + that *list* is already sorted. + + .. versionadded:: 2.1 + + +.. function:: insort_right(list, item[, lo[, hi]]) + + Similar to :func:`insort_left`, but inserting *item* in *list* after any + existing entries of *item*. + + .. versionadded:: 2.1 + + +.. function:: insort(...) + + Alias for :func:`insort_right`. + + +Examples +-------- + +.. _bisect-example: + +The :func:`bisect` function is generally useful for categorizing numeric data. +This example uses :func:`bisect` to look up a letter grade for an exam total +(say) based on a set of ordered numeric breakpoints: 85 and up is an 'A', 75..84 +is a 'B', etc. :: + + >>> grades = "FEDCBA" + >>> breakpoints = [30, 44, 66, 75, 85] + >>> from bisect import bisect + >>> def grade(total): + ... return grades[bisect(breakpoints, total)] + ... + >>> grade(66) + 'C' + >>> map(grade, [33, 99, 77, 44, 12, 88]) + ['E', 'A', 'B', 'D', 'F', 'A'] + + diff --git a/Doc/library/bsddb.rst b/Doc/library/bsddb.rst new file mode 100644 index 0000000..55b7c7d --- /dev/null +++ b/Doc/library/bsddb.rst @@ -0,0 +1,211 @@ + +:mod:`bsddb` --- Interface to Berkeley DB library +================================================= + +.. module:: bsddb + :synopsis: Interface to Berkeley DB database library +.. sectionauthor:: Skip Montanaro + + +The :mod:`bsddb` module provides an interface to the Berkeley DB library. Users +can create hash, btree or record based library files using the appropriate open +call. Bsddb objects behave generally like dictionaries. Keys and values must be +strings, however, so to use other objects as keys or to store other kinds of +objects the user must serialize them somehow, typically using +:func:`marshal.dumps` or :func:`pickle.dumps`. + +The :mod:`bsddb` module requires a Berkeley DB library version from 3.3 thru +4.5. + + +.. seealso:: + + http://pybsddb.sourceforge.net/ + The website with documentation for the :mod:`bsddb.db` Python Berkeley DB + interface that closely mirrors the object oriented interface provided in + Berkeley DB 3 and 4. + + http://www.oracle.com/database/berkeley-db/ + The Berkeley DB library. + +A more modern DB, DBEnv and DBSequence object interface is available in the +:mod:`bsddb.db` module which closely matches the Berkeley DB C API documented at +the above URLs. Additional features provided by the :mod:`bsddb.db` API include +fine tuning, transactions, logging, and multiprocess concurrent database access. + +The following is a description of the legacy :mod:`bsddb` interface compatible +with the old Python bsddb module. Starting in Python 2.5 this interface should +be safe for multithreaded access. The :mod:`bsddb.db` API is recommended for +threading users as it provides better control. + +The :mod:`bsddb` module defines the following functions that create objects that +access the appropriate type of Berkeley DB file. The first two arguments of +each function are the same. For ease of portability, only the first two +arguments should be used in most instances. + + +.. function:: hashopen(filename[, flag[, mode[, pgsize[, ffactor[, nelem[, cachesize[, lorder[, hflags]]]]]]]]) + + Open the hash format file named *filename*. Files never intended to be + preserved on disk may be created by passing ``None`` as the *filename*. The + optional *flag* identifies the mode used to open the file. It may be ``'r'`` + (read only), ``'w'`` (read-write) , ``'c'`` (read-write - create if necessary; + the default) or ``'n'`` (read-write - truncate to zero length). The other + arguments are rarely used and are just passed to the low-level :cfunc:`dbopen` + function. Consult the Berkeley DB documentation for their use and + interpretation. + + +.. function:: btopen(filename[, flag[, mode[, btflags[, cachesize[, maxkeypage[, minkeypage[, pgsize[, lorder]]]]]]]]) + + Open the btree format file named *filename*. Files never intended to be + preserved on disk may be created by passing ``None`` as the *filename*. The + optional *flag* identifies the mode used to open the file. It may be ``'r'`` + (read only), ``'w'`` (read-write), ``'c'`` (read-write - create if necessary; + the default) or ``'n'`` (read-write - truncate to zero length). The other + arguments are rarely used and are just passed to the low-level dbopen function. + Consult the Berkeley DB documentation for their use and interpretation. + + +.. function:: rnopen(filename[, flag[, mode[, rnflags[, cachesize[, pgsize[, lorder[, rlen[, delim[, source[, pad]]]]]]]]]]) + + Open a DB record format file named *filename*. Files never intended to be + preserved on disk may be created by passing ``None`` as the *filename*. The + optional *flag* identifies the mode used to open the file. It may be ``'r'`` + (read only), ``'w'`` (read-write), ``'c'`` (read-write - create if necessary; + the default) or ``'n'`` (read-write - truncate to zero length). The other + arguments are rarely used and are just passed to the low-level dbopen function. + Consult the Berkeley DB documentation for their use and interpretation. + + +.. class:: StringKeys(db) + + Wrapper class around a DB object that supports string keys (rather than bytes). + All keys are encoded as UTF-8, then passed to the underlying object. + + .. versionadded:: 3.0 + + +.. class:: StringValues(db) + + Wrapper class around a DB object that supports string values (rather than bytes). + All values are encoded as UTF-8, then passed to the underlying object. + + .. versionadded:: 3.0 + + +.. seealso:: + + Module :mod:`dbhash` + DBM-style interface to the :mod:`bsddb` + + +.. _bsddb-objects: + +Hash, BTree and Record Objects +------------------------------ + +Once instantiated, hash, btree and record objects support the same methods as +dictionaries. In addition, they support the methods listed below. + +.. versionchanged:: 2.3.1 + Added dictionary methods. + + +.. method:: bsddbobject.close() + + Close the underlying file. The object can no longer be accessed. Since there + is no open :meth:`open` method for these objects, to open the file again a new + :mod:`bsddb` module open function must be called. + + +.. method:: bsddbobject.keys() + + Return the list of keys contained in the DB file. The order of the list is + unspecified and should not be relied on. In particular, the order of the list + returned is different for different file formats. + + +.. method:: bsddbobject.has_key(key) + + Return ``1`` if the DB file contains the argument as a key. + + +.. method:: bsddbobject.set_location(key) + + Set the cursor to the item indicated by *key* and return a tuple containing the + key and its value. For binary tree databases (opened using :func:`btopen`), if + *key* does not actually exist in the database, the cursor will point to the next + item in sorted order and return that key and value. For other databases, + :exc:`KeyError` will be raised if *key* is not found in the database. + + +.. method:: bsddbobject.first() + + Set the cursor to the first item in the DB file and return it. The order of + keys in the file is unspecified, except in the case of B-Tree databases. This + method raises :exc:`bsddb.error` if the database is empty. + + +.. method:: bsddbobject.next() + + Set the cursor to the next item in the DB file and return it. The order of + keys in the file is unspecified, except in the case of B-Tree databases. + + +.. method:: bsddbobject.previous() + + Set the cursor to the previous item in the DB file and return it. The order of + keys in the file is unspecified, except in the case of B-Tree databases. This + is not supported on hashtable databases (those opened with :func:`hashopen`). + + +.. method:: bsddbobject.last() + + Set the cursor to the last item in the DB file and return it. The order of keys + in the file is unspecified. This is not supported on hashtable databases (those + opened with :func:`hashopen`). This method raises :exc:`bsddb.error` if the + database is empty. + + +.. method:: bsddbobject.sync() + + Synchronize the database on disk. + +Example:: + + >>> import bsddb + >>> db = bsddb.btopen('/tmp/spam.db', 'c') + >>> for i in range(10): db['%d'%i] = '%d'% (i*i) + ... + >>> db['3'] + '9' + >>> db.keys() + ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] + >>> db.first() + ('0', '0') + >>> db.next() + ('1', '1') + >>> db.last() + ('9', '81') + >>> db.set_location('2') + ('2', '4') + >>> db.previous() + ('1', '1') + >>> for k, v in db.iteritems(): + ... print k, v + 0 0 + 1 1 + 2 4 + 3 9 + 4 16 + 5 25 + 6 36 + 7 49 + 8 64 + 9 81 + >>> '8' in db + True + >>> db.sync() + 0 + diff --git a/Doc/library/bz2.rst b/Doc/library/bz2.rst new file mode 100644 index 0000000..a8c0911 --- /dev/null +++ b/Doc/library/bz2.rst @@ -0,0 +1,181 @@ + +:mod:`bz2` --- Compression compatible with :program:`bzip2` +=========================================================== + +.. module:: bz2 + :synopsis: Interface to compression and decompression routines compatible with bzip2. +.. moduleauthor:: Gustavo Niemeyer +.. sectionauthor:: Gustavo Niemeyer + + +.. versionadded:: 2.3 + +This module provides a comprehensive interface for the bz2 compression library. +It implements a complete file interface, one-shot (de)compression functions, and +types for sequential (de)compression. + +Here is a resume of the features offered by the bz2 module: + +* :class:`BZ2File` class implements a complete file interface, including + :meth:`readline`, :meth:`readlines`, :meth:`writelines`, :meth:`seek`, etc; + +* :class:`BZ2File` class implements emulated :meth:`seek` support; + +* :class:`BZ2File` class implements universal newline support; + +* :class:`BZ2File` class offers an optimized line iteration using the readahead + algorithm borrowed from file objects; + +* Sequential (de)compression supported by :class:`BZ2Compressor` and + :class:`BZ2Decompressor` classes; + +* One-shot (de)compression supported by :func:`compress` and :func:`decompress` + functions; + +* Thread safety uses individual locking mechanism; + +* Complete inline documentation; + + +(De)compression of files +------------------------ + +Handling of compressed files is offered by the :class:`BZ2File` class. + + +.. class:: BZ2File(filename[, mode[, buffering[, compresslevel]]]) + + Open a bz2 file. Mode can be either ``'r'`` or ``'w'``, for reading (default) + or writing. When opened for writing, the file will be created if it doesn't + exist, and truncated otherwise. If *buffering* is given, ``0`` means unbuffered, + and larger numbers specify the buffer size; the default is ``0``. If + *compresslevel* is given, it must be a number between ``1`` and ``9``; the + default is ``9``. Add a ``'U'`` to mode to open the file for input with + universal newline support. Any line ending in the input file will be seen as a + ``'\n'`` in Python. Also, a file so opened gains the attribute + :attr:`newlines`; the value for this attribute is one of ``None`` (no newline + read yet), ``'\r'``, ``'\n'``, ``'\r\n'`` or a tuple containing all the newline + types seen. Universal newlines are available only when reading. Instances + support iteration in the same way as normal :class:`file` instances. + + +.. method:: BZ2File.close() + + Close the file. Sets data attribute :attr:`closed` to true. A closed file cannot + be used for further I/O operations. :meth:`close` may be called more than once + without error. + + +.. method:: BZ2File.read([size]) + + Read at most *size* uncompressed bytes, returned as a string. If the *size* + argument is negative or omitted, read until EOF is reached. + + +.. method:: BZ2File.readline([size]) + + Return the next line from the file, as a string, retaining newline. A + non-negative *size* argument limits the maximum number of bytes to return (an + incomplete line may be returned then). Return an empty string at EOF. + + +.. method:: BZ2File.readlines([size]) + + Return a list of lines read. The optional *size* argument, if given, is an + approximate bound on the total number of bytes in the lines returned. + + +.. method:: BZ2File.seek(offset[, whence]) + + Move to new file position. Argument *offset* is a byte count. Optional argument + *whence* defaults to ``os.SEEK_SET`` or ``0`` (offset from start of file; offset + should be ``>= 0``); other values are ``os.SEEK_CUR`` or ``1`` (move relative to + current position; offset can be positive or negative), and ``os.SEEK_END`` or + ``2`` (move relative to end of file; offset is usually negative, although many + platforms allow seeking beyond the end of a file). + + Note that seeking of bz2 files is emulated, and depending on the parameters the + operation may be extremely slow. + + +.. method:: BZ2File.tell() + + Return the current file position, an integer (may be a long integer). + + +.. method:: BZ2File.write(data) + + Write string *data* to file. Note that due to buffering, :meth:`close` may be + needed before the file on disk reflects the data written. + + +.. method:: BZ2File.writelines(sequence_of_strings) + + Write the sequence of strings to the file. Note that newlines are not added. The + sequence can be any iterable object producing strings. This is equivalent to + calling write() for each string. + + +Sequential (de)compression +-------------------------- + +Sequential compression and decompression is done using the classes +:class:`BZ2Compressor` and :class:`BZ2Decompressor`. + + +.. class:: BZ2Compressor([compresslevel]) + + Create a new compressor object. This object may be used to compress data + sequentially. If you want to compress data in one shot, use the :func:`compress` + function instead. The *compresslevel* parameter, if given, must be a number + between ``1`` and ``9``; the default is ``9``. + + +.. method:: BZ2Compressor.compress(data) + + Provide more data to the compressor object. It will return chunks of compressed + data whenever possible. When you've finished providing data to compress, call + the :meth:`flush` method to finish the compression process, and return what is + left in internal buffers. + + +.. method:: BZ2Compressor.flush() + + Finish the compression process and return what is left in internal buffers. You + must not use the compressor object after calling this method. + + +.. class:: BZ2Decompressor() + + Create a new decompressor object. This object may be used to decompress data + sequentially. If you want to decompress data in one shot, use the + :func:`decompress` function instead. + + +.. method:: BZ2Decompressor.decompress(data) + + Provide more data to the decompressor object. It will return chunks of + decompressed data whenever possible. If you try to decompress data after the end + of stream is found, :exc:`EOFError` will be raised. If any data was found after + the end of stream, it'll be ignored and saved in :attr:`unused_data` attribute. + + +One-shot (de)compression +------------------------ + +One-shot compression and decompression is provided through the :func:`compress` +and :func:`decompress` functions. + + +.. function:: compress(data[, compresslevel]) + + Compress *data* in one shot. If you want to compress data sequentially, use an + instance of :class:`BZ2Compressor` instead. The *compresslevel* parameter, if + given, must be a number between ``1`` and ``9``; the default is ``9``. + + +.. function:: decompress(data) + + Decompress *data* in one shot. If you want to decompress data sequentially, use + an instance of :class:`BZ2Decompressor` instead. + diff --git a/Doc/library/calendar.rst b/Doc/library/calendar.rst new file mode 100644 index 0000000..68cbeb6 --- /dev/null +++ b/Doc/library/calendar.rst @@ -0,0 +1,326 @@ + +:mod:`calendar` --- General calendar-related functions +====================================================== + +.. module:: calendar + :synopsis: Functions for working with calendars, including some emulation of the Unix cal + program. +.. sectionauthor:: Drew Csillag + + +This module allows you to output calendars like the Unix :program:`cal` program, +and provides additional useful functions related to the calendar. By default, +these calendars have Monday as the first day of the week, and Sunday as the last +(the European convention). Use :func:`setfirstweekday` to set the first day of +the week to Sunday (6) or to any other weekday. Parameters that specify dates +are given as integers. For related +functionality, see also the :mod:`datetime` and :mod:`time` modules. + +Most of these functions and classses rely on the :mod:`datetime` module which +uses an idealized calendar, the current Gregorian calendar indefinitely extended +in both directions. This matches the definition of the "proleptic Gregorian" +calendar in Dershowitz and Reingold's book "Calendrical Calculations", where +it's the base calendar for all computations. + + +.. class:: Calendar([firstweekday]) + + Creates a :class:`Calendar` object. *firstweekday* is an integer specifying the + first day of the week. ``0`` is Monday (the default), ``6`` is Sunday. + + A :class:`Calendar` object provides several methods that can be used for + preparing the calendar data for formatting. This class doesn't do any formatting + itself. This is the job of subclasses. + + .. versionadded:: 2.5 + +:class:`Calendar` instances have the following methods: + + +.. method:: Calendar.iterweekdays(weekday) + + Return an iterator for the week day numbers that will be used for one week. The + first number from the iterator will be the same as the number returned by + :meth:`firstweekday`. + + +.. method:: Calendar.itermonthdates(year, month) + + Return an iterator for the month *month* (1-12) in the year *year*. This + iterator will return all days (as :class:`datetime.date` objects) for the month + and all days before the start of the month or after the end of the month that + are required to get a complete week. + + +.. method:: Calendar.itermonthdays2(year, month) + + Return an iterator for the month *month* in the year *year* similar to + :meth:`itermonthdates`. Days returned will be tuples consisting of a day number + and a week day number. + + +.. method:: Calendar.itermonthdays(year, month) + + Return an iterator for the month *month* in the year *year* similar to + :meth:`itermonthdates`. Days returned will simply be day numbers. + + +.. method:: Calendar.monthdatescalendar(year, month) + + Return a list of the weeks in the month *month* of the *year* as full weeks. + Weeks are lists of seven :class:`datetime.date` objects. + + +.. method:: Calendar.monthdays2calendar(year, month) + + Return a list of the weeks in the month *month* of the *year* as full weeks. + Weeks are lists of seven tuples of day numbers and weekday numbers. + + +.. method:: Calendar.monthdayscalendar(year, month) + + Return a list of the weeks in the month *month* of the *year* as full weeks. + Weeks are lists of seven day numbers. + + +.. method:: Calendar.yeardatescalendar(year, month[, width]) + + Return the data for the specified year ready for formatting. The return value is + a list of month rows. Each month row contains up to *width* months (defaulting + to 3). Each month contains between 4 and 6 weeks and each week contains 1--7 + days. Days are :class:`datetime.date` objects. + + +.. method:: Calendar.yeardays2calendar(year, month[, width]) + + Return the data for the specified year ready for formatting (similar to + :meth:`yeardatescalendar`). Entries in the week lists are tuples of day numbers + and weekday numbers. Day numbers outside this month are zero. + + +.. method:: Calendar.yeardayscalendar(year, month[, width]) + + Return the data for the specified year ready for formatting (similar to + :meth:`yeardatescalendar`). Entries in the week lists are day numbers. Day + numbers outside this month are zero. + + +.. class:: TextCalendar([firstweekday]) + + This class can be used to generate plain text calendars. + + .. versionadded:: 2.5 + +:class:`TextCalendar` instances have the following methods: + + +.. method:: TextCalendar.formatmonth(theyear, themonth[, w[, l]]) + + Return a month's calendar in a multi-line string. If *w* is provided, it + specifies the width of the date columns, which are centered. If *l* is given, it + specifies the number of lines that each week will use. Depends on the first + weekday as set by :func:`setfirstweekday`. + + +.. method:: TextCalendar.prmonth(theyear, themonth[, w[, l]]) + + Print a month's calendar as returned by :meth:`formatmonth`. + + +.. method:: TextCalendar.formatyear(theyear, themonth[, w[, l[, c[, m]]]]) + + Return a *m*-column calendar for an entire year as a multi-line string. Optional + parameters *w*, *l*, and *c* are for date column width, lines per week, and + number of spaces between month columns, respectively. Depends on the first + weekday as set by :meth:`setfirstweekday`. The earliest year for which a + calendar can be generated is platform-dependent. + + +.. method:: TextCalendar.pryear(theyear[, w[, l[, c[, m]]]]) + + Print the calendar for an entire year as returned by :meth:`formatyear`. + + +.. class:: HTMLCalendar([firstweekday]) + + This class can be used to generate HTML calendars. + + .. versionadded:: 2.5 + +:class:`HTMLCalendar` instances have the following methods: + + +.. method:: HTMLCalendar.formatmonth(theyear, themonth[, withyear]) + + Return a month's calendar as an HTML table. If *withyear* is true the year will + be included in the header, otherwise just the month name will be used. + + +.. method:: HTMLCalendar.formatyear(theyear, themonth[, width]) + + Return a year's calendar as an HTML table. *width* (defaulting to 3) specifies + the number of months per row. + + +.. method:: HTMLCalendar.formatyearpage(theyear, themonth[, width[, css[, encoding]]]) + + Return a year's calendar as a complete HTML page. *width* (defaulting to 3) + specifies the number of months per row. *css* is the name for the cascading + style sheet to be used. :const:`None` can be passed if no style sheet should be + used. *encoding* specifies the encoding to be used for the output (defaulting to + the system default encoding). + + +.. class:: LocaleTextCalendar([firstweekday[, locale]]) + + This subclass of :class:`TextCalendar` can be passed a locale name in the + constructor and will return month and weekday names in the specified locale. If + this locale includes an encoding all strings containing month and weekday names + will be returned as unicode. + + .. versionadded:: 2.5 + + +.. class:: LocaleHTMLCalendar([firstweekday[, locale]]) + + This subclass of :class:`HTMLCalendar` can be passed a locale name in the + constructor and will return month and weekday names in the specified locale. If + this locale includes an encoding all strings containing month and weekday names + will be returned as unicode. + + .. versionadded:: 2.5 + +For simple text calendars this module provides the following functions. + + +.. function:: setfirstweekday(weekday) + + Sets the weekday (``0`` is Monday, ``6`` is Sunday) to start each week. The + values :const:`MONDAY`, :const:`TUESDAY`, :const:`WEDNESDAY`, :const:`THURSDAY`, + :const:`FRIDAY`, :const:`SATURDAY`, and :const:`SUNDAY` are provided for + convenience. For example, to set the first weekday to Sunday:: + + import calendar + calendar.setfirstweekday(calendar.SUNDAY) + + .. versionadded:: 2.0 + + +.. function:: firstweekday() + + Returns the current setting for the weekday to start each week. + + .. versionadded:: 2.0 + + +.. function:: isleap(year) + + Returns :const:`True` if *year* is a leap year, otherwise :const:`False`. + + +.. function:: leapdays(y1, y2) + + Returns the number of leap years in the range from *y1* to *y2* (exclusive), + where *y1* and *y2* are years. + + .. versionchanged:: 2.0 + This function didn't work for ranges spanning a century change in Python + 1.5.2. + + +.. function:: weekday(year, month, day) + + Returns the day of the week (``0`` is Monday) for *year* (``1970``--...), + *month* (``1``--``12``), *day* (``1``--``31``). + + +.. function:: weekheader(n) + + Return a header containing abbreviated weekday names. *n* specifies the width in + characters for one weekday. + + +.. function:: monthrange(year, month) + + Returns weekday of first day of the month and number of days in month, for the + specified *year* and *month*. + + +.. function:: monthcalendar(year, month) + + Returns a matrix representing a month's calendar. Each row represents a week; + days outside of the month a represented by zeros. Each week begins with Monday + unless set by :func:`setfirstweekday`. + + +.. function:: prmonth(theyear, themonth[, w[, l]]) + + Prints a month's calendar as returned by :func:`month`. + + +.. function:: month(theyear, themonth[, w[, l]]) + + Returns a month's calendar in a multi-line string using the :meth:`formatmonth` + of the :class:`TextCalendar` class. + + .. versionadded:: 2.0 + + +.. function:: prcal(year[, w[, l[c]]]) + + Prints the calendar for an entire year as returned by :func:`calendar`. + + +.. function:: calendar(year[, w[, l[c]]]) + + Returns a 3-column calendar for an entire year as a multi-line string using the + :meth:`formatyear` of the :class:`TextCalendar` class. + + .. versionadded:: 2.0 + + +.. function:: timegm(tuple) + + An unrelated but handy function that takes a time tuple such as returned by the + :func:`gmtime` function in the :mod:`time` module, and returns the corresponding + Unix timestamp value, assuming an epoch of 1970, and the POSIX encoding. In + fact, :func:`time.gmtime` and :func:`timegm` are each others' inverse. + + .. versionadded:: 2.0 + +The :mod:`calendar` module exports the following data attributes: + + +.. data:: day_name + + An array that represents the days of the week in the current locale. + + +.. data:: day_abbr + + An array that represents the abbreviated days of the week in the current locale. + + +.. data:: month_name + + An array that represents the months of the year in the current locale. This + follows normal convention of January being month number 1, so it has a length of + 13 and ``month_name[0]`` is the empty string. + + +.. data:: month_abbr + + An array that represents the abbreviated months of the year in the current + locale. This follows normal convention of January being month number 1, so it + has a length of 13 and ``month_abbr[0]`` is the empty string. + + +.. seealso:: + + Module :mod:`datetime` + Object-oriented interface to dates and times with similar functionality to the + :mod:`time` module. + + Module :mod:`time` + Low-level time related functions. + diff --git a/Doc/library/carbon.rst b/Doc/library/carbon.rst new file mode 100644 index 0000000..ecaf3bb --- /dev/null +++ b/Doc/library/carbon.rst @@ -0,0 +1,288 @@ + +.. _toolbox: + +********************* +MacOS Toolbox Modules +********************* + +There are a set of modules that provide interfaces to various MacOS toolboxes. +If applicable the module will define a number of Python objects for the various +structures declared by the toolbox, and operations will be implemented as +methods of the object. Other operations will be implemented as functions in the +module. Not all operations possible in C will also be possible in Python +(callbacks are often a problem), and parameters will occasionally be different +in Python (input and output buffers, especially). All methods and functions +have a :attr:`__doc__` string describing their arguments and return values, and +for additional description you are referred to `Inside Macintosh +`_ or similar works. + +These modules all live in a package called :mod:`Carbon`. Despite that name they +are not all part of the Carbon framework: CF is really in the CoreFoundation +framework and Qt is in the QuickTime framework. The normal use pattern is :: + + from Carbon import AE + +**Warning!** These modules are not yet documented. If you wish to contribute +documentation of any of these modules, please get in touch with docs@python.org. + + +:mod:`Carbon.AE` --- Apple Events +================================= + +.. module:: Carbon.AE + :platform: Mac + :synopsis: Interface to the Apple Events toolbox. + + + +:mod:`Carbon.AH` --- Apple Help +=============================== + +.. module:: Carbon.AH + :platform: Mac + :synopsis: Interface to the Apple Help manager. + + + +:mod:`Carbon.App` --- Appearance Manager +======================================== + +.. module:: Carbon.App + :platform: Mac + :synopsis: Interface to the Appearance Manager. + + + +:mod:`Carbon.CF` --- Core Foundation +==================================== + +.. module:: Carbon.CF + :platform: Mac + :synopsis: Interface to the Core Foundation. + + +The ``CFBase``, ``CFArray``, ``CFData``, ``CFDictionary``, ``CFString`` and +``CFURL`` objects are supported, some only partially. + + +:mod:`Carbon.CG` --- Core Graphics +================================== + +.. module:: Carbon.CG + :platform: Mac + :synopsis: Interface to the Component Manager. + + + +:mod:`Carbon.CarbonEvt` --- Carbon Event Manager +================================================ + +.. module:: Carbon.CarbonEvt + :platform: Mac + :synopsis: Interface to the Carbon Event Manager. + + + +:mod:`Carbon.Cm` --- Component Manager +====================================== + +.. module:: Carbon.Cm + :platform: Mac + :synopsis: Interface to the Component Manager. + + + +:mod:`Carbon.Ctl` --- Control Manager +===================================== + +.. module:: Carbon.Ctl + :platform: Mac + :synopsis: Interface to the Control Manager. + + + +:mod:`Carbon.Dlg` --- Dialog Manager +==================================== + +.. module:: Carbon.Dlg + :platform: Mac + :synopsis: Interface to the Dialog Manager. + + + +:mod:`Carbon.Evt` --- Event Manager +=================================== + +.. module:: Carbon.Evt + :platform: Mac + :synopsis: Interface to the classic Event Manager. + + + +:mod:`Carbon.Fm` --- Font Manager +================================= + +.. module:: Carbon.Fm + :platform: Mac + :synopsis: Interface to the Font Manager. + + + +:mod:`Carbon.Folder` --- Folder Manager +======================================= + +.. module:: Carbon.Folder + :platform: Mac + :synopsis: Interface to the Folder Manager. + + + +:mod:`Carbon.Help` --- Help Manager +=================================== + +.. module:: Carbon.Help + :platform: Mac + :synopsis: Interface to the Carbon Help Manager. + + + +:mod:`Carbon.List` --- List Manager +=================================== + +.. module:: Carbon.List + :platform: Mac + :synopsis: Interface to the List Manager. + + + +:mod:`Carbon.Menu` --- Menu Manager +=================================== + +.. module:: Carbon.Menu + :platform: Mac + :synopsis: Interface to the Menu Manager. + + + +:mod:`Carbon.Mlte` --- MultiLingual Text Editor +=============================================== + +.. module:: Carbon.Mlte + :platform: Mac + :synopsis: Interface to the MultiLingual Text Editor. + + + +:mod:`Carbon.Qd` --- QuickDraw +============================== + +.. module:: Carbon.Qd + :platform: Mac + :synopsis: Interface to the QuickDraw toolbox. + + + +:mod:`Carbon.Qdoffs` --- QuickDraw Offscreen +============================================ + +.. module:: Carbon.Qdoffs + :platform: Mac + :synopsis: Interface to the QuickDraw Offscreen APIs. + + + +:mod:`Carbon.Qt` --- QuickTime +============================== + +.. module:: Carbon.Qt + :platform: Mac + :synopsis: Interface to the QuickTime toolbox. + + + +:mod:`Carbon.Res` --- Resource Manager and Handles +================================================== + +.. module:: Carbon.Res + :platform: Mac + :synopsis: Interface to the Resource Manager and Handles. + + + +:mod:`Carbon.Scrap` --- Scrap Manager +===================================== + +.. module:: Carbon.Scrap + :platform: Mac + :synopsis: The Scrap Manager provides basic services for implementing cut & paste and + clipboard operations. + + +This module is only fully available on MacOS9 and earlier under classic PPC +MacPython. Very limited functionality is available under Carbon MacPython. + +.. index:: single: Scrap Manager + +The Scrap Manager supports the simplest form of cut & paste operations on the +Macintosh. It can be use for both inter- and intra-application clipboard +operations. + +The :mod:`Scrap` module provides low-level access to the functions of the Scrap +Manager. It contains the following functions: + + +.. function:: InfoScrap() + + Return current information about the scrap. The information is encoded as a + tuple containing the fields ``(size, handle, count, state, path)``. + + +----------+---------------------------------------------+ + | Field | Meaning | + +==========+=============================================+ + | *size* | Size of the scrap in bytes. | + +----------+---------------------------------------------+ + | *handle* | Resource object representing the scrap. | + +----------+---------------------------------------------+ + | *count* | Serial number of the scrap contents. | + +----------+---------------------------------------------+ + | *state* | Integer; positive if in memory, ``0`` if on | + | | disk, negative if uninitialized. | + +----------+---------------------------------------------+ + | *path* | Filename of the scrap when stored on disk. | + +----------+---------------------------------------------+ + + +.. seealso:: + + `Scrap Manager `_ + Apple's documentation for the Scrap Manager gives a lot of useful information + about using the Scrap Manager in applications. + + + +:mod:`Carbon.Snd` --- Sound Manager +=================================== + +.. module:: Carbon.Snd + :platform: Mac + :synopsis: Interface to the Sound Manager. + + + +:mod:`Carbon.TE` --- TextEdit +============================= + +.. module:: Carbon.TE + :platform: Mac + :synopsis: Interface to TextEdit. + + + +:mod:`Carbon.Win` --- Window Manager +==================================== + +.. module:: Carbon.Win + :platform: Mac + :synopsis: Interface to the Window Manager. + + diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst new file mode 100644 index 0000000..29ed545 --- /dev/null +++ b/Doc/library/cgi.rst @@ -0,0 +1,558 @@ + +:mod:`cgi` --- Common Gateway Interface support. +================================================ + +.. module:: cgi + :synopsis: Helpers for running Python scripts via the Common Gateway Interface. + + +.. index:: + pair: WWW; server + pair: CGI; protocol + pair: HTTP; protocol + pair: MIME; headers + single: URL + single: Common Gateway Interface + +Support module for Common Gateway Interface (CGI) scripts. + +This module defines a number of utilities for use by CGI scripts written in +Python. + + +Introduction +------------ + +.. _cgi-intro: + +A CGI script is invoked by an HTTP server, usually to process user input +submitted through an HTML ``
    `` or ```` element. + +Most often, CGI scripts live in the server's special :file:`cgi-bin` directory. +The HTTP server places all sorts of information about the request (such as the +client's hostname, the requested URL, the query string, and lots of other +goodies) in the script's shell environment, executes the script, and sends the +script's output back to the client. + +The script's input is connected to the client too, and sometimes the form data +is read this way; at other times the form data is passed via the "query string" +part of the URL. This module is intended to take care of the different cases +and provide a simpler interface to the Python script. It also provides a number +of utilities that help in debugging scripts, and the latest addition is support +for file uploads from a form (if your browser supports it). + +The output of a CGI script should consist of two sections, separated by a blank +line. The first section contains a number of headers, telling the client what +kind of data is following. Python code to generate a minimal header section +looks like this:: + + print "Content-Type: text/html" # HTML is following + print # blank line, end of headers + +The second section is usually HTML, which allows the client software to display +nicely formatted text with header, in-line images, etc. Here's Python code that +prints a simple piece of HTML:: + + print "CGI script output" + print "

    This is my first CGI script

    " + print "Hello, world!" + + +.. _using-the-cgi-module: + +Using the cgi module +-------------------- + +Begin by writing ``import cgi``. Do not use ``from cgi import *`` --- the +module defines all sorts of names for its own use or for backward compatibility +that you don't want in your namespace. + +When you write a new script, consider adding the line:: + + import cgitb; cgitb.enable() + +This activates a special exception handler that will display detailed reports in +the Web browser if any errors occur. If you'd rather not show the guts of your +program to users of your script, you can have the reports saved to files +instead, with a line like this:: + + import cgitb; cgitb.enable(display=0, logdir="/tmp") + +It's very helpful to use this feature during script development. The reports +produced by :mod:`cgitb` provide information that can save you a lot of time in +tracking down bugs. You can always remove the ``cgitb`` line later when you +have tested your script and are confident that it works correctly. + +To get at submitted form data, it's best to use the :class:`FieldStorage` class. +The other classes defined in this module are provided mostly for backward +compatibility. Instantiate it exactly once, without arguments. This reads the +form contents from standard input or the environment (depending on the value of +various environment variables set according to the CGI standard). Since it may +consume standard input, it should be instantiated only once. + +The :class:`FieldStorage` instance can be indexed like a Python dictionary, and +also supports the standard dictionary methods :meth:`has_key` and :meth:`keys`. +The built-in :func:`len` is also supported. Form fields containing empty +strings are ignored and do not appear in the dictionary; to keep such values, +provide a true value for the optional *keep_blank_values* keyword parameter when +creating the :class:`FieldStorage` instance. + +For instance, the following code (which assumes that the +:mailheader:`Content-Type` header and blank line have already been printed) +checks that the fields ``name`` and ``addr`` are both set to a non-empty +string:: + + form = cgi.FieldStorage() + if not (form.has_key("name") and form.has_key("addr")): + print "

    Error

    " + print "Please fill in the name and addr fields." + return + print "

    name:", form["name"].value + print "

    addr:", form["addr"].value + ...further form processing here... + +Here the fields, accessed through ``form[key]``, are themselves instances of +:class:`FieldStorage` (or :class:`MiniFieldStorage`, depending on the form +encoding). The :attr:`value` attribute of the instance yields the string value +of the field. The :meth:`getvalue` method returns this string value directly; +it also accepts an optional second argument as a default to return if the +requested key is not present. + +If the submitted form data contains more than one field with the same name, the +object retrieved by ``form[key]`` is not a :class:`FieldStorage` or +:class:`MiniFieldStorage` instance but a list of such instances. Similarly, in +this situation, ``form.getvalue(key)`` would return a list of strings. If you +expect this possibility (when your HTML form contains multiple fields with the +same name), use the :func:`getlist` function, which always returns a list of +values (so that you do not need to special-case the single item case). For +example, this code concatenates any number of username fields, separated by +commas:: + + value = form.getlist("username") + usernames = ",".join(value) + +If a field represents an uploaded file, accessing the value via the +:attr:`value` attribute or the :func:`getvalue` method reads the entire file in +memory as a string. This may not be what you want. You can test for an uploaded +file by testing either the :attr:`filename` attribute or the :attr:`file` +attribute. You can then read the data at leisure from the :attr:`file` +attribute:: + + fileitem = form["userfile"] + if fileitem.file: + # It's an uploaded file; count lines + linecount = 0 + while 1: + line = fileitem.file.readline() + if not line: break + linecount = linecount + 1 + +The file upload draft standard entertains the possibility of uploading multiple +files from one field (using a recursive :mimetype:`multipart/\*` encoding). +When this occurs, the item will be a dictionary-like :class:`FieldStorage` item. +This can be determined by testing its :attr:`type` attribute, which should be +:mimetype:`multipart/form-data` (or perhaps another MIME type matching +:mimetype:`multipart/\*`). In this case, it can be iterated over recursively +just like the top-level form object. + +When a form is submitted in the "old" format (as the query string or as a single +data part of type :mimetype:`application/x-www-form-urlencoded`), the items will +actually be instances of the class :class:`MiniFieldStorage`. In this case, the +:attr:`list`, :attr:`file`, and :attr:`filename` attributes are always ``None``. + + +Higher Level Interface +---------------------- + +.. versionadded:: 2.2 + +The previous section explains how to read CGI form data using the +:class:`FieldStorage` class. This section describes a higher level interface +which was added to this class to allow one to do it in a more readable and +intuitive way. The interface doesn't make the techniques described in previous +sections obsolete --- they are still useful to process file uploads efficiently, +for example. + +.. % XXX: Is this true ? + +The interface consists of two simple methods. Using the methods you can process +form data in a generic way, without the need to worry whether only one or more +values were posted under one name. + +In the previous section, you learned to write following code anytime you +expected a user to post more than one value under one name:: + + item = form.getvalue("item") + if isinstance(item, list): + # The user is requesting more than one item. + else: + # The user is requesting only one item. + +This situation is common for example when a form contains a group of multiple +checkboxes with the same name:: + + + + +In most situations, however, there's only one form control with a particular +name in a form and then you expect and need only one value associated with this +name. So you write a script containing for example this code:: + + user = form.getvalue("user").upper() + +The problem with the code is that you should never expect that a client will +provide valid input to your scripts. For example, if a curious user appends +another ``user=foo`` pair to the query string, then the script would crash, +because in this situation the ``getvalue("user")`` method call returns a list +instead of a string. Calling the :meth:`toupper` method on a list is not valid +(since lists do not have a method of this name) and results in an +:exc:`AttributeError` exception. + +Therefore, the appropriate way to read form data values was to always use the +code which checks whether the obtained value is a single value or a list of +values. That's annoying and leads to less readable scripts. + +A more convenient approach is to use the methods :meth:`getfirst` and +:meth:`getlist` provided by this higher level interface. + + +.. method:: FieldStorage.getfirst(name[, default]) + + This method always returns only one value associated with form field *name*. + The method returns only the first value in case that more values were posted + under such name. Please note that the order in which the values are received + may vary from browser to browser and should not be counted on. [#]_ If no such + form field or value exists then the method returns the value specified by the + optional parameter *default*. This parameter defaults to ``None`` if not + specified. + + +.. method:: FieldStorage.getlist(name) + + This method always returns a list of values associated with form field *name*. + The method returns an empty list if no such form field or value exists for + *name*. It returns a list consisting of one item if only one such value exists. + +Using these methods you can write nice compact code:: + + import cgi + form = cgi.FieldStorage() + user = form.getfirst("user", "").upper() # This way it's safe. + for item in form.getlist("item"): + do_something(item) + + +Old classes +----------- + +These classes, present in earlier versions of the :mod:`cgi` module, are still +supported for backward compatibility. New applications should use the +:class:`FieldStorage` class. + +:class:`SvFormContentDict` stores single value form content as dictionary; it +assumes each field name occurs in the form only once. + +:class:`FormContentDict` stores multiple value form content as a dictionary (the +form items are lists of values). Useful if your form contains multiple fields +with the same name. + +Other classes (:class:`FormContent`, :class:`InterpFormContentDict`) are present +for backwards compatibility with really old applications only. If you still use +these and would be inconvenienced when they disappeared from a next version of +this module, drop me a note. + + +.. _functions-in-cgi-module: + +Functions +--------- + +These are useful if you want more control, or if you want to employ some of the +algorithms implemented in this module in other circumstances. + + +.. function:: parse(fp[, keep_blank_values[, strict_parsing]]) + + Parse a query in the environment or from a file (the file defaults to + ``sys.stdin``). The *keep_blank_values* and *strict_parsing* parameters are + passed to :func:`parse_qs` unchanged. + + +.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing]]) + + Parse a query string given as a string argument (data of type + :mimetype:`application/x-www-form-urlencoded`). Data are returned as a + dictionary. The dictionary keys are the unique query variable names and the + values are lists of values for each name. + + The optional argument *keep_blank_values* is a flag indicating whether blank + values in URL encoded queries should be treated as blank strings. A true value + indicates that blanks should be retained as blank strings. The default false + value indicates that blank values are to be ignored and treated as if they were + not included. + + The optional argument *strict_parsing* is a flag indicating what to do with + parsing errors. If false (the default), errors are silently ignored. If true, + errors raise a :exc:`ValueError` exception. + + Use the :func:`urllib.urlencode` function to convert such dictionaries into + query strings. + + +.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing]]) + + Parse a query string given as a string argument (data of type + :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of + name, value pairs. + + The optional argument *keep_blank_values* is a flag indicating whether blank + values in URL encoded queries should be treated as blank strings. A true value + indicates that blanks should be retained as blank strings. The default false + value indicates that blank values are to be ignored and treated as if they were + not included. + + The optional argument *strict_parsing* is a flag indicating what to do with + parsing errors. If false (the default), errors are silently ignored. If true, + errors raise a :exc:`ValueError` exception. + + Use the :func:`urllib.urlencode` function to convert such lists of pairs into + query strings. + + +.. function:: parse_multipart(fp, pdict) + + Parse input of type :mimetype:`multipart/form-data` (for file uploads). + Arguments are *fp* for the input file and *pdict* for a dictionary containing + other parameters in the :mailheader:`Content-Type` header. + + Returns a dictionary just like :func:`parse_qs` keys are the field names, each + value is a list of values for that field. This is easy to use but not much good + if you are expecting megabytes to be uploaded --- in that case, use the + :class:`FieldStorage` class instead which is much more flexible. + + Note that this does not parse nested multipart parts --- use + :class:`FieldStorage` for that. + + +.. function:: parse_header(string) + + Parse a MIME header (such as :mailheader:`Content-Type`) into a main value and a + dictionary of parameters. + + +.. function:: test() + + Robust test CGI script, usable as main program. Writes minimal HTTP headers and + formats all information provided to the script in HTML form. + + +.. function:: print_environ() + + Format the shell environment in HTML. + + +.. function:: print_form(form) + + Format a form in HTML. + + +.. function:: print_directory() + + Format the current directory in HTML. + + +.. function:: print_environ_usage() + + Print a list of useful (used by CGI) environment variables in HTML. + + +.. function:: escape(s[, quote]) + + Convert the characters ``'&'``, ``'<'`` and ``'>'`` in string *s* to HTML-safe + sequences. Use this if you need to display text that might contain such + characters in HTML. If the optional flag *quote* is true, the quotation mark + character (``'"'``) is also translated; this helps for inclusion in an HTML + attribute value, as in ````. If the value to be quoted might + include single- or double-quote characters, or both, consider using the + :func:`quoteattr` function in the :mod:`xml.sax.saxutils` module instead. + + +.. _cgi-security: + +Caring about security +--------------------- + +.. index:: pair: CGI; security + +There's one important rule: if you invoke an external program (via the +:func:`os.system` or :func:`os.popen` functions. or others with similar +functionality), make very sure you don't pass arbitrary strings received from +the client to the shell. This is a well-known security hole whereby clever +hackers anywhere on the Web can exploit a gullible CGI script to invoke +arbitrary shell commands. Even parts of the URL or field names cannot be +trusted, since the request doesn't have to come from your form! + +To be on the safe side, if you must pass a string gotten from a form to a shell +command, you should make sure the string contains only alphanumeric characters, +dashes, underscores, and periods. + + +Installing your CGI script on a Unix system +------------------------------------------- + +Read the documentation for your HTTP server and check with your local system +administrator to find the directory where CGI scripts should be installed; +usually this is in a directory :file:`cgi-bin` in the server tree. + +Make sure that your script is readable and executable by "others"; the Unix file +mode should be ``0755`` octal (use ``chmod 0755 filename``). Make sure that the +first line of the script contains ``#!`` starting in column 1 followed by the +pathname of the Python interpreter, for instance:: + + #!/usr/local/bin/python + +Make sure the Python interpreter exists and is executable by "others". + +Make sure that any files your script needs to read or write are readable or +writable, respectively, by "others" --- their mode should be ``0644`` for +readable and ``0666`` for writable. This is because, for security reasons, the +HTTP server executes your script as user "nobody", without any special +privileges. It can only read (write, execute) files that everybody can read +(write, execute). The current directory at execution time is also different (it +is usually the server's cgi-bin directory) and the set of environment variables +is also different from what you get when you log in. In particular, don't count +on the shell's search path for executables (:envvar:`PATH`) or the Python module +search path (:envvar:`PYTHONPATH`) to be set to anything interesting. + +If you need to load modules from a directory which is not on Python's default +module search path, you can change the path in your script, before importing +other modules. For example:: + + import sys + sys.path.insert(0, "/usr/home/joe/lib/python") + sys.path.insert(0, "/usr/local/lib/python") + +(This way, the directory inserted last will be searched first!) + +Instructions for non-Unix systems will vary; check your HTTP server's +documentation (it will usually have a section on CGI scripts). + + +Testing your CGI script +----------------------- + +Unfortunately, a CGI script will generally not run when you try it from the +command line, and a script that works perfectly from the command line may fail +mysteriously when run from the server. There's one reason why you should still +test your script from the command line: if it contains a syntax error, the +Python interpreter won't execute it at all, and the HTTP server will most likely +send a cryptic error to the client. + +Assuming your script has no syntax errors, yet it does not work, you have no +choice but to read the next section. + + +Debugging CGI scripts +--------------------- + +.. index:: pair: CGI; debugging + +First of all, check for trivial installation errors --- reading the section +above on installing your CGI script carefully can save you a lot of time. If +you wonder whether you have understood the installation procedure correctly, try +installing a copy of this module file (:file:`cgi.py`) as a CGI script. When +invoked as a script, the file will dump its environment and the contents of the +form in HTML form. Give it the right mode etc, and send it a request. If it's +installed in the standard :file:`cgi-bin` directory, it should be possible to +send it a request by entering a URL into your browser of the form:: + + http://yourhostname/cgi-bin/cgi.py?name=Joe+Blow&addr=At+Home + +If this gives an error of type 404, the server cannot find the script -- perhaps +you need to install it in a different directory. If it gives another error, +there's an installation problem that you should fix before trying to go any +further. If you get a nicely formatted listing of the environment and form +content (in this example, the fields should be listed as "addr" with value "At +Home" and "name" with value "Joe Blow"), the :file:`cgi.py` script has been +installed correctly. If you follow the same procedure for your own script, you +should now be able to debug it. + +The next step could be to call the :mod:`cgi` module's :func:`test` function +from your script: replace its main code with the single statement :: + + cgi.test() + +This should produce the same results as those gotten from installing the +:file:`cgi.py` file itself. + +When an ordinary Python script raises an unhandled exception (for whatever +reason: of a typo in a module name, a file that can't be opened, etc.), the +Python interpreter prints a nice traceback and exits. While the Python +interpreter will still do this when your CGI script raises an exception, most +likely the traceback will end up in one of the HTTP server's log files, or be +discarded altogether. + +Fortunately, once you have managed to get your script to execute *some* code, +you can easily send tracebacks to the Web browser using the :mod:`cgitb` module. +If you haven't done so already, just add the line:: + + import cgitb; cgitb.enable() + +to the top of your script. Then try running it again; when a problem occurs, +you should see a detailed report that will likely make apparent the cause of the +crash. + +If you suspect that there may be a problem in importing the :mod:`cgitb` module, +you can use an even more robust approach (which only uses built-in modules):: + + import sys + sys.stderr = sys.stdout + print "Content-Type: text/plain" + print + ...your code here... + +This relies on the Python interpreter to print the traceback. The content type +of the output is set to plain text, which disables all HTML processing. If your +script works, the raw HTML will be displayed by your client. If it raises an +exception, most likely after the first two lines have been printed, a traceback +will be displayed. Because no HTML interpretation is going on, the traceback +will be readable. + + +Common problems and solutions +----------------------------- + +* Most HTTP servers buffer the output from CGI scripts until the script is + completed. This means that it is not possible to display a progress report on + the client's display while the script is running. + +* Check the installation instructions above. + +* Check the HTTP server's log files. (``tail -f logfile`` in a separate window + may be useful!) + +* Always check a script for syntax errors first, by doing something like + ``python script.py``. + +* If your script does not have any syntax errors, try adding ``import cgitb; + cgitb.enable()`` to the top of the script. + +* When invoking external programs, make sure they can be found. Usually, this + means using absolute path names --- :envvar:`PATH` is usually not set to a very + useful value in a CGI script. + +* When reading or writing external files, make sure they can be read or written + by the userid under which your CGI script will be running: this is typically the + userid under which the web server is running, or some explicitly specified + userid for a web server's ``suexec`` feature. + +* Don't try to give a CGI script a set-uid mode. This doesn't work on most + systems, and is a security liability as well. + +.. rubric:: Footnotes + +.. [#] Note that some recent versions of the HTML specification do state what order the + field values should be supplied in, but knowing whether a request was + received from a conforming browser, or even from a browser at all, is tedious + and error-prone. + diff --git a/Doc/library/cgihttpserver.rst b/Doc/library/cgihttpserver.rst new file mode 100644 index 0000000..4f27627 --- /dev/null +++ b/Doc/library/cgihttpserver.rst @@ -0,0 +1,73 @@ + +:mod:`CGIHTTPServer` --- CGI-capable HTTP request handler +========================================================= + +.. module:: CGIHTTPServer + :synopsis: This module provides a request handler for HTTP servers which can run CGI + scripts. +.. sectionauthor:: Moshe Zadka + + +The :mod:`CGIHTTPServer` module defines a request-handler class, interface +compatible with :class:`BaseHTTPServer.BaseHTTPRequestHandler` and inherits +behavior from :class:`SimpleHTTPServer.SimpleHTTPRequestHandler` but can also +run CGI scripts. + +.. note:: + + This module can run CGI scripts on Unix and Windows systems; on Mac OS it will + only be able to run Python scripts within the same process as itself. + +.. note:: + + CGI scripts run by the :class:`CGIHTTPRequestHandler` class cannot execute + redirects (HTTP code 302), because code 200 (script output follows) is sent + prior to execution of the CGI script. This pre-empts the status code. + +The :mod:`CGIHTTPServer` module defines the following class: + + +.. class:: CGIHTTPRequestHandler(request, client_address, server) + + This class is used to serve either files or output of CGI scripts from the + current directory and below. Note that mapping HTTP hierarchic structure to + local directory structure is exactly as in + :class:`SimpleHTTPServer.SimpleHTTPRequestHandler`. + + The class will however, run the CGI script, instead of serving it as a file, if + it guesses it to be a CGI script. Only directory-based CGI are used --- the + other common server configuration is to treat special extensions as denoting CGI + scripts. + + The :func:`do_GET` and :func:`do_HEAD` functions are modified to run CGI scripts + and serve the output, instead of serving files, if the request leads to + somewhere below the ``cgi_directories`` path. + +The :class:`CGIHTTPRequestHandler` defines the following data member: + + +.. attribute:: CGIHTTPRequestHandler.cgi_directories + + This defaults to ``['/cgi-bin', '/htbin']`` and describes directories to treat + as containing CGI scripts. + +The :class:`CGIHTTPRequestHandler` defines the following methods: + + +.. method:: CGIHTTPRequestHandler.do_POST() + + This method serves the ``'POST'`` request type, only allowed for CGI scripts. + Error 501, "Can only POST to CGI scripts", is output when trying to POST to a + non-CGI url. + +Note that CGI scripts will be run with UID of user nobody, for security reasons. +Problems with the CGI script will be translated to error 403. + +For example usage, see the implementation of the :func:`test` function. + + +.. seealso:: + + Module :mod:`BaseHTTPServer` + Base class implementation for Web server and request handler. + diff --git a/Doc/library/cgitb.rst b/Doc/library/cgitb.rst new file mode 100644 index 0000000..327cd17 --- /dev/null +++ b/Doc/library/cgitb.rst @@ -0,0 +1,64 @@ + +:mod:`cgitb` --- Traceback manager for CGI scripts +================================================== + +.. module:: cgitb + :synopsis: Configurable traceback handler for CGI scripts. +.. moduleauthor:: Ka-Ping Yee +.. sectionauthor:: Fred L. Drake, Jr. + + +.. versionadded:: 2.2 + +.. index:: + single: CGI; exceptions + single: CGI; tracebacks + single: exceptions; in CGI scripts + single: tracebacks; in CGI scripts + +The :mod:`cgitb` module provides a special exception handler for Python scripts. +(Its name is a bit misleading. It was originally designed to display extensive +traceback information in HTML for CGI scripts. It was later generalized to also +display this information in plain text.) After this module is activated, if an +uncaught exception occurs, a detailed, formatted report will be displayed. The +report includes a traceback showing excerpts of the source code for each level, +as well as the values of the arguments and local variables to currently running +functions, to help you debug the problem. Optionally, you can save this +information to a file instead of sending it to the browser. + +To enable this feature, simply add one line to the top of your CGI script:: + + import cgitb; cgitb.enable() + +The options to the :func:`enable` function control whether the report is +displayed in the browser and whether the report is logged to a file for later +analysis. + + +.. function:: enable([display[, logdir[, context[, format]]]]) + + .. index:: single: excepthook() (in module sys) + + This function causes the :mod:`cgitb` module to take over the interpreter's + default handling for exceptions by setting the value of :attr:`sys.excepthook`. + + The optional argument *display* defaults to ``1`` and can be set to ``0`` to + suppress sending the traceback to the browser. If the argument *logdir* is + present, the traceback reports are written to files. The value of *logdir* + should be a directory where these files will be placed. The optional argument + *context* is the number of lines of context to display around the current line + of source code in the traceback; this defaults to ``5``. If the optional + argument *format* is ``"html"``, the output is formatted as HTML. Any other + value forces plain text output. The default value is ``"html"``. + + +.. function:: handler([info]) + + This function handles an exception using the default settings (that is, show a + report in the browser, but don't log to a file). This can be used when you've + caught an exception and want to report it using :mod:`cgitb`. The optional + *info* argument should be a 3-tuple containing an exception type, exception + value, and traceback object, exactly like the tuple returned by + :func:`sys.exc_info`. If the *info* argument is not supplied, the current + exception is obtained from :func:`sys.exc_info`. + diff --git a/Doc/library/chunk.rst b/Doc/library/chunk.rst new file mode 100644 index 0000000..2e1798d --- /dev/null +++ b/Doc/library/chunk.rst @@ -0,0 +1,130 @@ + +:mod:`chunk` --- Read IFF chunked data +====================================== + +.. module:: chunk + :synopsis: Module to read IFF chunks. +.. moduleauthor:: Sjoerd Mullender +.. sectionauthor:: Sjoerd Mullender + + +.. index:: + single: Audio Interchange File Format + single: AIFF + single: AIFF-C + single: Real Media File Format + single: RMFF + +This module provides an interface for reading files that use EA IFF 85 chunks. +[#]_ This format is used in at least the Audio Interchange File Format +(AIFF/AIFF-C) and the Real Media File Format (RMFF). The WAVE audio file format +is closely related and can also be read using this module. + +A chunk has the following structure: + ++---------+--------+-------------------------------+ +| Offset | Length | Contents | ++=========+========+===============================+ +| 0 | 4 | Chunk ID | ++---------+--------+-------------------------------+ +| 4 | 4 | Size of chunk in big-endian | +| | | byte order, not including the | +| | | header | ++---------+--------+-------------------------------+ +| 8 | *n* | Data bytes, where *n* is the | +| | | size given in the preceding | +| | | field | ++---------+--------+-------------------------------+ +| 8 + *n* | 0 or 1 | Pad byte needed if *n* is odd | +| | | and chunk alignment is used | ++---------+--------+-------------------------------+ + +The ID is a 4-byte string which identifies the type of chunk. + +The size field (a 32-bit value, encoded using big-endian byte order) gives the +size of the chunk data, not including the 8-byte header. + +Usually an IFF-type file consists of one or more chunks. The proposed usage of +the :class:`Chunk` class defined here is to instantiate an instance at the start +of each chunk and read from the instance until it reaches the end, after which a +new instance can be instantiated. At the end of the file, creating a new +instance will fail with a :exc:`EOFError` exception. + + +.. class:: Chunk(file[, align, bigendian, inclheader]) + + Class which represents a chunk. The *file* argument is expected to be a + file-like object. An instance of this class is specifically allowed. The + only method that is needed is :meth:`read`. If the methods :meth:`seek` and + :meth:`tell` are present and don't raise an exception, they are also used. + If these methods are present and raise an exception, they are expected to not + have altered the object. If the optional argument *align* is true, chunks + are assumed to be aligned on 2-byte boundaries. If *align* is false, no + alignment is assumed. The default value is true. If the optional argument + *bigendian* is false, the chunk size is assumed to be in little-endian order. + This is needed for WAVE audio files. The default value is true. If the + optional argument *inclheader* is true, the size given in the chunk header + includes the size of the header. The default value is false. + +A :class:`Chunk` object supports the following methods: + + +.. method:: Chunk.getname() + + Returns the name (ID) of the chunk. This is the first 4 bytes of the chunk. + + +.. method:: Chunk.getsize() + + Returns the size of the chunk. + + +.. method:: Chunk.close() + + Close and skip to the end of the chunk. This does not close the underlying + file. + +The remaining methods will raise :exc:`IOError` if called after the +:meth:`close` method has been called. + + +.. method:: Chunk.isatty() + + Returns ``False``. + + +.. method:: Chunk.seek(pos[, whence]) + + Set the chunk's current position. The *whence* argument is optional and + defaults to ``0`` (absolute file positioning); other values are ``1`` (seek + relative to the current position) and ``2`` (seek relative to the file's end). + There is no return value. If the underlying file does not allow seek, only + forward seeks are allowed. + + +.. method:: Chunk.tell() + + Return the current position into the chunk. + + +.. method:: Chunk.read([size]) + + Read at most *size* bytes from the chunk (less if the read hits the end of the + chunk before obtaining *size* bytes). If the *size* argument is negative or + omitted, read all data until the end of the chunk. The bytes are returned as a + string object. An empty string is returned when the end of the chunk is + encountered immediately. + + +.. method:: Chunk.skip() + + Skip to the end of the chunk. All further calls to :meth:`read` for the chunk + will return ``''``. If you are not interested in the contents of the chunk, + this method should be called so that the file points to the start of the next + chunk. + +.. rubric:: Footnotes + +.. [#] "EA IFF 85" Standard for Interchange Format Files, Jerry Morrison, Electronic + Arts, January 1985. + diff --git a/Doc/library/cmath.rst b/Doc/library/cmath.rst new file mode 100644 index 0000000..2bc162c --- /dev/null +++ b/Doc/library/cmath.rst @@ -0,0 +1,156 @@ + +:mod:`cmath` --- Mathematical functions for complex numbers +=========================================================== + +.. module:: cmath + :synopsis: Mathematical functions for complex numbers. + + +This module is always available. It provides access to mathematical functions +for complex numbers. The functions in this module accept integers, +floating-point numbers or complex numbers as arguments. They will also accept +any Python object that has either a :meth:`__complex__` or a :meth:`__float__` +method: these methods are used to convert the object to a complex or +floating-point number, respectively, and the function is then applied to the +result of the conversion. + +The functions are: + + +.. function:: acos(x) + + Return the arc cosine of *x*. There are two branch cuts: One extends right from + 1 along the real axis to ∞, continuous from below. The other extends left from + -1 along the real axis to -∞, continuous from above. + + +.. function:: acosh(x) + + Return the hyperbolic arc cosine of *x*. There is one branch cut, extending left + from 1 along the real axis to -∞, continuous from above. + + +.. function:: asin(x) + + Return the arc sine of *x*. This has the same branch cuts as :func:`acos`. + + +.. function:: asinh(x) + + Return the hyperbolic arc sine of *x*. There are two branch cuts, extending + left from ``±1j`` to ``±∞j``, both continuous from above. These branch cuts + should be considered a bug to be corrected in a future release. The correct + branch cuts should extend along the imaginary axis, one from ``1j`` up to + ``∞j`` and continuous from the right, and one from ``-1j`` down to ``-∞j`` + and continuous from the left. + + +.. function:: atan(x) + + Return the arc tangent of *x*. There are two branch cuts: One extends from + ``1j`` along the imaginary axis to ``∞j``, continuous from the left. The + other extends from ``-1j`` along the imaginary axis to ``-∞j``, continuous + from the left. (This should probably be changed so the upper cut becomes + continuous from the other side.) + + +.. function:: atanh(x) + + Return the hyperbolic arc tangent of *x*. There are two branch cuts: One + extends from ``1`` along the real axis to ``∞``, continuous from above. The + other extends from ``-1`` along the real axis to ``-∞``, continuous from + above. (This should probably be changed so the right cut becomes continuous + from the other side.) + + +.. function:: cos(x) + + Return the cosine of *x*. + + +.. function:: cosh(x) + + Return the hyperbolic cosine of *x*. + + +.. function:: exp(x) + + Return the exponential value ``e**x``. + + +.. function:: log(x[, base]) + + Returns the logarithm of *x* to the given *base*. If the *base* is not + specified, returns the natural logarithm of *x*. There is one branch cut, from 0 + along the negative real axis to -∞, continuous from above. + + .. versionchanged:: 2.4 + *base* argument added. + + +.. function:: log10(x) + + Return the base-10 logarithm of *x*. This has the same branch cut as + :func:`log`. + + +.. function:: sin(x) + + Return the sine of *x*. + + +.. function:: sinh(x) + + Return the hyperbolic sine of *x*. + + +.. function:: sqrt(x) + + Return the square root of *x*. This has the same branch cut as :func:`log`. + + +.. function:: tan(x) + + Return the tangent of *x*. + + +.. function:: tanh(x) + + Return the hyperbolic tangent of *x*. + +The module also defines two mathematical constants: + + +.. data:: pi + + The mathematical constant *pi*, as a float. + + +.. data:: e + + The mathematical constant *e*, as a float. + +.. index:: module: math + +Note that the selection of functions is similar, but not identical, to that in +module :mod:`math`. The reason for having two modules is that some users aren't +interested in complex numbers, and perhaps don't even know what they are. They +would rather have ``math.sqrt(-1)`` raise an exception than return a complex +number. Also note that the functions defined in :mod:`cmath` always return a +complex number, even if the answer can be expressed as a real number (in which +case the complex number has an imaginary part of zero). + +A note on branch cuts: They are curves along which the given function fails to +be continuous. They are a necessary feature of many complex functions. It is +assumed that if you need to compute with complex functions, you will understand +about branch cuts. Consult almost any (not too elementary) book on complex +variables for enlightenment. For information of the proper choice of branch +cuts for numerical purposes, a good reference should be the following: + + +.. seealso:: + + Kahan, W: Branch cuts for complex elementary functions; or, Much ado about + nothing's sign bit. In Iserles, A., and Powell, M. (eds.), The state of the art + in numerical analysis. Clarendon Press (1987) pp165-211. + diff --git a/Doc/library/cmd.rst b/Doc/library/cmd.rst new file mode 100644 index 0000000..9af08e2 --- /dev/null +++ b/Doc/library/cmd.rst @@ -0,0 +1,202 @@ + +:mod:`cmd` --- Support for line-oriented command interpreters +============================================================= + +.. module:: cmd + :synopsis: Build line-oriented command interpreters. +.. sectionauthor:: Eric S. Raymond + + +The :class:`Cmd` class provides a simple framework for writing line-oriented +command interpreters. These are often useful for test harnesses, administrative +tools, and prototypes that will later be wrapped in a more sophisticated +interface. + + +.. class:: Cmd([completekey[, stdin[, stdout]]]) + + A :class:`Cmd` instance or subclass instance is a line-oriented interpreter + framework. There is no good reason to instantiate :class:`Cmd` itself; rather, + it's useful as a superclass of an interpreter class you define yourself in order + to inherit :class:`Cmd`'s methods and encapsulate action methods. + + The optional argument *completekey* is the :mod:`readline` name of a completion + key; it defaults to :kbd:`Tab`. If *completekey* is not :const:`None` and + :mod:`readline` is available, command completion is done automatically. + + The optional arguments *stdin* and *stdout* specify the input and output file + objects that the Cmd instance or subclass instance will use for input and + output. If not specified, they will default to *sys.stdin* and *sys.stdout*. + + .. versionchanged:: 2.3 + The *stdin* and *stdout* parameters were added. + + +.. _cmd-objects: + +Cmd Objects +----------- + +A :class:`Cmd` instance has the following methods: + + +.. method:: Cmd.cmdloop([intro]) + + Repeatedly issue a prompt, accept input, parse an initial prefix off the + received input, and dispatch to action methods, passing them the remainder of + the line as argument. + + The optional argument is a banner or intro string to be issued before the first + prompt (this overrides the :attr:`intro` class member). + + If the :mod:`readline` module is loaded, input will automatically inherit + :program:`bash`\ -like history-list editing (e.g. :kbd:`Control-P` scrolls back + to the last command, :kbd:`Control-N` forward to the next one, :kbd:`Control-F` + moves the cursor to the right non-destructively, :kbd:`Control-B` moves the + cursor to the left non-destructively, etc.). + + An end-of-file on input is passed back as the string ``'EOF'``. + + An interpreter instance will recognize a command name ``foo`` if and only if it + has a method :meth:`do_foo`. As a special case, a line beginning with the + character ``'?'`` is dispatched to the method :meth:`do_help`. As another + special case, a line beginning with the character ``'!'`` is dispatched to the + method :meth:`do_shell` (if such a method is defined). + + This method will return when the :meth:`postcmd` method returns a true value. + The *stop* argument to :meth:`postcmd` is the return value from the command's + corresponding :meth:`do_\*` method. + + If completion is enabled, completing commands will be done automatically, and + completing of commands args is done by calling :meth:`complete_foo` with + arguments *text*, *line*, *begidx*, and *endidx*. *text* is the string prefix + we are attempting to match: all returned matches must begin with it. *line* is + the current input line with leading whitespace removed, *begidx* and *endidx* + are the beginning and ending indexes of the prefix text, which could be used to + provide different completion depending upon which position the argument is in. + + All subclasses of :class:`Cmd` inherit a predefined :meth:`do_help`. This + method, called with an argument ``'bar'``, invokes the corresponding method + :meth:`help_bar`. With no argument, :meth:`do_help` lists all available help + topics (that is, all commands with corresponding :meth:`help_\*` methods), and + also lists any undocumented commands. + + +.. method:: Cmd.onecmd(str) + + Interpret the argument as though it had been typed in response to the prompt. + This may be overridden, but should not normally need to be; see the + :meth:`precmd` and :meth:`postcmd` methods for useful execution hooks. The + return value is a flag indicating whether interpretation of commands by the + interpreter should stop. If there is a :meth:`do_\*` method for the command + *str*, the return value of that method is returned, otherwise the return value + from the :meth:`default` method is returned. + + +.. method:: Cmd.emptyline() + + Method called when an empty line is entered in response to the prompt. If this + method is not overridden, it repeats the last nonempty command entered. + + +.. method:: Cmd.default(line) + + Method called on an input line when the command prefix is not recognized. If + this method is not overridden, it prints an error message and returns. + + +.. method:: Cmd.completedefault(text, line, begidx, endidx) + + Method called to complete an input line when no command-specific + :meth:`complete_\*` method is available. By default, it returns an empty list. + + +.. method:: Cmd.precmd(line) + + Hook method executed just before the command line *line* is interpreted, but + after the input prompt is generated and issued. This method is a stub in + :class:`Cmd`; it exists to be overridden by subclasses. The return value is + used as the command which will be executed by the :meth:`onecmd` method; the + :meth:`precmd` implementation may re-write the command or simply return *line* + unchanged. + + +.. method:: Cmd.postcmd(stop, line) + + Hook method executed just after a command dispatch is finished. This method is + a stub in :class:`Cmd`; it exists to be overridden by subclasses. *line* is the + command line which was executed, and *stop* is a flag which indicates whether + execution will be terminated after the call to :meth:`postcmd`; this will be the + return value of the :meth:`onecmd` method. The return value of this method will + be used as the new value for the internal flag which corresponds to *stop*; + returning false will cause interpretation to continue. + + +.. method:: Cmd.preloop() + + Hook method executed once when :meth:`cmdloop` is called. This method is a stub + in :class:`Cmd`; it exists to be overridden by subclasses. + + +.. method:: Cmd.postloop() + + Hook method executed once when :meth:`cmdloop` is about to return. This method + is a stub in :class:`Cmd`; it exists to be overridden by subclasses. + +Instances of :class:`Cmd` subclasses have some public instance variables: + + +.. attribute:: Cmd.prompt + + The prompt issued to solicit input. + + +.. attribute:: Cmd.identchars + + The string of characters accepted for the command prefix. + + +.. attribute:: Cmd.lastcmd + + The last nonempty command prefix seen. + + +.. attribute:: Cmd.intro + + A string to issue as an intro or banner. May be overridden by giving the + :meth:`cmdloop` method an argument. + + +.. attribute:: Cmd.doc_header + + The header to issue if the help output has a section for documented commands. + + +.. attribute:: Cmd.misc_header + + The header to issue if the help output has a section for miscellaneous help + topics (that is, there are :meth:`help_\*` methods without corresponding + :meth:`do_\*` methods). + + +.. attribute:: Cmd.undoc_header + + The header to issue if the help output has a section for undocumented commands + (that is, there are :meth:`do_\*` methods without corresponding :meth:`help_\*` + methods). + + +.. attribute:: Cmd.ruler + + The character used to draw separator lines under the help-message headers. If + empty, no ruler line is drawn. It defaults to ``'='``. + + +.. attribute:: Cmd.use_rawinput + + A flag, defaulting to true. If true, :meth:`cmdloop` uses :func:`input` to + display a prompt and read the next command; if false, :meth:`sys.stdout.write` + and :meth:`sys.stdin.readline` are used. (This means that by importing + :mod:`readline`, on systems that support it, the interpreter will automatically + support :program:`Emacs`\ -like line editing and command-history keystrokes.) + diff --git a/Doc/library/code.rst b/Doc/library/code.rst new file mode 100644 index 0000000..4e00639 --- /dev/null +++ b/Doc/library/code.rst @@ -0,0 +1,167 @@ + +:mod:`code` --- Interpreter base classes +======================================== + +.. module:: code + :synopsis: Facilities to implement read-eval-print loops. + + + +The ``code`` module provides facilities to implement read-eval-print loops in +Python. Two classes and convenience functions are included which can be used to +build applications which provide an interactive interpreter prompt. + + +.. class:: InteractiveInterpreter([locals]) + + This class deals with parsing and interpreter state (the user's namespace); it + does not deal with input buffering or prompting or input file naming (the + filename is always passed in explicitly). The optional *locals* argument + specifies the dictionary in which code will be executed; it defaults to a newly + created dictionary with key ``'__name__'`` set to ``'__console__'`` and key + ``'__doc__'`` set to ``None``. + + +.. class:: InteractiveConsole([locals[, filename]]) + + Closely emulate the behavior of the interactive Python interpreter. This class + builds on :class:`InteractiveInterpreter` and adds prompting using the familiar + ``sys.ps1`` and ``sys.ps2``, and input buffering. + + +.. function:: interact([banner[, readfunc[, local]]]) + + Convenience function to run a read-eval-print loop. This creates a new instance + of :class:`InteractiveConsole` and sets *readfunc* to be used as the + :meth:`raw_input` method, if provided. If *local* is provided, it is passed to + the :class:`InteractiveConsole` constructor for use as the default namespace for + the interpreter loop. The :meth:`interact` method of the instance is then run + with *banner* passed as the banner to use, if provided. The console object is + discarded after use. + + +.. function:: compile_command(source[, filename[, symbol]]) + + This function is useful for programs that want to emulate Python's interpreter + main loop (a.k.a. the read-eval-print loop). The tricky part is to determine + when the user has entered an incomplete command that can be completed by + entering more text (as opposed to a complete command or a syntax error). This + function *almost* always makes the same decision as the real interpreter main + loop. + + *source* is the source string; *filename* is the optional filename from which + source was read, defaulting to ``''``; and *symbol* is the optional + grammar start symbol, which should be either ``'single'`` (the default) or + ``'eval'``. + + Returns a code object (the same as ``compile(source, filename, symbol)``) if the + command is complete and valid; ``None`` if the command is incomplete; raises + :exc:`SyntaxError` if the command is complete and contains a syntax error, or + raises :exc:`OverflowError` or :exc:`ValueError` if the command contains an + invalid literal. + + +.. _interpreter-objects: + +Interactive Interpreter Objects +------------------------------- + + +.. method:: InteractiveInterpreter.runsource(source[, filename[, symbol]]) + + Compile and run some source in the interpreter. Arguments are the same as for + :func:`compile_command`; the default for *filename* is ``''``, and for + *symbol* is ``'single'``. One several things can happen: + + * The input is incorrect; :func:`compile_command` raised an exception + (:exc:`SyntaxError` or :exc:`OverflowError`). A syntax traceback will be + printed by calling the :meth:`showsyntaxerror` method. :meth:`runsource` + returns ``False``. + + * The input is incomplete, and more input is required; :func:`compile_command` + returned ``None``. :meth:`runsource` returns ``True``. + + * The input is complete; :func:`compile_command` returned a code object. The + code is executed by calling the :meth:`runcode` (which also handles run-time + exceptions, except for :exc:`SystemExit`). :meth:`runsource` returns ``False``. + + The return value can be used to decide whether to use ``sys.ps1`` or ``sys.ps2`` + to prompt the next line. + + +.. method:: InteractiveInterpreter.runcode(code) + + Execute a code object. When an exception occurs, :meth:`showtraceback` is called + to display a traceback. All exceptions are caught except :exc:`SystemExit`, + which is allowed to propagate. + + A note about :exc:`KeyboardInterrupt`: this exception may occur elsewhere in + this code, and may not always be caught. The caller should be prepared to deal + with it. + + +.. method:: InteractiveInterpreter.showsyntaxerror([filename]) + + Display the syntax error that just occurred. This does not display a stack + trace because there isn't one for syntax errors. If *filename* is given, it is + stuffed into the exception instead of the default filename provided by Python's + parser, because it always uses ``''`` when reading from a string. The + output is written by the :meth:`write` method. + + +.. method:: InteractiveInterpreter.showtraceback() + + Display the exception that just occurred. We remove the first stack item + because it is within the interpreter object implementation. The output is + written by the :meth:`write` method. + + +.. method:: InteractiveInterpreter.write(data) + + Write a string to the standard error stream (``sys.stderr``). Derived classes + should override this to provide the appropriate output handling as needed. + + +.. _console-objects: + +Interactive Console Objects +--------------------------- + +The :class:`InteractiveConsole` class is a subclass of +:class:`InteractiveInterpreter`, and so offers all the methods of the +interpreter objects as well as the following additions. + + +.. method:: InteractiveConsole.interact([banner]) + + Closely emulate the interactive Python console. The optional banner argument + specify the banner to print before the first interaction; by default it prints a + banner similar to the one printed by the standard Python interpreter, followed + by the class name of the console object in parentheses (so as not to confuse + this with the real interpreter -- since it's so close!). + + +.. method:: InteractiveConsole.push(line) + + Push a line of source text to the interpreter. The line should not have a + trailing newline; it may have internal newlines. The line is appended to a + buffer and the interpreter's :meth:`runsource` method is called with the + concatenated contents of the buffer as source. If this indicates that the + command was executed or invalid, the buffer is reset; otherwise, the command is + incomplete, and the buffer is left as it was after the line was appended. The + return value is ``True`` if more input is required, ``False`` if the line was + dealt with in some way (this is the same as :meth:`runsource`). + + +.. method:: InteractiveConsole.resetbuffer() + + Remove any unhandled source text from the input buffer. + + +.. method:: InteractiveConsole.raw_input([prompt]) + + Write a prompt and read a line. The returned line does not include the trailing + newline. When the user enters the EOF key sequence, :exc:`EOFError` is raised. + The base implementation reads from ``sys.stdin``; a subclass may replace this + with a different implementation. + diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst new file mode 100644 index 0000000..38264df --- /dev/null +++ b/Doc/library/codecs.rst @@ -0,0 +1,1230 @@ + +:mod:`codecs` --- Codec registry and base classes +================================================= + +.. module:: codecs + :synopsis: Encode and decode data and streams. +.. moduleauthor:: Marc-Andre Lemburg +.. sectionauthor:: Marc-Andre Lemburg +.. sectionauthor:: Martin v. Löwis + + +.. index:: + single: Unicode + single: Codecs + pair: Codecs; encode + pair: Codecs; decode + single: streams + pair: stackable; streams + +This module defines base classes for standard Python codecs (encoders and +decoders) and provides access to the internal Python codec registry which +manages the codec and error handling lookup process. + +It defines the following functions: + + +.. function:: register(search_function) + + Register a codec search function. Search functions are expected to take one + argument, the encoding name in all lower case letters, and return a + :class:`CodecInfo` object having the following attributes: + + * ``name`` The name of the encoding; + + * ``encoder`` The stateless encoding function; + + * ``decoder`` The stateless decoding function; + + * ``incrementalencoder`` An incremental encoder class or factory function; + + * ``incrementaldecoder`` An incremental decoder class or factory function; + + * ``streamwriter`` A stream writer class or factory function; + + * ``streamreader`` A stream reader class or factory function. + + The various functions or classes take the following arguments: + + *encoder* and *decoder*: These must be functions or methods which have the same + interface as the :meth:`encode`/:meth:`decode` methods of Codec instances (see + Codec Interface). The functions/methods are expected to work in a stateless + mode. + + *incrementalencoder* and *incrementalencoder*: These have to be factory + functions providing the following interface: + + ``factory(errors='strict')`` + + The factory functions must return objects providing the interfaces defined by + the base classes :class:`IncrementalEncoder` and :class:`IncrementalEncoder`, + respectively. Incremental codecs can maintain state. + + *streamreader* and *streamwriter*: These have to be factory functions providing + the following interface: + + ``factory(stream, errors='strict')`` + + The factory functions must return objects providing the interfaces defined by + the base classes :class:`StreamWriter` and :class:`StreamReader`, respectively. + Stream codecs can maintain state. + + Possible values for errors are ``'strict'`` (raise an exception in case of an + encoding error), ``'replace'`` (replace malformed data with a suitable + replacement marker, such as ``'?'``), ``'ignore'`` (ignore malformed data and + continue without further notice), ``'xmlcharrefreplace'`` (replace with the + appropriate XML character reference (for encoding only)) and + ``'backslashreplace'`` (replace with backslashed escape sequences (for encoding + only)) as well as any other error handling name defined via + :func:`register_error`. + + In case a search function cannot find a given encoding, it should return + ``None``. + + +.. function:: lookup(encoding) + + Looks up the codec info in the Python codec registry and returns a + :class:`CodecInfo` object as defined above. + + Encodings are first looked up in the registry's cache. If not found, the list of + registered search functions is scanned. If no :class:`CodecInfo` object is + found, a :exc:`LookupError` is raised. Otherwise, the :class:`CodecInfo` object + is stored in the cache and returned to the caller. + +To simplify access to the various codecs, the module provides these additional +functions which use :func:`lookup` for the codec lookup: + + +.. function:: getencoder(encoding) + + Look up the codec for the given encoding and return its encoder function. + + Raises a :exc:`LookupError` in case the encoding cannot be found. + + +.. function:: getdecoder(encoding) + + Look up the codec for the given encoding and return its decoder function. + + Raises a :exc:`LookupError` in case the encoding cannot be found. + + +.. function:: getincrementalencoder(encoding) + + Look up the codec for the given encoding and return its incremental encoder + class or factory function. + + Raises a :exc:`LookupError` in case the encoding cannot be found or the codec + doesn't support an incremental encoder. + + .. versionadded:: 2.5 + + +.. function:: getincrementaldecoder(encoding) + + Look up the codec for the given encoding and return its incremental decoder + class or factory function. + + Raises a :exc:`LookupError` in case the encoding cannot be found or the codec + doesn't support an incremental decoder. + + .. versionadded:: 2.5 + + +.. function:: getreader(encoding) + + Look up the codec for the given encoding and return its StreamReader class or + factory function. + + Raises a :exc:`LookupError` in case the encoding cannot be found. + + +.. function:: getwriter(encoding) + + Look up the codec for the given encoding and return its StreamWriter class or + factory function. + + Raises a :exc:`LookupError` in case the encoding cannot be found. + + +.. function:: register_error(name, error_handler) + + Register the error handling function *error_handler* under the name *name*. + *error_handler* will be called during encoding and decoding in case of an error, + when *name* is specified as the errors parameter. + + For encoding *error_handler* will be called with a :exc:`UnicodeEncodeError` + instance, which contains information about the location of the error. The error + handler must either raise this or a different exception or return a tuple with a + replacement for the unencodable part of the input and a position where encoding + should continue. The encoder will encode the replacement and continue encoding + the original input at the specified position. Negative position values will be + treated as being relative to the end of the input string. If the resulting + position is out of bound an :exc:`IndexError` will be raised. + + Decoding and translating works similar, except :exc:`UnicodeDecodeError` or + :exc:`UnicodeTranslateError` will be passed to the handler and that the + replacement from the error handler will be put into the output directly. + + +.. function:: lookup_error(name) + + Return the error handler previously registered under the name *name*. + + Raises a :exc:`LookupError` in case the handler cannot be found. + + +.. function:: strict_errors(exception) + + Implements the ``strict`` error handling. + + +.. function:: replace_errors(exception) + + Implements the ``replace`` error handling. + + +.. function:: ignore_errors(exception) + + Implements the ``ignore`` error handling. + + +.. function:: xmlcharrefreplace_errors_errors(exception) + + Implements the ``xmlcharrefreplace`` error handling. + + +.. function:: backslashreplace_errors_errors(exception) + + Implements the ``backslashreplace`` error handling. + +To simplify working with encoded files or stream, the module also defines these +utility functions: + + +.. function:: open(filename, mode[, encoding[, errors[, buffering]]]) + + Open an encoded file using the given *mode* and return a wrapped version + providing transparent encoding/decoding. + + .. note:: + + The wrapped version will only accept the object format defined by the codecs, + i.e. Unicode objects for most built-in codecs. Output is also codec-dependent + and will usually be Unicode as well. + + *encoding* specifies the encoding which is to be used for the file. + + *errors* may be given to define the error handling. It defaults to ``'strict'`` + which causes a :exc:`ValueError` to be raised in case an encoding error occurs. + + *buffering* has the same meaning as for the built-in :func:`open` function. It + defaults to line buffered. + + +.. function:: EncodedFile(file, input[, output[, errors]]) + + Return a wrapped version of file which provides transparent encoding + translation. + + Strings written to the wrapped file are interpreted according to the given + *input* encoding and then written to the original file as strings using the + *output* encoding. The intermediate encoding will usually be Unicode but depends + on the specified codecs. + + If *output* is not given, it defaults to *input*. + + *errors* may be given to define the error handling. It defaults to ``'strict'``, + which causes :exc:`ValueError` to be raised in case an encoding error occurs. + + +.. function:: iterencode(iterable, encoding[, errors]) + + Uses an incremental encoder to iteratively encode the input provided by + *iterable*. This function is a generator. *errors* (as well as any other keyword + argument) is passed through to the incremental encoder. + + .. versionadded:: 2.5 + + +.. function:: iterdecode(iterable, encoding[, errors]) + + Uses an incremental decoder to iteratively decode the input provided by + *iterable*. This function is a generator. *errors* (as well as any other keyword + argument) is passed through to the incremental decoder. + + .. versionadded:: 2.5 + +The module also provides the following constants which are useful for reading +and writing to platform dependent files: + + +.. data:: BOM + BOM_BE + BOM_LE + BOM_UTF8 + BOM_UTF16 + BOM_UTF16_BE + BOM_UTF16_LE + BOM_UTF32 + BOM_UTF32_BE + BOM_UTF32_LE + + These constants define various encodings of the Unicode byte order mark (BOM) + used in UTF-16 and UTF-32 data streams to indicate the byte order used in the + stream or file and in UTF-8 as a Unicode signature. :const:`BOM_UTF16` is either + :const:`BOM_UTF16_BE` or :const:`BOM_UTF16_LE` depending on the platform's + native byte order, :const:`BOM` is an alias for :const:`BOM_UTF16`, + :const:`BOM_LE` for :const:`BOM_UTF16_LE` and :const:`BOM_BE` for + :const:`BOM_UTF16_BE`. The others represent the BOM in UTF-8 and UTF-32 + encodings. + + +.. _codec-base-classes: + +Codec Base Classes +------------------ + +The :mod:`codecs` module defines a set of base classes which define the +interface and can also be used to easily write you own codecs for use in Python. + +Each codec has to define four interfaces to make it usable as codec in Python: +stateless encoder, stateless decoder, stream reader and stream writer. The +stream reader and writers typically reuse the stateless encoder/decoder to +implement the file protocols. + +The :class:`Codec` class defines the interface for stateless encoders/decoders. + +To simplify and standardize error handling, the :meth:`encode` and +:meth:`decode` methods may implement different error handling schemes by +providing the *errors* string argument. The following string values are defined +and implemented by all standard Python codecs: + ++-------------------------+-----------------------------------------------+ +| Value | Meaning | ++=========================+===============================================+ +| ``'strict'`` | Raise :exc:`UnicodeError` (or a subclass); | +| | this is the default. | ++-------------------------+-----------------------------------------------+ +| ``'ignore'`` | Ignore the character and continue with the | +| | next. | ++-------------------------+-----------------------------------------------+ +| ``'replace'`` | Replace with a suitable replacement | +| | character; Python will use the official | +| | U+FFFD REPLACEMENT CHARACTER for the built-in | +| | Unicode codecs on decoding and '?' on | +| | encoding. | ++-------------------------+-----------------------------------------------+ +| ``'xmlcharrefreplace'`` | Replace with the appropriate XML character | +| | reference (only for encoding). | ++-------------------------+-----------------------------------------------+ +| ``'backslashreplace'`` | Replace with backslashed escape sequences | +| | (only for encoding). | ++-------------------------+-----------------------------------------------+ + +The set of allowed values can be extended via :meth:`register_error`. + + +.. _codec-objects: + +Codec Objects +^^^^^^^^^^^^^ + +The :class:`Codec` class defines these methods which also define the function +interfaces of the stateless encoder and decoder: + + +.. method:: Codec.encode(input[, errors]) + + Encodes the object *input* and returns a tuple (output object, length consumed). + While codecs are not restricted to use with Unicode, in a Unicode context, + encoding converts a Unicode object to a plain string using a particular + character set encoding (e.g., ``cp1252`` or ``iso-8859-1``). + + *errors* defines the error handling to apply. It defaults to ``'strict'`` + handling. + + The method may not store state in the :class:`Codec` instance. Use + :class:`StreamCodec` for codecs which have to keep state in order to make + encoding/decoding efficient. + + The encoder must be able to handle zero length input and return an empty object + of the output object type in this situation. + + +.. method:: Codec.decode(input[, errors]) + + Decodes the object *input* and returns a tuple (output object, length consumed). + In a Unicode context, decoding converts a plain string encoded using a + particular character set encoding to a Unicode object. + + *input* must be an object which provides the ``bf_getreadbuf`` buffer slot. + Python strings, buffer objects and memory mapped files are examples of objects + providing this slot. + + *errors* defines the error handling to apply. It defaults to ``'strict'`` + handling. + + The method may not store state in the :class:`Codec` instance. Use + :class:`StreamCodec` for codecs which have to keep state in order to make + encoding/decoding efficient. + + The decoder must be able to handle zero length input and return an empty object + of the output object type in this situation. + +The :class:`IncrementalEncoder` and :class:`IncrementalDecoder` classes provide +the basic interface for incremental encoding and decoding. Encoding/decoding the +input isn't done with one call to the stateless encoder/decoder function, but +with multiple calls to the :meth:`encode`/:meth:`decode` method of the +incremental encoder/decoder. The incremental encoder/decoder keeps track of the +encoding/decoding process during method calls. + +The joined output of calls to the :meth:`encode`/:meth:`decode` method is the +same as if all the single inputs were joined into one, and this input was +encoded/decoded with the stateless encoder/decoder. + + +.. _incremental-encoder-objects: + +IncrementalEncoder Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 2.5 + +The :class:`IncrementalEncoder` class is used for encoding an input in multiple +steps. It defines the following methods which every incremental encoder must +define in order to be compatible with the Python codec registry. + + +.. class:: IncrementalEncoder([errors]) + + Constructor for an :class:`IncrementalEncoder` instance. + + All incremental encoders must provide this constructor interface. They are free + to add additional keyword arguments, but only the ones defined here are used by + the Python codec registry. + + The :class:`IncrementalEncoder` may implement different error handling schemes + by providing the *errors* keyword argument. These parameters are predefined: + + * ``'strict'`` Raise :exc:`ValueError` (or a subclass); this is the default. + + * ``'ignore'`` Ignore the character and continue with the next. + + * ``'replace'`` Replace with a suitable replacement character + + * ``'xmlcharrefreplace'`` Replace with the appropriate XML character reference + + * ``'backslashreplace'`` Replace with backslashed escape sequences. + + The *errors* argument will be assigned to an attribute of the same name. + Assigning to this attribute makes it possible to switch between different error + handling strategies during the lifetime of the :class:`IncrementalEncoder` + object. + + The set of allowed values for the *errors* argument can be extended with + :func:`register_error`. + + +.. method:: IncrementalEncoder.encode(object[, final]) + + Encodes *object* (taking the current state of the encoder into account) and + returns the resulting encoded object. If this is the last call to :meth:`encode` + *final* must be true (the default is false). + + +.. method:: IncrementalEncoder.reset() + + Reset the encoder to the initial state. + + +.. method:: IncrementalEncoder.getstate() + + Return the current state of the encoder which must be an integer. The + implementation should make sure that ``0`` is the most common state. (States + that are more complicated than integers can be converted into an integer by + marshaling/pickling the state and encoding the bytes of the resulting string + into an integer). + + .. versionadded:: 3.0 + + +.. method:: IncrementalEncoder.setstate(state) + + Set the state of the encoder to *state*. *state* must be an encoder state + returned by :meth:`getstate`. + + .. versionadded:: 3.0 + + +.. _incremental-decoder-objects: + +IncrementalDecoder Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :class:`IncrementalDecoder` class is used for decoding an input in multiple +steps. It defines the following methods which every incremental decoder must +define in order to be compatible with the Python codec registry. + + +.. class:: IncrementalDecoder([errors]) + + Constructor for an :class:`IncrementalDecoder` instance. + + All incremental decoders must provide this constructor interface. They are free + to add additional keyword arguments, but only the ones defined here are used by + the Python codec registry. + + The :class:`IncrementalDecoder` may implement different error handling schemes + by providing the *errors* keyword argument. These parameters are predefined: + + * ``'strict'`` Raise :exc:`ValueError` (or a subclass); this is the default. + + * ``'ignore'`` Ignore the character and continue with the next. + + * ``'replace'`` Replace with a suitable replacement character. + + The *errors* argument will be assigned to an attribute of the same name. + Assigning to this attribute makes it possible to switch between different error + handling strategies during the lifetime of the :class:`IncrementalEncoder` + object. + + The set of allowed values for the *errors* argument can be extended with + :func:`register_error`. + + +.. method:: IncrementalDecoder.decode(object[, final]) + + Decodes *object* (taking the current state of the decoder into account) and + returns the resulting decoded object. If this is the last call to :meth:`decode` + *final* must be true (the default is false). If *final* is true the decoder must + decode the input completely and must flush all buffers. If this isn't possible + (e.g. because of incomplete byte sequences at the end of the input) it must + initiate error handling just like in the stateless case (which might raise an + exception). + + +.. method:: IncrementalDecoder.reset() + + Reset the decoder to the initial state. + + +.. method:: IncrementalDecoder.getstate() + + Return the current state of the decoder. This must be a tuple with two items, + the first must be the buffer containing the still undecoded input. The second + must be an integer and can be additional state info. (The implementation should + make sure that ``0`` is the most common additional state info.) If this + additional state info is ``0`` it must be possible to set the decoder to the + state which has no input buffered and ``0`` as the additional state info, so + that feeding the previously buffered input to the decoder returns it to the + previous state without producing any output. (Additional state info that is more + complicated than integers can be converted into an integer by + marshaling/pickling the info and encoding the bytes of the resulting string into + an integer.) + + .. versionadded:: 3.0 + + +.. method:: IncrementalDecoder.setstate(state) + + Set the state of the encoder to *state*. *state* must be a decoder state + returned by :meth:`getstate`. + + .. versionadded:: 3.0 + +The :class:`StreamWriter` and :class:`StreamReader` classes provide generic +working interfaces which can be used to implement new encoding submodules very +easily. See :mod:`encodings.utf_8` for an example of how this is done. + + +.. _stream-writer-objects: + +StreamWriter Objects +^^^^^^^^^^^^^^^^^^^^ + +The :class:`StreamWriter` class is a subclass of :class:`Codec` and defines the +following methods which every stream writer must define in order to be +compatible with the Python codec registry. + + +.. class:: StreamWriter(stream[, errors]) + + Constructor for a :class:`StreamWriter` instance. + + All stream writers must provide this constructor interface. They are free to add + additional keyword arguments, but only the ones defined here are used by the + Python codec registry. + + *stream* must be a file-like object open for writing binary data. + + The :class:`StreamWriter` may implement different error handling schemes by + providing the *errors* keyword argument. These parameters are predefined: + + * ``'strict'`` Raise :exc:`ValueError` (or a subclass); this is the default. + + * ``'ignore'`` Ignore the character and continue with the next. + + * ``'replace'`` Replace with a suitable replacement character + + * ``'xmlcharrefreplace'`` Replace with the appropriate XML character reference + + * ``'backslashreplace'`` Replace with backslashed escape sequences. + + The *errors* argument will be assigned to an attribute of the same name. + Assigning to this attribute makes it possible to switch between different error + handling strategies during the lifetime of the :class:`StreamWriter` object. + + The set of allowed values for the *errors* argument can be extended with + :func:`register_error`. + + +.. method:: StreamWriter.write(object) + + Writes the object's contents encoded to the stream. + + +.. method:: StreamWriter.writelines(list) + + Writes the concatenated list of strings to the stream (possibly by reusing the + :meth:`write` method). + + +.. method:: StreamWriter.reset() + + Flushes and resets the codec buffers used for keeping state. + + Calling this method should ensure that the data on the output is put into a + clean state that allows appending of new fresh data without having to rescan the + whole stream to recover state. + +In addition to the above methods, the :class:`StreamWriter` must also inherit +all other methods and attributes from the underlying stream. + + +.. _stream-reader-objects: + +StreamReader Objects +^^^^^^^^^^^^^^^^^^^^ + +The :class:`StreamReader` class is a subclass of :class:`Codec` and defines the +following methods which every stream reader must define in order to be +compatible with the Python codec registry. + + +.. class:: StreamReader(stream[, errors]) + + Constructor for a :class:`StreamReader` instance. + + All stream readers must provide this constructor interface. They are free to add + additional keyword arguments, but only the ones defined here are used by the + Python codec registry. + + *stream* must be a file-like object open for reading (binary) data. + + The :class:`StreamReader` may implement different error handling schemes by + providing the *errors* keyword argument. These parameters are defined: + + * ``'strict'`` Raise :exc:`ValueError` (or a subclass); this is the default. + + * ``'ignore'`` Ignore the character and continue with the next. + + * ``'replace'`` Replace with a suitable replacement character. + + The *errors* argument will be assigned to an attribute of the same name. + Assigning to this attribute makes it possible to switch between different error + handling strategies during the lifetime of the :class:`StreamReader` object. + + The set of allowed values for the *errors* argument can be extended with + :func:`register_error`. + + +.. method:: StreamReader.read([size[, chars, [firstline]]]) + + Decodes data from the stream and returns the resulting object. + + *chars* indicates the number of characters to read from the stream. :func:`read` + will never return more than *chars* characters, but it might return less, if + there are not enough characters available. + + *size* indicates the approximate maximum number of bytes to read from the stream + for decoding purposes. The decoder can modify this setting as appropriate. The + default value -1 indicates to read and decode as much as possible. *size* is + intended to prevent having to decode huge files in one step. + + *firstline* indicates that it would be sufficient to only return the first line, + if there are decoding errors on later lines. + + The method should use a greedy read strategy meaning that it should read as much + data as is allowed within the definition of the encoding and the given size, + e.g. if optional encoding endings or state markers are available on the stream, + these should be read too. + + .. versionchanged:: 2.4 + *chars* argument added. + + .. versionchanged:: 2.4.2 + *firstline* argument added. + + +.. method:: StreamReader.readline([size[, keepends]]) + + Read one line from the input stream and return the decoded data. + + *size*, if given, is passed as size argument to the stream's :meth:`readline` + method. + + If *keepends* is false line-endings will be stripped from the lines returned. + + .. versionchanged:: 2.4 + *keepends* argument added. + + +.. method:: StreamReader.readlines([sizehint[, keepends]]) + + Read all lines available on the input stream and return them as a list of lines. + + Line-endings are implemented using the codec's decoder method and are included + in the list entries if *keepends* is true. + + *sizehint*, if given, is passed as the *size* argument to the stream's + :meth:`read` method. + + +.. method:: StreamReader.reset() + + Resets the codec buffers used for keeping state. + + Note that no stream repositioning should take place. This method is primarily + intended to be able to recover from decoding errors. + +In addition to the above methods, the :class:`StreamReader` must also inherit +all other methods and attributes from the underlying stream. + +The next two base classes are included for convenience. They are not needed by +the codec registry, but may provide useful in practice. + + +.. _stream-reader-writer: + +StreamReaderWriter Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :class:`StreamReaderWriter` allows wrapping streams which work in both read +and write modes. + +The design is such that one can use the factory functions returned by the +:func:`lookup` function to construct the instance. + + +.. class:: StreamReaderWriter(stream, Reader, Writer, errors) + + Creates a :class:`StreamReaderWriter` instance. *stream* must be a file-like + object. *Reader* and *Writer* must be factory functions or classes providing the + :class:`StreamReader` and :class:`StreamWriter` interface resp. Error handling + is done in the same way as defined for the stream readers and writers. + +:class:`StreamReaderWriter` instances define the combined interfaces of +:class:`StreamReader` and :class:`StreamWriter` classes. They inherit all other +methods and attributes from the underlying stream. + + +.. _stream-recoder-objects: + +StreamRecoder Objects +^^^^^^^^^^^^^^^^^^^^^ + +The :class:`StreamRecoder` provide a frontend - backend view of encoding data +which is sometimes useful when dealing with different encoding environments. + +The design is such that one can use the factory functions returned by the +:func:`lookup` function to construct the instance. + + +.. class:: StreamRecoder(stream, encode, decode, Reader, Writer, errors) + + Creates a :class:`StreamRecoder` instance which implements a two-way conversion: + *encode* and *decode* work on the frontend (the input to :meth:`read` and output + of :meth:`write`) while *Reader* and *Writer* work on the backend (reading and + writing to the stream). + + You can use these objects to do transparent direct recodings from e.g. Latin-1 + to UTF-8 and back. + + *stream* must be a file-like object. + + *encode*, *decode* must adhere to the :class:`Codec` interface. *Reader*, + *Writer* must be factory functions or classes providing objects of the + :class:`StreamReader` and :class:`StreamWriter` interface respectively. + + *encode* and *decode* are needed for the frontend translation, *Reader* and + *Writer* for the backend translation. The intermediate format used is + determined by the two sets of codecs, e.g. the Unicode codecs will use Unicode + as the intermediate encoding. + + Error handling is done in the same way as defined for the stream readers and + writers. + +:class:`StreamRecoder` instances define the combined interfaces of +:class:`StreamReader` and :class:`StreamWriter` classes. They inherit all other +methods and attributes from the underlying stream. + + +.. _encodings-overview: + +Encodings and Unicode +--------------------- + +Unicode strings are stored internally as sequences of codepoints (to be precise +as :ctype:`Py_UNICODE` arrays). Depending on the way Python is compiled (either +via :option:`--enable-unicode=ucs2` or :option:`--enable-unicode=ucs4`, with the +former being the default) :ctype:`Py_UNICODE` is either a 16-bit or 32-bit data +type. Once a Unicode object is used outside of CPU and memory, CPU endianness +and how these arrays are stored as bytes become an issue. Transforming a +unicode object into a sequence of bytes is called encoding and recreating the +unicode object from the sequence of bytes is known as decoding. There are many +different methods for how this transformation can be done (these methods are +also called encodings). The simplest method is to map the codepoints 0-255 to +the bytes ``0x0``-``0xff``. This means that a unicode object that contains +codepoints above ``U+00FF`` can't be encoded with this method (which is called +``'latin-1'`` or ``'iso-8859-1'``). :func:`unicode.encode` will raise a +:exc:`UnicodeEncodeError` that looks like this: ``UnicodeEncodeError: 'latin-1' +codec can't encode character u'\u1234' in position 3: ordinal not in +range(256)``. + +There's another group of encodings (the so called charmap encodings) that choose +a different subset of all unicode code points and how these codepoints are +mapped to the bytes ``0x0``-``0xff``. To see how this is done simply open +e.g. :file:`encodings/cp1252.py` (which is an encoding that is used primarily on +Windows). There's a string constant with 256 characters that shows you which +character is mapped to which byte value. + +All of these encodings can only encode 256 of the 65536 (or 1114111) codepoints +defined in unicode. A simple and straightforward way that can store each Unicode +code point, is to store each codepoint as two consecutive bytes. There are two +possibilities: Store the bytes in big endian or in little endian order. These +two encodings are called UTF-16-BE and UTF-16-LE respectively. Their +disadvantage is that if e.g. you use UTF-16-BE on a little endian machine you +will always have to swap bytes on encoding and decoding. UTF-16 avoids this +problem: Bytes will always be in natural endianness. When these bytes are read +by a CPU with a different endianness, then bytes have to be swapped though. To +be able to detect the endianness of a UTF-16 byte sequence, there's the so +called BOM (the "Byte Order Mark"). This is the Unicode character ``U+FEFF``. +This character will be prepended to every UTF-16 byte sequence. The byte swapped +version of this character (``0xFFFE``) is an illegal character that may not +appear in a Unicode text. So when the first character in an UTF-16 byte sequence +appears to be a ``U+FFFE`` the bytes have to be swapped on decoding. +Unfortunately upto Unicode 4.0 the character ``U+FEFF`` had a second purpose as +a ``ZERO WIDTH NO-BREAK SPACE``: A character that has no width and doesn't allow +a word to be split. It can e.g. be used to give hints to a ligature algorithm. +With Unicode 4.0 using ``U+FEFF`` as a ``ZERO WIDTH NO-BREAK SPACE`` has been +deprecated (with ``U+2060`` (``WORD JOINER``) assuming this role). Nevertheless +Unicode software still must be able to handle ``U+FEFF`` in both roles: As a BOM +it's a device to determine the storage layout of the encoded bytes, and vanishes +once the byte sequence has been decoded into a Unicode string; as a ``ZERO WIDTH +NO-BREAK SPACE`` it's a normal character that will be decoded like any other. + +There's another encoding that is able to encoding the full range of Unicode +characters: UTF-8. UTF-8 is an 8-bit encoding, which means there are no issues +with byte order in UTF-8. Each byte in a UTF-8 byte sequence consists of two +parts: Marker bits (the most significant bits) and payload bits. The marker bits +are a sequence of zero to six 1 bits followed by a 0 bit. Unicode characters are +encoded like this (with x being payload bits, which when concatenated give the +Unicode character): + ++-----------------------------------+----------------------------------------------+ +| Range | Encoding | ++===================================+==============================================+ +| ``U-00000000`` ... ``U-0000007F`` | 0xxxxxxx | ++-----------------------------------+----------------------------------------------+ +| ``U-00000080`` ... ``U-000007FF`` | 110xxxxx 10xxxxxx | ++-----------------------------------+----------------------------------------------+ +| ``U-00000800`` ... ``U-0000FFFF`` | 1110xxxx 10xxxxxx 10xxxxxx | ++-----------------------------------+----------------------------------------------+ +| ``U-00010000`` ... ``U-001FFFFF`` | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ++-----------------------------------+----------------------------------------------+ +| ``U-00200000`` ... ``U-03FFFFFF`` | 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | ++-----------------------------------+----------------------------------------------+ +| ``U-04000000`` ... ``U-7FFFFFFF`` | 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx | +| | 10xxxxxx | ++-----------------------------------+----------------------------------------------+ + +The least significant bit of the Unicode character is the rightmost x bit. + +As UTF-8 is an 8-bit encoding no BOM is required and any ``U+FEFF`` character in +the decoded Unicode string (even if it's the first character) is treated as a +``ZERO WIDTH NO-BREAK SPACE``. + +Without external information it's impossible to reliably determine which +encoding was used for encoding a Unicode string. Each charmap encoding can +decode any random byte sequence. However that's not possible with UTF-8, as +UTF-8 byte sequences have a structure that doesn't allow arbitrary byte +sequence. To increase the reliability with which a UTF-8 encoding can be +detected, Microsoft invented a variant of UTF-8 (that Python 2.5 calls +``"utf-8-sig"``) for its Notepad program: Before any of the Unicode characters +is written to the file, a UTF-8 encoded BOM (which looks like this as a byte +sequence: ``0xef``, ``0xbb``, ``0xbf``) is written. As it's rather improbable +that any charmap encoded file starts with these byte values (which would e.g. +map to + + | LATIN SMALL LETTER I WITH DIAERESIS + | RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + | INVERTED QUESTION MARK + +in iso-8859-1), this increases the probability that a utf-8-sig encoding can be +correctly guessed from the byte sequence. So here the BOM is not used to be able +to determine the byte order used for generating the byte sequence, but as a +signature that helps in guessing the encoding. On encoding the utf-8-sig codec +will write ``0xef``, ``0xbb``, ``0xbf`` as the first three bytes to the file. On +decoding utf-8-sig will skip those three bytes if they appear as the first three +bytes in the file. + + +.. _standard-encodings: + +Standard Encodings +------------------ + +Python comes with a number of codecs built-in, either implemented as C functions +or with dictionaries as mapping tables. The following table lists the codecs by +name, together with a few common aliases, and the languages for which the +encoding is likely used. Neither the list of aliases nor the list of languages +is meant to be exhaustive. Notice that spelling alternatives that only differ in +case or use a hyphen instead of an underscore are also valid aliases. + +Many of the character sets support the same languages. They vary in individual +characters (e.g. whether the EURO SIGN is supported or not), and in the +assignment of characters to code positions. For the European languages in +particular, the following variants typically exist: + +* an ISO 8859 codeset + +* a Microsoft Windows code page, which is typically derived from a 8859 codeset, + but replaces control characters with additional graphic characters + +* an IBM EBCDIC code page + +* an IBM PC code page, which is ASCII compatible + ++-----------------+--------------------------------+--------------------------------+ +| Codec | Aliases | Languages | ++=================+================================+================================+ +| ascii | 646, us-ascii | English | ++-----------------+--------------------------------+--------------------------------+ +| big5 | big5-tw, csbig5 | Traditional Chinese | ++-----------------+--------------------------------+--------------------------------+ +| big5hkscs | big5-hkscs, hkscs | Traditional Chinese | ++-----------------+--------------------------------+--------------------------------+ +| cp037 | IBM037, IBM039 | English | ++-----------------+--------------------------------+--------------------------------+ +| cp424 | EBCDIC-CP-HE, IBM424 | Hebrew | ++-----------------+--------------------------------+--------------------------------+ +| cp437 | 437, IBM437 | English | ++-----------------+--------------------------------+--------------------------------+ +| cp500 | EBCDIC-CP-BE, EBCDIC-CP-CH, | Western Europe | +| | IBM500 | | ++-----------------+--------------------------------+--------------------------------+ +| cp737 | | Greek | ++-----------------+--------------------------------+--------------------------------+ +| cp775 | IBM775 | Baltic languages | ++-----------------+--------------------------------+--------------------------------+ +| cp850 | 850, IBM850 | Western Europe | ++-----------------+--------------------------------+--------------------------------+ +| cp852 | 852, IBM852 | Central and Eastern Europe | ++-----------------+--------------------------------+--------------------------------+ +| cp855 | 855, IBM855 | Bulgarian, Byelorussian, | +| | | Macedonian, Russian, Serbian | ++-----------------+--------------------------------+--------------------------------+ +| cp856 | | Hebrew | ++-----------------+--------------------------------+--------------------------------+ +| cp857 | 857, IBM857 | Turkish | ++-----------------+--------------------------------+--------------------------------+ +| cp860 | 860, IBM860 | Portuguese | ++-----------------+--------------------------------+--------------------------------+ +| cp861 | 861, CP-IS, IBM861 | Icelandic | ++-----------------+--------------------------------+--------------------------------+ +| cp862 | 862, IBM862 | Hebrew | ++-----------------+--------------------------------+--------------------------------+ +| cp863 | 863, IBM863 | Canadian | ++-----------------+--------------------------------+--------------------------------+ +| cp864 | IBM864 | Arabic | ++-----------------+--------------------------------+--------------------------------+ +| cp865 | 865, IBM865 | Danish, Norwegian | ++-----------------+--------------------------------+--------------------------------+ +| cp866 | 866, IBM866 | Russian | ++-----------------+--------------------------------+--------------------------------+ +| cp869 | 869, CP-GR, IBM869 | Greek | ++-----------------+--------------------------------+--------------------------------+ +| cp874 | | Thai | ++-----------------+--------------------------------+--------------------------------+ +| cp875 | | Greek | ++-----------------+--------------------------------+--------------------------------+ +| cp932 | 932, ms932, mskanji, ms-kanji | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| cp949 | 949, ms949, uhc | Korean | ++-----------------+--------------------------------+--------------------------------+ +| cp950 | 950, ms950 | Traditional Chinese | ++-----------------+--------------------------------+--------------------------------+ +| cp1006 | | Urdu | ++-----------------+--------------------------------+--------------------------------+ +| cp1026 | ibm1026 | Turkish | ++-----------------+--------------------------------+--------------------------------+ +| cp1140 | ibm1140 | Western Europe | ++-----------------+--------------------------------+--------------------------------+ +| cp1250 | windows-1250 | Central and Eastern Europe | ++-----------------+--------------------------------+--------------------------------+ +| cp1251 | windows-1251 | Bulgarian, Byelorussian, | +| | | Macedonian, Russian, Serbian | ++-----------------+--------------------------------+--------------------------------+ +| cp1252 | windows-1252 | Western Europe | ++-----------------+--------------------------------+--------------------------------+ +| cp1253 | windows-1253 | Greek | ++-----------------+--------------------------------+--------------------------------+ +| cp1254 | windows-1254 | Turkish | ++-----------------+--------------------------------+--------------------------------+ +| cp1255 | windows-1255 | Hebrew | ++-----------------+--------------------------------+--------------------------------+ +| cp1256 | windows1256 | Arabic | ++-----------------+--------------------------------+--------------------------------+ +| cp1257 | windows-1257 | Baltic languages | ++-----------------+--------------------------------+--------------------------------+ +| cp1258 | windows-1258 | Vietnamese | ++-----------------+--------------------------------+--------------------------------+ +| euc_jp | eucjp, ujis, u-jis | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| euc_jis_2004 | jisx0213, eucjis2004 | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| euc_jisx0213 | eucjisx0213 | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| euc_kr | euckr, korean, ksc5601, | Korean | +| | ks_c-5601, ks_c-5601-1987, | | +| | ksx1001, ks_x-1001 | | ++-----------------+--------------------------------+--------------------------------+ +| gb2312 | chinese, csiso58gb231280, euc- | Simplified Chinese | +| | cn, euccn, eucgb2312-cn, | | +| | gb2312-1980, gb2312-80, iso- | | +| | ir-58 | | ++-----------------+--------------------------------+--------------------------------+ +| gbk | 936, cp936, ms936 | Unified Chinese | ++-----------------+--------------------------------+--------------------------------+ +| gb18030 | gb18030-2000 | Unified Chinese | ++-----------------+--------------------------------+--------------------------------+ +| hz | hzgb, hz-gb, hz-gb-2312 | Simplified Chinese | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp | csiso2022jp, iso2022jp, | Japanese | +| | iso-2022-jp | | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp_1 | iso2022jp-1, iso-2022-jp-1 | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp_2 | iso2022jp-2, iso-2022-jp-2 | Japanese, Korean, Simplified | +| | | Chinese, Western Europe, Greek | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp_2004 | iso2022jp-2004, | Japanese | +| | iso-2022-jp-2004 | | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp_3 | iso2022jp-3, iso-2022-jp-3 | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_jp_ext | iso2022jp-ext, iso-2022-jp-ext | Japanese | ++-----------------+--------------------------------+--------------------------------+ +| iso2022_kr | csiso2022kr, iso2022kr, | Korean | +| | iso-2022-kr | | ++-----------------+--------------------------------+--------------------------------+ +| latin_1 | iso-8859-1, iso8859-1, 8859, | West Europe | +| | cp819, latin, latin1, L1 | | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_2 | iso-8859-2, latin2, L2 | Central and Eastern Europe | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_3 | iso-8859-3, latin3, L3 | Esperanto, Maltese | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_4 | iso-8859-4, latin4, L4 | Baltic languagues | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_5 | iso-8859-5, cyrillic | Bulgarian, Byelorussian, | +| | | Macedonian, Russian, Serbian | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_6 | iso-8859-6, arabic | Arabic | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_7 | iso-8859-7, greek, greek8 | Greek | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_8 | iso-8859-8, hebrew | Hebrew | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_9 | iso-8859-9, latin5, L5 | Turkish | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_10 | iso-8859-10, latin6, L6 | Nordic languages | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_13 | iso-8859-13 | Baltic languages | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_14 | iso-8859-14, latin8, L8 | Celtic languages | ++-----------------+--------------------------------+--------------------------------+ +| iso8859_15 | iso-8859-15 | Western Europe | ++-----------------+--------------------------------+--------------------------------+ +| johab | cp1361, ms1361 | Korean | ++-----------------+--------------------------------+--------------------------------+ +| koi8_r | | Russian | ++-----------------+--------------------------------+--------------------------------+ +| koi8_u | | Ukrainian | ++-----------------+--------------------------------+--------------------------------+ +| mac_cyrillic | maccyrillic | Bulgarian, Byelorussian, | +| | | Macedonian, Russian, Serbian | ++-----------------+--------------------------------+--------------------------------+ +| mac_greek | macgreek | Greek | ++-----------------+--------------------------------+--------------------------------+ +| mac_iceland | maciceland | Icelandic | ++-----------------+--------------------------------+--------------------------------+ +| mac_latin2 | maclatin2, maccentraleurope | Central and Eastern Europe | ++-----------------+--------------------------------+--------------------------------+ +| mac_roman | macroman | Western Europe | ++-----------------+--------------------------------+--------------------------------+ +| mac_turkish | macturkish | Turkish | ++-----------------+--------------------------------+--------------------------------+ +| ptcp154 | csptcp154, pt154, cp154, | Kazakh | +| | cyrillic-asian | | ++-----------------+--------------------------------+--------------------------------+ +| shift_jis | csshiftjis, shiftjis, sjis, | Japanese | +| | s_jis | | ++-----------------+--------------------------------+--------------------------------+ +| shift_jis_2004 | shiftjis2004, sjis_2004, | Japanese | +| | sjis2004 | | ++-----------------+--------------------------------+--------------------------------+ +| shift_jisx0213 | shiftjisx0213, sjisx0213, | Japanese | +| | s_jisx0213 | | ++-----------------+--------------------------------+--------------------------------+ +| utf_16 | U16, utf16 | all languages | ++-----------------+--------------------------------+--------------------------------+ +| utf_16_be | UTF-16BE | all languages (BMP only) | ++-----------------+--------------------------------+--------------------------------+ +| utf_16_le | UTF-16LE | all languages (BMP only) | ++-----------------+--------------------------------+--------------------------------+ +| utf_7 | U7, unicode-1-1-utf-7 | all languages | ++-----------------+--------------------------------+--------------------------------+ +| utf_8 | U8, UTF, utf8 | all languages | ++-----------------+--------------------------------+--------------------------------+ +| utf_8_sig | | all languages | ++-----------------+--------------------------------+--------------------------------+ + +A number of codecs are specific to Python, so their codec names have no meaning +outside Python. Some of them don't convert from Unicode strings to byte strings, +but instead use the property of the Python codecs machinery that any bijective +function with one argument can be considered as an encoding. + +For the codecs listed below, the result in the "encoding" direction is always a +byte string. The result of the "decoding" direction is listed as operand type in +the table. + ++--------------------+---------+----------------+---------------------------+ +| Codec | Aliases | Operand type | Purpose | ++====================+=========+================+===========================+ +| idna | | Unicode string | Implements :rfc:`3490`, | +| | | | see also | +| | | | :mod:`encodings.idna` | ++--------------------+---------+----------------+---------------------------+ +| mbcs | dbcs | Unicode string | Windows only: Encode | +| | | | operand according to the | +| | | | ANSI codepage (CP_ACP) | ++--------------------+---------+----------------+---------------------------+ +| palmos | | Unicode string | Encoding of PalmOS 3.5 | ++--------------------+---------+----------------+---------------------------+ +| punycode | | Unicode string | Implements :rfc:`3492` | ++--------------------+---------+----------------+---------------------------+ +| raw_unicode_escape | | Unicode string | Produce a string that is | +| | | | suitable as raw Unicode | +| | | | literal in Python source | +| | | | code | ++--------------------+---------+----------------+---------------------------+ +| undefined | | any | Raise an exception for | +| | | | all conversions. Can be | +| | | | used as the system | +| | | | encoding if no automatic | +| | | | coercion between byte and | +| | | | Unicode strings is | +| | | | desired. | ++--------------------+---------+----------------+---------------------------+ +| unicode_escape | | Unicode string | Produce a string that is | +| | | | suitable as Unicode | +| | | | literal in Python source | +| | | | code | ++--------------------+---------+----------------+---------------------------+ +| unicode_internal | | Unicode string | Return the internal | +| | | | representation of the | +| | | | operand | ++--------------------+---------+----------------+---------------------------+ + +.. versionadded:: 2.3 + The ``idna`` and ``punycode`` encodings. + + +:mod:`encodings.idna` --- Internationalized Domain Names in Applications +------------------------------------------------------------------------ + +.. module:: encodings.idna + :synopsis: Internationalized Domain Names implementation +.. moduleauthor:: Martin v. Löwis + +.. versionadded:: 2.3 + +This module implements :rfc:`3490` (Internationalized Domain Names in +Applications) and :rfc:`3492` (Nameprep: A Stringprep Profile for +Internationalized Domain Names (IDN)). It builds upon the ``punycode`` encoding +and :mod:`stringprep`. + +These RFCs together define a protocol to support non-ASCII characters in domain +names. A domain name containing non-ASCII characters (such as +``www.Alliancefrançaise.nu``) is converted into an ASCII-compatible encoding +(ACE, such as ``www.xn--alliancefranaise-npb.nu``). The ACE form of the domain +name is then used in all places where arbitrary characters are not allowed by +the protocol, such as DNS queries, HTTP :mailheader:`Host` fields, and so +on. This conversion is carried out in the application; if possible invisible to +the user: The application should transparently convert Unicode domain labels to +IDNA on the wire, and convert back ACE labels to Unicode before presenting them +to the user. + +Python supports this conversion in several ways: The ``idna`` codec allows to +convert between Unicode and the ACE. Furthermore, the :mod:`socket` module +transparently converts Unicode host names to ACE, so that applications need not +be concerned about converting host names themselves when they pass them to the +socket module. On top of that, modules that have host names as function +parameters, such as :mod:`httplib` and :mod:`ftplib`, accept Unicode host names +(:mod:`httplib` then also transparently sends an IDNA hostname in the +:mailheader:`Host` field if it sends that field at all). + +When receiving host names from the wire (such as in reverse name lookup), no +automatic conversion to Unicode is performed: Applications wishing to present +such host names to the user should decode them to Unicode. + +The module :mod:`encodings.idna` also implements the nameprep procedure, which +performs certain normalizations on host names, to achieve case-insensitivity of +international domain names, and to unify similar characters. The nameprep +functions can be used directly if desired. + + +.. function:: nameprep(label) + + Return the nameprepped version of *label*. The implementation currently assumes + query strings, so ``AllowUnassigned`` is true. + + +.. function:: ToASCII(label) + + Convert a label to ASCII, as specified in :rfc:`3490`. ``UseSTD3ASCIIRules`` is + assumed to be false. + + +.. function:: ToUnicode(label) + + Convert a label to Unicode, as specified in :rfc:`3490`. + + +:mod:`encodings.utf_8_sig` --- UTF-8 codec with BOM signature +------------------------------------------------------------- + +.. module:: encodings.utf_8_sig + :synopsis: UTF-8 codec with BOM signature +.. moduleauthor:: Walter Dörwald + +.. versionadded:: 2.5 + +This module implements a variant of the UTF-8 codec: On encoding a UTF-8 encoded +BOM will be prepended to the UTF-8 encoded bytes. For the stateful encoder this +is only done once (on the first write to the byte stream). For decoding an +optional UTF-8 encoded BOM at the start of the data will be skipped. + diff --git a/Doc/library/codeop.rst b/Doc/library/codeop.rst new file mode 100644 index 0000000..8a730ec --- /dev/null +++ b/Doc/library/codeop.rst @@ -0,0 +1,95 @@ + +:mod:`codeop` --- Compile Python code +===================================== + +.. module:: codeop + :synopsis: Compile (possibly incomplete) Python code. +.. sectionauthor:: Moshe Zadka +.. sectionauthor:: Michael Hudson + + +.. % LaTeXed from excellent doc-string. + +The :mod:`codeop` module provides utilities upon which the Python +read-eval-print loop can be emulated, as is done in the :mod:`code` module. As +a result, you probably don't want to use the module directly; if you want to +include such a loop in your program you probably want to use the :mod:`code` +module instead. + +There are two parts to this job: + +#. Being able to tell if a line of input completes a Python statement: in + short, telling whether to print '``>>>``' or '``...``' next. + +#. Remembering which future statements the user has entered, so subsequent + input can be compiled with these in effect. + +The :mod:`codeop` module provides a way of doing each of these things, and a way +of doing them both. + +To do just the former: + + +.. function:: compile_command(source[, filename[, symbol]]) + + Tries to compile *source*, which should be a string of Python code and return a + code object if *source* is valid Python code. In that case, the filename + attribute of the code object will be *filename*, which defaults to + ``''``. Returns ``None`` if *source* is *not* valid Python code, but is a + prefix of valid Python code. + + If there is a problem with *source*, an exception will be raised. + :exc:`SyntaxError` is raised if there is invalid Python syntax, and + :exc:`OverflowError` or :exc:`ValueError` if there is an invalid literal. + + The *symbol* argument determines whether *source* is compiled as a statement + (``'single'``, the default) or as an expression (``'eval'``). Any other value + will cause :exc:`ValueError` to be raised. + + **Caveat:** It is possible (but not likely) that the parser stops parsing with a + successful outcome before reaching the end of the source; in this case, trailing + symbols may be ignored instead of causing an error. For example, a backslash + followed by two newlines may be followed by arbitrary garbage. This will be + fixed once the API for the parser is better. + + +.. class:: Compile() + + Instances of this class have :meth:`__call__` methods identical in signature to + the built-in function :func:`compile`, but with the difference that if the + instance compiles program text containing a :mod:`__future__` statement, the + instance 'remembers' and compiles all subsequent program texts with the + statement in force. + + +.. class:: CommandCompiler() + + Instances of this class have :meth:`__call__` methods identical in signature to + :func:`compile_command`; the difference is that if the instance compiles program + text containing a ``__future__`` statement, the instance 'remembers' and + compiles all subsequent program texts with the statement in force. + +A note on version compatibility: the :class:`Compile` and +:class:`CommandCompiler` are new in Python 2.2. If you want to enable the +future-tracking features of 2.2 but also retain compatibility with 2.1 and +earlier versions of Python you can either write :: + + try: + from codeop import CommandCompiler + compile_command = CommandCompiler() + del CommandCompiler + except ImportError: + from codeop import compile_command + +which is a low-impact change, but introduces possibly unwanted global state into +your program, or you can write:: + + try: + from codeop import CommandCompiler + except ImportError: + def CommandCompiler(): + from codeop import compile_command + return compile_command + +and then call ``CommandCompiler`` every time you need a fresh compiler object. + diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst new file mode 100644 index 0000000..c2c9262 --- /dev/null +++ b/Doc/library/collections.rst @@ -0,0 +1,414 @@ + +:mod:`collections` --- High-performance container datatypes +=========================================================== + +.. module:: collections + :synopsis: High-performance datatypes +.. moduleauthor:: Raymond Hettinger +.. sectionauthor:: Raymond Hettinger + + +.. versionadded:: 2.4 + +This module implements high-performance container datatypes. Currently, +there are two datatypes, :class:`deque` and :class:`defaultdict`, and +one datatype factory function, :func:`NamedTuple`. Python already +includes built-in containers, :class:`dict`, :class:`list`, +:class:`set`, and :class:`tuple`. In addition, the optional :mod:`bsddb` +module has a :meth:`bsddb.btopen` method that can be used to create in-memory +or file based ordered dictionaries with string keys. + +Future editions of the standard library may include balanced trees and +ordered dictionaries. + +.. versionchanged:: 2.5 + Added :class:`defaultdict`. + +.. versionchanged:: 2.6 + Added :class:`NamedTuple`. + + +.. _deque-objects: + +:class:`deque` objects +---------------------- + + +.. class:: deque([iterable]) + + Returns a new deque object initialized left-to-right (using :meth:`append`) with + data from *iterable*. If *iterable* is not specified, the new deque is empty. + + Deques are a generalization of stacks and queues (the name is pronounced "deck" + and is short for "double-ended queue"). Deques support thread-safe, memory + efficient appends and pops from either side of the deque with approximately the + same O(1) performance in either direction. + + Though :class:`list` objects support similar operations, they are optimized for + fast fixed-length operations and incur O(n) memory movement costs for + ``pop(0)`` and ``insert(0, v)`` operations which change both the size and + position of the underlying data representation. + + .. versionadded:: 2.4 + +Deque objects support the following methods: + + +.. method:: deque.append(x) + + Add *x* to the right side of the deque. + + +.. method:: deque.appendleft(x) + + Add *x* to the left side of the deque. + + +.. method:: deque.clear() + + Remove all elements from the deque leaving it with length 0. + + +.. method:: deque.extend(iterable) + + Extend the right side of the deque by appending elements from the iterable + argument. + + +.. method:: deque.extendleft(iterable) + + Extend the left side of the deque by appending elements from *iterable*. Note, + the series of left appends results in reversing the order of elements in the + iterable argument. + + +.. method:: deque.pop() + + Remove and return an element from the right side of the deque. If no elements + are present, raises an :exc:`IndexError`. + + +.. method:: deque.popleft() + + Remove and return an element from the left side of the deque. If no elements are + present, raises an :exc:`IndexError`. + + +.. method:: deque.remove(value) + + Removed the first occurrence of *value*. If not found, raises a + :exc:`ValueError`. + + .. versionadded:: 2.5 + + +.. method:: deque.rotate(n) + + Rotate the deque *n* steps to the right. If *n* is negative, rotate to the + left. Rotating one step to the right is equivalent to: + ``d.appendleft(d.pop())``. + +In addition to the above, deques support iteration, pickling, ``len(d)``, +``reversed(d)``, ``copy.copy(d)``, ``copy.deepcopy(d)``, membership testing with +the :keyword:`in` operator, and subscript references such as ``d[-1]``. + +Example:: + + >>> from collections import deque + >>> d = deque('ghi') # make a new deque with three items + >>> for elem in d: # iterate over the deque's elements + ... print elem.upper() + G + H + I + + >>> d.append('j') # add a new entry to the right side + >>> d.appendleft('f') # add a new entry to the left side + >>> d # show the representation of the deque + deque(['f', 'g', 'h', 'i', 'j']) + + >>> d.pop() # return and remove the rightmost item + 'j' + >>> d.popleft() # return and remove the leftmost item + 'f' + >>> list(d) # list the contents of the deque + ['g', 'h', 'i'] + >>> d[0] # peek at leftmost item + 'g' + >>> d[-1] # peek at rightmost item + 'i' + + >>> list(reversed(d)) # list the contents of a deque in reverse + ['i', 'h', 'g'] + >>> 'h' in d # search the deque + True + >>> d.extend('jkl') # add multiple elements at once + >>> d + deque(['g', 'h', 'i', 'j', 'k', 'l']) + >>> d.rotate(1) # right rotation + >>> d + deque(['l', 'g', 'h', 'i', 'j', 'k']) + >>> d.rotate(-1) # left rotation + >>> d + deque(['g', 'h', 'i', 'j', 'k', 'l']) + + >>> deque(reversed(d)) # make a new deque in reverse order + deque(['l', 'k', 'j', 'i', 'h', 'g']) + >>> d.clear() # empty the deque + >>> d.pop() # cannot pop from an empty deque + Traceback (most recent call last): + File "", line 1, in -toplevel- + d.pop() + IndexError: pop from an empty deque + + >>> d.extendleft('abc') # extendleft() reverses the input order + >>> d + deque(['c', 'b', 'a']) + + +.. _deque-recipes: + +Recipes +^^^^^^^ + +This section shows various approaches to working with deques. + +The :meth:`rotate` method provides a way to implement :class:`deque` slicing and +deletion. For example, a pure python implementation of ``del d[n]`` relies on +the :meth:`rotate` method to position elements to be popped:: + + def delete_nth(d, n): + d.rotate(-n) + d.popleft() + d.rotate(n) + +To implement :class:`deque` slicing, use a similar approach applying +:meth:`rotate` to bring a target element to the left side of the deque. Remove +old entries with :meth:`popleft`, add new entries with :meth:`extend`, and then +reverse the rotation. + +With minor variations on that approach, it is easy to implement Forth style +stack manipulations such as ``dup``, ``drop``, ``swap``, ``over``, ``pick``, +``rot``, and ``roll``. + +A roundrobin task server can be built from a :class:`deque` using +:meth:`popleft` to select the current task and :meth:`append` to add it back to +the tasklist if the input stream is not exhausted:: + + >>> def roundrobin(*iterables): + ... pending = deque(iter(i) for i in iterables) + ... while pending: + ... task = pending.popleft() + ... try: + ... yield next(task) + ... except StopIteration: + ... continue + ... pending.append(task) + ... + >>> for value in roundrobin('abc', 'd', 'efgh'): + ... print value + + a + d + e + b + f + c + g + h + + +Multi-pass data reduction algorithms can be succinctly expressed and efficiently +coded by extracting elements with multiple calls to :meth:`popleft`, applying +the reduction function, and calling :meth:`append` to add the result back to the +queue. + +For example, building a balanced binary tree of nested lists entails reducing +two adjacent nodes into one by grouping them in a list:: + + >>> def maketree(iterable): + ... d = deque(iterable) + ... while len(d) > 1: + ... pair = [d.popleft(), d.popleft()] + ... d.append(pair) + ... return list(d) + ... + >>> print maketree('abcdefgh') + [[[['a', 'b'], ['c', 'd']], [['e', 'f'], ['g', 'h']]]] + + + +.. _defaultdict-objects: + +:class:`defaultdict` objects +---------------------------- + + +.. class:: defaultdict([default_factory[, ...]]) + + Returns a new dictionary-like object. :class:`defaultdict` is a subclass of the + builtin :class:`dict` class. It overrides one method and adds one writable + instance variable. The remaining functionality is the same as for the + :class:`dict` class and is not documented here. + + The first argument provides the initial value for the :attr:`default_factory` + attribute; it defaults to ``None``. All remaining arguments are treated the same + as if they were passed to the :class:`dict` constructor, including keyword + arguments. + + .. versionadded:: 2.5 + +:class:`defaultdict` objects support the following method in addition to the +standard :class:`dict` operations: + + +.. method:: defaultdict.__missing__(key) + + If the :attr:`default_factory` attribute is ``None``, this raises an + :exc:`KeyError` exception with the *key* as argument. + + If :attr:`default_factory` is not ``None``, it is called without arguments to + provide a default value for the given *key*, this value is inserted in the + dictionary for the *key*, and returned. + + If calling :attr:`default_factory` raises an exception this exception is + propagated unchanged. + + This method is called by the :meth:`__getitem__` method of the :class:`dict` + class when the requested key is not found; whatever it returns or raises is then + returned or raised by :meth:`__getitem__`. + +:class:`defaultdict` objects support the following instance variable: + + +.. attribute:: defaultdict.default_factory + + This attribute is used by the :meth:`__missing__` method; it is initialized from + the first argument to the constructor, if present, or to ``None``, if absent. + + +.. _defaultdict-examples: + +:class:`defaultdict` Examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Using :class:`list` as the :attr:`default_factory`, it is easy to group a +sequence of key-value pairs into a dictionary of lists:: + + >>> s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)] + >>> d = defaultdict(list) + >>> for k, v in s: + ... d[k].append(v) + ... + >>> d.items() + [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])] + +When each key is encountered for the first time, it is not already in the +mapping; so an entry is automatically created using the :attr:`default_factory` +function which returns an empty :class:`list`. The :meth:`list.append` +operation then attaches the value to the new list. When keys are encountered +again, the look-up proceeds normally (returning the list for that key) and the +:meth:`list.append` operation adds another value to the list. This technique is +simpler and faster than an equivalent technique using :meth:`dict.setdefault`:: + + >>> d = {} + >>> for k, v in s: + ... d.setdefault(k, []).append(v) + ... + >>> d.items() + [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])] + +Setting the :attr:`default_factory` to :class:`int` makes the +:class:`defaultdict` useful for counting (like a bag or multiset in other +languages):: + + >>> s = 'mississippi' + >>> d = defaultdict(int) + >>> for k in s: + ... d[k] += 1 + ... + >>> d.items() + [('i', 4), ('p', 2), ('s', 4), ('m', 1)] + +When a letter is first encountered, it is missing from the mapping, so the +:attr:`default_factory` function calls :func:`int` to supply a default count of +zero. The increment operation then builds up the count for each letter. + +The function :func:`int` which always returns zero is just a special case of +constant functions. A faster and more flexible way to create constant functions +is to use a lambda function which can supply any constant value (not just +zero):: + + >>> def constant_factory(value): + ... return lambda: value + >>> d = defaultdict(constant_factory('')) + >>> d.update(name='John', action='ran') + >>> '%(name)s %(action)s to %(object)s' % d + 'John ran to ' + +Setting the :attr:`default_factory` to :class:`set` makes the +:class:`defaultdict` useful for building a dictionary of sets:: + + >>> s = [('red', 1), ('blue', 2), ('red', 3), ('blue', 4), ('red', 1), ('blue', 4)] + >>> d = defaultdict(set) + >>> for k, v in s: + ... d[k].add(v) + ... + >>> d.items() + [('blue', set([2, 4])), ('red', set([1, 3]))] + + +.. _named-tuple-factory: + +:func:`NamedTuple` datatype factory function +-------------------------------------------- + + +.. function:: NamedTuple(typename, fieldnames) + + Returns a new tuple subclass named *typename*. The new subclass is used to + create tuple-like objects that have fields accessable by attribute lookup as + well as being indexable and iterable. Instances of the subclass also have a + helpful docstring (with typename and fieldnames) and a helpful :meth:`__repr__` + method which lists the tuple contents in a ``name=value`` format. + + .. versionadded:: 2.6 + + The *fieldnames* are specified in a single string and are separated by spaces. + Any valid Python identifier may be used for a field name. + + Example:: + + >>> Point = NamedTuple('Point', 'x y') + >>> Point.__doc__ # docstring for the new datatype + 'Point(x, y)' + >>> p = Point(11, y=22) # instantiate with positional or keyword arguments + >>> p[0] + p[1] # works just like the tuple (11, 22) + 33 + >>> x, y = p # unpacks just like a tuple + >>> x, y + (11, 22) + >>> p.x + p.y # fields also accessable by name + 33 + >>> p # readable __repr__ with name=value style + Point(x=11, y=22) + + The use cases are the same as those for tuples. The named factories assign + meaning to each tuple position and allow for more readable, self-documenting + code. Named tuples can also be used to assign field names to tuples returned + by the :mod:`csv` or :mod:`sqlite3` modules. For example:: + + from itertools import starmap + import csv + EmployeeRecord = NamedTuple('EmployeeRecord', 'name age title department paygrade') + for record in starmap(EmployeeRecord, csv.reader(open("employees.csv", "rb"))): + print record + + To cast an individual record stored as :class:`list`, :class:`tuple`, or some + other iterable type, use the star-operator to unpack the values:: + + >>> Color = NamedTuple('Color', 'name code') + >>> m = dict(red=1, green=2, blue=3) + >>> print Color(*m.popitem()) + Color(name='blue', code=3) + diff --git a/Doc/library/colorpicker.rst b/Doc/library/colorpicker.rst new file mode 100644 index 0000000..4244104 --- /dev/null +++ b/Doc/library/colorpicker.rst @@ -0,0 +1,23 @@ + +:mod:`ColorPicker` --- Color selection dialog +============================================= + +.. module:: ColorPicker + :platform: Mac + :synopsis: Interface to the standard color selection dialog. +.. moduleauthor:: Just van Rossum +.. sectionauthor:: Fred L. Drake, Jr. + + +The :mod:`ColorPicker` module provides access to the standard color picker +dialog. + + +.. function:: GetColor(prompt, rgb) + + Show a standard color selection dialog and allow the user to select a color. + The user is given instruction by the *prompt* string, and the default color is + set to *rgb*. *rgb* must be a tuple giving the red, green, and blue components + of the color. :func:`GetColor` returns a tuple giving the user's selected color + and a flag indicating whether they accepted the selection of cancelled. + diff --git a/Doc/library/colorsys.rst b/Doc/library/colorsys.rst new file mode 100644 index 0000000..2e7f3b7 --- /dev/null +++ b/Doc/library/colorsys.rst @@ -0,0 +1,60 @@ + +:mod:`colorsys` --- Conversions between color systems +===================================================== + +.. module:: colorsys + :synopsis: Conversion functions between RGB and other color systems. +.. sectionauthor:: David Ascher + + +The :mod:`colorsys` module defines bidirectional conversions of color values +between colors expressed in the RGB (Red Green Blue) color space used in +computer monitors and three other coordinate systems: YIQ, HLS (Hue Lightness +Saturation) and HSV (Hue Saturation Value). Coordinates in all of these color +spaces are floating point values. In the YIQ space, the Y coordinate is between +0 and 1, but the I and Q coordinates can be positive or negative. In all other +spaces, the coordinates are all between 0 and 1. + +More information about color spaces can be found at +http://www.poynton.com/ColorFAQ.html. + +The :mod:`colorsys` module defines the following functions: + + +.. function:: rgb_to_yiq(r, g, b) + + Convert the color from RGB coordinates to YIQ coordinates. + + +.. function:: yiq_to_rgb(y, i, q) + + Convert the color from YIQ coordinates to RGB coordinates. + + +.. function:: rgb_to_hls(r, g, b) + + Convert the color from RGB coordinates to HLS coordinates. + + +.. function:: hls_to_rgb(h, l, s) + + Convert the color from HLS coordinates to RGB coordinates. + + +.. function:: rgb_to_hsv(r, g, b) + + Convert the color from RGB coordinates to HSV coordinates. + + +.. function:: hsv_to_rgb(h, s, v) + + Convert the color from HSV coordinates to RGB coordinates. + +Example:: + + >>> import colorsys + >>> colorsys.rgb_to_hsv(.3, .4, .2) + (0.25, 0.5, 0.4) + >>> colorsys.hsv_to_rgb(0.25, 0.5, 0.4) + (0.3, 0.4, 0.2) + diff --git a/Doc/library/commands.rst b/Doc/library/commands.rst new file mode 100644 index 0000000..79e3d73 --- /dev/null +++ b/Doc/library/commands.rst @@ -0,0 +1,53 @@ + +:mod:`commands` --- Utilities for running commands +================================================== + +.. module:: commands + :platform: Unix + :synopsis: Utility functions for running external commands. +.. sectionauthor:: Sue Williams + + +The :mod:`commands` module contains wrapper functions for :func:`os.popen` which +take a system command as a string and return any output generated by the command +and, optionally, the exit status. + +The :mod:`subprocess` module provides more powerful facilities for spawning new +processes and retrieving their results. Using the :mod:`subprocess` module is +preferable to using the :mod:`commands` module. + +The :mod:`commands` module defines the following functions: + + +.. function:: getstatusoutput(cmd) + + Execute the string *cmd* in a shell with :func:`os.popen` and return a 2-tuple + ``(status, output)``. *cmd* is actually run as ``{ cmd ; } 2>&1``, so that the + returned output will contain output or error messages. A trailing newline is + stripped from the output. The exit status for the command can be interpreted + according to the rules for the C function :cfunc:`wait`. + + +.. function:: getoutput(cmd) + + Like :func:`getstatusoutput`, except the exit status is ignored and the return + value is a string containing the command's output. + +Example:: + + >>> import commands + >>> commands.getstatusoutput('ls /bin/ls') + (0, '/bin/ls') + >>> commands.getstatusoutput('cat /bin/junk') + (256, 'cat: /bin/junk: No such file or directory') + >>> commands.getstatusoutput('/bin/junk') + (256, 'sh: /bin/junk: not found') + >>> commands.getoutput('ls /bin/ls') + '/bin/ls' + + +.. seealso:: + + Module :mod:`subprocess` + Module for spawning and managing subprocesses. + diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst new file mode 100644 index 0000000..d62b785 --- /dev/null +++ b/Doc/library/compileall.rst @@ -0,0 +1,57 @@ + +:mod:`compileall` --- Byte-compile Python libraries +=================================================== + +.. module:: compileall + :synopsis: Tools for byte-compiling all Python source files in a directory tree. + + +This module provides some utility functions to support installing Python +libraries. These functions compile Python source files in a directory tree, +allowing users without permission to write to the libraries to take advantage of +cached byte-code files. + +The source file for this module may also be used as a script to compile Python +sources in directories named on the command line or in ``sys.path``. + + +.. function:: compile_dir(dir[, maxlevels[, ddir[, force[, rx[, quiet]]]]]) + + Recursively descend the directory tree named by *dir*, compiling all :file:`.py` + files along the way. The *maxlevels* parameter is used to limit the depth of + the recursion; it defaults to ``10``. If *ddir* is given, it is used as the + base path from which the filenames used in error messages will be generated. + If *force* is true, modules are re-compiled even if the timestamps are up to + date. + + If *rx* is given, it specifies a regular expression of file names to exclude + from the search; that expression is searched for in the full path. + + If *quiet* is true, nothing is printed to the standard output in normal + operation. + + +.. function:: compile_path([skip_curdir[, maxlevels[, force]]]) + + Byte-compile all the :file:`.py` files found along ``sys.path``. If + *skip_curdir* is true (the default), the current directory is not included in + the search. The *maxlevels* and *force* parameters default to ``0`` and are + passed to the :func:`compile_dir` function. + +To force a recompile of all the :file:`.py` files in the :file:`Lib/` +subdirectory and all its subdirectories:: + + import compileall + + compileall.compile_dir('Lib/', force=True) + + # Perform same compilation, excluding files in .svn directories. + import re + compileall.compile_dir('Lib/', rx=re.compile('/[.]svn'), force=True) + + +.. seealso:: + + Module :mod:`py_compile` + Byte-compile a single source file. + diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst new file mode 100644 index 0000000..dd91d59 --- /dev/null +++ b/Doc/library/configparser.rst @@ -0,0 +1,361 @@ + +:mod:`ConfigParser` --- Configuration file parser +================================================= + +.. module:: ConfigParser + :synopsis: Configuration file parser. +.. moduleauthor:: Ken Manheimer +.. moduleauthor:: Barry Warsaw +.. moduleauthor:: Eric S. Raymond +.. sectionauthor:: Christopher G. Petrilli + + +.. index:: + pair: .ini; file + pair: configuration; file + single: ini file + single: Windows ini file + +This module defines the class :class:`ConfigParser`. The :class:`ConfigParser` +class implements a basic configuration file parser language which provides a +structure similar to what you would find on Microsoft Windows INI files. You +can use this to write Python programs which can be customized by end users +easily. + +.. warning:: + + This library does *not* interpret or write the value-type prefixes used in the + Windows Registry extended version of INI syntax. + +The configuration file consists of sections, led by a ``[section]`` header and +followed by ``name: value`` entries, with continuations in the style of +:rfc:`822`; ``name=value`` is also accepted. Note that leading whitespace is +removed from values. The optional values can contain format strings which refer +to other values in the same section, or values in a special ``DEFAULT`` section. +Additional defaults can be provided on initialization and retrieval. Lines +beginning with ``'#'`` or ``';'`` are ignored and may be used to provide +comments. + +For example:: + + [My Section] + foodir: %(dir)s/whatever + dir=frob + +would resolve the ``%(dir)s`` to the value of ``dir`` (``frob`` in this case). +All reference expansions are done on demand. + +Default values can be specified by passing them into the :class:`ConfigParser` +constructor as a dictionary. Additional defaults may be passed into the +:meth:`get` method which will override all others. + +Sections are normally stored in a builtin dictionary. An alternative dictionary +type can be passed to the :class:`ConfigParser` constructor. For example, if a +dictionary type is passed that sorts its keys, the sections will be sorted on +write-back, as will be the keys within each section. + + +.. class:: RawConfigParser([defaults[, dict_type]]) + + The basic configuration object. When *defaults* is given, it is initialized + into the dictionary of intrinsic defaults. When *dict_type* is given, it will + be used to create the dictionary objects for the list of sections, for the + options within a section, and for the default values. This class does not + support the magical interpolation behavior. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.6 + *dict_type* was added. + + +.. class:: ConfigParser([defaults]) + + Derived class of :class:`RawConfigParser` that implements the magical + interpolation feature and adds optional arguments to the :meth:`get` and + :meth:`items` methods. The values in *defaults* must be appropriate for the + ``%()s`` string interpolation. Note that *__name__* is an intrinsic default; + its value is the section name, and will override any value provided in + *defaults*. + + All option names used in interpolation will be passed through the + :meth:`optionxform` method just like any other option name reference. For + example, using the default implementation of :meth:`optionxform` (which converts + option names to lower case), the values ``foo %(bar)s`` and ``foo %(BAR)s`` are + equivalent. + + +.. class:: SafeConfigParser([defaults]) + + Derived class of :class:`ConfigParser` that implements a more-sane variant of + the magical interpolation feature. This implementation is more predictable as + well. New applications should prefer this version if they don't need to be + compatible with older versions of Python. + + .. % XXX Need to explain what's safer/more predictable about it. + + .. versionadded:: 2.3 + + +.. exception:: NoSectionError + + Exception raised when a specified section is not found. + + +.. exception:: DuplicateSectionError + + Exception raised if :meth:`add_section` is called with the name of a section + that is already present. + + +.. exception:: NoOptionError + + Exception raised when a specified option is not found in the specified section. + + +.. exception:: InterpolationError + + Base class for exceptions raised when problems occur performing string + interpolation. + + +.. exception:: InterpolationDepthError + + Exception raised when string interpolation cannot be completed because the + number of iterations exceeds :const:`MAX_INTERPOLATION_DEPTH`. Subclass of + :exc:`InterpolationError`. + + +.. exception:: InterpolationMissingOptionError + + Exception raised when an option referenced from a value does not exist. Subclass + of :exc:`InterpolationError`. + + .. versionadded:: 2.3 + + +.. exception:: InterpolationSyntaxError + + Exception raised when the source text into which substitutions are made does not + conform to the required syntax. Subclass of :exc:`InterpolationError`. + + .. versionadded:: 2.3 + + +.. exception:: MissingSectionHeaderError + + Exception raised when attempting to parse a file which has no section headers. + + +.. exception:: ParsingError + + Exception raised when errors occur attempting to parse a file. + + +.. data:: MAX_INTERPOLATION_DEPTH + + The maximum depth for recursive interpolation for :meth:`get` when the *raw* + parameter is false. This is relevant only for the :class:`ConfigParser` class. + + +.. seealso:: + + Module :mod:`shlex` + Support for a creating Unix shell-like mini-languages which can be used as an + alternate format for application configuration files. + + +.. _rawconfigparser-objects: + +RawConfigParser Objects +----------------------- + +:class:`RawConfigParser` instances have the following methods: + + +.. method:: RawConfigParser.defaults() + + Return a dictionary containing the instance-wide defaults. + + +.. method:: RawConfigParser.sections() + + Return a list of the sections available; ``DEFAULT`` is not included in the + list. + + +.. method:: RawConfigParser.add_section(section) + + Add a section named *section* to the instance. If a section by the given name + already exists, :exc:`DuplicateSectionError` is raised. + + +.. method:: RawConfigParser.has_section(section) + + Indicates whether the named section is present in the configuration. The + ``DEFAULT`` section is not acknowledged. + + +.. method:: RawConfigParser.options(section) + + Returns a list of options available in the specified *section*. + + +.. method:: RawConfigParser.has_option(section, option) + + If the given section exists, and contains the given option, return + :const:`True`; otherwise return :const:`False`. + + .. versionadded:: 1.6 + + +.. method:: RawConfigParser.read(filenames) + + Attempt to read and parse a list of filenames, returning a list of filenames + which were successfully parsed. If *filenames* is a string or Unicode string, + it is treated as a single filename. If a file named in *filenames* cannot be + opened, that file will be ignored. This is designed so that you can specify a + list of potential configuration file locations (for example, the current + directory, the user's home directory, and some system-wide directory), and all + existing configuration files in the list will be read. If none of the named + files exist, the :class:`ConfigParser` instance will contain an empty dataset. + An application which requires initial values to be loaded from a file should + load the required file or files using :meth:`readfp` before calling :meth:`read` + for any optional files:: + + import ConfigParser, os + + config = ConfigParser.ConfigParser() + config.readfp(open('defaults.cfg')) + config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')]) + + .. versionchanged:: 2.4 + Returns list of successfully parsed filenames. + + +.. method:: RawConfigParser.readfp(fp[, filename]) + + Read and parse configuration data from the file or file-like object in *fp* + (only the :meth:`readline` method is used). If *filename* is omitted and *fp* + has a :attr:`name` attribute, that is used for *filename*; the default is + ````. + + +.. method:: RawConfigParser.get(section, option) + + Get an *option* value for the named *section*. + + +.. method:: RawConfigParser.getint(section, option) + + A convenience method which coerces the *option* in the specified *section* to an + integer. + + +.. method:: RawConfigParser.getfloat(section, option) + + A convenience method which coerces the *option* in the specified *section* to a + floating point number. + + +.. method:: RawConfigParser.getboolean(section, option) + + A convenience method which coerces the *option* in the specified *section* to a + Boolean value. Note that the accepted values for the option are ``"1"``, + ``"yes"``, ``"true"``, and ``"on"``, which cause this method to return ``True``, + and ``"0"``, ``"no"``, ``"false"``, and ``"off"``, which cause it to return + ``False``. These string values are checked in a case-insensitive manner. Any + other value will cause it to raise :exc:`ValueError`. + + +.. method:: RawConfigParser.items(section) + + Return a list of ``(name, value)`` pairs for each option in the given *section*. + + +.. method:: RawConfigParser.set(section, option, value) + + If the given section exists, set the given option to the specified value; + otherwise raise :exc:`NoSectionError`. While it is possible to use + :class:`RawConfigParser` (or :class:`ConfigParser` with *raw* parameters set to + true) for *internal* storage of non-string values, full functionality (including + interpolation and output to files) can only be achieved using string values. + + .. versionadded:: 1.6 + + +.. method:: RawConfigParser.write(fileobject) + + Write a representation of the configuration to the specified file object. This + representation can be parsed by a future :meth:`read` call. + + .. versionadded:: 1.6 + + +.. method:: RawConfigParser.remove_option(section, option) + + Remove the specified *option* from the specified *section*. If the section does + not exist, raise :exc:`NoSectionError`. If the option existed to be removed, + return :const:`True`; otherwise return :const:`False`. + + .. versionadded:: 1.6 + + +.. method:: RawConfigParser.remove_section(section) + + Remove the specified *section* from the configuration. If the section in fact + existed, return ``True``. Otherwise return ``False``. + + +.. method:: RawConfigParser.optionxform(option) + + Transforms the option name *option* as found in an input file or as passed in by + client code to the form that should be used in the internal structures. The + default implementation returns a lower-case version of *option*; subclasses may + override this or client code can set an attribute of this name on instances to + affect this behavior. Setting this to :func:`str`, for example, would make + option names case sensitive. + + +.. _configparser-objects: + +ConfigParser Objects +-------------------- + +The :class:`ConfigParser` class extends some methods of the +:class:`RawConfigParser` interface, adding some optional arguments. + + +.. method:: ConfigParser.get(section, option[, raw[, vars]]) + + Get an *option* value for the named *section*. All the ``'%'`` interpolations + are expanded in the return values, based on the defaults passed into the + constructor, as well as the options *vars* provided, unless the *raw* argument + is true. + + +.. method:: ConfigParser.items(section[, raw[, vars]]) + + Return a list of ``(name, value)`` pairs for each option in the given *section*. + Optional arguments have the same meaning as for the :meth:`get` method. + + .. versionadded:: 2.3 + + +.. _safeconfigparser-objects: + +SafeConfigParser Objects +------------------------ + +The :class:`SafeConfigParser` class implements the same extended interface as +:class:`ConfigParser`, with the following addition: + + +.. method:: SafeConfigParser.set(section, option, value) + + If the given section exists, set the given option to the specified value; + otherwise raise :exc:`NoSectionError`. *value* must be a string (:class:`str` + or :class:`unicode`); if not, :exc:`TypeError` is raised. + + .. versionadded:: 2.4 + diff --git a/Doc/library/constants.rst b/Doc/library/constants.rst new file mode 100644 index 0000000..fecd836 --- /dev/null +++ b/Doc/library/constants.rst @@ -0,0 +1,42 @@ + +Built-in Constants +================== + +A small number of constants live in the built-in namespace. They are: + + +.. data:: False + + The false value of the :class:`bool` type. + + .. versionadded:: 2.3 + + +.. data:: True + + The true value of the :class:`bool` type. + + .. versionadded:: 2.3 + + +.. data:: None + + The sole value of :attr:`types.NoneType`. ``None`` is frequently used to + represent the absence of a value, as when default arguments are not passed to a + function. + + +.. data:: NotImplemented + + Special value which can be returned by the "rich comparison" special methods + (:meth:`__eq__`, :meth:`__lt__`, and friends), to indicate that the comparison + is not implemented with respect to the other type. + + +.. data:: Ellipsis + + The same as ``...``. Special value used mostly in conjunction with extended + slicing syntax for user-defined container data types. + + .. % XXX Someone who understands extended slicing should fill in here. + diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst new file mode 100644 index 0000000..fffb99c --- /dev/null +++ b/Doc/library/contextlib.rst @@ -0,0 +1,120 @@ + +:mod:`contextlib` --- Utilities for :keyword:`with`\ -statement contexts. +========================================================================= + +.. module:: contextlib + :synopsis: Utilities for with-statement contexts. + + +.. versionadded:: 2.5 + +This module provides utilities for common tasks involving the :keyword:`with` +statement. For more information see also :ref:`typecontextmanager` and +:ref:`context-managers`. + +Functions provided: + + +.. function:: contextmanager(func) + + This function is a decorator that can be used to define a factory function for + :keyword:`with` statement context managers, without needing to create a class or + separate :meth:`__enter__` and :meth:`__exit__` methods. + + A simple example (this is not recommended as a real way of generating HTML!):: + + from __future__ import with_statement + from contextlib import contextmanager + + @contextmanager + def tag(name): + print "<%s>" % name + yield + print "" % name + + >>> with tag("h1"): + ... print "foo" + ... +

    + foo +

    + + The function being decorated must return a generator-iterator when called. This + iterator must yield exactly one value, which will be bound to the targets in the + :keyword:`with` statement's :keyword:`as` clause, if any. + + At the point where the generator yields, the block nested in the :keyword:`with` + statement is executed. The generator is then resumed after the block is exited. + If an unhandled exception occurs in the block, it is reraised inside the + generator at the point where the yield occurred. Thus, you can use a + :keyword:`try`...\ :keyword:`except`...\ :keyword:`finally` statement to trap + the error (if any), or ensure that some cleanup takes place. If an exception is + trapped merely in order to log it or to perform some action (rather than to + suppress it entirely), the generator must reraise that exception. Otherwise the + generator context manager will indicate to the :keyword:`with` statement that + the exception has been handled, and execution will resume with the statement + immediately following the :keyword:`with` statement. + + +.. function:: nested(mgr1[, mgr2[, ...]]) + + Combine multiple context managers into a single nested context manager. + + Code like this:: + + from contextlib import nested + + with nested(A, B, C) as (X, Y, Z): + do_something() + + is equivalent to this:: + + with A as X: + with B as Y: + with C as Z: + do_something() + + Note that if the :meth:`__exit__` method of one of the nested context managers + indicates an exception should be suppressed, no exception information will be + passed to any remaining outer context managers. Similarly, if the + :meth:`__exit__` method of one of the nested managers raises an exception, any + previous exception state will be lost; the new exception will be passed to the + :meth:`__exit__` methods of any remaining outer context managers. In general, + :meth:`__exit__` methods should avoid raising exceptions, and in particular they + should not re-raise a passed-in exception. + + +.. function:: closing(thing) + + Return a context manager that closes *thing* upon completion of the block. This + is basically equivalent to:: + + from contextlib import contextmanager + + @contextmanager + def closing(thing): + try: + yield thing + finally: + thing.close() + + And lets you write code like this:: + + from __future__ import with_statement + from contextlib import closing + import urllib + + with closing(urllib.urlopen('http://www.python.org')) as page: + for line in page: + print line + + without needing to explicitly close ``page``. Even if an error occurs, + ``page.close()`` will be called when the :keyword:`with` block is exited. + + +.. seealso:: + + :pep:`0343` - The "with" statement + The specification, background, and examples for the Python :keyword:`with` + statement. + diff --git a/Doc/library/cookie.rst b/Doc/library/cookie.rst new file mode 100644 index 0000000..5a5808f --- /dev/null +++ b/Doc/library/cookie.rst @@ -0,0 +1,282 @@ + +:mod:`Cookie` --- HTTP state management +======================================= + +.. module:: Cookie + :synopsis: Support for HTTP state management (cookies). +.. moduleauthor:: Timothy O'Malley +.. sectionauthor:: Moshe Zadka + + +The :mod:`Cookie` module defines classes for abstracting the concept of +cookies, an HTTP state management mechanism. It supports both simple string-only +cookies, and provides an abstraction for having any serializable data-type as +cookie value. + +The module formerly strictly applied the parsing rules described in the +:rfc:`2109` and :rfc:`2068` specifications. It has since been discovered that +MSIE 3.0x doesn't follow the character rules outlined in those specs. As a +result, the parsing rules used are a bit less strict. + + +.. exception:: CookieError + + Exception failing because of :rfc:`2109` invalidity: incorrect attributes, + incorrect :mailheader:`Set-Cookie` header, etc. + + +.. class:: BaseCookie([input]) + + This class is a dictionary-like object whose keys are strings and whose values + are :class:`Morsel` instances. Note that upon setting a key to a value, the + value is first converted to a :class:`Morsel` containing the key and the value. + + If *input* is given, it is passed to the :meth:`load` method. + + +.. class:: SimpleCookie([input]) + + This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` + and :meth:`value_encode` to be the identity and :func:`str` respectively. + + +.. class:: SerialCookie([input]) + + This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` + and :meth:`value_encode` to be the :func:`pickle.loads` and + :func:`pickle.dumps`. + + .. deprecated:: 2.3 + Reading pickled values from untrusted cookie data is a huge security hole, as + pickle strings can be crafted to cause arbitrary code to execute on your server. + It is supported for backwards compatibility only, and may eventually go away. + + +.. class:: SmartCookie([input]) + + This class derives from :class:`BaseCookie`. It overrides :meth:`value_decode` + to be :func:`pickle.loads` if it is a valid pickle, and otherwise the value + itself. It overrides :meth:`value_encode` to be :func:`pickle.dumps` unless it + is a string, in which case it returns the value itself. + + .. deprecated:: 2.3 + The same security warning from :class:`SerialCookie` applies here. + +A further security note is warranted. For backwards compatibility, the +:mod:`Cookie` module exports a class named :class:`Cookie` which is just an +alias for :class:`SmartCookie`. This is probably a mistake and will likely be +removed in a future version. You should not use the :class:`Cookie` class in +your applications, for the same reason why you should not use the +:class:`SerialCookie` class. + + +.. seealso:: + + Module :mod:`cookielib` + HTTP cookie handling for web *clients*. The :mod:`cookielib` and :mod:`Cookie` + modules do not depend on each other. + + :rfc:`2109` - HTTP State Management Mechanism + This is the state management specification implemented by this module. + + +.. _cookie-objects: + +Cookie Objects +-------------- + + +.. method:: BaseCookie.value_decode(val) + + Return a decoded value from a string representation. Return value can be any + type. This method does nothing in :class:`BaseCookie` --- it exists so it can be + overridden. + + +.. method:: BaseCookie.value_encode(val) + + Return an encoded value. *val* can be any type, but return value must be a + string. This method does nothing in :class:`BaseCookie` --- it exists so it can + be overridden + + In general, it should be the case that :meth:`value_encode` and + :meth:`value_decode` are inverses on the range of *value_decode*. + + +.. method:: BaseCookie.output([attrs[, header[, sep]]]) + + Return a string representation suitable to be sent as HTTP headers. *attrs* and + *header* are sent to each :class:`Morsel`'s :meth:`output` method. *sep* is used + to join the headers together, and is by default the combination ``'\r\n'`` + (CRLF). + + .. versionchanged:: 2.5 + The default separator has been changed from ``'\n'`` to match the cookie + specification. + + +.. method:: BaseCookie.js_output([attrs]) + + Return an embeddable JavaScript snippet, which, if run on a browser which + supports JavaScript, will act the same as if the HTTP headers was sent. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. method:: BaseCookie.load(rawdata) + + If *rawdata* is a string, parse it as an ``HTTP_COOKIE`` and add the values + found there as :class:`Morsel`\ s. If it is a dictionary, it is equivalent to:: + + for k, v in rawdata.items(): + cookie[k] = v + + +.. _morsel-objects: + +Morsel Objects +-------------- + + +.. class:: Morsel() + + Abstract a key/value pair, which has some :rfc:`2109` attributes. + + Morsels are dictionary-like objects, whose set of keys is constant --- the valid + :rfc:`2109` attributes, which are + + * ``expires`` + * ``path`` + * ``comment`` + * ``domain`` + * ``max-age`` + * ``secure`` + * ``version`` + + The keys are case-insensitive. + + +.. attribute:: Morsel.value + + The value of the cookie. + + +.. attribute:: Morsel.coded_value + + The encoded value of the cookie --- this is what should be sent. + + +.. attribute:: Morsel.key + + The name of the cookie. + + +.. method:: Morsel.set(key, value, coded_value) + + Set the *key*, *value* and *coded_value* members. + + +.. method:: Morsel.isReservedKey(K) + + Whether *K* is a member of the set of keys of a :class:`Morsel`. + + +.. method:: Morsel.output([attrs[, header]]) + + Return a string representation of the Morsel, suitable to be sent as an HTTP + header. By default, all the attributes are included, unless *attrs* is given, in + which case it should be a list of attributes to use. *header* is by default + ``"Set-Cookie:"``. + + +.. method:: Morsel.js_output([attrs]) + + Return an embeddable JavaScript snippet, which, if run on a browser which + supports JavaScript, will act the same as if the HTTP header was sent. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. method:: Morsel.OutputString([attrs]) + + Return a string representing the Morsel, without any surrounding HTTP or + JavaScript. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. _cookie-example: + +Example +------- + +The following example demonstrates how to use the :mod:`Cookie` module. :: + + >>> import Cookie + >>> C = Cookie.SimpleCookie() + >>> C = Cookie.SerialCookie() + >>> C = Cookie.SmartCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> print C # generate HTTP headers + Set-Cookie: sugar=wafer + Set-Cookie: fig=newton + >>> print C.output() # same thing + Set-Cookie: sugar=wafer + Set-Cookie: fig=newton + >>> C = Cookie.SmartCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print C.output(header="Cookie:") + Cookie: rocky=road; Path=/cookie + >>> print C.output(attrs=[], header="Cookie:") + Cookie: rocky=road + >>> C = Cookie.SmartCookie() + >>> C.load("chips=ahoy; vienna=finger") # load from a string (HTTP header) + >>> print C + Set-Cookie: vienna=finger + Set-Cookie: chips=ahoy + >>> C = Cookie.SmartCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print C + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + >>> C = Cookie.SmartCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print C + Set-Cookie: oreo=doublestuff; Path=/ + >>> C = Cookie.SmartCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + >>> C = Cookie.SimpleCookie() + >>> C["number"] = 7 # equivalent to C["number"] = str(7) + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> print C + Set-Cookie: number=7 + Set-Cookie: string=seven + >>> C = Cookie.SerialCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> print C + Set-Cookie: number="I7\012." + Set-Cookie: string="S'seven'\012p1\012." + >>> C = Cookie.SmartCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> print C + Set-Cookie: number="I7\012." + Set-Cookie: string=seven + diff --git a/Doc/library/cookielib.rst b/Doc/library/cookielib.rst new file mode 100644 index 0000000..44045d3 --- /dev/null +++ b/Doc/library/cookielib.rst @@ -0,0 +1,768 @@ + +:mod:`cookielib` --- Cookie handling for HTTP clients +===================================================== + +.. module:: cookielib + :synopsis: Classes for automatic handling of HTTP cookies. +.. moduleauthor:: John J. Lee +.. sectionauthor:: John J. Lee + + +.. versionadded:: 2.4 + + + +The :mod:`cookielib` module defines classes for automatic handling of HTTP +cookies. It is useful for accessing web sites that require small pieces of data +-- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a +web server, and then returned to the server in later HTTP requests. + +Both the regular Netscape cookie protocol and the protocol defined by +:rfc:`2965` are handled. RFC 2965 handling is switched off by default. +:rfc:`2109` cookies are parsed as Netscape cookies and subsequently treated +either as Netscape or RFC 2965 cookies according to the 'policy' in effect. +Note that the great majority of cookies on the Internet are Netscape cookies. +:mod:`cookielib` attempts to follow the de-facto Netscape cookie protocol (which +differs substantially from that set out in the original Netscape specification), +including taking note of the ``max-age`` and ``port`` cookie-attributes +introduced with RFC 2965. + +.. note:: + + The various named parameters found in :mailheader:`Set-Cookie` and + :mailheader:`Set-Cookie2` headers (eg. ``domain`` and ``expires``) are + conventionally referred to as :dfn:`attributes`. To distinguish them from + Python attributes, the documentation for this module uses the term + :dfn:`cookie-attribute` instead. + + +The module defines the following exception: + + +.. exception:: LoadError + + Instances of :class:`FileCookieJar` raise this exception on failure to load + cookies from a file. + + .. note:: + + For backwards-compatibility with Python 2.4 (which raised an :exc:`IOError`), + :exc:`LoadError` is a subclass of :exc:`IOError`. + + +The following classes are provided: + + +.. class:: CookieJar(policy=None) + + *policy* is an object implementing the :class:`CookiePolicy` interface. + + The :class:`CookieJar` class stores HTTP cookies. It extracts cookies from HTTP + requests, and returns them in HTTP responses. :class:`CookieJar` instances + automatically expire contained cookies when necessary. Subclasses are also + responsible for storing and retrieving cookies from a file or database. + + +.. class:: FileCookieJar(filename, delayload=None, policy=None) + + *policy* is an object implementing the :class:`CookiePolicy` interface. For the + other arguments, see the documentation for the corresponding attributes. + + A :class:`CookieJar` which can load cookies from, and perhaps save cookies to, a + file on disk. Cookies are **NOT** loaded from the named file until either the + :meth:`load` or :meth:`revert` method is called. Subclasses of this class are + documented in section :ref:`file-cookie-jar-classes`. + + +.. class:: CookiePolicy() + + This class is responsible for deciding whether each cookie should be accepted + from / returned to the server. + + +.. class:: DefaultCookiePolicy( blocked_domains=None, allowed_domains=None, netscape=True, rfc2965=False, rfc2109_as_netscape=None, hide_cookie2=False, strict_domain=False, strict_rfc2965_unverifiable=True, strict_ns_unverifiable=False, strict_ns_domain=DefaultCookiePolicy.DomainLiberal, strict_ns_set_initial_dollar=False, strict_ns_set_path=False ) + + Constructor arguments should be passed as keyword arguments only. + *blocked_domains* is a sequence of domain names that we never accept cookies + from, nor return cookies to. *allowed_domains* if not :const:`None`, this is a + sequence of the only domains for which we accept and return cookies. For all + other arguments, see the documentation for :class:`CookiePolicy` and + :class:`DefaultCookiePolicy` objects. + + :class:`DefaultCookiePolicy` implements the standard accept / reject rules for + Netscape and RFC 2965 cookies. By default, RFC 2109 cookies (ie. cookies + received in a :mailheader:`Set-Cookie` header with a version cookie-attribute of + 1) are treated according to the RFC 2965 rules. However, if RFC 2965 handling + is turned off or :attr:`rfc2109_as_netscape` is True, RFC 2109 cookies are + 'downgraded' by the :class:`CookieJar` instance to Netscape cookies, by + setting the :attr:`version` attribute of the :class:`Cookie` instance to 0. + :class:`DefaultCookiePolicy` also provides some parameters to allow some + fine-tuning of policy. + + +.. class:: Cookie() + + This class represents Netscape, RFC 2109 and RFC 2965 cookies. It is not + expected that users of :mod:`cookielib` construct their own :class:`Cookie` + instances. Instead, if necessary, call :meth:`make_cookies` on a + :class:`CookieJar` instance. + + +.. seealso:: + + Module :mod:`urllib2` + URL opening with automatic cookie handling. + + Module :mod:`Cookie` + HTTP cookie classes, principally useful for server-side code. The + :mod:`cookielib` and :mod:`Cookie` modules do not depend on each other. + + http://wwwsearch.sf.net/ClientCookie/ + Extensions to this module, including a class for reading Microsoft Internet + Explorer cookies on Windows. + + http://www.netscape.com/newsref/std/cookie_spec.html + The specification of the original Netscape cookie protocol. Though this is + still the dominant protocol, the 'Netscape cookie protocol' implemented by all + the major browsers (and :mod:`cookielib`) only bears a passing resemblance to + the one sketched out in ``cookie_spec.html``. + + :rfc:`2109` - HTTP State Management Mechanism + Obsoleted by RFC 2965. Uses :mailheader:`Set-Cookie` with version=1. + + :rfc:`2965` - HTTP State Management Mechanism + The Netscape protocol with the bugs fixed. Uses :mailheader:`Set-Cookie2` in + place of :mailheader:`Set-Cookie`. Not widely used. + + http://kristol.org/cookie/errata.html + Unfinished errata to RFC 2965. + + :rfc:`2964` - Use of HTTP State Management + +.. _cookie-jar-objects: + +CookieJar and FileCookieJar Objects +----------------------------------- + +:class:`CookieJar` objects support the iterator protocol for iterating over +contained :class:`Cookie` objects. + +:class:`CookieJar` has the following methods: + + +.. method:: CookieJar.add_cookie_header(request) + + Add correct :mailheader:`Cookie` header to *request*. + + If policy allows (ie. the :attr:`rfc2965` and :attr:`hide_cookie2` attributes of + the :class:`CookieJar`'s :class:`CookiePolicy` instance are true and false + respectively), the :mailheader:`Cookie2` header is also added when appropriate. + + The *request* object (usually a :class:`urllib2.Request` instance) must support + the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`get_type`, + :meth:`unverifiable`, :meth:`get_origin_req_host`, :meth:`has_header`, + :meth:`get_header`, :meth:`header_items`, and :meth:`add_unredirected_header`,as + documented by :mod:`urllib2`. + + +.. method:: CookieJar.extract_cookies(response, request) + + Extract cookies from HTTP *response* and store them in the :class:`CookieJar`, + where allowed by policy. + + The :class:`CookieJar` will look for allowable :mailheader:`Set-Cookie` and + :mailheader:`Set-Cookie2` headers in the *response* argument, and store cookies + as appropriate (subject to the :meth:`CookiePolicy.set_ok` method's approval). + + The *response* object (usually the result of a call to :meth:`urllib2.urlopen`, + or similar) should support an :meth:`info` method, which returns an object with + a :meth:`getallmatchingheaders` method (usually a :class:`mimetools.Message` + instance). + + The *request* object (usually a :class:`urllib2.Request` instance) must support + the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`unverifiable`, and + :meth:`get_origin_req_host`, as documented by :mod:`urllib2`. The request is + used to set default values for cookie-attributes as well as for checking that + the cookie is allowed to be set. + + +.. method:: CookieJar.set_policy(policy) + + Set the :class:`CookiePolicy` instance to be used. + + +.. method:: CookieJar.make_cookies(response, request) + + Return sequence of :class:`Cookie` objects extracted from *response* object. + + See the documentation for :meth:`extract_cookies` for the interfaces required of + the *response* and *request* arguments. + + +.. method:: CookieJar.set_cookie_if_ok(cookie, request) + + Set a :class:`Cookie` if policy says it's OK to do so. + + +.. method:: CookieJar.set_cookie(cookie) + + Set a :class:`Cookie`, without checking with policy to see whether or not it + should be set. + + +.. method:: CookieJar.clear([domain[, path[, name]]]) + + Clear some cookies. + + If invoked without arguments, clear all cookies. If given a single argument, + only cookies belonging to that *domain* will be removed. If given two arguments, + cookies belonging to the specified *domain* and URL *path* are removed. If + given three arguments, then the cookie with the specified *domain*, *path* and + *name* is removed. + + Raises :exc:`KeyError` if no matching cookie exists. + + +.. method:: CookieJar.clear_session_cookies() + + Discard all session cookies. + + Discards all contained cookies that have a true :attr:`discard` attribute + (usually because they had either no ``max-age`` or ``expires`` cookie-attribute, + or an explicit ``discard`` cookie-attribute). For interactive browsers, the end + of a session usually corresponds to closing the browser window. + + Note that the :meth:`save` method won't save session cookies anyway, unless you + ask otherwise by passing a true *ignore_discard* argument. + +:class:`FileCookieJar` implements the following additional methods: + + +.. method:: FileCookieJar.save(filename=None, ignore_discard=False, ignore_expires=False) + + Save cookies to a file. + + This base class raises :exc:`NotImplementedError`. Subclasses may leave this + method unimplemented. + + *filename* is the name of file in which to save cookies. If *filename* is not + specified, :attr:`self.filename` is used (whose default is the value passed to + the constructor, if any); if :attr:`self.filename` is :const:`None`, + :exc:`ValueError` is raised. + + *ignore_discard*: save even cookies set to be discarded. *ignore_expires*: save + even cookies that have expired + + The file is overwritten if it already exists, thus wiping all the cookies it + contains. Saved cookies can be restored later using the :meth:`load` or + :meth:`revert` methods. + + +.. method:: FileCookieJar.load(filename=None, ignore_discard=False, ignore_expires=False) + + Load cookies from a file. + + Old cookies are kept unless overwritten by newly loaded ones. + + Arguments are as for :meth:`save`. + + The named file must be in the format understood by the class, or + :exc:`LoadError` will be raised. Also, :exc:`IOError` may be raised, for + example if the file does not exist. + + .. note:: + + For backwards-compatibility with Python 2.4 (which raised an :exc:`IOError`), + :exc:`LoadError` is a subclass of :exc:`IOError`. + + +.. method:: FileCookieJar.revert(filename=None, ignore_discard=False, ignore_expires=False) + + Clear all cookies and reload cookies from a saved file. + + :meth:`revert` can raise the same exceptions as :meth:`load`. If there is a + failure, the object's state will not be altered. + +:class:`FileCookieJar` instances have the following public attributes: + + +.. attribute:: FileCookieJar.filename + + Filename of default file in which to keep cookies. This attribute may be + assigned to. + + +.. attribute:: FileCookieJar.delayload + + If true, load cookies lazily from disk. This attribute should not be assigned + to. This is only a hint, since this only affects performance, not behaviour + (unless the cookies on disk are changing). A :class:`CookieJar` object may + ignore it. None of the :class:`FileCookieJar` classes included in the standard + library lazily loads cookies. + + +.. _file-cookie-jar-classes: + +FileCookieJar subclasses and co-operation with web browsers +----------------------------------------------------------- + +The following :class:`CookieJar` subclasses are provided for reading and writing +. Further :class:`CookieJar` subclasses, including one that reads Microsoft +Internet Explorer cookies, are available at +http://wwwsearch.sf.net/ClientCookie/. + + +.. class:: MozillaCookieJar(filename, delayload=None, policy=None) + + A :class:`FileCookieJar` that can load from and save cookies to disk in the + Mozilla ``cookies.txt`` file format (which is also used by the Lynx and Netscape + browsers). + + .. note:: + + This loses information about RFC 2965 cookies, and also about newer or + non-standard cookie-attributes such as ``port``. + + .. warning:: + + Back up your cookies before saving if you have cookies whose loss / corruption + would be inconvenient (there are some subtleties which may lead to slight + changes in the file over a load / save round-trip). + + Also note that cookies saved while Mozilla is running will get clobbered by + Mozilla. + + +.. class:: LWPCookieJar(filename, delayload=None, policy=None) + + A :class:`FileCookieJar` that can load from and save cookies to disk in format + compatible with the libwww-perl library's ``Set-Cookie3`` file format. This is + convenient if you want to store cookies in a human-readable file. + + +.. _cookie-policy-objects: + +CookiePolicy Objects +-------------------- + +Objects implementing the :class:`CookiePolicy` interface have the following +methods: + + +.. method:: CookiePolicy.set_ok(cookie, request) + + Return boolean value indicating whether cookie should be accepted from server. + + *cookie* is a :class:`cookielib.Cookie` instance. *request* is an object + implementing the interface defined by the documentation for + :meth:`CookieJar.extract_cookies`. + + +.. method:: CookiePolicy.return_ok(cookie, request) + + Return boolean value indicating whether cookie should be returned to server. + + *cookie* is a :class:`cookielib.Cookie` instance. *request* is an object + implementing the interface defined by the documentation for + :meth:`CookieJar.add_cookie_header`. + + +.. method:: CookiePolicy.domain_return_ok(domain, request) + + Return false if cookies should not be returned, given cookie domain. + + This method is an optimization. It removes the need for checking every cookie + with a particular domain (which might involve reading many files). Returning + true from :meth:`domain_return_ok` and :meth:`path_return_ok` leaves all the + work to :meth:`return_ok`. + + If :meth:`domain_return_ok` returns true for the cookie domain, + :meth:`path_return_ok` is called for the cookie path. Otherwise, + :meth:`path_return_ok` and :meth:`return_ok` are never called for that cookie + domain. If :meth:`path_return_ok` returns true, :meth:`return_ok` is called + with the :class:`Cookie` object itself for a full check. Otherwise, + :meth:`return_ok` is never called for that cookie path. + + Note that :meth:`domain_return_ok` is called for every *cookie* domain, not just + for the *request* domain. For example, the function might be called with both + ``".example.com"`` and ``"www.example.com"`` if the request domain is + ``"www.example.com"``. The same goes for :meth:`path_return_ok`. + + The *request* argument is as documented for :meth:`return_ok`. + + +.. method:: CookiePolicy.path_return_ok(path, request) + + Return false if cookies should not be returned, given cookie path. + + See the documentation for :meth:`domain_return_ok`. + +In addition to implementing the methods above, implementations of the +:class:`CookiePolicy` interface must also supply the following attributes, +indicating which protocols should be used, and how. All of these attributes may +be assigned to. + + +.. attribute:: CookiePolicy.netscape + + Implement Netscape protocol. + + +.. attribute:: CookiePolicy.rfc2965 + + Implement RFC 2965 protocol. + + +.. attribute:: CookiePolicy.hide_cookie2 + + Don't add :mailheader:`Cookie2` header to requests (the presence of this header + indicates to the server that we understand RFC 2965 cookies). + +The most useful way to define a :class:`CookiePolicy` class is by subclassing +from :class:`DefaultCookiePolicy` and overriding some or all of the methods +above. :class:`CookiePolicy` itself may be used as a 'null policy' to allow +setting and receiving any and all cookies (this is unlikely to be useful). + + +.. _default-cookie-policy-objects: + +DefaultCookiePolicy Objects +--------------------------- + +Implements the standard rules for accepting and returning cookies. + +Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is switched +off by default. + +The easiest way to provide your own policy is to override this class and call +its methods in your overridden implementations before adding your own additional +checks:: + + import cookielib + class MyCookiePolicy(cookielib.DefaultCookiePolicy): + def set_ok(self, cookie, request): + if not cookielib.DefaultCookiePolicy.set_ok(self, cookie, request): + return False + if i_dont_want_to_store_this_cookie(cookie): + return False + return True + +In addition to the features required to implement the :class:`CookiePolicy` +interface, this class allows you to block and allow domains from setting and +receiving cookies. There are also some strictness switches that allow you to +tighten up the rather loose Netscape protocol rules a little bit (at the cost of +blocking some benign cookies). + +A domain blacklist and whitelist is provided (both off by default). Only domains +not in the blacklist and present in the whitelist (if the whitelist is active) +participate in cookie setting and returning. Use the *blocked_domains* +constructor argument, and :meth:`blocked_domains` and +:meth:`set_blocked_domains` methods (and the corresponding argument and methods +for *allowed_domains*). If you set a whitelist, you can turn it off again by +setting it to :const:`None`. + +Domains in block or allow lists that do not start with a dot must equal the +cookie domain to be matched. For example, ``"example.com"`` matches a blacklist +entry of ``"example.com"``, but ``"www.example.com"`` does not. Domains that do +start with a dot are matched by more specific domains too. For example, both +``"www.example.com"`` and ``"www.coyote.example.com"`` match ``".example.com"`` +(but ``"example.com"`` itself does not). IP addresses are an exception, and +must match exactly. For example, if blocked_domains contains ``"192.168.1.2"`` +and ``".168.1.2"``, 192.168.1.2 is blocked, but 193.168.1.2 is not. + +:class:`DefaultCookiePolicy` implements the following additional methods: + + +.. method:: DefaultCookiePolicy.blocked_domains() + + Return the sequence of blocked domains (as a tuple). + + +.. method:: DefaultCookiePolicy.set_blocked_domains(blocked_domains) + + Set the sequence of blocked domains. + + +.. method:: DefaultCookiePolicy.is_blocked(domain) + + Return whether *domain* is on the blacklist for setting or receiving cookies. + + +.. method:: DefaultCookiePolicy.allowed_domains() + + Return :const:`None`, or the sequence of allowed domains (as a tuple). + + +.. method:: DefaultCookiePolicy.set_allowed_domains(allowed_domains) + + Set the sequence of allowed domains, or :const:`None`. + + +.. method:: DefaultCookiePolicy.is_not_allowed(domain) + + Return whether *domain* is not on the whitelist for setting or receiving + cookies. + +:class:`DefaultCookiePolicy` instances have the following attributes, which are +all initialised from the constructor arguments of the same name, and which may +all be assigned to. + + +.. attribute:: DefaultCookiePolicy.rfc2109_as_netscape + + If true, request that the :class:`CookieJar` instance downgrade RFC 2109 cookies + (ie. cookies received in a :mailheader:`Set-Cookie` header with a version + cookie-attribute of 1) to Netscape cookies by setting the version attribute of + the :class:`Cookie` instance to 0. The default value is :const:`None`, in which + case RFC 2109 cookies are downgraded if and only if RFC 2965 handling is turned + off. Therefore, RFC 2109 cookies are downgraded by default. + + .. versionadded:: 2.5 + +General strictness switches: + + +.. attribute:: DefaultCookiePolicy.strict_domain + + Don't allow sites to set two-component domains with country-code top-level + domains like ``.co.uk``, ``.gov.uk``, ``.co.nz``.etc. This is far from perfect + and isn't guaranteed to work! + +RFC 2965 protocol strictness switches: + + +.. attribute:: DefaultCookiePolicy.strict_rfc2965_unverifiable + + Follow RFC 2965 rules on unverifiable transactions (usually, an unverifiable + transaction is one resulting from a redirect or a request for an image hosted on + another site). If this is false, cookies are *never* blocked on the basis of + verifiability + +Netscape protocol strictness switches: + + +.. attribute:: DefaultCookiePolicy.strict_ns_unverifiable + + apply RFC 2965 rules on unverifiable transactions even to Netscape cookies + + +.. attribute:: DefaultCookiePolicy.strict_ns_domain + + Flags indicating how strict to be with domain-matching rules for Netscape + cookies. See below for acceptable values. + + +.. attribute:: DefaultCookiePolicy.strict_ns_set_initial_dollar + + Ignore cookies in Set-Cookie: headers that have names starting with ``'$'``. + + +.. attribute:: DefaultCookiePolicy.strict_ns_set_path + + Don't allow setting cookies whose path doesn't path-match request URI. + +:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by +or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means +both flags are set). + + +.. attribute:: DefaultCookiePolicy.DomainStrictNoDots + + When setting cookies, the 'host prefix' must not contain a dot (eg. + ``www.foo.bar.com`` can't set a cookie for ``.bar.com``, because ``www.foo`` + contains a dot). + + +.. attribute:: DefaultCookiePolicy.DomainStrictNonDomain + + Cookies that did not explicitly specify a ``domain`` cookie-attribute can only + be returned to a domain equal to the domain that set the cookie (eg. + ``spam.example.com`` won't be returned cookies from ``example.com`` that had no + ``domain`` cookie-attribute). + + +.. attribute:: DefaultCookiePolicy.DomainRFC2965Match + + When setting cookies, require a full RFC 2965 domain-match. + +The following attributes are provided for convenience, and are the most useful +combinations of the above flags: + + +.. attribute:: DefaultCookiePolicy.DomainLiberal + + Equivalent to 0 (ie. all of the above Netscape domain strictness flags switched + off). + + +.. attribute:: DefaultCookiePolicy.DomainStrict + + Equivalent to ``DomainStrictNoDots|DomainStrictNonDomain``. + + +.. _cookielib-cookie-objects: + +Cookie Objects +-------------- + +:class:`Cookie` instances have Python attributes roughly corresponding to the +standard cookie-attributes specified in the various cookie standards. The +correspondence is not one-to-one, because there are complicated rules for +assigning default values, because the ``max-age`` and ``expires`` +cookie-attributes contain equivalent information, and because RFC 2109 cookies +may be 'downgraded' by :mod:`cookielib` from version 1 to version 0 (Netscape) +cookies. + +Assignment to these attributes should not be necessary other than in rare +circumstances in a :class:`CookiePolicy` method. The class does not enforce +internal consistency, so you should know what you're doing if you do that. + + +.. attribute:: Cookie.version + + Integer or :const:`None`. Netscape cookies have :attr:`version` 0. RFC 2965 and + RFC 2109 cookies have a ``version`` cookie-attribute of 1. However, note that + :mod:`cookielib` may 'downgrade' RFC 2109 cookies to Netscape cookies, in which + case :attr:`version` is 0. + + +.. attribute:: Cookie.name + + Cookie name (a string). + + +.. attribute:: Cookie.value + + Cookie value (a string), or :const:`None`. + + +.. attribute:: Cookie.port + + String representing a port or a set of ports (eg. '80', or '80,8080'), or + :const:`None`. + + +.. attribute:: Cookie.path + + Cookie path (a string, eg. ``'/acme/rocket_launchers'``). + + +.. attribute:: Cookie.secure + + True if cookie should only be returned over a secure connection. + + +.. attribute:: Cookie.expires + + Integer expiry date in seconds since epoch, or :const:`None`. See also the + :meth:`is_expired` method. + + +.. attribute:: Cookie.discard + + True if this is a session cookie. + + +.. attribute:: Cookie.comment + + String comment from the server explaining the function of this cookie, or + :const:`None`. + + +.. attribute:: Cookie.comment_url + + URL linking to a comment from the server explaining the function of this cookie, + or :const:`None`. + + +.. attribute:: Cookie.rfc2109 + + True if this cookie was received as an RFC 2109 cookie (ie. the cookie + arrived in a :mailheader:`Set-Cookie` header, and the value of the Version + cookie-attribute in that header was 1). This attribute is provided because + :mod:`cookielib` may 'downgrade' RFC 2109 cookies to Netscape cookies, in + which case :attr:`version` is 0. + + .. versionadded:: 2.5 + + +.. attribute:: Cookie.port_specified + + True if a port or set of ports was explicitly specified by the server (in the + :mailheader:`Set-Cookie` / :mailheader:`Set-Cookie2` header). + + +.. attribute:: Cookie.domain_specified + + True if a domain was explicitly specified by the server. + + +.. attribute:: Cookie.domain_initial_dot + + True if the domain explicitly specified by the server began with a dot + (``'.'``). + +Cookies may have additional non-standard cookie-attributes. These may be +accessed using the following methods: + + +.. method:: Cookie.has_nonstandard_attr(name) + + Return true if cookie has the named cookie-attribute. + + +.. method:: Cookie.get_nonstandard_attr(name, default=None) + + If cookie has the named cookie-attribute, return its value. Otherwise, return + *default*. + + +.. method:: Cookie.set_nonstandard_attr(name, value) + + Set the value of the named cookie-attribute. + +The :class:`Cookie` class also defines the following method: + + +.. method:: Cookie.is_expired([now=:const:`None`]) + + True if cookie has passed the time at which the server requested it should + expire. If *now* is given (in seconds since the epoch), return whether the + cookie has expired at the specified time. + + +.. _cookielib-examples: + +Examples +-------- + +The first example shows the most common usage of :mod:`cookielib`:: + + import cookielib, urllib2 + cj = cookielib.CookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + +This example illustrates how to open a URL using your Netscape, Mozilla, or Lynx +cookies (assumes Unix/Netscape convention for location of the cookies file):: + + import os, cookielib, urllib2 + cj = cookielib.MozillaCookieJar() + cj.load(os.path.join(os.environ["HOME"], ".netscape/cookies.txt")) + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + +The next example illustrates the use of :class:`DefaultCookiePolicy`. Turn on +RFC 2965 cookies, be more strict about domains when setting and returning +Netscape cookies, and block some domains from setting cookies or having them +returned:: + + import urllib2 + from cookielib import CookieJar, DefaultCookiePolicy + policy = DefaultCookiePolicy( + rfc2965=True, strict_ns_domain=Policy.DomainStrict, + blocked_domains=["ads.net", ".ads.net"]) + cj = CookieJar(policy) + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + diff --git a/Doc/library/copy.rst b/Doc/library/copy.rst new file mode 100644 index 0000000..6fb3100 --- /dev/null +++ b/Doc/library/copy.rst @@ -0,0 +1,85 @@ + +:mod:`copy` --- Shallow and deep copy operations +================================================ + +.. module:: copy + :synopsis: Shallow and deep copy operations. + + +.. index:: + single: copy() (in copy) + single: deepcopy() (in copy) + +This module provides generic (shallow and deep) copying operations. + +Interface summary:: + + import copy + + x = copy.copy(y) # make a shallow copy of y + x = copy.deepcopy(y) # make a deep copy of y + +For module specific errors, :exc:`copy.error` is raised. + +.. % + +The difference between shallow and deep copying is only relevant for compound +objects (objects that contain other objects, like lists or class instances): + +* A *shallow copy* constructs a new compound object and then (to the extent + possible) inserts *references* into it to the objects found in the original. + +* A *deep copy* constructs a new compound object and then, recursively, inserts + *copies* into it of the objects found in the original. + +Two problems often exist with deep copy operations that don't exist with shallow +copy operations: + +* Recursive objects (compound objects that, directly or indirectly, contain a + reference to themselves) may cause a recursive loop. + +* Because deep copy copies *everything* it may copy too much, e.g., + administrative data structures that should be shared even between copies. + +The :func:`deepcopy` function avoids these problems by: + +* keeping a "memo" dictionary of objects already copied during the current + copying pass; and + +* letting user-defined classes override the copying operation or the set of + components copied. + +This module does not copy types like module, method, stack trace, stack frame, +file, socket, window, array, or any similar types. It does "copy" functions and +classes (shallow and deeply), by returning the original object unchanged; this +is compatible with the way these are treated by the :mod:`pickle` module. + +.. versionchanged:: 2.5 + Added copying functions. + +.. index:: module: pickle + +Classes can use the same interfaces to control copying that they use to control +pickling. See the description of module :mod:`pickle` for information on these +methods. The :mod:`copy` module does not use the :mod:`copy_reg` registration +module. + +.. index:: + single: __copy__() (copy protocol) + single: __deepcopy__() (copy protocol) + +In order for a class to define its own copy implementation, it can define +special methods :meth:`__copy__` and :meth:`__deepcopy__`. The former is called +to implement the shallow copy operation; no additional arguments are passed. +The latter is called to implement the deep copy operation; it is passed one +argument, the memo dictionary. If the :meth:`__deepcopy__` implementation needs +to make a deep copy of a component, it should call the :func:`deepcopy` function +with the component as first argument and the memo dictionary as second argument. + + +.. seealso:: + + Module :mod:`pickle` + Discussion of the special methods used to support object state retrieval and + restoration. + diff --git a/Doc/library/copy_reg.rst b/Doc/library/copy_reg.rst new file mode 100644 index 0000000..9b82a31 --- /dev/null +++ b/Doc/library/copy_reg.rst @@ -0,0 +1,42 @@ + +:mod:`copy_reg` --- Register :mod:`pickle` support functions +============================================================ + +.. module:: copy_reg + :synopsis: Register pickle support functions. + + +.. index:: + module: pickle + module: cPickle + module: copy + +The :mod:`copy_reg` module provides support for the :mod:`pickle` and +:mod:`cPickle` modules. The :mod:`copy` module is likely to use this in the +future as well. It provides configuration information about object constructors +which are not classes. Such constructors may be factory functions or class +instances. + + +.. function:: constructor(object) + + Declares *object* to be a valid constructor. If *object* is not callable (and + hence not valid as a constructor), raises :exc:`TypeError`. + + +.. function:: pickle(type, function[, constructor]) + + Declares that *function* should be used as a "reduction" function for objects of + type *type*; *type* must not be a "classic" class object. (Classic classes are + handled differently; see the documentation for the :mod:`pickle` module for + details.) *function* should return either a string or a tuple containing two or + three elements. + + The optional *constructor* parameter, if provided, is a callable object which + can be used to reconstruct the object when called with the tuple of arguments + returned by *function* at pickling time. :exc:`TypeError` will be raised if + *object* is a class or *constructor* is not callable. + + See the :mod:`pickle` module for more details on the interface expected of + *function* and *constructor*. + diff --git a/Doc/library/crypt.rst b/Doc/library/crypt.rst new file mode 100644 index 0000000..8840fc7 --- /dev/null +++ b/Doc/library/crypt.rst @@ -0,0 +1,66 @@ + +:mod:`crypt` --- Function to check Unix passwords +================================================= + +.. module:: crypt + :platform: Unix + :synopsis: The crypt() function used to check Unix passwords. +.. moduleauthor:: Steven D. Majewski +.. sectionauthor:: Steven D. Majewski +.. sectionauthor:: Peter Funk + + +.. index:: + single: crypt(3) + pair: cipher; DES + +This module implements an interface to the :manpage:`crypt(3)` routine, which is +a one-way hash function based upon a modified DES algorithm; see the Unix man +page for further details. Possible uses include allowing Python scripts to +accept typed passwords from the user, or attempting to crack Unix passwords with +a dictionary. + +.. index:: single: crypt(3) + +Notice that the behavior of this module depends on the actual implementation of +the :manpage:`crypt(3)` routine in the running system. Therefore, any +extensions available on the current implementation will also be available on +this module. + + +.. function:: crypt(word, salt) + + *word* will usually be a user's password as typed at a prompt or in a graphical + interface. *salt* is usually a random two-character string which will be used + to perturb the DES algorithm in one of 4096 ways. The characters in *salt* must + be in the set ``[./a-zA-Z0-9]``. Returns the hashed password as a string, which + will be composed of characters from the same alphabet as the salt (the first two + characters represent the salt itself). + + .. index:: single: crypt(3) + + Since a few :manpage:`crypt(3)` extensions allow different values, with + different sizes in the *salt*, it is recommended to use the full crypted + password as salt when checking for a password. + +A simple example illustrating typical use:: + + import crypt, getpass, pwd + + def raw_input(prompt): + import sys + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + def login(): + username = raw_input('Python login:') + cryptedpasswd = pwd.getpwnam(username)[1] + if cryptedpasswd: + if cryptedpasswd == 'x' or cryptedpasswd == '*': + raise "Sorry, currently no support for shadow passwords" + cleartext = getpass.getpass() + return crypt.crypt(cleartext, cryptedpasswd) == cryptedpasswd + else: + return 1 + diff --git a/Doc/library/crypto.rst b/Doc/library/crypto.rst new file mode 100644 index 0000000..dce5a01 --- /dev/null +++ b/Doc/library/crypto.rst @@ -0,0 +1,30 @@ + +.. _crypto: + +********************** +Cryptographic Services +********************** + +.. index:: single: cryptography + +The modules described in this chapter implement various algorithms of a +cryptographic nature. They are available at the discretion of the installation. +Here's an overview: + + +.. toctree:: + + hashlib.rst + hmac.rst + +.. index:: + pair: AES; algorithm + single: cryptography + single: Kuchling, Andrew + +Hardcore cypherpunks will probably find the cryptographic modules written by +A.M. Kuchling of further interest; the package contains modules for various +encryption algorithms, most notably AES. These modules are not distributed with +Python but available separately. See the URL +http://www.amk.ca/python/code/crypto.html for more information. + diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst new file mode 100644 index 0000000..19123c6 --- /dev/null +++ b/Doc/library/csv.rst @@ -0,0 +1,530 @@ + +:mod:`csv` --- CSV File Reading and Writing +=========================================== + +.. module:: csv + :synopsis: Write and read tabular data to and from delimited files. +.. sectionauthor:: Skip Montanaro + + +.. versionadded:: 2.3 + +.. index:: + single: csv + pair: data; tabular + +The so-called CSV (Comma Separated Values) format is the most common import and +export format for spreadsheets and databases. There is no "CSV standard", so +the format is operationally defined by the many applications which read and +write it. The lack of a standard means that subtle differences often exist in +the data produced and consumed by different applications. These differences can +make it annoying to process CSV files from multiple sources. Still, while the +delimiters and quoting characters vary, the overall format is similar enough +that it is possible to write a single module which can efficiently manipulate +such data, hiding the details of reading and writing the data from the +programmer. + +The :mod:`csv` module implements classes to read and write tabular data in CSV +format. It allows programmers to say, "write this data in the format preferred +by Excel," or "read data from this file which was generated by Excel," without +knowing the precise details of the CSV format used by Excel. Programmers can +also describe the CSV formats understood by other applications or define their +own special-purpose CSV formats. + +The :mod:`csv` module's :class:`reader` and :class:`writer` objects read and +write sequences. Programmers can also read and write data in dictionary form +using the :class:`DictReader` and :class:`DictWriter` classes. + +.. note:: + + This version of the :mod:`csv` module doesn't support Unicode input. Also, + there are currently some issues regarding ASCII NUL characters. Accordingly, + all input should be UTF-8 or printable ASCII to be safe; see the examples in + section :ref:`csv-examples`. These restrictions will be removed in the future. + + +.. seealso:: + + .. % \seemodule{array}{Arrays of uniformly types numeric values.} + + :pep:`305` - CSV File API + The Python Enhancement Proposal which proposed this addition to Python. + + +.. _csv-contents: + +Module Contents +--------------- + +The :mod:`csv` module defines the following functions: + + +.. function:: reader(csvfile[, dialect='excel'][, fmtparam]) + + Return a reader object which will iterate over lines in the given *csvfile*. + *csvfile* can be any object which supports the iterator protocol and returns a + string each time its :meth:`next` method is called --- file objects and list + objects are both suitable. If *csvfile* is a file object, it must be opened + with the 'b' flag on platforms where that makes a difference. An optional + *dialect* parameter can be given which is used to define a set of parameters + specific to a particular CSV dialect. It may be an instance of a subclass of + the :class:`Dialect` class or one of the strings returned by the + :func:`list_dialects` function. The other optional *fmtparam* keyword arguments + can be given to override individual formatting parameters in the current + dialect. For full details about the dialect and formatting parameters, see + section :ref:`csv-fmt-params`. + + All data read are returned as strings. No automatic data type conversion is + performed. + + .. versionchanged:: 2.5 + The parser is now stricter with respect to multi-line quoted fields. Previously, + if a line ended within a quoted field without a terminating newline character, a + newline would be inserted into the returned field. This behavior caused problems + when reading files which contained carriage return characters within fields. + The behavior was changed to return the field without inserting newlines. As a + consequence, if newlines embedded within fields are important, the input should + be split into lines in a manner which preserves the newline characters. + + +.. function:: writer(csvfile[, dialect='excel'][, fmtparam]) + + Return a writer object responsible for converting the user's data into delimited + strings on the given file-like object. *csvfile* can be any object with a + :func:`write` method. If *csvfile* is a file object, it must be opened with the + 'b' flag on platforms where that makes a difference. An optional *dialect* + parameter can be given which is used to define a set of parameters specific to a + particular CSV dialect. It may be an instance of a subclass of the + :class:`Dialect` class or one of the strings returned by the + :func:`list_dialects` function. The other optional *fmtparam* keyword arguments + can be given to override individual formatting parameters in the current + dialect. For full details about the dialect and formatting parameters, see + section :ref:`csv-fmt-params`. To make it + as easy as possible to interface with modules which implement the DB API, the + value :const:`None` is written as the empty string. While this isn't a + reversible transformation, it makes it easier to dump SQL NULL data values to + CSV files without preprocessing the data returned from a ``cursor.fetch*`` call. + All other non-string data are stringified with :func:`str` before being written. + + +.. function:: register_dialect(name[, dialect][, fmtparam]) + + Associate *dialect* with *name*. *name* must be a string or Unicode object. The + dialect can be specified either by passing a sub-class of :class:`Dialect`, or + by *fmtparam* keyword arguments, or both, with keyword arguments overriding + parameters of the dialect. For full details about the dialect and formatting + parameters, see section :ref:`csv-fmt-params`. + + +.. function:: unregister_dialect(name) + + Delete the dialect associated with *name* from the dialect registry. An + :exc:`Error` is raised if *name* is not a registered dialect name. + + +.. function:: get_dialect(name) + + Return the dialect associated with *name*. An :exc:`Error` is raised if *name* + is not a registered dialect name. + + +.. function:: list_dialects() + + Return the names of all registered dialects. + + +.. function:: field_size_limit([new_limit]) + + Returns the current maximum field size allowed by the parser. If *new_limit* is + given, this becomes the new limit. + + .. versionadded:: 2.5 + +The :mod:`csv` module defines the following classes: + + +.. class:: DictReader(csvfile[, fieldnames=:const:None,[, restkey=:const:None[, restval=None[, dialect='excel'[, *args, **kwds]]]]]) + + Create an object which operates like a regular reader but maps the information + read into a dict whose keys are given by the optional *fieldnames* parameter. + If the *fieldnames* parameter is omitted, the values in the first row of the + *csvfile* will be used as the fieldnames. If the row read has fewer fields than + the fieldnames sequence, the value of *restval* will be used as the default + value. If the row read has more fields than the fieldnames sequence, the + remaining data is added as a sequence keyed by the value of *restkey*. If the + row read has fewer fields than the fieldnames sequence, the remaining keys take + the value of the optional *restval* parameter. Any other optional or keyword + arguments are passed to the underlying :class:`reader` instance. + + +.. class:: DictWriter(csvfile, fieldnames[, restval=''[, extrasaction='raise'[, dialect='excel'[, *args, **kwds]]]]) + + Create an object which operates like a regular writer but maps dictionaries onto + output rows. The *fieldnames* parameter identifies the order in which values in + the dictionary passed to the :meth:`writerow` method are written to the + *csvfile*. The optional *restval* parameter specifies the value to be written + if the dictionary is missing a key in *fieldnames*. If the dictionary passed to + the :meth:`writerow` method contains a key not found in *fieldnames*, the + optional *extrasaction* parameter indicates what action to take. If it is set + to ``'raise'`` a :exc:`ValueError` is raised. If it is set to ``'ignore'``, + extra values in the dictionary are ignored. Any other optional or keyword + arguments are passed to the underlying :class:`writer` instance. + + Note that unlike the :class:`DictReader` class, the *fieldnames* parameter of + the :class:`DictWriter` is not optional. Since Python's :class:`dict` objects + are not ordered, there is not enough information available to deduce the order + in which the row should be written to the *csvfile*. + + +.. class:: Dialect + + The :class:`Dialect` class is a container class relied on primarily for its + attributes, which are used to define the parameters for a specific + :class:`reader` or :class:`writer` instance. + + +.. class:: excel() + + The :class:`excel` class defines the usual properties of an Excel-generated CSV + file. It is registered with the dialect name ``'excel'``. + + +.. class:: excel_tab() + + The :class:`excel_tab` class defines the usual properties of an Excel-generated + TAB-delimited file. It is registered with the dialect name ``'excel-tab'``. + + +.. class:: Sniffer() + + The :class:`Sniffer` class is used to deduce the format of a CSV file. + +The :class:`Sniffer` class provides two methods: + + +.. method:: Sniffer.sniff(sample[, delimiters=None]) + + Analyze the given *sample* and return a :class:`Dialect` subclass reflecting the + parameters found. If the optional *delimiters* parameter is given, it is + interpreted as a string containing possible valid delimiter characters. + + +.. method:: Sniffer.has_header(sample) + + Analyze the sample text (presumed to be in CSV format) and return :const:`True` + if the first row appears to be a series of column headers. + +The :mod:`csv` module defines the following constants: + + +.. data:: QUOTE_ALL + + Instructs :class:`writer` objects to quote all fields. + + +.. data:: QUOTE_MINIMAL + + Instructs :class:`writer` objects to only quote those fields which contain + special characters such as *delimiter*, *quotechar* or any of the characters in + *lineterminator*. + + +.. data:: QUOTE_NONNUMERIC + + Instructs :class:`writer` objects to quote all non-numeric fields. + + Instructs the reader to convert all non-quoted fields to type *float*. + + +.. data:: QUOTE_NONE + + Instructs :class:`writer` objects to never quote fields. When the current + *delimiter* occurs in output data it is preceded by the current *escapechar* + character. If *escapechar* is not set, the writer will raise :exc:`Error` if + any characters that require escaping are encountered. + + Instructs :class:`reader` to perform no special processing of quote characters. + +The :mod:`csv` module defines the following exception: + + +.. exception:: Error + + Raised by any of the functions when an error is detected. + + +.. _csv-fmt-params: + +Dialects and Formatting Parameters +---------------------------------- + +To make it easier to specify the format of input and output records, specific +formatting parameters are grouped together into dialects. A dialect is a +subclass of the :class:`Dialect` class having a set of specific methods and a +single :meth:`validate` method. When creating :class:`reader` or +:class:`writer` objects, the programmer can specify a string or a subclass of +the :class:`Dialect` class as the dialect parameter. In addition to, or instead +of, the *dialect* parameter, the programmer can also specify individual +formatting parameters, which have the same names as the attributes defined below +for the :class:`Dialect` class. + +Dialects support the following attributes: + + +.. attribute:: Dialect.delimiter + + A one-character string used to separate fields. It defaults to ``','``. + + +.. attribute:: Dialect.doublequote + + Controls how instances of *quotechar* appearing inside a field should be + themselves be quoted. When :const:`True`, the character is doubled. When + :const:`False`, the *escapechar* is used as a prefix to the *quotechar*. It + defaults to :const:`True`. + + On output, if *doublequote* is :const:`False` and no *escapechar* is set, + :exc:`Error` is raised if a *quotechar* is found in a field. + + +.. attribute:: Dialect.escapechar + + A one-character string used by the writer to escape the *delimiter* if *quoting* + is set to :const:`QUOTE_NONE` and the *quotechar* if *doublequote* is + :const:`False`. On reading, the *escapechar* removes any special meaning from + the following character. It defaults to :const:`None`, which disables escaping. + + +.. attribute:: Dialect.lineterminator + + The string used to terminate lines produced by the :class:`writer`. It defaults + to ``'\r\n'``. + + .. note:: + + The :class:`reader` is hard-coded to recognise either ``'\r'`` or ``'\n'`` as + end-of-line, and ignores *lineterminator*. This behavior may change in the + future. + + +.. attribute:: Dialect.quotechar + + A one-character string used to quote fields containing special characters, such + as the *delimiter* or *quotechar*, or which contain new-line characters. It + defaults to ``'"'``. + + +.. attribute:: Dialect.quoting + + Controls when quotes should be generated by the writer and recognised by the + reader. It can take on any of the :const:`QUOTE_\*` constants (see section + :ref:`csv-contents`) and defaults to :const:`QUOTE_MINIMAL`. + + +.. attribute:: Dialect.skipinitialspace + + When :const:`True`, whitespace immediately following the *delimiter* is ignored. + The default is :const:`False`. + + +Reader Objects +-------------- + +Reader objects (:class:`DictReader` instances and objects returned by the +:func:`reader` function) have the following public methods: + + +.. method:: csvreader.next() + + Return the next row of the reader's iterable object as a list, parsed according + to the current dialect. + +Reader objects have the following public attributes: + + +.. attribute:: csvreader.dialect + + A read-only description of the dialect in use by the parser. + + +.. attribute:: csvreader.line_num + + The number of lines read from the source iterator. This is not the same as the + number of records returned, as records can span multiple lines. + + .. versionadded:: 2.5 + + +Writer Objects +-------------- + +:class:`Writer` objects (:class:`DictWriter` instances and objects returned by +the :func:`writer` function) have the following public methods. A *row* must be +a sequence of strings or numbers for :class:`Writer` objects and a dictionary +mapping fieldnames to strings or numbers (by passing them through :func:`str` +first) for :class:`DictWriter` objects. Note that complex numbers are written +out surrounded by parens. This may cause some problems for other programs which +read CSV files (assuming they support complex numbers at all). + + +.. method:: csvwriter.writerow(row) + + Write the *row* parameter to the writer's file object, formatted according to + the current dialect. + + +.. method:: csvwriter.writerows(rows) + + Write all the *rows* parameters (a list of *row* objects as described above) to + the writer's file object, formatted according to the current dialect. + +Writer objects have the following public attribute: + + +.. attribute:: csvwriter.dialect + + A read-only description of the dialect in use by the writer. + + +.. _csv-examples: + +Examples +-------- + +The simplest example of reading a CSV file:: + + import csv + reader = csv.reader(open("some.csv", "rb")) + for row in reader: + print row + +Reading a file with an alternate format:: + + import csv + reader = csv.reader(open("passwd", "rb"), delimiter=':', quoting=csv.QUOTE_NONE) + for row in reader: + print row + +The corresponding simplest possible writing example is:: + + import csv + writer = csv.writer(open("some.csv", "wb")) + writer.writerows(someiterable) + +Registering a new dialect:: + + import csv + + csv.register_dialect('unixpwd', delimiter=':', quoting=csv.QUOTE_NONE) + + reader = csv.reader(open("passwd", "rb"), 'unixpwd') + +A slightly more advanced use of the reader --- catching and reporting errors:: + + import csv, sys + filename = "some.csv" + reader = csv.reader(open(filename, "rb")) + try: + for row in reader: + print row + except csv.Error as e: + sys.exit('file %s, line %d: %s' % (filename, reader.line_num, e)) + +And while the module doesn't directly support parsing strings, it can easily be +done:: + + import csv + for row in csv.reader(['one,two,three']): + print row + +The :mod:`csv` module doesn't directly support reading and writing Unicode, but +it is 8-bit-clean save for some problems with ASCII NUL characters. So you can +write functions or classes that handle the encoding and decoding for you as long +as you avoid encodings like UTF-16 that use NULs. UTF-8 is recommended. + +:func:`unicode_csv_reader` below is a generator that wraps :class:`csv.reader` +to handle Unicode CSV data (a list of Unicode strings). :func:`utf_8_encoder` +is a generator that encodes the Unicode strings as UTF-8, one string (or row) at +a time. The encoded strings are parsed by the CSV reader, and +:func:`unicode_csv_reader` decodes the UTF-8-encoded cells back into Unicode:: + + import csv + + def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs): + # csv.py doesn't do Unicode; encode temporarily as UTF-8: + csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), + dialect=dialect, **kwargs) + for row in csv_reader: + # decode UTF-8 back to Unicode, cell by cell: + yield [unicode(cell, 'utf-8') for cell in row] + + def utf_8_encoder(unicode_csv_data): + for line in unicode_csv_data: + yield line.encode('utf-8') + +For all other encodings the following :class:`UnicodeReader` and +:class:`UnicodeWriter` classes can be used. They take an additional *encoding* +parameter in their constructor and make sure that the data passes the real +reader or writer encoded as UTF-8:: + + import csv, codecs, cStringIO + + class UTF8Recoder: + """ + Iterator that reads an encoded stream and reencodes the input to UTF-8 + """ + def __init__(self, f, encoding): + self.reader = codecs.getreader(encoding)(f) + + def __iter__(self): + return self + + def __next__(self): + return next(self.reader).encode("utf-8") + + class UnicodeReader: + """ + A CSV reader which will iterate over lines in the CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + f = UTF8Recoder(f, encoding) + self.reader = csv.reader(f, dialect=dialect, **kwds) + + def __next__(self): + row = next(self.reader) + return [unicode(s, "utf-8") for s in row] + + def __iter__(self): + return self + + class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) + diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst new file mode 100644 index 0000000..dc37565 --- /dev/null +++ b/Doc/library/ctypes.rst @@ -0,0 +1,2364 @@ + +:mod:`ctypes` --- A foreign function library for Python. +======================================================== + +.. module:: ctypes + :synopsis: A foreign function library for Python. +.. moduleauthor:: Thomas Heller + + +.. versionadded:: 2.5 + +``ctypes`` is a foreign function library for Python. It provides C compatible +data types, and allows calling functions in dlls/shared libraries. It can be +used to wrap these libraries in pure Python. + + +.. _ctypes-ctypes-tutorial: + +ctypes tutorial +--------------- + +Note: The code samples in this tutorial use ``doctest`` to make sure that they +actually work. Since some code samples behave differently under Linux, Windows, +or Mac OS X, they contain doctest directives in comments. + +Note: Some code sample references the ctypes :class:`c_int` type. This type is +an alias to the :class:`c_long` type on 32-bit systems. So, you should not be +confused if :class:`c_long` is printed if you would expect :class:`c_int` --- +they are actually the same type. + + +.. _ctypes-loading-dynamic-link-libraries: + +Loading dynamic link libraries +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``ctypes`` exports the *cdll*, and on Windows also *windll* and *oledll* objects +to load dynamic link libraries. + +You load libraries by accessing them as attributes of these objects. *cdll* +loads libraries which export functions using the standard ``cdecl`` calling +convention, while *windll* libraries call functions using the ``stdcall`` +calling convention. *oledll* also uses the ``stdcall`` calling convention, and +assumes the functions return a Windows :class:`HRESULT` error code. The error +code is used to automatically raise :class:`WindowsError` Python exceptions when +the function call fails. + +Here are some examples for Windows. Note that ``msvcrt`` is the MS standard C +library containing most standard C functions, and uses the cdecl calling +convention:: + + >>> from ctypes import * + >>> print windll.kernel32 # doctest: +WINDOWS + + >>> print cdll.msvcrt # doctest: +WINDOWS + + >>> libc = cdll.msvcrt # doctest: +WINDOWS + >>> + +Windows appends the usual '.dll' file suffix automatically. + +On Linux, it is required to specify the filename *including* the extension to +load a library, so attribute access does not work. Either the +:meth:`LoadLibrary` method of the dll loaders should be used, or you should load +the library by creating an instance of CDLL by calling the constructor:: + + >>> cdll.LoadLibrary("libc.so.6") # doctest: +LINUX + + >>> libc = CDLL("libc.so.6") # doctest: +LINUX + >>> libc # doctest: +LINUX + + >>> + +.. % XXX Add section for Mac OS X. + + +.. _ctypes-accessing-functions-from-loaded-dlls: + +Accessing functions from loaded dlls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Functions are accessed as attributes of dll objects:: + + >>> from ctypes import * + >>> libc.printf + <_FuncPtr object at 0x...> + >>> print windll.kernel32.GetModuleHandleA # doctest: +WINDOWS + <_FuncPtr object at 0x...> + >>> print windll.kernel32.MyOwnFunction # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + File "ctypes.py", line 239, in __getattr__ + func = _StdcallFuncPtr(name, self) + AttributeError: function 'MyOwnFunction' not found + >>> + +Note that win32 system dlls like ``kernel32`` and ``user32`` often export ANSI +as well as UNICODE versions of a function. The UNICODE version is exported with +an ``W`` appended to the name, while the ANSI version is exported with an ``A`` +appended to the name. The win32 ``GetModuleHandle`` function, which returns a +*module handle* for a given module name, has the following C prototype, and a +macro is used to expose one of them as ``GetModuleHandle`` depending on whether +UNICODE is defined or not:: + + /* ANSI version */ + HMODULE GetModuleHandleA(LPCSTR lpModuleName); + /* UNICODE version */ + HMODULE GetModuleHandleW(LPCWSTR lpModuleName); + +*windll* does not try to select one of them by magic, you must access the +version you need by specifying ``GetModuleHandleA`` or ``GetModuleHandleW`` +explicitely, and then call it with normal strings or unicode strings +respectively. + +Sometimes, dlls export functions with names which aren't valid Python +identifiers, like ``"??2@YAPAXI@Z"``. In this case you have to use ``getattr`` +to retrieve the function:: + + >>> getattr(cdll.msvcrt, "??2@YAPAXI@Z") # doctest: +WINDOWS + <_FuncPtr object at 0x...> + >>> + +On Windows, some dlls export functions not by name but by ordinal. These +functions can be accessed by indexing the dll object with the ordinal number:: + + >>> cdll.kernel32[1] # doctest: +WINDOWS + <_FuncPtr object at 0x...> + >>> cdll.kernel32[0] # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + File "ctypes.py", line 310, in __getitem__ + func = _StdcallFuncPtr(name, self) + AttributeError: function ordinal 0 not found + >>> + + +.. _ctypes-calling-functions: + +Calling functions +^^^^^^^^^^^^^^^^^ + +You can call these functions like any other Python callable. This example uses +the ``time()`` function, which returns system time in seconds since the Unix +epoch, and the ``GetModuleHandleA()`` function, which returns a win32 module +handle. + +This example calls both functions with a NULL pointer (``None`` should be used +as the NULL pointer):: + + >>> print libc.time(None) # doctest: +SKIP + 1150640792 + >>> print hex(windll.kernel32.GetModuleHandleA(None)) # doctest: +WINDOWS + 0x1d000000 + >>> + +``ctypes`` tries to protect you from calling functions with the wrong number of +arguments or the wrong calling convention. Unfortunately this only works on +Windows. It does this by examining the stack after the function returns, so +although an error is raised the function *has* been called:: + + >>> windll.kernel32.GetModuleHandleA() # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + ValueError: Procedure probably called with not enough arguments (4 bytes missing) + >>> windll.kernel32.GetModuleHandleA(0, 0) # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + ValueError: Procedure probably called with too many arguments (4 bytes in excess) + >>> + +The same exception is raised when you call an ``stdcall`` function with the +``cdecl`` calling convention, or vice versa:: + + >>> cdll.kernel32.GetModuleHandleA(None) # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + ValueError: Procedure probably called with not enough arguments (4 bytes missing) + >>> + + >>> windll.msvcrt.printf("spam") # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + ValueError: Procedure probably called with too many arguments (4 bytes in excess) + >>> + +To find out the correct calling convention you have to look into the C header +file or the documentation for the function you want to call. + +On Windows, ``ctypes`` uses win32 structured exception handling to prevent +crashes from general protection faults when functions are called with invalid +argument values:: + + >>> windll.kernel32.GetModuleHandleA(32) # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + WindowsError: exception: access violation reading 0x00000020 + >>> + +There are, however, enough ways to crash Python with ``ctypes``, so you should +be careful anyway. + +``None``, integers, longs, byte strings and unicode strings are the only native +Python objects that can directly be used as parameters in these function calls. +``None`` is passed as a C ``NULL`` pointer, byte strings and unicode strings are +passed as pointer to the memory block that contains their data (``char *`` or +``wchar_t *``). Python integers and Python longs are passed as the platforms +default C ``int`` type, their value is masked to fit into the C type. + +Before we move on calling functions with other parameter types, we have to learn +more about ``ctypes`` data types. + + +.. _ctypes-fundamental-data-types: + +Fundamental data types +^^^^^^^^^^^^^^^^^^^^^^ + +``ctypes`` defines a number of primitive C compatible data types : + + +----------------------+--------------------------------+----------------------------+ + | ctypes type | C type | Python type | + +======================+================================+============================+ + | :class:`c_char` | ``char`` | 1-character string | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_wchar` | ``wchar_t`` | 1-character unicode string | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_byte` | ``char`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_ubyte` | ``unsigned char`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_short` | ``short`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_ushort` | ``unsigned short`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_int` | ``int`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_uint` | ``unsigned int`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_long` | ``long`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_ulong` | ``unsigned long`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_longlong` | ``__int64`` or ``long long`` | int/long | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_ulonglong` | ``unsigned __int64`` or | int/long | + | | ``unsigned long long`` | | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_float` | ``float`` | float | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_double` | ``double`` | float | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_char_p` | ``char *`` (NUL terminated) | string or ``None`` | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_wchar_p` | ``wchar_t *`` (NUL terminated) | unicode or ``None`` | + +----------------------+--------------------------------+----------------------------+ + | :class:`c_void_p` | ``void *`` | int/long or ``None`` | + +----------------------+--------------------------------+----------------------------+ + + +All these types can be created by calling them with an optional initializer of +the correct type and value:: + + >>> c_int() + c_long(0) + >>> c_char_p("Hello, World") + c_char_p('Hello, World') + >>> c_ushort(-3) + c_ushort(65533) + >>> + +Since these types are mutable, their value can also be changed afterwards:: + + >>> i = c_int(42) + >>> print i + c_long(42) + >>> print i.value + 42 + >>> i.value = -99 + >>> print i.value + -99 + >>> + +Assigning a new value to instances of the pointer types :class:`c_char_p`, +:class:`c_wchar_p`, and :class:`c_void_p` changes the *memory location* they +point to, *not the contents* of the memory block (of course not, because Python +strings are immutable):: + + >>> s = "Hello, World" + >>> c_s = c_char_p(s) + >>> print c_s + c_char_p('Hello, World') + >>> c_s.value = "Hi, there" + >>> print c_s + c_char_p('Hi, there') + >>> print s # first string is unchanged + Hello, World + >>> + +You should be careful, however, not to pass them to functions expecting pointers +to mutable memory. If you need mutable memory blocks, ctypes has a +``create_string_buffer`` function which creates these in various ways. The +current memory block contents can be accessed (or changed) with the ``raw`` +property; if you want to access it as NUL terminated string, use the ``value`` +property:: + + >>> from ctypes import * + >>> p = create_string_buffer(3) # create a 3 byte buffer, initialized to NUL bytes + >>> print sizeof(p), repr(p.raw) + 3 '\x00\x00\x00' + >>> p = create_string_buffer("Hello") # create a buffer containing a NUL terminated string + >>> print sizeof(p), repr(p.raw) + 6 'Hello\x00' + >>> print repr(p.value) + 'Hello' + >>> p = create_string_buffer("Hello", 10) # create a 10 byte buffer + >>> print sizeof(p), repr(p.raw) + 10 'Hello\x00\x00\x00\x00\x00' + >>> p.value = "Hi" + >>> print sizeof(p), repr(p.raw) + 10 'Hi\x00lo\x00\x00\x00\x00\x00' + >>> + +The ``create_string_buffer`` function replaces the ``c_buffer`` function (which +is still available as an alias), as well as the ``c_string`` function from +earlier ctypes releases. To create a mutable memory block containing unicode +characters of the C type ``wchar_t`` use the ``create_unicode_buffer`` function. + + +.. _ctypes-calling-functions-continued: + +Calling functions, continued +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note that printf prints to the real standard output channel, *not* to +``sys.stdout``, so these examples will only work at the console prompt, not from +within *IDLE* or *PythonWin*:: + + >>> printf = libc.printf + >>> printf("Hello, %s\n", "World!") + Hello, World! + 14 + >>> printf("Hello, %S", u"World!") + Hello, World! + 13 + >>> printf("%d bottles of beer\n", 42) + 42 bottles of beer + 19 + >>> printf("%f bottles of beer\n", 42.5) + Traceback (most recent call last): + File "", line 1, in ? + ArgumentError: argument 2: exceptions.TypeError: Don't know how to convert parameter 2 + >>> + +As has been mentioned before, all Python types except integers, strings, and +unicode strings have to be wrapped in their corresponding ``ctypes`` type, so +that they can be converted to the required C data type:: + + >>> printf("An int %d, a double %f\n", 1234, c_double(3.14)) + Integer 1234, double 3.1400001049 + 31 + >>> + + +.. _ctypes-calling-functions-with-own-custom-data-types: + +Calling functions with your own custom data types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can also customize ``ctypes`` argument conversion to allow instances of your +own classes be used as function arguments. ``ctypes`` looks for an +:attr:`_as_parameter_` attribute and uses this as the function argument. Of +course, it must be one of integer, string, or unicode:: + + >>> class Bottles(object): + ... def __init__(self, number): + ... self._as_parameter_ = number + ... + >>> bottles = Bottles(42) + >>> printf("%d bottles of beer\n", bottles) + 42 bottles of beer + 19 + >>> + +If you don't want to store the instance's data in the :attr:`_as_parameter_` +instance variable, you could define a ``property`` which makes the data +avaiblable. + + +.. _ctypes-specifying-required-argument-types: + +Specifying the required argument types (function prototypes) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is possible to specify the required argument types of functions exported from +DLLs by setting the :attr:`argtypes` attribute. + +:attr:`argtypes` must be a sequence of C data types (the ``printf`` function is +probably not a good example here, because it takes a variable number and +different types of parameters depending on the format string, on the other hand +this is quite handy to experiment with this feature):: + + >>> printf.argtypes = [c_char_p, c_char_p, c_int, c_double] + >>> printf("String '%s', Int %d, Double %f\n", "Hi", 10, 2.2) + String 'Hi', Int 10, Double 2.200000 + 37 + >>> + +Specifying a format protects against incompatible argument types (just as a +prototype for a C function), and tries to convert the arguments to valid types:: + + >>> printf("%d %d %d", 1, 2, 3) + Traceback (most recent call last): + File "", line 1, in ? + ArgumentError: argument 2: exceptions.TypeError: wrong type + >>> printf("%s %d %f", "X", 2, 3) + X 2 3.00000012 + 12 + >>> + +If you have defined your own classes which you pass to function calls, you have +to implement a :meth:`from_param` class method for them to be able to use them +in the :attr:`argtypes` sequence. The :meth:`from_param` class method receives +the Python object passed to the function call, it should do a typecheck or +whatever is needed to make sure this object is acceptable, and then return the +object itself, it's :attr:`_as_parameter_` attribute, or whatever you want to +pass as the C function argument in this case. Again, the result should be an +integer, string, unicode, a ``ctypes`` instance, or something having the +:attr:`_as_parameter_` attribute. + + +.. _ctypes-return-types: + +Return types +^^^^^^^^^^^^ + +By default functions are assumed to return the C ``int`` type. Other return +types can be specified by setting the :attr:`restype` attribute of the function +object. + +Here is a more advanced example, it uses the ``strchr`` function, which expects +a string pointer and a char, and returns a pointer to a string:: + + >>> strchr = libc.strchr + >>> strchr("abcdef", ord("d")) # doctest: +SKIP + 8059983 + >>> strchr.restype = c_char_p # c_char_p is a pointer to a string + >>> strchr("abcdef", ord("d")) + 'def' + >>> print strchr("abcdef", ord("x")) + None + >>> + +If you want to avoid the ``ord("x")`` calls above, you can set the +:attr:`argtypes` attribute, and the second argument will be converted from a +single character Python string into a C char:: + + >>> strchr.restype = c_char_p + >>> strchr.argtypes = [c_char_p, c_char] + >>> strchr("abcdef", "d") + 'def' + >>> strchr("abcdef", "def") + Traceback (most recent call last): + File "", line 1, in ? + ArgumentError: argument 2: exceptions.TypeError: one character string expected + >>> print strchr("abcdef", "x") + None + >>> strchr("abcdef", "d") + 'def' + >>> + +You can also use a callable Python object (a function or a class for example) as +the :attr:`restype` attribute, if the foreign function returns an integer. The +callable will be called with the ``integer`` the C function returns, and the +result of this call will be used as the result of your function call. This is +useful to check for error return values and automatically raise an exception:: + + >>> GetModuleHandle = windll.kernel32.GetModuleHandleA # doctest: +WINDOWS + >>> def ValidHandle(value): + ... if value == 0: + ... raise WinError() + ... return value + ... + >>> + >>> GetModuleHandle.restype = ValidHandle # doctest: +WINDOWS + >>> GetModuleHandle(None) # doctest: +WINDOWS + 486539264 + >>> GetModuleHandle("something silly") # doctest: +WINDOWS + Traceback (most recent call last): + File "", line 1, in ? + File "", line 3, in ValidHandle + WindowsError: [Errno 126] The specified module could not be found. + >>> + +``WinError`` is a function which will call Windows ``FormatMessage()`` api to +get the string representation of an error code, and *returns* an exception. +``WinError`` takes an optional error code parameter, if no one is used, it calls +:func:`GetLastError` to retrieve it. + +Please note that a much more powerful error checking mechanism is available +through the :attr:`errcheck` attribute; see the reference manual for details. + + +.. _ctypes-passing-pointers: + +Passing pointers (or: passing parameters by reference) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes a C api function expects a *pointer* to a data type as parameter, +probably to write into the corresponding location, or if the data is too large +to be passed by value. This is also known as *passing parameters by reference*. + +``ctypes`` exports the :func:`byref` function which is used to pass parameters +by reference. The same effect can be achieved with the ``pointer`` function, +although ``pointer`` does a lot more work since it constructs a real pointer +object, so it is faster to use :func:`byref` if you don't need the pointer +object in Python itself:: + + >>> i = c_int() + >>> f = c_float() + >>> s = create_string_buffer('\000' * 32) + >>> print i.value, f.value, repr(s.value) + 0 0.0 '' + >>> libc.sscanf("1 3.14 Hello", "%d %f %s", + ... byref(i), byref(f), s) + 3 + >>> print i.value, f.value, repr(s.value) + 1 3.1400001049 'Hello' + >>> + + +.. _ctypes-structures-unions: + +Structures and unions +^^^^^^^^^^^^^^^^^^^^^ + +Structures and unions must derive from the :class:`Structure` and :class:`Union` +base classes which are defined in the ``ctypes`` module. Each subclass must +define a :attr:`_fields_` attribute. :attr:`_fields_` must be a list of +*2-tuples*, containing a *field name* and a *field type*. + +The field type must be a ``ctypes`` type like :class:`c_int`, or any other +derived ``ctypes`` type: structure, union, array, pointer. + +Here is a simple example of a POINT structure, which contains two integers named +``x`` and ``y``, and also shows how to initialize a structure in the +constructor:: + + >>> from ctypes import * + >>> class POINT(Structure): + ... _fields_ = [("x", c_int), + ... ("y", c_int)] + ... + >>> point = POINT(10, 20) + >>> print point.x, point.y + 10 20 + >>> point = POINT(y=5) + >>> print point.x, point.y + 0 5 + >>> POINT(1, 2, 3) + Traceback (most recent call last): + File "", line 1, in ? + ValueError: too many initializers + >>> + +You can, however, build much more complicated structures. Structures can itself +contain other structures by using a structure as a field type. + +Here is a RECT structure which contains two POINTs named ``upperleft`` and +``lowerright`` :: + + >>> class RECT(Structure): + ... _fields_ = [("upperleft", POINT), + ... ("lowerright", POINT)] + ... + >>> rc = RECT(point) + >>> print rc.upperleft.x, rc.upperleft.y + 0 5 + >>> print rc.lowerright.x, rc.lowerright.y + 0 0 + >>> + +Nested structures can also be initialized in the constructor in several ways:: + + >>> r = RECT(POINT(1, 2), POINT(3, 4)) + >>> r = RECT((1, 2), (3, 4)) + +Fields descriptors can be retrieved from the *class*, they are useful for +debugging because they can provide useful information:: + + >>> print POINT.x + + >>> print POINT.y + + >>> + + +.. _ctypes-structureunion-alignment-byte-order: + +Structure/union alignment and byte order +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, Structure and Union fields are aligned in the same way the C +compiler does it. It is possible to override this behaviour be specifying a +:attr:`_pack_` class attribute in the subclass definition. This must be set to a +positive integer and specifies the maximum alignment for the fields. This is +what ``#pragma pack(n)`` also does in MSVC. + +``ctypes`` uses the native byte order for Structures and Unions. To build +structures with non-native byte order, you can use one of the +BigEndianStructure, LittleEndianStructure, BigEndianUnion, and LittleEndianUnion +base classes. These classes cannot contain pointer fields. + + +.. _ctypes-bit-fields-in-structures-unions: + +Bit fields in structures and unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is possible to create structures and unions containing bit fields. Bit fields +are only possible for integer fields, the bit width is specified as the third +item in the :attr:`_fields_` tuples:: + + >>> class Int(Structure): + ... _fields_ = [("first_16", c_int, 16), + ... ("second_16", c_int, 16)] + ... + >>> print Int.first_16 + + >>> print Int.second_16 + + >>> + + +.. _ctypes-arrays: + +Arrays +^^^^^^ + +Arrays are sequences, containing a fixed number of instances of the same type. + +The recommended way to create array types is by multiplying a data type with a +positive integer:: + + TenPointsArrayType = POINT * 10 + +Here is an example of an somewhat artifical data type, a structure containing 4 +POINTs among other stuff:: + + >>> from ctypes import * + >>> class POINT(Structure): + ... _fields_ = ("x", c_int), ("y", c_int) + ... + >>> class MyStruct(Structure): + ... _fields_ = [("a", c_int), + ... ("b", c_float), + ... ("point_array", POINT * 4)] + >>> + >>> print len(MyStruct().point_array) + 4 + >>> + +Instances are created in the usual way, by calling the class:: + + arr = TenPointsArrayType() + for pt in arr: + print pt.x, pt.y + +The above code print a series of ``0 0`` lines, because the array contents is +initialized to zeros. + +Initializers of the correct type can also be specified:: + + >>> from ctypes import * + >>> TenIntegers = c_int * 10 + >>> ii = TenIntegers(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + >>> print ii + + >>> for i in ii: print i, + ... + 1 2 3 4 5 6 7 8 9 10 + >>> + + +.. _ctypes-pointers: + +Pointers +^^^^^^^^ + +Pointer instances are created by calling the ``pointer`` function on a +``ctypes`` type:: + + >>> from ctypes import * + >>> i = c_int(42) + >>> pi = pointer(i) + >>> + +Pointer instances have a ``contents`` attribute which returns the object to +which the pointer points, the ``i`` object above:: + + >>> pi.contents + c_long(42) + >>> + +Note that ``ctypes`` does not have OOR (original object return), it constructs a +new, equivalent object each time you retrieve an attribute:: + + >>> pi.contents is i + False + >>> pi.contents is pi.contents + False + >>> + +Assigning another :class:`c_int` instance to the pointer's contents attribute +would cause the pointer to point to the memory location where this is stored:: + + >>> i = c_int(99) + >>> pi.contents = i + >>> pi.contents + c_long(99) + >>> + +Pointer instances can also be indexed with integers:: + + >>> pi[0] + 99 + >>> + +Assigning to an integer index changes the pointed to value:: + + >>> print i + c_long(99) + >>> pi[0] = 22 + >>> print i + c_long(22) + >>> + +It is also possible to use indexes different from 0, but you must know what +you're doing, just as in C: You can access or change arbitrary memory locations. +Generally you only use this feature if you receive a pointer from a C function, +and you *know* that the pointer actually points to an array instead of a single +item. + +Behind the scenes, the ``pointer`` function does more than simply create pointer +instances, it has to create pointer *types* first. This is done with the +``POINTER`` function, which accepts any ``ctypes`` type, and returns a new +type:: + + >>> PI = POINTER(c_int) + >>> PI + + >>> PI(42) + Traceback (most recent call last): + File "", line 1, in ? + TypeError: expected c_long instead of int + >>> PI(c_int(42)) + + >>> + +Calling the pointer type without an argument creates a ``NULL`` pointer. +``NULL`` pointers have a ``False`` boolean value:: + + >>> null_ptr = POINTER(c_int)() + >>> print bool(null_ptr) + False + >>> + +``ctypes`` checks for ``NULL`` when dereferencing pointers (but dereferencing +non-\ ``NULL`` pointers would crash Python):: + + >>> null_ptr[0] + Traceback (most recent call last): + .... + ValueError: NULL pointer access + >>> + + >>> null_ptr[0] = 1234 + Traceback (most recent call last): + .... + ValueError: NULL pointer access + >>> + + +.. _ctypes-type-conversions: + +Type conversions +^^^^^^^^^^^^^^^^ + +Usually, ctypes does strict type checking. This means, if you have +``POINTER(c_int)`` in the :attr:`argtypes` list of a function or as the type of +a member field in a structure definition, only instances of exactly the same +type are accepted. There are some exceptions to this rule, where ctypes accepts +other objects. For example, you can pass compatible array instances instead of +pointer types. So, for ``POINTER(c_int)``, ctypes accepts an array of c_int:: + + >>> class Bar(Structure): + ... _fields_ = [("count", c_int), ("values", POINTER(c_int))] + ... + >>> bar = Bar() + >>> bar.values = (c_int * 3)(1, 2, 3) + >>> bar.count = 3 + >>> for i in range(bar.count): + ... print bar.values[i] + ... + 1 + 2 + 3 + >>> + +To set a POINTER type field to ``NULL``, you can assign ``None``:: + + >>> bar.values = None + >>> + +XXX list other conversions... + +Sometimes you have instances of incompatible types. In ``C``, you can cast one +type into another type. ``ctypes`` provides a ``cast`` function which can be +used in the same way. The ``Bar`` structure defined above accepts +``POINTER(c_int)`` pointers or :class:`c_int` arrays for its ``values`` field, +but not instances of other types:: + + >>> bar.values = (c_byte * 4)() + Traceback (most recent call last): + File "", line 1, in ? + TypeError: incompatible types, c_byte_Array_4 instance instead of LP_c_long instance + >>> + +For these cases, the ``cast`` function is handy. + +The ``cast`` function can be used to cast a ctypes instance into a pointer to a +different ctypes data type. ``cast`` takes two parameters, a ctypes object that +is or can be converted to a pointer of some kind, and a ctypes pointer type. It +returns an instance of the second argument, which references the same memory +block as the first argument:: + + >>> a = (c_byte * 4)() + >>> cast(a, POINTER(c_int)) + + >>> + +So, ``cast`` can be used to assign to the ``values`` field of ``Bar`` the +structure:: + + >>> bar = Bar() + >>> bar.values = cast((c_byte * 4)(), POINTER(c_int)) + >>> print bar.values[0] + 0 + >>> + + +.. _ctypes-incomplete-types: + +Incomplete Types +^^^^^^^^^^^^^^^^ + +*Incomplete Types* are structures, unions or arrays whose members are not yet +specified. In C, they are specified by forward declarations, which are defined +later:: + + struct cell; /* forward declaration */ + + struct { + char *name; + struct cell *next; + } cell; + +The straightforward translation into ctypes code would be this, but it does not +work:: + + >>> class cell(Structure): + ... _fields_ = [("name", c_char_p), + ... ("next", POINTER(cell))] + ... + Traceback (most recent call last): + File "", line 1, in ? + File "", line 2, in cell + NameError: name 'cell' is not defined + >>> + +because the new ``class cell`` is not available in the class statement itself. +In ``ctypes``, we can define the ``cell`` class and set the :attr:`_fields_` +attribute later, after the class statement:: + + >>> from ctypes import * + >>> class cell(Structure): + ... pass + ... + >>> cell._fields_ = [("name", c_char_p), + ... ("next", POINTER(cell))] + >>> + +Lets try it. We create two instances of ``cell``, and let them point to each +other, and finally follow the pointer chain a few times:: + + >>> c1 = cell() + >>> c1.name = "foo" + >>> c2 = cell() + >>> c2.name = "bar" + >>> c1.next = pointer(c2) + >>> c2.next = pointer(c1) + >>> p = c1 + >>> for i in range(8): + ... print p.name, + ... p = p.next[0] + ... + foo bar foo bar foo bar foo bar + >>> + + +.. _ctypes-callback-functions: + +Callback functions +^^^^^^^^^^^^^^^^^^ + +``ctypes`` allows to create C callable function pointers from Python callables. +These are sometimes called *callback functions*. + +First, you must create a class for the callback function, the class knows the +calling convention, the return type, and the number and types of arguments this +function will receive. + +The CFUNCTYPE factory function creates types for callback functions using the +normal cdecl calling convention, and, on Windows, the WINFUNCTYPE factory +function creates types for callback functions using the stdcall calling +convention. + +Both of these factory functions are called with the result type as first +argument, and the callback functions expected argument types as the remaining +arguments. + +I will present an example here which uses the standard C library's :func:`qsort` +function, this is used to sort items with the help of a callback function. +:func:`qsort` will be used to sort an array of integers:: + + >>> IntArray5 = c_int * 5 + >>> ia = IntArray5(5, 1, 7, 33, 99) + >>> qsort = libc.qsort + >>> qsort.restype = None + >>> + +:func:`qsort` must be called with a pointer to the data to sort, the number of +items in the data array, the size of one item, and a pointer to the comparison +function, the callback. The callback will then be called with two pointers to +items, and it must return a negative integer if the first item is smaller than +the second, a zero if they are equal, and a positive integer else. + +So our callback function receives pointers to integers, and must return an +integer. First we create the ``type`` for the callback function:: + + >>> CMPFUNC = CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int)) + >>> + +For the first implementation of the callback function, we simply print the +arguments we get, and return 0 (incremental development ;-):: + + >>> def py_cmp_func(a, b): + ... print "py_cmp_func", a, b + ... return 0 + ... + >>> + +Create the C callable callback:: + + >>> cmp_func = CMPFUNC(py_cmp_func) + >>> + +And we're ready to go:: + + >>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + py_cmp_func + >>> + +We know how to access the contents of a pointer, so lets redefine our callback:: + + >>> def py_cmp_func(a, b): + ... print "py_cmp_func", a[0], b[0] + ... return 0 + ... + >>> cmp_func = CMPFUNC(py_cmp_func) + >>> + +Here is what we get on Windows:: + + >>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +WINDOWS + py_cmp_func 7 1 + py_cmp_func 33 1 + py_cmp_func 99 1 + py_cmp_func 5 1 + py_cmp_func 7 5 + py_cmp_func 33 5 + py_cmp_func 99 5 + py_cmp_func 7 99 + py_cmp_func 33 99 + py_cmp_func 7 33 + >>> + +It is funny to see that on linux the sort function seems to work much more +efficient, it is doing less comparisons:: + + >>> qsort(ia, len(ia), sizeof(c_int), cmp_func) # doctest: +LINUX + py_cmp_func 5 1 + py_cmp_func 33 99 + py_cmp_func 7 33 + py_cmp_func 5 7 + py_cmp_func 1 7 + >>> + +Ah, we're nearly done! The last step is to actually compare the two items and +return a useful result:: + + >>> def py_cmp_func(a, b): + ... print "py_cmp_func", a[0], b[0] + ... return a[0] - b[0] + ... + >>> + +Final run on Windows:: + + >>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +WINDOWS + py_cmp_func 33 7 + py_cmp_func 99 33 + py_cmp_func 5 99 + py_cmp_func 1 99 + py_cmp_func 33 7 + py_cmp_func 1 33 + py_cmp_func 5 33 + py_cmp_func 5 7 + py_cmp_func 1 7 + py_cmp_func 5 1 + >>> + +and on Linux:: + + >>> qsort(ia, len(ia), sizeof(c_int), CMPFUNC(py_cmp_func)) # doctest: +LINUX + py_cmp_func 5 1 + py_cmp_func 33 99 + py_cmp_func 7 33 + py_cmp_func 1 7 + py_cmp_func 5 7 + >>> + +It is quite interesting to see that the Windows :func:`qsort` function needs +more comparisons than the linux version! + +As we can easily check, our array is sorted now:: + + >>> for i in ia: print i, + ... + 1 5 7 33 99 + >>> + +**Important note for callback functions:** + +Make sure you keep references to CFUNCTYPE objects as long as they are used from +C code. ``ctypes`` doesn't, and if you don't, they may be garbage collected, +crashing your program when a callback is made. + + +.. _ctypes-accessing-values-exported-from-dlls: + +Accessing values exported from dlls +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometimes, a dll not only exports functions, it also exports variables. An +example in the Python library itself is the ``Py_OptimizeFlag``, an integer set +to 0, 1, or 2, depending on the :option:`-O` or :option:`-OO` flag given on +startup. + +``ctypes`` can access values like this with the :meth:`in_dll` class methods of +the type. *pythonapi* is a predefined symbol giving access to the Python C +api:: + + >>> opt_flag = c_int.in_dll(pythonapi, "Py_OptimizeFlag") + >>> print opt_flag + c_long(0) + >>> + +If the interpreter would have been started with :option:`-O`, the sample would +have printed ``c_long(1)``, or ``c_long(2)`` if :option:`-OO` would have been +specified. + +An extended example which also demonstrates the use of pointers accesses the +``PyImport_FrozenModules`` pointer exported by Python. + +Quoting the Python docs: *This pointer is initialized to point to an array of +"struct _frozen" records, terminated by one whose members are all NULL or zero. +When a frozen module is imported, it is searched in this table. Third-party code +could play tricks with this to provide a dynamically created collection of +frozen modules.* + +So manipulating this pointer could even prove useful. To restrict the example +size, we show only how this table can be read with ``ctypes``:: + + >>> from ctypes import * + >>> + >>> class struct_frozen(Structure): + ... _fields_ = [("name", c_char_p), + ... ("code", POINTER(c_ubyte)), + ... ("size", c_int)] + ... + >>> + +We have defined the ``struct _frozen`` data type, so we can get the pointer to +the table:: + + >>> FrozenTable = POINTER(struct_frozen) + >>> table = FrozenTable.in_dll(pythonapi, "PyImport_FrozenModules") + >>> + +Since ``table`` is a ``pointer`` to the array of ``struct_frozen`` records, we +can iterate over it, but we just have to make sure that our loop terminates, +because pointers have no size. Sooner or later it would probably crash with an +access violation or whatever, so it's better to break out of the loop when we +hit the NULL entry:: + + >>> for item in table: + ... print item.name, item.size + ... if item.name is None: + ... break + ... + __hello__ 104 + __phello__ -104 + __phello__.spam 104 + None 0 + >>> + +The fact that standard Python has a frozen module and a frozen package +(indicated by the negative size member) is not wellknown, it is only used for +testing. Try it out with ``import __hello__`` for example. + + +.. _ctypes-surprises: + +Surprises +^^^^^^^^^ + +There are some edges in ``ctypes`` where you may be expect something else than +what actually happens. + +Consider the following example:: + + >>> from ctypes import * + >>> class POINT(Structure): + ... _fields_ = ("x", c_int), ("y", c_int) + ... + >>> class RECT(Structure): + ... _fields_ = ("a", POINT), ("b", POINT) + ... + >>> p1 = POINT(1, 2) + >>> p2 = POINT(3, 4) + >>> rc = RECT(p1, p2) + >>> print rc.a.x, rc.a.y, rc.b.x, rc.b.y + 1 2 3 4 + >>> # now swap the two points + >>> rc.a, rc.b = rc.b, rc.a + >>> print rc.a.x, rc.a.y, rc.b.x, rc.b.y + 3 4 3 4 + >>> + +Hm. We certainly expected the last statement to print ``3 4 1 2``. What +happended? Here are the steps of the ``rc.a, rc.b = rc.b, rc.a`` line above:: + + >>> temp0, temp1 = rc.b, rc.a + >>> rc.a = temp0 + >>> rc.b = temp1 + >>> + +Note that ``temp0`` and ``temp1`` are objects still using the internal buffer of +the ``rc`` object above. So executing ``rc.a = temp0`` copies the buffer +contents of ``temp0`` into ``rc`` 's buffer. This, in turn, changes the +contents of ``temp1``. So, the last assignment ``rc.b = temp1``, doesn't have +the expected effect. + +Keep in mind that retrieving subobjects from Structure, Unions, and Arrays +doesn't *copy* the subobject, instead it retrieves a wrapper object accessing +the root-object's underlying buffer. + +Another example that may behave different from what one would expect is this:: + + >>> s = c_char_p() + >>> s.value = "abc def ghi" + >>> s.value + 'abc def ghi' + >>> s.value is s.value + False + >>> + +Why is it printing ``False``? ctypes instances are objects containing a memory +block plus some descriptors accessing the contents of the memory. Storing a +Python object in the memory block does not store the object itself, instead the +``contents`` of the object is stored. Accessing the contents again constructs a +new Python each time! + + +.. _ctypes-variable-sized-data-types: + +Variable-sized data types +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``ctypes`` provides some support for variable-sized arrays and structures (this +was added in version 0.9.9.7). + +The ``resize`` function can be used to resize the memory buffer of an existing +ctypes object. The function takes the object as first argument, and the +requested size in bytes as the second argument. The memory block cannot be made +smaller than the natural memory block specified by the objects type, a +``ValueError`` is raised if this is tried:: + + >>> short_array = (c_short * 4)() + >>> print sizeof(short_array) + 8 + >>> resize(short_array, 4) + Traceback (most recent call last): + ... + ValueError: minimum size is 8 + >>> resize(short_array, 32) + >>> sizeof(short_array) + 32 + >>> sizeof(type(short_array)) + 8 + >>> + +This is nice and fine, but how would one access the additional elements +contained in this array? Since the type still only knows about 4 elements, we +get errors accessing other elements:: + + >>> short_array[:] + [0, 0, 0, 0] + >>> short_array[7] + Traceback (most recent call last): + ... + IndexError: invalid index + >>> + +Another way to use variable-sized data types with ``ctypes`` is to use the +dynamic nature of Python, and (re-)define the data type after the required size +is already known, on a case by case basis. + + +.. _ctypes-bugs-todo-non-implemented-things: + +Bugs, ToDo and non-implemented things +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Enumeration types are not implemented. You can do it easily yourself, using +:class:`c_int` as the base class. + +``long double`` is not implemented. + +.. % Local Variables: +.. % compile-command: "make.bat" +.. % End: + + +.. _ctypes-ctypes-reference: + +ctypes reference +---------------- + + +.. _ctypes-finding-shared-libraries: + +Finding shared libraries +^^^^^^^^^^^^^^^^^^^^^^^^ + +When programming in a compiled language, shared libraries are accessed when +compiling/linking a program, and when the program is run. + +The purpose of the ``find_library`` function is to locate a library in a way +similar to what the compiler does (on platforms with several versions of a +shared library the most recent should be loaded), while the ctypes library +loaders act like when a program is run, and call the runtime loader directly. + +The ``ctypes.util`` module provides a function which can help to determine the +library to load. + + +.. data:: find_library(name) + :noindex: + + Try to find a library and return a pathname. *name* is the library name without + any prefix like *lib*, suffix like ``.so``, ``.dylib`` or version number (this + is the form used for the posix linker option :option:`-l`). If no library can + be found, returns ``None``. + +The exact functionality is system dependend. + +On Linux, ``find_library`` tries to run external programs (/sbin/ldconfig, gcc, +and objdump) to find the library file. It returns the filename of the library +file. Here are sone examples:: + + >>> from ctypes.util import find_library + >>> find_library("m") + 'libm.so.6' + >>> find_library("c") + 'libc.so.6' + >>> find_library("bz2") + 'libbz2.so.1.0' + >>> + +On OS X, ``find_library`` tries several predefined naming schemes and paths to +locate the library, and returns a full pathname if successfull:: + + >>> from ctypes.util import find_library + >>> find_library("c") + '/usr/lib/libc.dylib' + >>> find_library("m") + '/usr/lib/libm.dylib' + >>> find_library("bz2") + '/usr/lib/libbz2.dylib' + >>> find_library("AGL") + '/System/Library/Frameworks/AGL.framework/AGL' + >>> + +On Windows, ``find_library`` searches along the system search path, and returns +the full pathname, but since there is no predefined naming scheme a call like +``find_library("c")`` will fail and return ``None``. + +If wrapping a shared library with ``ctypes``, it *may* be better to determine +the shared library name at development type, and hardcode that into the wrapper +module instead of using ``find_library`` to locate the library at runtime. + + +.. _ctypes-loading-shared-libraries: + +Loading shared libraries +^^^^^^^^^^^^^^^^^^^^^^^^ + +There are several ways to loaded shared libraries into the Python process. One +way is to instantiate one of the following classes: + + +.. class:: CDLL(name, mode=DEFAULT_MODE, handle=None) + + Instances of this class represent loaded shared libraries. Functions in these + libraries use the standard C calling convention, and are assumed to return + ``int``. + + +.. class:: OleDLL(name, mode=DEFAULT_MODE, handle=None) + + Windows only: Instances of this class represent loaded shared libraries, + functions in these libraries use the ``stdcall`` calling convention, and are + assumed to return the windows specific :class:`HRESULT` code. :class:`HRESULT` + values contain information specifying whether the function call failed or + succeeded, together with additional error code. If the return value signals a + failure, an :class:`WindowsError` is automatically raised. + + +.. class:: WinDLL(name, mode=DEFAULT_MODE, handle=None) + + Windows only: Instances of this class represent loaded shared libraries, + functions in these libraries use the ``stdcall`` calling convention, and are + assumed to return ``int`` by default. + + On Windows CE only the standard calling convention is used, for convenience the + :class:`WinDLL` and :class:`OleDLL` use the standard calling convention on this + platform. + +The Python GIL is released before calling any function exported by these +libraries, and reaquired afterwards. + + +.. class:: PyDLL(name, mode=DEFAULT_MODE, handle=None) + + Instances of this class behave like :class:`CDLL` instances, except that the + Python GIL is *not* released during the function call, and after the function + execution the Python error flag is checked. If the error flag is set, a Python + exception is raised. + + Thus, this is only useful to call Python C api functions directly. + +All these classes can be instantiated by calling them with at least one +argument, the pathname of the shared library. If you have an existing handle to +an already loaded shard library, it can be passed as the ``handle`` named +parameter, otherwise the underlying platforms ``dlopen`` or :meth:`LoadLibrary` +function is used to load the library into the process, and to get a handle to +it. + +The *mode* parameter can be used to specify how the library is loaded. For +details, consult the ``dlopen(3)`` manpage, on Windows, *mode* is ignored. + + +.. data:: RTLD_GLOBAL + :noindex: + + Flag to use as *mode* parameter. On platforms where this flag is not available, + it is defined as the integer zero. + + +.. data:: RTLD_LOCAL + :noindex: + + Flag to use as *mode* parameter. On platforms where this is not available, it + is the same as *RTLD_GLOBAL*. + + +.. data:: DEFAULT_MODE + :noindex: + + The default mode which is used to load shared libraries. On OSX 10.3, this is + *RTLD_GLOBAL*, otherwise it is the same as *RTLD_LOCAL*. + +Instances of these classes have no public methods, however :meth:`__getattr__` +and :meth:`__getitem__` have special behaviour: functions exported by the shared +library can be accessed as attributes of by index. Please note that both +:meth:`__getattr__` and :meth:`__getitem__` cache their result, so calling them +repeatedly returns the same object each time. + +The following public attributes are available, their name starts with an +underscore to not clash with exported function names: + + +.. attribute:: PyDLL._handle + + The system handle used to access the library. + + +.. attribute:: PyDLL._name + + The name of the library passed in the contructor. + +Shared libraries can also be loaded by using one of the prefabricated objects, +which are instances of the :class:`LibraryLoader` class, either by calling the +:meth:`LoadLibrary` method, or by retrieving the library as attribute of the +loader instance. + + +.. class:: LibraryLoader(dlltype) + + Class which loads shared libraries. ``dlltype`` should be one of the + :class:`CDLL`, :class:`PyDLL`, :class:`WinDLL`, or :class:`OleDLL` types. + + :meth:`__getattr__` has special behaviour: It allows to load a shared library by + accessing it as attribute of a library loader instance. The result is cached, + so repeated attribute accesses return the same library each time. + + +.. method:: LibraryLoader.LoadLibrary(name) + + Load a shared library into the process and return it. This method always + returns a new instance of the library. + +These prefabricated library loaders are available: + + +.. data:: cdll + :noindex: + + Creates :class:`CDLL` instances. + + +.. data:: windll + :noindex: + + Windows only: Creates :class:`WinDLL` instances. + + +.. data:: oledll + :noindex: + + Windows only: Creates :class:`OleDLL` instances. + + +.. data:: pydll + :noindex: + + Creates :class:`PyDLL` instances. + +For accessing the C Python api directly, a ready-to-use Python shared library +object is available: + + +.. data:: pythonapi + :noindex: + + An instance of :class:`PyDLL` that exposes Python C api functions as attributes. + Note that all these functions are assumed to return C ``int``, which is of + course not always the truth, so you have to assign the correct :attr:`restype` + attribute to use these functions. + + +.. _ctypes-foreign-functions: + +Foreign functions +^^^^^^^^^^^^^^^^^ + +As explained in the previous section, foreign functions can be accessed as +attributes of loaded shared libraries. The function objects created in this way +by default accept any number of arguments, accept any ctypes data instances as +arguments, and return the default result type specified by the library loader. +They are instances of a private class: + + +.. class:: _FuncPtr + + Base class for C callable foreign functions. + +Instances of foreign functions are also C compatible data types; they represent +C function pointers. + +This behaviour can be customized by assigning to special attributes of the +foreign function object. + + +.. attribute:: _FuncPtr.restype + + Assign a ctypes type to specify the result type of the foreign function. Use + ``None`` for ``void`` a function not returning anything. + + It is possible to assign a callable Python object that is not a ctypes type, in + this case the function is assumed to return a C ``int``, and the callable will + be called with this integer, allowing to do further processing or error + checking. Using this is deprecated, for more flexible postprocessing or error + checking use a ctypes data type as :attr:`restype` and assign a callable to the + :attr:`errcheck` attribute. + + +.. attribute:: _FuncPtr.argtypes + + Assign a tuple of ctypes types to specify the argument types that the function + accepts. Functions using the ``stdcall`` calling convention can only be called + with the same number of arguments as the length of this tuple; functions using + the C calling convention accept additional, unspecified arguments as well. + + When a foreign function is called, each actual argument is passed to the + :meth:`from_param` class method of the items in the :attr:`argtypes` tuple, this + method allows to adapt the actual argument to an object that the foreign + function accepts. For example, a :class:`c_char_p` item in the :attr:`argtypes` + tuple will convert a unicode string passed as argument into an byte string using + ctypes conversion rules. + + New: It is now possible to put items in argtypes which are not ctypes types, but + each item must have a :meth:`from_param` method which returns a value usable as + argument (integer, string, ctypes instance). This allows to define adapters + that can adapt custom objects as function parameters. + + +.. attribute:: _FuncPtr.errcheck + + Assign a Python function or another callable to this attribute. The callable + will be called with three or more arguments: + + +.. function:: callable(result, func, arguments) + :noindex: + + ``result`` is what the foreign function returns, as specified by the + :attr:`restype` attribute. + + ``func`` is the foreign function object itself, this allows to reuse the same + callable object to check or postprocess the results of several functions. + + ``arguments`` is a tuple containing the parameters originally passed to the + function call, this allows to specialize the behaviour on the arguments used. + + The object that this function returns will be returned from the foreign function + call, but it can also check the result value and raise an exception if the + foreign function call failed. + + +.. exception:: ArgumentError() + + This exception is raised when a foreign function call cannot convert one of the + passed arguments. + + +.. _ctypes-function-prototypes: + +Function prototypes +^^^^^^^^^^^^^^^^^^^ + +Foreign functions can also be created by instantiating function prototypes. +Function prototypes are similar to function prototypes in C; they describe a +function (return type, argument types, calling convention) without defining an +implementation. The factory functions must be called with the desired result +type and the argument types of the function. + + +.. function:: CFUNCTYPE(restype, *argtypes) + + The returned function prototype creates functions that use the standard C + calling convention. The function will release the GIL during the call. + + +.. function:: WINFUNCTYPE(restype, *argtypes) + + Windows only: The returned function prototype creates functions that use the + ``stdcall`` calling convention, except on Windows CE where :func:`WINFUNCTYPE` + is the same as :func:`CFUNCTYPE`. The function will release the GIL during the + call. + + +.. function:: PYFUNCTYPE(restype, *argtypes) + + The returned function prototype creates functions that use the Python calling + convention. The function will *not* release the GIL during the call. + +Function prototypes created by the factory functions can be instantiated in +different ways, depending on the type and number of the parameters in the call. + + +.. function:: prototype(address) + :noindex: + + Returns a foreign function at the specified address. + + +.. function:: prototype(callable) + :noindex: + + Create a C callable function (a callback function) from a Python ``callable``. + + +.. function:: prototype(func_spec[, paramflags]) + :noindex: + + Returns a foreign function exported by a shared library. ``func_spec`` must be a + 2-tuple ``(name_or_ordinal, library)``. The first item is the name of the + exported function as string, or the ordinal of the exported function as small + integer. The second item is the shared library instance. + + +.. function:: prototype(vtbl_index, name[, paramflags[, iid]]) + :noindex: + + Returns a foreign function that will call a COM method. ``vtbl_index`` is the + index into the virtual function table, a small nonnegative integer. *name* is + name of the COM method. *iid* is an optional pointer to the interface identifier + which is used in extended error reporting. + + COM methods use a special calling convention: They require a pointer to the COM + interface as first argument, in addition to those parameters that are specified + in the :attr:`argtypes` tuple. + +The optional *paramflags* parameter creates foreign function wrappers with much +more functionality than the features described above. + +*paramflags* must be a tuple of the same length as :attr:`argtypes`. + +Each item in this tuple contains further information about a parameter, it must +be a tuple containing 1, 2, or 3 items. + +The first item is an integer containing flags for the parameter: + + +.. data:: 1 + :noindex: + + Specifies an input parameter to the function. + + +.. data:: 2 + :noindex: + + Output parameter. The foreign function fills in a value. + + +.. data:: 4 + :noindex: + + Input parameter which defaults to the integer zero. + +The optional second item is the parameter name as string. If this is specified, +the foreign function can be called with named parameters. + +The optional third item is the default value for this parameter. + +This example demonstrates how to wrap the Windows ``MessageBoxA`` function so +that it supports default parameters and named arguments. The C declaration from +the windows header file is this:: + + WINUSERAPI int WINAPI + MessageBoxA( + HWND hWnd , + LPCSTR lpText, + LPCSTR lpCaption, + UINT uType); + +Here is the wrapping with ``ctypes``: + + :: + + >>> from ctypes import c_int, WINFUNCTYPE, windll + >>> from ctypes.wintypes import HWND, LPCSTR, UINT + >>> prototype = WINFUNCTYPE(c_int, HWND, LPCSTR, LPCSTR, UINT) + >>> paramflags = (1, "hwnd", 0), (1, "text", "Hi"), (1, "caption", None), (1, "flags", 0) + >>> MessageBox = prototype(("MessageBoxA", windll.user32), paramflags) + >>> + +The MessageBox foreign function can now be called in these ways:: + + >>> MessageBox() + >>> MessageBox(text="Spam, spam, spam") + >>> MessageBox(flags=2, text="foo bar") + >>> + +A second example demonstrates output parameters. The win32 ``GetWindowRect`` +function retrieves the dimensions of a specified window by copying them into +``RECT`` structure that the caller has to supply. Here is the C declaration:: + + WINUSERAPI BOOL WINAPI + GetWindowRect( + HWND hWnd, + LPRECT lpRect); + +Here is the wrapping with ``ctypes``: + + :: + + >>> from ctypes import POINTER, WINFUNCTYPE, windll, WinError + >>> from ctypes.wintypes import BOOL, HWND, RECT + >>> prototype = WINFUNCTYPE(BOOL, HWND, POINTER(RECT)) + >>> paramflags = (1, "hwnd"), (2, "lprect") + >>> GetWindowRect = prototype(("GetWindowRect", windll.user32), paramflags) + >>> + +Functions with output parameters will automatically return the output parameter +value if there is a single one, or a tuple containing the output parameter +values when there are more than one, so the GetWindowRect function now returns a +RECT instance, when called. + +Output parameters can be combined with the :attr:`errcheck` protocol to do +further output processing and error checking. The win32 ``GetWindowRect`` api +function returns a ``BOOL`` to signal success or failure, so this function could +do the error checking, and raises an exception when the api call failed:: + + >>> def errcheck(result, func, args): + ... if not result: + ... raise WinError() + ... return args + >>> GetWindowRect.errcheck = errcheck + >>> + +If the :attr:`errcheck` function returns the argument tuple it receives +unchanged, ``ctypes`` continues the normal processing it does on the output +parameters. If you want to return a tuple of window coordinates instead of a +``RECT`` instance, you can retrieve the fields in the function and return them +instead, the normal processing will no longer take place:: + + >>> def errcheck(result, func, args): + ... if not result: + ... raise WinError() + ... rc = args[1] + ... return rc.left, rc.top, rc.bottom, rc.right + >>> + >>> GetWindowRect.errcheck = errcheck + >>> + + +.. _ctypes-utility-functions: + +Utility functions +^^^^^^^^^^^^^^^^^ + + +.. function:: addressof(obj) + + Returns the address of the memory buffer as integer. ``obj`` must be an + instance of a ctypes type. + + +.. function:: alignment(obj_or_type) + + Returns the alignment requirements of a ctypes type. ``obj_or_type`` must be a + ctypes type or instance. + + +.. function:: byref(obj) + + Returns a light-weight pointer to ``obj``, which must be an instance of a ctypes + type. The returned object can only be used as a foreign function call parameter. + It behaves similar to ``pointer(obj)``, but the construction is a lot faster. + + +.. function:: cast(obj, type) + + This function is similar to the cast operator in C. It returns a new instance of + ``type`` which points to the same memory block as ``obj``. ``type`` must be a + pointer type, and ``obj`` must be an object that can be interpreted as a + pointer. + + +.. function:: create_string_buffer(init_or_size[, size]) + + This function creates a mutable character buffer. The returned object is a + ctypes array of :class:`c_char`. + + ``init_or_size`` must be an integer which specifies the size of the array, or a + string which will be used to initialize the array items. + + If a string is specified as first argument, the buffer is made one item larger + than the length of the string so that the last element in the array is a NUL + termination character. An integer can be passed as second argument which allows + to specify the size of the array if the length of the string should not be used. + + If the first parameter is a unicode string, it is converted into an 8-bit string + according to ctypes conversion rules. + + +.. function:: create_unicode_buffer(init_or_size[, size]) + + This function creates a mutable unicode character buffer. The returned object is + a ctypes array of :class:`c_wchar`. + + ``init_or_size`` must be an integer which specifies the size of the array, or a + unicode string which will be used to initialize the array items. + + If a unicode string is specified as first argument, the buffer is made one item + larger than the length of the string so that the last element in the array is a + NUL termination character. An integer can be passed as second argument which + allows to specify the size of the array if the length of the string should not + be used. + + If the first parameter is a 8-bit string, it is converted into an unicode string + according to ctypes conversion rules. + + +.. function:: DllCanUnloadNow() + + Windows only: This function is a hook which allows to implement inprocess COM + servers with ctypes. It is called from the DllCanUnloadNow function that the + _ctypes extension dll exports. + + +.. function:: DllGetClassObject() + + Windows only: This function is a hook which allows to implement inprocess COM + servers with ctypes. It is called from the DllGetClassObject function that the + ``_ctypes`` extension dll exports. + + +.. function:: FormatError([code]) + + Windows only: Returns a textual description of the error code. If no error code + is specified, the last error code is used by calling the Windows api function + GetLastError. + + +.. function:: GetLastError() + + Windows only: Returns the last error code set by Windows in the calling thread. + + +.. function:: memmove(dst, src, count) + + Same as the standard C memmove library function: copies *count* bytes from + ``src`` to *dst*. *dst* and ``src`` must be integers or ctypes instances that + can be converted to pointers. + + +.. function:: memset(dst, c, count) + + Same as the standard C memset library function: fills the memory block at + address *dst* with *count* bytes of value *c*. *dst* must be an integer + specifying an address, or a ctypes instance. + + +.. function:: POINTER(type) + + This factory function creates and returns a new ctypes pointer type. Pointer + types are cached an reused internally, so calling this function repeatedly is + cheap. type must be a ctypes type. + + +.. function:: pointer(obj) + + This function creates a new pointer instance, pointing to ``obj``. The returned + object is of the type POINTER(type(obj)). + + Note: If you just want to pass a pointer to an object to a foreign function + call, you should use ``byref(obj)`` which is much faster. + + +.. function:: resize(obj, size) + + This function resizes the internal memory buffer of obj, which must be an + instance of a ctypes type. It is not possible to make the buffer smaller than + the native size of the objects type, as given by sizeof(type(obj)), but it is + possible to enlarge the buffer. + + +.. function:: set_conversion_mode(encoding, errors) + + This function sets the rules that ctypes objects use when converting between + 8-bit strings and unicode strings. encoding must be a string specifying an + encoding, like ``'utf-8'`` or ``'mbcs'``, errors must be a string specifying the + error handling on encoding/decoding errors. Examples of possible values are + ``"strict"``, ``"replace"``, or ``"ignore"``. + + ``set_conversion_mode`` returns a 2-tuple containing the previous conversion + rules. On windows, the initial conversion rules are ``('mbcs', 'ignore')``, on + other systems ``('ascii', 'strict')``. + + +.. function:: sizeof(obj_or_type) + + Returns the size in bytes of a ctypes type or instance memory buffer. Does the + same as the C ``sizeof()`` function. + + +.. function:: string_at(address[, size]) + + This function returns the string starting at memory address address. If size + is specified, it is used as size, otherwise the string is assumed to be + zero-terminated. + + +.. function:: WinError(code=None, descr=None) + + Windows only: this function is probably the worst-named thing in ctypes. It + creates an instance of WindowsError. If *code* is not specified, + ``GetLastError`` is called to determine the error code. If ``descr`` is not + spcified, :func:`FormatError` is called to get a textual description of the + error. + + +.. function:: wstring_at(address) + + This function returns the wide character string starting at memory address + ``address`` as unicode string. If ``size`` is specified, it is used as the + number of characters of the string, otherwise the string is assumed to be + zero-terminated. + + +.. _ctypes-data-types: + +Data types +^^^^^^^^^^ + + +.. class:: _CData + + This non-public class is the common base class of all ctypes data types. Among + other things, all ctypes type instances contain a memory block that hold C + compatible data; the address of the memory block is returned by the + ``addressof()`` helper function. Another instance variable is exposed as + :attr:`_objects`; this contains other Python objects that need to be kept alive + in case the memory block contains pointers. + +Common methods of ctypes data types, these are all class methods (to be exact, +they are methods of the metaclass): + + +.. method:: _CData.from_address(address) + + This method returns a ctypes type instance using the memory specified by address + which must be an integer. + + +.. method:: _CData.from_param(obj) + + This method adapts obj to a ctypes type. It is called with the actual object + used in a foreign function call, when the type is present in the foreign + functions :attr:`argtypes` tuple; it must return an object that can be used as + function call parameter. + + All ctypes data types have a default implementation of this classmethod, + normally it returns ``obj`` if that is an instance of the type. Some types + accept other objects as well. + + +.. method:: _CData.in_dll(library, name) + + This method returns a ctypes type instance exported by a shared library. *name* + is the name of the symbol that exports the data, *library* is the loaded shared + library. + +Common instance variables of ctypes data types: + + +.. attribute:: _CData._b_base_ + + Sometimes ctypes data instances do not own the memory block they contain, + instead they share part of the memory block of a base object. The + :attr:`_b_base_` readonly member is the root ctypes object that owns the memory + block. + + +.. attribute:: _CData._b_needsfree_ + + This readonly variable is true when the ctypes data instance has allocated the + memory block itself, false otherwise. + + +.. attribute:: _CData._objects + + This member is either ``None`` or a dictionary containing Python objects that + need to be kept alive so that the memory block contents is kept valid. This + object is only exposed for debugging; never modify the contents of this + dictionary. + + +.. _ctypes-fundamental-data-types-2: + +Fundamental data types +^^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: _SimpleCData + + This non-public class is the base class of all fundamental ctypes data types. It + is mentioned here because it contains the common attributes of the fundamental + ctypes data types. ``_SimpleCData`` is a subclass of ``_CData``, so it inherits + their methods and attributes. + +Instances have a single attribute: + + +.. attribute:: _SimpleCData.value + + This attribute contains the actual value of the instance. For integer and + pointer types, it is an integer, for character types, it is a single character + string, for character pointer types it is a Python string or unicode string. + + When the ``value`` attribute is retrieved from a ctypes instance, usually a new + object is returned each time. ``ctypes`` does *not* implement original object + return, always a new object is constructed. The same is true for all other + ctypes object instances. + +Fundamental data types, when returned as foreign function call results, or, for +example, by retrieving structure field members or array items, are transparently +converted to native Python types. In other words, if a foreign function has a +:attr:`restype` of :class:`c_char_p`, you will always receive a Python string, +*not* a :class:`c_char_p` instance. + +Subclasses of fundamental data types do *not* inherit this behaviour. So, if a +foreign functions :attr:`restype` is a subclass of :class:`c_void_p`, you will +receive an instance of this subclass from the function call. Of course, you can +get the value of the pointer by accessing the ``value`` attribute. + +These are the fundamental ctypes data types: + + +.. class:: c_byte + + Represents the C signed char datatype, and interprets the value as small + integer. The constructor accepts an optional integer initializer; no overflow + checking is done. + + +.. class:: c_char + + Represents the C char datatype, and interprets the value as a single character. + The constructor accepts an optional string initializer, the length of the string + must be exactly one character. + + +.. class:: c_char_p + + Represents the C char \* datatype, which must be a pointer to a zero-terminated + string. The constructor accepts an integer address, or a string. + + +.. class:: c_double + + Represents the C double datatype. The constructor accepts an optional float + initializer. + + +.. class:: c_float + + Represents the C double datatype. The constructor accepts an optional float + initializer. + + +.. class:: c_int + + Represents the C signed int datatype. The constructor accepts an optional + integer initializer; no overflow checking is done. On platforms where + ``sizeof(int) == sizeof(long)`` it is an alias to :class:`c_long`. + + +.. class:: c_int8 + + Represents the C 8-bit ``signed int`` datatype. Usually an alias for + :class:`c_byte`. + + +.. class:: c_int16 + + Represents the C 16-bit signed int datatype. Usually an alias for + :class:`c_short`. + + +.. class:: c_int32 + + Represents the C 32-bit signed int datatype. Usually an alias for + :class:`c_int`. + + +.. class:: c_int64 + + Represents the C 64-bit ``signed int`` datatype. Usually an alias for + :class:`c_longlong`. + + +.. class:: c_long + + Represents the C ``signed long`` datatype. The constructor accepts an optional + integer initializer; no overflow checking is done. + + +.. class:: c_longlong + + Represents the C ``signed long long`` datatype. The constructor accepts an + optional integer initializer; no overflow checking is done. + + +.. class:: c_short + + Represents the C ``signed short`` datatype. The constructor accepts an optional + integer initializer; no overflow checking is done. + + +.. class:: c_size_t + + Represents the C ``size_t`` datatype. + + +.. class:: c_ubyte + + Represents the C ``unsigned char`` datatype, it interprets the value as small + integer. The constructor accepts an optional integer initializer; no overflow + checking is done. + + +.. class:: c_uint + + Represents the C ``unsigned int`` datatype. The constructor accepts an optional + integer initializer; no overflow checking is done. On platforms where + ``sizeof(int) == sizeof(long)`` it is an alias for :class:`c_ulong`. + + +.. class:: c_uint8 + + Represents the C 8-bit unsigned int datatype. Usually an alias for + :class:`c_ubyte`. + + +.. class:: c_uint16 + + Represents the C 16-bit unsigned int datatype. Usually an alias for + :class:`c_ushort`. + + +.. class:: c_uint32 + + Represents the C 32-bit unsigned int datatype. Usually an alias for + :class:`c_uint`. + + +.. class:: c_uint64 + + Represents the C 64-bit unsigned int datatype. Usually an alias for + :class:`c_ulonglong`. + + +.. class:: c_ulong + + Represents the C ``unsigned long`` datatype. The constructor accepts an optional + integer initializer; no overflow checking is done. + + +.. class:: c_ulonglong + + Represents the C ``unsigned long long`` datatype. The constructor accepts an + optional integer initializer; no overflow checking is done. + + +.. class:: c_ushort + + Represents the C ``unsigned short`` datatype. The constructor accepts an + optional integer initializer; no overflow checking is done. + + +.. class:: c_void_p + + Represents the C ``void *`` type. The value is represented as integer. The + constructor accepts an optional integer initializer. + + +.. class:: c_wchar + + Represents the C ``wchar_t`` datatype, and interprets the value as a single + character unicode string. The constructor accepts an optional string + initializer, the length of the string must be exactly one character. + + +.. class:: c_wchar_p + + Represents the C ``wchar_t *`` datatype, which must be a pointer to a + zero-terminated wide character string. The constructor accepts an integer + address, or a string. + + +.. class:: c_bool + + Represent the C ``bool`` datatype (more accurately, _Bool from C99). Its value + can be True or False, and the constructor accepts any object that has a truth + value. + + .. versionadded:: 2.6 + + +.. class:: HRESULT + + Windows only: Represents a :class:`HRESULT` value, which contains success or + error information for a function or method call. + + +.. class:: py_object + + Represents the C ``PyObject *`` datatype. Calling this without an argument + creates a ``NULL`` ``PyObject *`` pointer. + +The ``ctypes.wintypes`` module provides quite some other Windows specific data +types, for example ``HWND``, ``WPARAM``, or ``DWORD``. Some useful structures +like ``MSG`` or ``RECT`` are also defined. + + +.. _ctypes-structured-data-types: + +Structured data types +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: Union(*args, **kw) + + Abstract base class for unions in native byte order. + + +.. class:: BigEndianStructure(*args, **kw) + + Abstract base class for structures in *big endian* byte order. + + +.. class:: LittleEndianStructure(*args, **kw) + + Abstract base class for structures in *little endian* byte order. + +Structures with non-native byte order cannot contain pointer type fields, or any +other data types containing pointer type fields. + + +.. class:: Structure(*args, **kw) + + Abstract base class for structures in *native* byte order. + +Concrete structure and union types must be created by subclassing one of these +types, and at least define a :attr:`_fields_` class variable. ``ctypes`` will +create descriptors which allow reading and writing the fields by direct +attribute accesses. These are the + + +.. attribute:: Structure._fields_ + + A sequence defining the structure fields. The items must be 2-tuples or + 3-tuples. The first item is the name of the field, the second item specifies + the type of the field; it can be any ctypes data type. + + For integer type fields like :class:`c_int`, a third optional item can be given. + It must be a small positive integer defining the bit width of the field. + + Field names must be unique within one structure or union. This is not checked, + only one field can be accessed when names are repeated. + + It is possible to define the :attr:`_fields_` class variable *after* the class + statement that defines the Structure subclass, this allows to create data types + that directly or indirectly reference themselves:: + + class List(Structure): + pass + List._fields_ = [("pnext", POINTER(List)), + ... + ] + + The :attr:`_fields_` class variable must, however, be defined before the type is + first used (an instance is created, ``sizeof()`` is called on it, and so on). + Later assignments to the :attr:`_fields_` class variable will raise an + AttributeError. + + Structure and union subclass constructors accept both positional and named + arguments. Positional arguments are used to initialize the fields in the same + order as they appear in the :attr:`_fields_` definition, named arguments are + used to initialize the fields with the corresponding name. + + It is possible to defined sub-subclasses of structure types, they inherit the + fields of the base class plus the :attr:`_fields_` defined in the sub-subclass, + if any. + + +.. attribute:: Structure._pack_ + + An optional small integer that allows to override the alignment of structure + fields in the instance. :attr:`_pack_` must already be defined when + :attr:`_fields_` is assigned, otherwise it will have no effect. + + +.. attribute:: Structure._anonymous_ + + An optional sequence that lists the names of unnamed (anonymous) fields. + ``_anonymous_`` must be already defined when :attr:`_fields_` is assigned, + otherwise it will have no effect. + + The fields listed in this variable must be structure or union type fields. + ``ctypes`` will create descriptors in the structure type that allows to access + the nested fields directly, without the need to create the structure or union + field. + + Here is an example type (Windows):: + + class _U(Union): + _fields_ = [("lptdesc", POINTER(TYPEDESC)), + ("lpadesc", POINTER(ARRAYDESC)), + ("hreftype", HREFTYPE)] + + class TYPEDESC(Structure): + _fields_ = [("u", _U), + ("vt", VARTYPE)] + + _anonymous_ = ("u",) + + The ``TYPEDESC`` structure describes a COM data type, the ``vt`` field specifies + which one of the union fields is valid. Since the ``u`` field is defined as + anonymous field, it is now possible to access the members directly off the + TYPEDESC instance. ``td.lptdesc`` and ``td.u.lptdesc`` are equivalent, but the + former is faster since it does not need to create a temporary union instance:: + + td = TYPEDESC() + td.vt = VT_PTR + td.lptdesc = POINTER(some_type) + td.u.lptdesc = POINTER(some_type) + +It is possible to defined sub-subclasses of structures, they inherit the fields +of the base class. If the subclass definition has a separate :attr:`_fields_` +variable, the fields specified in this are appended to the fields of the base +class. + +Structure and union constructors accept both positional and keyword arguments. +Positional arguments are used to initialize member fields in the same order as +they are appear in :attr:`_fields_`. Keyword arguments in the constructor are +interpreted as attribute assignments, so they will initialize :attr:`_fields_` +with the same name, or create new attributes for names not present in +:attr:`_fields_`. + + +.. _ctypes-arrays-pointers: + +Arrays and pointers +^^^^^^^^^^^^^^^^^^^ + +Not yet written - please see the sections :ref:`ctypes-pointers` and +section :ref:`ctypes-arrays` in the tutorial. + diff --git a/Doc/library/curses.ascii.rst b/Doc/library/curses.ascii.rst new file mode 100644 index 0000000..0a45c2a --- /dev/null +++ b/Doc/library/curses.ascii.rst @@ -0,0 +1,228 @@ + +:mod:`curses.ascii` --- Utilities for ASCII characters +====================================================== + +.. module:: curses.ascii + :synopsis: Constants and set-membership functions for ASCII characters. +.. moduleauthor:: Eric S. Raymond +.. sectionauthor:: Eric S. Raymond + + +.. versionadded:: 1.6 + +The :mod:`curses.ascii` module supplies name constants for ASCII characters and +functions to test membership in various ASCII character classes. The constants +supplied are names for control characters as follows: + ++--------------+----------------------------------------------+ +| Name | Meaning | ++==============+==============================================+ +| :const:`NUL` | | ++--------------+----------------------------------------------+ +| :const:`SOH` | Start of heading, console interrupt | ++--------------+----------------------------------------------+ +| :const:`STX` | Start of text | ++--------------+----------------------------------------------+ +| :const:`ETX` | End of text | ++--------------+----------------------------------------------+ +| :const:`EOT` | End of transmission | ++--------------+----------------------------------------------+ +| :const:`ENQ` | Enquiry, goes with :const:`ACK` flow control | ++--------------+----------------------------------------------+ +| :const:`ACK` | Acknowledgement | ++--------------+----------------------------------------------+ +| :const:`BEL` | Bell | ++--------------+----------------------------------------------+ +| :const:`BS` | Backspace | ++--------------+----------------------------------------------+ +| :const:`TAB` | Tab | ++--------------+----------------------------------------------+ +| :const:`HT` | Alias for :const:`TAB`: "Horizontal tab" | ++--------------+----------------------------------------------+ +| :const:`LF` | Line feed | ++--------------+----------------------------------------------+ +| :const:`NL` | Alias for :const:`LF`: "New line" | ++--------------+----------------------------------------------+ +| :const:`VT` | Vertical tab | ++--------------+----------------------------------------------+ +| :const:`FF` | Form feed | ++--------------+----------------------------------------------+ +| :const:`CR` | Carriage return | ++--------------+----------------------------------------------+ +| :const:`SO` | Shift-out, begin alternate character set | ++--------------+----------------------------------------------+ +| :const:`SI` | Shift-in, resume default character set | ++--------------+----------------------------------------------+ +| :const:`DLE` | Data-link escape | ++--------------+----------------------------------------------+ +| :const:`DC1` | XON, for flow control | ++--------------+----------------------------------------------+ +| :const:`DC2` | Device control 2, block-mode flow control | ++--------------+----------------------------------------------+ +| :const:`DC3` | XOFF, for flow control | ++--------------+----------------------------------------------+ +| :const:`DC4` | Device control 4 | ++--------------+----------------------------------------------+ +| :const:`NAK` | Negative acknowledgement | ++--------------+----------------------------------------------+ +| :const:`SYN` | Synchronous idle | ++--------------+----------------------------------------------+ +| :const:`ETB` | End transmission block | ++--------------+----------------------------------------------+ +| :const:`CAN` | Cancel | ++--------------+----------------------------------------------+ +| :const:`EM` | End of medium | ++--------------+----------------------------------------------+ +| :const:`SUB` | Substitute | ++--------------+----------------------------------------------+ +| :const:`ESC` | Escape | ++--------------+----------------------------------------------+ +| :const:`FS` | File separator | ++--------------+----------------------------------------------+ +| :const:`GS` | Group separator | ++--------------+----------------------------------------------+ +| :const:`RS` | Record separator, block-mode terminator | ++--------------+----------------------------------------------+ +| :const:`US` | Unit separator | ++--------------+----------------------------------------------+ +| :const:`SP` | Space | ++--------------+----------------------------------------------+ +| :const:`DEL` | Delete | ++--------------+----------------------------------------------+ + +Note that many of these have little practical significance in modern usage. The +mnemonics derive from teleprinter conventions that predate digital computers. + +The module supplies the following functions, patterned on those in the standard +C library: + + +.. function:: isalnum(c) + + Checks for an ASCII alphanumeric character; it is equivalent to ``isalpha(c) or + isdigit(c)``. + + +.. function:: isalpha(c) + + Checks for an ASCII alphabetic character; it is equivalent to ``isupper(c) or + islower(c)``. + + +.. function:: isascii(c) + + Checks for a character value that fits in the 7-bit ASCII set. + + +.. function:: isblank(c) + + Checks for an ASCII whitespace character. + + +.. function:: iscntrl(c) + + Checks for an ASCII control character (in the range 0x00 to 0x1f). + + +.. function:: isdigit(c) + + Checks for an ASCII decimal digit, ``'0'`` through ``'9'``. This is equivalent + to ``c in string.digits``. + + +.. function:: isgraph(c) + + Checks for ASCII any printable character except space. + + +.. function:: islower(c) + + Checks for an ASCII lower-case character. + + +.. function:: isprint(c) + + Checks for any ASCII printable character including space. + + +.. function:: ispunct(c) + + Checks for any printable ASCII character which is not a space or an alphanumeric + character. + + +.. function:: isspace(c) + + Checks for ASCII white-space characters; space, line feed, carriage return, form + feed, horizontal tab, vertical tab. + + +.. function:: isupper(c) + + Checks for an ASCII uppercase letter. + + +.. function:: isxdigit(c) + + Checks for an ASCII hexadecimal digit. This is equivalent to ``c in + string.hexdigits``. + + +.. function:: isctrl(c) + + Checks for an ASCII control character (ordinal values 0 to 31). + + +.. function:: ismeta(c) + + Checks for a non-ASCII character (ordinal values 0x80 and above). + +These functions accept either integers or strings; when the argument is a +string, it is first converted using the built-in function :func:`ord`. + +Note that all these functions check ordinal bit values derived from the first +character of the string you pass in; they do not actually know anything about +the host machine's character encoding. For functions that know about the +character encoding (and handle internationalization properly) see the +:mod:`string` module. + +The following two functions take either a single-character string or integer +byte value; they return a value of the same type. + + +.. function:: ascii(c) + + Return the ASCII value corresponding to the low 7 bits of *c*. + + +.. function:: ctrl(c) + + Return the control character corresponding to the given character (the character + bit value is bitwise-anded with 0x1f). + + +.. function:: alt(c) + + Return the 8-bit character corresponding to the given ASCII character (the + character bit value is bitwise-ored with 0x80). + +The following function takes either a single-character string or integer value; +it returns a string. + + +.. function:: unctrl(c) + + Return a string representation of the ASCII character *c*. If *c* is printable, + this string is the character itself. If the character is a control character + (0x00-0x1f) the string consists of a caret (``'^'``) followed by the + corresponding uppercase letter. If the character is an ASCII delete (0x7f) the + string is ``'^?'``. If the character has its meta bit (0x80) set, the meta bit + is stripped, the preceding rules applied, and ``'!'`` prepended to the result. + + +.. data:: controlnames + + A 33-element string array that contains the ASCII mnemonics for the thirty-two + ASCII control characters from 0 (NUL) to 0x1f (US), in order, plus the mnemonic + ``SP`` for the space character. + diff --git a/Doc/library/curses.panel.rst b/Doc/library/curses.panel.rst new file mode 100644 index 0000000..59e5b86 --- /dev/null +++ b/Doc/library/curses.panel.rst @@ -0,0 +1,119 @@ + +:mod:`curses.panel` --- A panel stack extension for curses. +=========================================================== + +.. module:: curses.panel + :synopsis: A panel stack extension that adds depth to curses windows. +.. sectionauthor:: A.M. Kuchling + + +Panels are windows with the added feature of depth, so they can be stacked on +top of each other, and only the visible portions of each window will be +displayed. Panels can be added, moved up or down in the stack, and removed. + + +.. _cursespanel-functions: + +Functions +--------- + +The module :mod:`curses.panel` defines the following functions: + + +.. function:: bottom_panel() + + Returns the bottom panel in the panel stack. + + +.. function:: new_panel(win) + + Returns a panel object, associating it with the given window *win*. Be aware + that you need to keep the returned panel object referenced explicitly. If you + don't, the panel object is garbage collected and removed from the panel stack. + + +.. function:: top_panel() + + Returns the top panel in the panel stack. + + +.. function:: update_panels() + + Updates the virtual screen after changes in the panel stack. This does not call + :func:`curses.doupdate`, so you'll have to do this yourself. + + +.. _curses-panel-objects: + +Panel Objects +------------- + +Panel objects, as returned by :func:`new_panel` above, are windows with a +stacking order. There's always a window associated with a panel which determines +the content, while the panel methods are responsible for the window's depth in +the panel stack. + +Panel objects have the following methods: + + +.. method:: Panel.above() + + Returns the panel above the current panel. + + +.. method:: Panel.below() + + Returns the panel below the current panel. + + +.. method:: Panel.bottom() + + Push the panel to the bottom of the stack. + + +.. method:: Panel.hidden() + + Returns true if the panel is hidden (not visible), false otherwise. + + +.. method:: Panel.hide() + + Hide the panel. This does not delete the object, it just makes the window on + screen invisible. + + +.. method:: Panel.move(y, x) + + Move the panel to the screen coordinates ``(y, x)``. + + +.. method:: Panel.replace(win) + + Change the window associated with the panel to the window *win*. + + +.. method:: Panel.set_userptr(obj) + + Set the panel's user pointer to *obj*. This is used to associate an arbitrary + piece of data with the panel, and can be any Python object. + + +.. method:: Panel.show() + + Display the panel (which might have been hidden). + + +.. method:: Panel.top() + + Push panel to the top of the stack. + + +.. method:: Panel.userptr() + + Returns the user pointer for the panel. This might be any Python object. + + +.. method:: Panel.window() + + Returns the window object associated with the panel. + diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst new file mode 100644 index 0000000..91af757 --- /dev/null +++ b/Doc/library/curses.rst @@ -0,0 +1,1679 @@ + +:mod:`curses` --- Terminal handling for character-cell displays +=============================================================== + +.. module:: curses + :synopsis: An interface to the curses library, providing portable terminal handling. +.. sectionauthor:: Moshe Zadka +.. sectionauthor:: Eric Raymond + + +.. versionchanged:: 1.6 + Added support for the ``ncurses`` library and converted to a package. + +The :mod:`curses` module provides an interface to the curses library, the +de-facto standard for portable advanced terminal handling. + +While curses is most widely used in the Unix environment, versions are available +for DOS, OS/2, and possibly other systems as well. This extension module is +designed to match the API of ncurses, an open-source curses library hosted on +Linux and the BSD variants of Unix. + + +.. seealso:: + + Module :mod:`curses.ascii` + Utilities for working with ASCII characters, regardless of your locale settings. + + Module :mod:`curses.panel` + A panel stack extension that adds depth to curses windows. + + Module :mod:`curses.textpad` + Editable text widget for curses supporting :program:`Emacs`\ -like bindings. + + Module :mod:`curses.wrapper` + Convenience function to ensure proper terminal setup and resetting on + application entry and exit. + + `Curses Programming with Python `_ + Tutorial material on using curses with Python, by Andrew Kuchling and Eric + Raymond, is available on the Python Web site. + + The :file:`Demo/curses/` directory in the Python source distribution contains + some example programs using the curses bindings provided by this module. + + +.. _curses-functions: + +Functions +--------- + +The module :mod:`curses` defines the following exception: + + +.. exception:: error + + Exception raised when a curses library function returns an error. + +.. note:: + + Whenever *x* or *y* arguments to a function or a method are optional, they + default to the current cursor location. Whenever *attr* is optional, it defaults + to :const:`A_NORMAL`. + +The module :mod:`curses` defines the following functions: + + +.. function:: baudrate() + + Returns the output speed of the terminal in bits per second. On software + terminal emulators it will have a fixed high value. Included for historical + reasons; in former times, it was used to write output loops for time delays and + occasionally to change interfaces depending on the line speed. + + +.. function:: beep() + + Emit a short attention sound. + + +.. function:: can_change_color() + + Returns true or false, depending on whether the programmer can change the colors + displayed by the terminal. + + +.. function:: cbreak() + + Enter cbreak mode. In cbreak mode (sometimes called "rare" mode) normal tty + line buffering is turned off and characters are available to be read one by one. + However, unlike raw mode, special characters (interrupt, quit, suspend, and flow + control) retain their effects on the tty driver and calling program. Calling + first :func:`raw` then :func:`cbreak` leaves the terminal in cbreak mode. + + +.. function:: color_content(color_number) + + Returns the intensity of the red, green, and blue (RGB) components in the color + *color_number*, which must be between ``0`` and :const:`COLORS`. A 3-tuple is + returned, containing the R,G,B values for the given color, which will be between + ``0`` (no component) and ``1000`` (maximum amount of component). + + +.. function:: color_pair(color_number) + + Returns the attribute value for displaying text in the specified color. This + attribute value can be combined with :const:`A_STANDOUT`, :const:`A_REVERSE`, + and the other :const:`A_\*` attributes. :func:`pair_number` is the counterpart + to this function. + + +.. function:: curs_set(visibility) + + Sets the cursor state. *visibility* can be set to 0, 1, or 2, for invisible, + normal, or very visible. If the terminal supports the visibility requested, the + previous cursor state is returned; otherwise, an exception is raised. On many + terminals, the "visible" mode is an underline cursor and the "very visible" mode + is a block cursor. + + +.. function:: def_prog_mode() + + Saves the current terminal mode as the "program" mode, the mode when the running + program is using curses. (Its counterpart is the "shell" mode, for when the + program is not in curses.) Subsequent calls to :func:`reset_prog_mode` will + restore this mode. + + +.. function:: def_shell_mode() + + Saves the current terminal mode as the "shell" mode, the mode when the running + program is not using curses. (Its counterpart is the "program" mode, when the + program is using curses capabilities.) Subsequent calls to + :func:`reset_shell_mode` will restore this mode. + + +.. function:: delay_output(ms) + + Inserts an *ms* millisecond pause in output. + + +.. function:: doupdate() + + Update the physical screen. The curses library keeps two data structures, one + representing the current physical screen contents and a virtual screen + representing the desired next state. The :func:`doupdate` ground updates the + physical screen to match the virtual screen. + + The virtual screen may be updated by a :meth:`noutrefresh` call after write + operations such as :meth:`addstr` have been performed on a window. The normal + :meth:`refresh` call is simply :meth:`noutrefresh` followed by :func:`doupdate`; + if you have to update multiple windows, you can speed performance and perhaps + reduce screen flicker by issuing :meth:`noutrefresh` calls on all windows, + followed by a single :func:`doupdate`. + + +.. function:: echo() + + Enter echo mode. In echo mode, each character input is echoed to the screen as + it is entered. + + +.. function:: endwin() + + De-initialize the library, and return terminal to normal status. + + +.. function:: erasechar() + + Returns the user's current erase character. Under Unix operating systems this + is a property of the controlling tty of the curses program, and is not set by + the curses library itself. + + +.. function:: filter() + + The :func:`filter` routine, if used, must be called before :func:`initscr` is + called. The effect is that, during those calls, LINES is set to 1; the + capabilities clear, cup, cud, cud1, cuu1, cuu, vpa are disabled; and the home + string is set to the value of cr. The effect is that the cursor is confined to + the current line, and so are screen updates. This may be used for enabling + character-at-a-time line editing without touching the rest of the screen. + + +.. function:: flash() + + Flash the screen. That is, change it to reverse-video and then change it back + in a short interval. Some people prefer such as 'visible bell' to the audible + attention signal produced by :func:`beep`. + + +.. function:: flushinp() + + Flush all input buffers. This throws away any typeahead that has been typed + by the user and has not yet been processed by the program. + + +.. function:: getmouse() + + After :meth:`getch` returns :const:`KEY_MOUSE` to signal a mouse event, this + method should be call to retrieve the queued mouse event, represented as a + 5-tuple ``(id, x, y, z, bstate)``. *id* is an ID value used to distinguish + multiple devices, and *x*, *y*, *z* are the event's coordinates. (*z* is + currently unused.). *bstate* is an integer value whose bits will be set to + indicate the type of event, and will be the bitwise OR of one or more of the + following constants, where *n* is the button number from 1 to 4: + :const:`BUTTONn_PRESSED`, :const:`BUTTONn_RELEASED`, :const:`BUTTONn_CLICKED`, + :const:`BUTTONn_DOUBLE_CLICKED`, :const:`BUTTONn_TRIPLE_CLICKED`, + :const:`BUTTON_SHIFT`, :const:`BUTTON_CTRL`, :const:`BUTTON_ALT`. + + +.. function:: getsyx() + + Returns the current coordinates of the virtual screen cursor in y and x. If + leaveok is currently true, then -1,-1 is returned. + + +.. function:: getwin(file) + + Reads window related data stored in the file by an earlier :func:`putwin` call. + The routine then creates and initializes a new window using that data, returning + the new window object. + + +.. function:: has_colors() + + Returns true if the terminal can display colors; otherwise, it returns false. + + +.. function:: has_ic() + + Returns true if the terminal has insert- and delete- character capabilities. + This function is included for historical reasons only, as all modern software + terminal emulators have such capabilities. + + +.. function:: has_il() + + Returns true if the terminal has insert- and delete-line capabilities, or can + simulate them using scrolling regions. This function is included for + historical reasons only, as all modern software terminal emulators have such + capabilities. + + +.. function:: has_key(ch) + + Takes a key value *ch*, and returns true if the current terminal type recognizes + a key with that value. + + +.. function:: halfdelay(tenths) + + Used for half-delay mode, which is similar to cbreak mode in that characters + typed by the user are immediately available to the program. However, after + blocking for *tenths* tenths of seconds, an exception is raised if nothing has + been typed. The value of *tenths* must be a number between 1 and 255. Use + :func:`nocbreak` to leave half-delay mode. + + +.. function:: init_color(color_number, r, g, b) + + Changes the definition of a color, taking the number of the color to be changed + followed by three RGB values (for the amounts of red, green, and blue + components). The value of *color_number* must be between ``0`` and + :const:`COLORS`. Each of *r*, *g*, *b*, must be a value between ``0`` and + ``1000``. When :func:`init_color` is used, all occurrences of that color on the + screen immediately change to the new definition. This function is a no-op on + most terminals; it is active only if :func:`can_change_color` returns ``1``. + + +.. function:: init_pair(pair_number, fg, bg) + + Changes the definition of a color-pair. It takes three arguments: the number of + the color-pair to be changed, the foreground color number, and the background + color number. The value of *pair_number* must be between ``1`` and + ``COLOR_PAIRS - 1`` (the ``0`` color pair is wired to white on black and cannot + be changed). The value of *fg* and *bg* arguments must be between ``0`` and + :const:`COLORS`. If the color-pair was previously initialized, the screen is + refreshed and all occurrences of that color-pair are changed to the new + definition. + + +.. function:: initscr() + + Initialize the library. Returns a :class:`WindowObject` which represents the + whole screen. + + .. note:: + + If there is an error opening the terminal, the underlying curses library may + cause the interpreter to exit. + + +.. function:: isendwin() + + Returns true if :func:`endwin` has been called (that is, the curses library has + been deinitialized). + + +.. function:: keyname(k) + + Return the name of the key numbered *k*. The name of a key generating printable + ASCII character is the key's character. The name of a control-key combination + is a two-character string consisting of a caret followed by the corresponding + printable ASCII character. The name of an alt-key combination (128-255) is a + string consisting of the prefix 'M-' followed by the name of the corresponding + ASCII character. + + +.. function:: killchar() + + Returns the user's current line kill character. Under Unix operating systems + this is a property of the controlling tty of the curses program, and is not set + by the curses library itself. + + +.. function:: longname() + + Returns a string containing the terminfo long name field describing the current + terminal. The maximum length of a verbose description is 128 characters. It is + defined only after the call to :func:`initscr`. + + +.. function:: meta(yes) + + If *yes* is 1, allow 8-bit characters to be input. If *yes* is 0, allow only + 7-bit chars. + + +.. function:: mouseinterval(interval) + + Sets the maximum time in milliseconds that can elapse between press and release + events in order for them to be recognized as a click, and returns the previous + interval value. The default value is 200 msec, or one fifth of a second. + + +.. function:: mousemask(mousemask) + + Sets the mouse events to be reported, and returns a tuple ``(availmask, + oldmask)``. *availmask* indicates which of the specified mouse events can be + reported; on complete failure it returns 0. *oldmask* is the previous value of + the given window's mouse event mask. If this function is never called, no mouse + events are ever reported. + + +.. function:: napms(ms) + + Sleep for *ms* milliseconds. + + +.. function:: newpad(nlines, ncols) + + Creates and returns a pointer to a new pad data structure with the given number + of lines and columns. A pad is returned as a window object. + + A pad is like a window, except that it is not restricted by the screen size, and + is not necessarily associated with a particular part of the screen. Pads can be + used when a large window is needed, and only a part of the window will be on the + screen at one time. Automatic refreshes of pads (such as from scrolling or + echoing of input) do not occur. The :meth:`refresh` and :meth:`noutrefresh` + methods of a pad require 6 arguments to specify the part of the pad to be + displayed and the location on the screen to be used for the display. The + arguments are pminrow, pmincol, sminrow, smincol, smaxrow, smaxcol; the p + arguments refer to the upper left corner of the pad region to be displayed and + the s arguments define a clipping box on the screen within which the pad region + is to be displayed. + + +.. function:: newwin([nlines, ncols,] begin_y, begin_x) + + Return a new window, whose left-upper corner is at ``(begin_y, begin_x)``, and + whose height/width is *nlines*/*ncols*. + + By default, the window will extend from the specified position to the lower + right corner of the screen. + + +.. function:: nl() + + Enter newline mode. This mode translates the return key into newline on input, + and translates newline into return and line-feed on output. Newline mode is + initially on. + + +.. function:: nocbreak() + + Leave cbreak mode. Return to normal "cooked" mode with line buffering. + + +.. function:: noecho() + + Leave echo mode. Echoing of input characters is turned off. + + +.. function:: nonl() + + Leave newline mode. Disable translation of return into newline on input, and + disable low-level translation of newline into newline/return on output (but this + does not change the behavior of ``addch('\n')``, which always does the + equivalent of return and line feed on the virtual screen). With translation + off, curses can sometimes speed up vertical motion a little; also, it will be + able to detect the return key on input. + + +.. function:: noqiflush() + + When the noqiflush routine is used, normal flush of input and output queues + associated with the INTR, QUIT and SUSP characters will not be done. You may + want to call :func:`noqiflush` in a signal handler if you want output to + continue as though the interrupt had not occurred, after the handler exits. + + +.. function:: noraw() + + Leave raw mode. Return to normal "cooked" mode with line buffering. + + +.. function:: pair_content(pair_number) + + Returns a tuple ``(fg, bg)`` containing the colors for the requested color pair. + The value of *pair_number* must be between ``1`` and ``COLOR_PAIRS - 1``. + + +.. function:: pair_number(attr) + + Returns the number of the color-pair set by the attribute value *attr*. + :func:`color_pair` is the counterpart to this function. + + +.. function:: putp(string) + + Equivalent to ``tputs(str, 1, putchar)``; emits the value of a specified + terminfo capability for the current terminal. Note that the output of putp + always goes to standard output. + + +.. function:: qiflush( [flag] ) + + If *flag* is false, the effect is the same as calling :func:`noqiflush`. If + *flag* is true, or no argument is provided, the queues will be flushed when + these control characters are read. + + +.. function:: raw() + + Enter raw mode. In raw mode, normal line buffering and processing of + interrupt, quit, suspend, and flow control keys are turned off; characters are + presented to curses input functions one by one. + + +.. function:: reset_prog_mode() + + Restores the terminal to "program" mode, as previously saved by + :func:`def_prog_mode`. + + +.. function:: reset_shell_mode() + + Restores the terminal to "shell" mode, as previously saved by + :func:`def_shell_mode`. + + +.. function:: setsyx(y, x) + + Sets the virtual screen cursor to *y*, *x*. If *y* and *x* are both -1, then + leaveok is set. + + +.. function:: setupterm([termstr, fd]) + + Initializes the terminal. *termstr* is a string giving the terminal name; if + omitted, the value of the TERM environment variable will be used. *fd* is the + file descriptor to which any initialization sequences will be sent; if not + supplied, the file descriptor for ``sys.stdout`` will be used. + + +.. function:: start_color() + + Must be called if the programmer wants to use colors, and before any other color + manipulation routine is called. It is good practice to call this routine right + after :func:`initscr`. + + :func:`start_color` initializes eight basic colors (black, red, green, yellow, + blue, magenta, cyan, and white), and two global variables in the :mod:`curses` + module, :const:`COLORS` and :const:`COLOR_PAIRS`, containing the maximum number + of colors and color-pairs the terminal can support. It also restores the colors + on the terminal to the values they had when the terminal was just turned on. + + +.. function:: termattrs() + + Returns a logical OR of all video attributes supported by the terminal. This + information is useful when a curses program needs complete control over the + appearance of the screen. + + +.. function:: termname() + + Returns the value of the environment variable TERM, truncated to 14 characters. + + +.. function:: tigetflag(capname) + + Returns the value of the Boolean capability corresponding to the terminfo + capability name *capname*. The value ``-1`` is returned if *capname* is not a + Boolean capability, or ``0`` if it is canceled or absent from the terminal + description. + + +.. function:: tigetnum(capname) + + Returns the value of the numeric capability corresponding to the terminfo + capability name *capname*. The value ``-2`` is returned if *capname* is not a + numeric capability, or ``-1`` if it is canceled or absent from the terminal + description. + + +.. function:: tigetstr(capname) + + Returns the value of the string capability corresponding to the terminfo + capability name *capname*. ``None`` is returned if *capname* is not a string + capability, or is canceled or absent from the terminal description. + + +.. function:: tparm(str[,...]) + + Instantiates the string *str* with the supplied parameters, where *str* should + be a parameterized string obtained from the terminfo database. E.g. + ``tparm(tigetstr("cup"), 5, 3)`` could result in ``'\033[6;4H'``, the exact + result depending on terminal type. + + +.. function:: typeahead(fd) + + Specifies that the file descriptor *fd* be used for typeahead checking. If *fd* + is ``-1``, then no typeahead checking is done. + + The curses library does "line-breakout optimization" by looking for typeahead + periodically while updating the screen. If input is found, and it is coming + from a tty, the current update is postponed until refresh or doupdate is called + again, allowing faster response to commands typed in advance. This function + allows specifying a different file descriptor for typeahead checking. + + +.. function:: unctrl(ch) + + Returns a string which is a printable representation of the character *ch*. + Control characters are displayed as a caret followed by the character, for + example as ``^C``. Printing characters are left as they are. + + +.. function:: ungetch(ch) + + Push *ch* so the next :meth:`getch` will return it. + + .. note:: + + Only one *ch* can be pushed before :meth:`getch` is called. + + +.. function:: ungetmouse(id, x, y, z, bstate) + + Push a :const:`KEY_MOUSE` event onto the input queue, associating the given + state data with it. + + +.. function:: use_env(flag) + + If used, this function should be called before :func:`initscr` or newterm are + called. When *flag* is false, the values of lines and columns specified in the + terminfo database will be used, even if environment variables :envvar:`LINES` + and :envvar:`COLUMNS` (used by default) are set, or if curses is running in a + window (in which case default behavior would be to use the window size if + :envvar:`LINES` and :envvar:`COLUMNS` are not set). + + +.. function:: use_default_colors() + + Allow use of default values for colors on terminals supporting this feature. Use + this to support transparency in your application. The default color is assigned + to the color number -1. After calling this function, ``init_pair(x, + curses.COLOR_RED, -1)`` initializes, for instance, color pair *x* to a red + foreground color on the default background. + + +.. _curses-window-objects: + +Window Objects +-------------- + +Window objects, as returned by :func:`initscr` and :func:`newwin` above, have +the following methods: + + +.. method:: window.addch([y, x,] ch[, attr]) + + .. note:: + + A *character* means a C character (an ASCII code), rather then a Python + character (a string of length 1). (This note is true whenever the documentation + mentions a character.) The builtin :func:`ord` is handy for conveying strings to + codes. + + Paint character *ch* at ``(y, x)`` with attributes *attr*, overwriting any + character previously painter at that location. By default, the character + position and attributes are the current settings for the window object. + + +.. method:: window.addnstr([y, x,] str, n[, attr]) + + Paint at most *n* characters of the string *str* at ``(y, x)`` with attributes + *attr*, overwriting anything previously on the display. + + +.. method:: window.addstr([y, x,] str[, attr]) + + Paint the string *str* at ``(y, x)`` with attributes *attr*, overwriting + anything previously on the display. + + +.. method:: window.attroff(attr) + + Remove attribute *attr* from the "background" set applied to all writes to the + current window. + + +.. method:: window.attron(attr) + + Add attribute *attr* from the "background" set applied to all writes to the + current window. + + +.. method:: window.attrset(attr) + + Set the "background" set of attributes to *attr*. This set is initially 0 (no + attributes). + + +.. method:: window.bkgd(ch[, attr]) + + Sets the background property of the window to the character *ch*, with + attributes *attr*. The change is then applied to every character position in + that window: + + * The attribute of every character in the window is changed to the new + background attribute. + + * Wherever the former background character appears, it is changed to the new + background character. + + +.. method:: window.bkgdset(ch[, attr]) + + Sets the window's background. A window's background consists of a character and + any combination of attributes. The attribute part of the background is combined + (OR'ed) with all non-blank characters that are written into the window. Both + the character and attribute parts of the background are combined with the blank + characters. The background becomes a property of the character and moves with + the character through any scrolling and insert/delete line/character operations. + + +.. method:: window.border([ls[, rs[, ts[, bs[, tl[, tr[, bl[, br]]]]]]]]) + + Draw a border around the edges of the window. Each parameter specifies the + character to use for a specific part of the border; see the table below for more + details. The characters can be specified as integers or as one-character + strings. + + .. note:: + + A ``0`` value for any parameter will cause the default character to be used for + that parameter. Keyword parameters can *not* be used. The defaults are listed + in this table: + + +-----------+---------------------+-----------------------+ + | Parameter | Description | Default value | + +===========+=====================+=======================+ + | *ls* | Left side | :const:`ACS_VLINE` | + +-----------+---------------------+-----------------------+ + | *rs* | Right side | :const:`ACS_VLINE` | + +-----------+---------------------+-----------------------+ + | *ts* | Top | :const:`ACS_HLINE` | + +-----------+---------------------+-----------------------+ + | *bs* | Bottom | :const:`ACS_HLINE` | + +-----------+---------------------+-----------------------+ + | *tl* | Upper-left corner | :const:`ACS_ULCORNER` | + +-----------+---------------------+-----------------------+ + | *tr* | Upper-right corner | :const:`ACS_URCORNER` | + +-----------+---------------------+-----------------------+ + | *bl* | Bottom-left corner | :const:`ACS_LLCORNER` | + +-----------+---------------------+-----------------------+ + | *br* | Bottom-right corner | :const:`ACS_LRCORNER` | + +-----------+---------------------+-----------------------+ + + +.. method:: window.box([vertch, horch]) + + Similar to :meth:`border`, but both *ls* and *rs* are *vertch* and both *ts* and + bs are *horch*. The default corner characters are always used by this function. + + +.. method:: window.chgat([y, x, ] [num,] attr) + + Sets the attributes of *num* characters at the current cursor position, or at + position ``(y, x)`` if supplied. If no value of *num* is given or *num* = -1, + the attribute will be set on all the characters to the end of the line. This + function does not move the cursor. The changed line will be touched using the + :meth:`touchline` method so that the contents will be redisplayed by the next + window refresh. + + +.. method:: window.clear() + + Like :meth:`erase`, but also causes the whole window to be repainted upon next + call to :meth:`refresh`. + + +.. method:: window.clearok(yes) + + If *yes* is 1, the next call to :meth:`refresh` will clear the window + completely. + + +.. method:: window.clrtobot() + + Erase from cursor to the end of the window: all lines below the cursor are + deleted, and then the equivalent of :meth:`clrtoeol` is performed. + + +.. method:: window.clrtoeol() + + Erase from cursor to the end of the line. + + +.. method:: window.cursyncup() + + Updates the current cursor position of all the ancestors of the window to + reflect the current cursor position of the window. + + +.. method:: window.delch([y, x]) + + Delete any character at ``(y, x)``. + + +.. method:: window.deleteln() + + Delete the line under the cursor. All following lines are moved up by 1 line. + + +.. method:: window.derwin([nlines, ncols,] begin_y, begin_x) + + An abbreviation for "derive window", :meth:`derwin` is the same as calling + :meth:`subwin`, except that *begin_y* and *begin_x* are relative to the origin + of the window, rather than relative to the entire screen. Returns a window + object for the derived window. + + +.. method:: window.echochar(ch[, attr]) + + Add character *ch* with attribute *attr*, and immediately call :meth:`refresh` + on the window. + + +.. method:: window.enclose(y, x) + + Tests whether the given pair of screen-relative character-cell coordinates are + enclosed by the given window, returning true or false. It is useful for + determining what subset of the screen windows enclose the location of a mouse + event. + + +.. method:: window.erase() + + Clear the window. + + +.. method:: window.getbegyx() + + Return a tuple ``(y, x)`` of co-ordinates of upper-left corner. + + +.. method:: window.getch([y, x]) + + Get a character. Note that the integer returned does *not* have to be in ASCII + range: function keys, keypad keys and so on return numbers higher than 256. In + no-delay mode, -1 is returned if there is no input. + + +.. method:: window.getkey([y, x]) + + Get a character, returning a string instead of an integer, as :meth:`getch` + does. Function keys, keypad keys and so on return a multibyte string containing + the key name. In no-delay mode, an exception is raised if there is no input. + + +.. method:: window.getmaxyx() + + Return a tuple ``(y, x)`` of the height and width of the window. + + +.. method:: window.getparyx() + + Returns the beginning coordinates of this window relative to its parent window + into two integer variables y and x. Returns ``-1,-1`` if this window has no + parent. + + +.. method:: window.getstr([y, x]) + + Read a string from the user, with primitive line editing capacity. + + +.. method:: window.getyx() + + Return a tuple ``(y, x)`` of current cursor position relative to the window's + upper-left corner. + + +.. method:: window.hline([y, x,] ch, n) + + Display a horizontal line starting at ``(y, x)`` with length *n* consisting of + the character *ch*. + + +.. method:: window.idcok(flag) + + If *flag* is false, curses no longer considers using the hardware insert/delete + character feature of the terminal; if *flag* is true, use of character insertion + and deletion is enabled. When curses is first initialized, use of character + insert/delete is enabled by default. + + +.. method:: window.idlok(yes) + + If called with *yes* equal to 1, :mod:`curses` will try and use hardware line + editing facilities. Otherwise, line insertion/deletion are disabled. + + +.. method:: window.immedok(flag) + + If *flag* is true, any change in the window image automatically causes the + window to be refreshed; you no longer have to call :meth:`refresh` yourself. + However, it may degrade performance considerably, due to repeated calls to + wrefresh. This option is disabled by default. + + +.. method:: window.inch([y, x]) + + Return the character at the given position in the window. The bottom 8 bits are + the character proper, and upper bits are the attributes. + + +.. method:: window.insch([y, x,] ch[, attr]) + + Paint character *ch* at ``(y, x)`` with attributes *attr*, moving the line from + position *x* right by one character. + + +.. method:: window.insdelln(nlines) + + Inserts *nlines* lines into the specified window above the current line. The + *nlines* bottom lines are lost. For negative *nlines*, delete *nlines* lines + starting with the one under the cursor, and move the remaining lines up. The + bottom *nlines* lines are cleared. The current cursor position remains the + same. + + +.. method:: window.insertln() + + Insert a blank line under the cursor. All following lines are moved down by 1 + line. + + +.. method:: window.insnstr([y, x,] str, n [, attr]) + + Insert a character string (as many characters as will fit on the line) before + the character under the cursor, up to *n* characters. If *n* is zero or + negative, the entire string is inserted. All characters to the right of the + cursor are shifted right, with the rightmost characters on the line being lost. + The cursor position does not change (after moving to *y*, *x*, if specified). + + +.. method:: window.insstr([y, x, ] str [, attr]) + + Insert a character string (as many characters as will fit on the line) before + the character under the cursor. All characters to the right of the cursor are + shifted right, with the rightmost characters on the line being lost. The cursor + position does not change (after moving to *y*, *x*, if specified). + + +.. method:: window.instr([y, x] [, n]) + + Returns a string of characters, extracted from the window starting at the + current cursor position, or at *y*, *x* if specified. Attributes are stripped + from the characters. If *n* is specified, :meth:`instr` returns return a string + at most *n* characters long (exclusive of the trailing NUL). + + +.. method:: window.is_linetouched(line) + + Returns true if the specified line was modified since the last call to + :meth:`refresh`; otherwise returns false. Raises a :exc:`curses.error` + exception if *line* is not valid for the given window. + + +.. method:: window.is_wintouched() + + Returns true if the specified window was modified since the last call to + :meth:`refresh`; otherwise returns false. + + +.. method:: window.keypad(yes) + + If *yes* is 1, escape sequences generated by some keys (keypad, function keys) + will be interpreted by :mod:`curses`. If *yes* is 0, escape sequences will be + left as is in the input stream. + + +.. method:: window.leaveok(yes) + + If *yes* is 1, cursor is left where it is on update, instead of being at "cursor + position." This reduces cursor movement where possible. If possible the cursor + will be made invisible. + + If *yes* is 0, cursor will always be at "cursor position" after an update. + + +.. method:: window.move(new_y, new_x) + + Move cursor to ``(new_y, new_x)``. + + +.. method:: window.mvderwin(y, x) + + Moves the window inside its parent window. The screen-relative parameters of + the window are not changed. This routine is used to display different parts of + the parent window at the same physical position on the screen. + + +.. method:: window.mvwin(new_y, new_x) + + Move the window so its upper-left corner is at ``(new_y, new_x)``. + + +.. method:: window.nodelay(yes) + + If *yes* is ``1``, :meth:`getch` will be non-blocking. + + +.. method:: window.notimeout(yes) + + If *yes* is ``1``, escape sequences will not be timed out. + + If *yes* is ``0``, after a few milliseconds, an escape sequence will not be + interpreted, and will be left in the input stream as is. + + +.. method:: window.noutrefresh() + + Mark for refresh but wait. This function updates the data structure + representing the desired state of the window, but does not force an update of + the physical screen. To accomplish that, call :func:`doupdate`. + + +.. method:: window.overlay(destwin[, sminrow, smincol, dminrow, dmincol, dmaxrow, dmaxcol]) + + Overlay the window on top of *destwin*. The windows need not be the same size, + only the overlapping region is copied. This copy is non-destructive, which means + that the current background character does not overwrite the old contents of + *destwin*. + + To get fine-grained control over the copied region, the second form of + :meth:`overlay` can be used. *sminrow* and *smincol* are the upper-left + coordinates of the source window, and the other variables mark a rectangle in + the destination window. + + +.. method:: window.overwrite(destwin[, sminrow, smincol, dminrow, dmincol, dmaxrow, dmaxcol]) + + Overwrite the window on top of *destwin*. The windows need not be the same size, + in which case only the overlapping region is copied. This copy is destructive, + which means that the current background character overwrites the old contents of + *destwin*. + + To get fine-grained control over the copied region, the second form of + :meth:`overwrite` can be used. *sminrow* and *smincol* are the upper-left + coordinates of the source window, the other variables mark a rectangle in the + destination window. + + +.. method:: window.putwin(file) + + Writes all data associated with the window into the provided file object. This + information can be later retrieved using the :func:`getwin` function. + + +.. method:: window.redrawln(beg, num) + + Indicates that the *num* screen lines, starting at line *beg*, are corrupted and + should be completely redrawn on the next :meth:`refresh` call. + + +.. method:: window.redrawwin() + + Touches the entire window, causing it to be completely redrawn on the next + :meth:`refresh` call. + + +.. method:: window.refresh([pminrow, pmincol, sminrow, smincol, smaxrow, smaxcol]) + + Update the display immediately (sync actual screen with previous + drawing/deleting methods). + + The 6 optional arguments can only be specified when the window is a pad created + with :func:`newpad`. The additional parameters are needed to indicate what part + of the pad and screen are involved. *pminrow* and *pmincol* specify the upper + left-hand corner of the rectangle to be displayed in the pad. *sminrow*, + *smincol*, *smaxrow*, and *smaxcol* specify the edges of the rectangle to be + displayed on the screen. The lower right-hand corner of the rectangle to be + displayed in the pad is calculated from the screen coordinates, since the + rectangles must be the same size. Both rectangles must be entirely contained + within their respective structures. Negative values of *pminrow*, *pmincol*, + *sminrow*, or *smincol* are treated as if they were zero. + + +.. method:: window.scroll([lines=1]) + + Scroll the screen or scrolling region upward by *lines* lines. + + +.. method:: window.scrollok(flag) + + Controls what happens when the cursor of a window is moved off the edge of the + window or scrolling region, either as a result of a newline action on the bottom + line, or typing the last character of the last line. If *flag* is false, the + cursor is left on the bottom line. If *flag* is true, the window is scrolled up + one line. Note that in order to get the physical scrolling effect on the + terminal, it is also necessary to call :meth:`idlok`. + + +.. method:: window.setscrreg(top, bottom) + + Set the scrolling region from line *top* to line *bottom*. All scrolling actions + will take place in this region. + + +.. method:: window.standend() + + Turn off the standout attribute. On some terminals this has the side effect of + turning off all attributes. + + +.. method:: window.standout() + + Turn on attribute *A_STANDOUT*. + + +.. method:: window.subpad([nlines, ncols,] begin_y, begin_x) + + Return a sub-window, whose upper-left corner is at ``(begin_y, begin_x)``, and + whose width/height is *ncols*/*nlines*. + + +.. method:: window.subwin([nlines, ncols,] begin_y, begin_x) + + Return a sub-window, whose upper-left corner is at ``(begin_y, begin_x)``, and + whose width/height is *ncols*/*nlines*. + + By default, the sub-window will extend from the specified position to the lower + right corner of the window. + + +.. method:: window.syncdown() + + Touches each location in the window that has been touched in any of its ancestor + windows. This routine is called by :meth:`refresh`, so it should almost never + be necessary to call it manually. + + +.. method:: window.syncok(flag) + + If called with *flag* set to true, then :meth:`syncup` is called automatically + whenever there is a change in the window. + + +.. method:: window.syncup() + + Touches all locations in ancestors of the window that have been changed in the + window. + + +.. method:: window.timeout(delay) + + Sets blocking or non-blocking read behavior for the window. If *delay* is + negative, blocking read is used (which will wait indefinitely for input). If + *delay* is zero, then non-blocking read is used, and -1 will be returned by + :meth:`getch` if no input is waiting. If *delay* is positive, then + :meth:`getch` will block for *delay* milliseconds, and return -1 if there is + still no input at the end of that time. + + +.. method:: window.touchline(start, count[, changed]) + + Pretend *count* lines have been changed, starting with line *start*. If + *changed* is supplied, it specifies whether the affected lines are marked as + having been changed (*changed*\ =1) or unchanged (*changed*\ =0). + + +.. method:: window.touchwin() + + Pretend the whole window has been changed, for purposes of drawing + optimizations. + + +.. method:: window.untouchwin() + + Marks all lines in the window as unchanged since the last call to + :meth:`refresh`. + + +.. method:: window.vline([y, x,] ch, n) + + Display a vertical line starting at ``(y, x)`` with length *n* consisting of the + character *ch*. + + +Constants +--------- + +The :mod:`curses` module defines the following data members: + + +.. data:: ERR + + Some curses routines that return an integer, such as :func:`getch`, return + :const:`ERR` upon failure. + + +.. data:: OK + + Some curses routines that return an integer, such as :func:`napms`, return + :const:`OK` upon success. + + +.. data:: version + + A string representing the current version of the module. Also available as + :const:`__version__`. + +Several constants are available to specify character cell attributes: + ++------------------+-------------------------------+ +| Attribute | Meaning | ++==================+===============================+ +| ``A_ALTCHARSET`` | Alternate character set mode. | ++------------------+-------------------------------+ +| ``A_BLINK`` | Blink mode. | ++------------------+-------------------------------+ +| ``A_BOLD`` | Bold mode. | ++------------------+-------------------------------+ +| ``A_DIM`` | Dim mode. | ++------------------+-------------------------------+ +| ``A_NORMAL`` | Normal attribute. | ++------------------+-------------------------------+ +| ``A_STANDOUT`` | Standout mode. | ++------------------+-------------------------------+ +| ``A_UNDERLINE`` | Underline mode. | ++------------------+-------------------------------+ + +Keys are referred to by integer constants with names starting with ``KEY_``. +The exact keycaps available are system dependent. + +.. % XXX this table is far too large! +.. % XXX should this table be alphabetized? + ++-------------------+--------------------------------------------+ +| Key constant | Key | ++===================+============================================+ +| ``KEY_MIN`` | Minimum key value | ++-------------------+--------------------------------------------+ +| ``KEY_BREAK`` | Break key (unreliable) | ++-------------------+--------------------------------------------+ +| ``KEY_DOWN`` | Down-arrow | ++-------------------+--------------------------------------------+ +| ``KEY_UP`` | Up-arrow | ++-------------------+--------------------------------------------+ +| ``KEY_LEFT`` | Left-arrow | ++-------------------+--------------------------------------------+ +| ``KEY_RIGHT`` | Right-arrow | ++-------------------+--------------------------------------------+ +| ``KEY_HOME`` | Home key (upward+left arrow) | ++-------------------+--------------------------------------------+ +| ``KEY_BACKSPACE`` | Backspace (unreliable) | ++-------------------+--------------------------------------------+ +| ``KEY_F0`` | Function keys. Up to 64 function keys are | +| | supported. | ++-------------------+--------------------------------------------+ +| ``KEY_Fn`` | Value of function key *n* | ++-------------------+--------------------------------------------+ +| ``KEY_DL`` | Delete line | ++-------------------+--------------------------------------------+ +| ``KEY_IL`` | Insert line | ++-------------------+--------------------------------------------+ +| ``KEY_DC`` | Delete character | ++-------------------+--------------------------------------------+ +| ``KEY_IC`` | Insert char or enter insert mode | ++-------------------+--------------------------------------------+ +| ``KEY_EIC`` | Exit insert char mode | ++-------------------+--------------------------------------------+ +| ``KEY_CLEAR`` | Clear screen | ++-------------------+--------------------------------------------+ +| ``KEY_EOS`` | Clear to end of screen | ++-------------------+--------------------------------------------+ +| ``KEY_EOL`` | Clear to end of line | ++-------------------+--------------------------------------------+ +| ``KEY_SF`` | Scroll 1 line forward | ++-------------------+--------------------------------------------+ +| ``KEY_SR`` | Scroll 1 line backward (reverse) | ++-------------------+--------------------------------------------+ +| ``KEY_NPAGE`` | Next page | ++-------------------+--------------------------------------------+ +| ``KEY_PPAGE`` | Previous page | ++-------------------+--------------------------------------------+ +| ``KEY_STAB`` | Set tab | ++-------------------+--------------------------------------------+ +| ``KEY_CTAB`` | Clear tab | ++-------------------+--------------------------------------------+ +| ``KEY_CATAB`` | Clear all tabs | ++-------------------+--------------------------------------------+ +| ``KEY_ENTER`` | Enter or send (unreliable) | ++-------------------+--------------------------------------------+ +| ``KEY_SRESET`` | Soft (partial) reset (unreliable) | ++-------------------+--------------------------------------------+ +| ``KEY_RESET`` | Reset or hard reset (unreliable) | ++-------------------+--------------------------------------------+ +| ``KEY_PRINT`` | Print | ++-------------------+--------------------------------------------+ +| ``KEY_LL`` | Home down or bottom (lower left) | ++-------------------+--------------------------------------------+ +| ``KEY_A1`` | Upper left of keypad | ++-------------------+--------------------------------------------+ +| ``KEY_A3`` | Upper right of keypad | ++-------------------+--------------------------------------------+ +| ``KEY_B2`` | Center of keypad | ++-------------------+--------------------------------------------+ +| ``KEY_C1`` | Lower left of keypad | ++-------------------+--------------------------------------------+ +| ``KEY_C3`` | Lower right of keypad | ++-------------------+--------------------------------------------+ +| ``KEY_BTAB`` | Back tab | ++-------------------+--------------------------------------------+ +| ``KEY_BEG`` | Beg (beginning) | ++-------------------+--------------------------------------------+ +| ``KEY_CANCEL`` | Cancel | ++-------------------+--------------------------------------------+ +| ``KEY_CLOSE`` | Close | ++-------------------+--------------------------------------------+ +| ``KEY_COMMAND`` | Cmd (command) | ++-------------------+--------------------------------------------+ +| ``KEY_COPY`` | Copy | ++-------------------+--------------------------------------------+ +| ``KEY_CREATE`` | Create | ++-------------------+--------------------------------------------+ +| ``KEY_END`` | End | ++-------------------+--------------------------------------------+ +| ``KEY_EXIT`` | Exit | ++-------------------+--------------------------------------------+ +| ``KEY_FIND`` | Find | ++-------------------+--------------------------------------------+ +| ``KEY_HELP`` | Help | ++-------------------+--------------------------------------------+ +| ``KEY_MARK`` | Mark | ++-------------------+--------------------------------------------+ +| ``KEY_MESSAGE`` | Message | ++-------------------+--------------------------------------------+ +| ``KEY_MOVE`` | Move | ++-------------------+--------------------------------------------+ +| ``KEY_NEXT`` | Next | ++-------------------+--------------------------------------------+ +| ``KEY_OPEN`` | Open | ++-------------------+--------------------------------------------+ +| ``KEY_OPTIONS`` | Options | ++-------------------+--------------------------------------------+ +| ``KEY_PREVIOUS`` | Prev (previous) | ++-------------------+--------------------------------------------+ +| ``KEY_REDO`` | Redo | ++-------------------+--------------------------------------------+ +| ``KEY_REFERENCE`` | Ref (reference) | ++-------------------+--------------------------------------------+ +| ``KEY_REFRESH`` | Refresh | ++-------------------+--------------------------------------------+ +| ``KEY_REPLACE`` | Replace | ++-------------------+--------------------------------------------+ +| ``KEY_RESTART`` | Restart | ++-------------------+--------------------------------------------+ +| ``KEY_RESUME`` | Resume | ++-------------------+--------------------------------------------+ +| ``KEY_SAVE`` | Save | ++-------------------+--------------------------------------------+ +| ``KEY_SBEG`` | Shifted Beg (beginning) | ++-------------------+--------------------------------------------+ +| ``KEY_SCANCEL`` | Shifted Cancel | ++-------------------+--------------------------------------------+ +| ``KEY_SCOMMAND`` | Shifted Command | ++-------------------+--------------------------------------------+ +| ``KEY_SCOPY`` | Shifted Copy | ++-------------------+--------------------------------------------+ +| ``KEY_SCREATE`` | Shifted Create | ++-------------------+--------------------------------------------+ +| ``KEY_SDC`` | Shifted Delete char | ++-------------------+--------------------------------------------+ +| ``KEY_SDL`` | Shifted Delete line | ++-------------------+--------------------------------------------+ +| ``KEY_SELECT`` | Select | ++-------------------+--------------------------------------------+ +| ``KEY_SEND`` | Shifted End | ++-------------------+--------------------------------------------+ +| ``KEY_SEOL`` | Shifted Clear line | ++-------------------+--------------------------------------------+ +| ``KEY_SEXIT`` | Shifted Dxit | ++-------------------+--------------------------------------------+ +| ``KEY_SFIND`` | Shifted Find | ++-------------------+--------------------------------------------+ +| ``KEY_SHELP`` | Shifted Help | ++-------------------+--------------------------------------------+ +| ``KEY_SHOME`` | Shifted Home | ++-------------------+--------------------------------------------+ +| ``KEY_SIC`` | Shifted Input | ++-------------------+--------------------------------------------+ +| ``KEY_SLEFT`` | Shifted Left arrow | ++-------------------+--------------------------------------------+ +| ``KEY_SMESSAGE`` | Shifted Message | ++-------------------+--------------------------------------------+ +| ``KEY_SMOVE`` | Shifted Move | ++-------------------+--------------------------------------------+ +| ``KEY_SNEXT`` | Shifted Next | ++-------------------+--------------------------------------------+ +| ``KEY_SOPTIONS`` | Shifted Options | ++-------------------+--------------------------------------------+ +| ``KEY_SPREVIOUS`` | Shifted Prev | ++-------------------+--------------------------------------------+ +| ``KEY_SPRINT`` | Shifted Print | ++-------------------+--------------------------------------------+ +| ``KEY_SREDO`` | Shifted Redo | ++-------------------+--------------------------------------------+ +| ``KEY_SREPLACE`` | Shifted Replace | ++-------------------+--------------------------------------------+ +| ``KEY_SRIGHT`` | Shifted Right arrow | ++-------------------+--------------------------------------------+ +| ``KEY_SRSUME`` | Shifted Resume | ++-------------------+--------------------------------------------+ +| ``KEY_SSAVE`` | Shifted Save | ++-------------------+--------------------------------------------+ +| ``KEY_SSUSPEND`` | Shifted Suspend | ++-------------------+--------------------------------------------+ +| ``KEY_SUNDO`` | Shifted Undo | ++-------------------+--------------------------------------------+ +| ``KEY_SUSPEND`` | Suspend | ++-------------------+--------------------------------------------+ +| ``KEY_UNDO`` | Undo | ++-------------------+--------------------------------------------+ +| ``KEY_MOUSE`` | Mouse event has occurred | ++-------------------+--------------------------------------------+ +| ``KEY_RESIZE`` | Terminal resize event | ++-------------------+--------------------------------------------+ +| ``KEY_MAX`` | Maximum key value | ++-------------------+--------------------------------------------+ + +On VT100s and their software emulations, such as X terminal emulators, there are +normally at least four function keys (:const:`KEY_F1`, :const:`KEY_F2`, +:const:`KEY_F3`, :const:`KEY_F4`) available, and the arrow keys mapped to +:const:`KEY_UP`, :const:`KEY_DOWN`, :const:`KEY_LEFT` and :const:`KEY_RIGHT` in +the obvious way. If your machine has a PC keyboard, it is safe to expect arrow +keys and twelve function keys (older PC keyboards may have only ten function +keys); also, the following keypad mappings are standard: + ++------------------+-----------+ +| Keycap | Constant | ++==================+===========+ +| :kbd:`Insert` | KEY_IC | ++------------------+-----------+ +| :kbd:`Delete` | KEY_DC | ++------------------+-----------+ +| :kbd:`Home` | KEY_HOME | ++------------------+-----------+ +| :kbd:`End` | KEY_END | ++------------------+-----------+ +| :kbd:`Page Up` | KEY_NPAGE | ++------------------+-----------+ +| :kbd:`Page Down` | KEY_PPAGE | ++------------------+-----------+ + +The following table lists characters from the alternate character set. These are +inherited from the VT100 terminal, and will generally be available on software +emulations such as X terminals. When there is no graphic available, curses +falls back on a crude printable ASCII approximation. + +.. note:: + + These are available only after :func:`initscr` has been called. + ++------------------+------------------------------------------+ +| ACS code | Meaning | ++==================+==========================================+ +| ``ACS_BBSS`` | alternate name for upper right corner | ++------------------+------------------------------------------+ +| ``ACS_BLOCK`` | solid square block | ++------------------+------------------------------------------+ +| ``ACS_BOARD`` | board of squares | ++------------------+------------------------------------------+ +| ``ACS_BSBS`` | alternate name for horizontal line | ++------------------+------------------------------------------+ +| ``ACS_BSSB`` | alternate name for upper left corner | ++------------------+------------------------------------------+ +| ``ACS_BSSS`` | alternate name for top tee | ++------------------+------------------------------------------+ +| ``ACS_BTEE`` | bottom tee | ++------------------+------------------------------------------+ +| ``ACS_BULLET`` | bullet | ++------------------+------------------------------------------+ +| ``ACS_CKBOARD`` | checker board (stipple) | ++------------------+------------------------------------------+ +| ``ACS_DARROW`` | arrow pointing down | ++------------------+------------------------------------------+ +| ``ACS_DEGREE`` | degree symbol | ++------------------+------------------------------------------+ +| ``ACS_DIAMOND`` | diamond | ++------------------+------------------------------------------+ +| ``ACS_GEQUAL`` | greater-than-or-equal-to | ++------------------+------------------------------------------+ +| ``ACS_HLINE`` | horizontal line | ++------------------+------------------------------------------+ +| ``ACS_LANTERN`` | lantern symbol | ++------------------+------------------------------------------+ +| ``ACS_LARROW`` | left arrow | ++------------------+------------------------------------------+ +| ``ACS_LEQUAL`` | less-than-or-equal-to | ++------------------+------------------------------------------+ +| ``ACS_LLCORNER`` | lower left-hand corner | ++------------------+------------------------------------------+ +| ``ACS_LRCORNER`` | lower right-hand corner | ++------------------+------------------------------------------+ +| ``ACS_LTEE`` | left tee | ++------------------+------------------------------------------+ +| ``ACS_NEQUAL`` | not-equal sign | ++------------------+------------------------------------------+ +| ``ACS_PI`` | letter pi | ++------------------+------------------------------------------+ +| ``ACS_PLMINUS`` | plus-or-minus sign | ++------------------+------------------------------------------+ +| ``ACS_PLUS`` | big plus sign | ++------------------+------------------------------------------+ +| ``ACS_RARROW`` | right arrow | ++------------------+------------------------------------------+ +| ``ACS_RTEE`` | right tee | ++------------------+------------------------------------------+ +| ``ACS_S1`` | scan line 1 | ++------------------+------------------------------------------+ +| ``ACS_S3`` | scan line 3 | ++------------------+------------------------------------------+ +| ``ACS_S7`` | scan line 7 | ++------------------+------------------------------------------+ +| ``ACS_S9`` | scan line 9 | ++------------------+------------------------------------------+ +| ``ACS_SBBS`` | alternate name for lower right corner | ++------------------+------------------------------------------+ +| ``ACS_SBSB`` | alternate name for vertical line | ++------------------+------------------------------------------+ +| ``ACS_SBSS`` | alternate name for right tee | ++------------------+------------------------------------------+ +| ``ACS_SSBB`` | alternate name for lower left corner | ++------------------+------------------------------------------+ +| ``ACS_SSBS`` | alternate name for bottom tee | ++------------------+------------------------------------------+ +| ``ACS_SSSB`` | alternate name for left tee | ++------------------+------------------------------------------+ +| ``ACS_SSSS`` | alternate name for crossover or big plus | ++------------------+------------------------------------------+ +| ``ACS_STERLING`` | pound sterling | ++------------------+------------------------------------------+ +| ``ACS_TTEE`` | top tee | ++------------------+------------------------------------------+ +| ``ACS_UARROW`` | up arrow | ++------------------+------------------------------------------+ +| ``ACS_ULCORNER`` | upper left corner | ++------------------+------------------------------------------+ +| ``ACS_URCORNER`` | upper right corner | ++------------------+------------------------------------------+ +| ``ACS_VLINE`` | vertical line | ++------------------+------------------------------------------+ + +The following table lists the predefined colors: + ++-------------------+----------------------------+ +| Constant | Color | ++===================+============================+ +| ``COLOR_BLACK`` | Black | ++-------------------+----------------------------+ +| ``COLOR_BLUE`` | Blue | ++-------------------+----------------------------+ +| ``COLOR_CYAN`` | Cyan (light greenish blue) | ++-------------------+----------------------------+ +| ``COLOR_GREEN`` | Green | ++-------------------+----------------------------+ +| ``COLOR_MAGENTA`` | Magenta (purplish red) | ++-------------------+----------------------------+ +| ``COLOR_RED`` | Red | ++-------------------+----------------------------+ +| ``COLOR_WHITE`` | White | ++-------------------+----------------------------+ +| ``COLOR_YELLOW`` | Yellow | ++-------------------+----------------------------+ + + +:mod:`curses.textpad` --- Text input widget for curses programs +=============================================================== + +.. module:: curses.textpad + :synopsis: Emacs-like input editing in a curses window. +.. moduleauthor:: Eric Raymond +.. sectionauthor:: Eric Raymond + + +.. versionadded:: 1.6 + +The :mod:`curses.textpad` module provides a :class:`Textbox` class that handles +elementary text editing in a curses window, supporting a set of keybindings +resembling those of Emacs (thus, also of Netscape Navigator, BBedit 6.x, +FrameMaker, and many other programs). The module also provides a +rectangle-drawing function useful for framing text boxes or for other purposes. + +The module :mod:`curses.textpad` defines the following function: + + +.. function:: rectangle(win, uly, ulx, lry, lrx) + + Draw a rectangle. The first argument must be a window object; the remaining + arguments are coordinates relative to that window. The second and third + arguments are the y and x coordinates of the upper left hand corner of the + rectangle to be drawn; the fourth and fifth arguments are the y and x + coordinates of the lower right hand corner. The rectangle will be drawn using + VT100/IBM PC forms characters on terminals that make this possible (including + xterm and most other software terminal emulators). Otherwise it will be drawn + with ASCII dashes, vertical bars, and plus signs. + + +.. _curses-textpad-objects: + +Textbox objects +--------------- + +You can instantiate a :class:`Textbox` object as follows: + + +.. class:: Textbox(win) + + Return a textbox widget object. The *win* argument should be a curses + :class:`WindowObject` in which the textbox is to be contained. The edit cursor + of the textbox is initially located at the upper left hand corner of the + containing window, with coordinates ``(0, 0)``. The instance's + :attr:`stripspaces` flag is initially on. + +:class:`Textbox` objects have the following methods: + + +.. method:: Textbox.edit([validator]) + + This is the entry point you will normally use. It accepts editing keystrokes + until one of the termination keystrokes is entered. If *validator* is supplied, + it must be a function. It will be called for each keystroke entered with the + keystroke as a parameter; command dispatch is done on the result. This method + returns the window contents as a string; whether blanks in the window are + included is affected by the :attr:`stripspaces` member. + + +.. method:: Textbox.do_command(ch) + + Process a single command keystroke. Here are the supported special keystrokes: + + +------------------+-------------------------------------------+ + | Keystroke | Action | + +==================+===========================================+ + | :kbd:`Control-A` | Go to left edge of window. | + +------------------+-------------------------------------------+ + | :kbd:`Control-B` | Cursor left, wrapping to previous line if | + | | appropriate. | + +------------------+-------------------------------------------+ + | :kbd:`Control-D` | Delete character under cursor. | + +------------------+-------------------------------------------+ + | :kbd:`Control-E` | Go to right edge (stripspaces off) or end | + | | of line (stripspaces on). | + +------------------+-------------------------------------------+ + | :kbd:`Control-F` | Cursor right, wrapping to next line when | + | | appropriate. | + +------------------+-------------------------------------------+ + | :kbd:`Control-G` | Terminate, returning the window contents. | + +------------------+-------------------------------------------+ + | :kbd:`Control-H` | Delete character backward. | + +------------------+-------------------------------------------+ + | :kbd:`Control-J` | Terminate if the window is 1 line, | + | | otherwise insert newline. | + +------------------+-------------------------------------------+ + | :kbd:`Control-K` | If line is blank, delete it, otherwise | + | | clear to end of line. | + +------------------+-------------------------------------------+ + | :kbd:`Control-L` | Refresh screen. | + +------------------+-------------------------------------------+ + | :kbd:`Control-N` | Cursor down; move down one line. | + +------------------+-------------------------------------------+ + | :kbd:`Control-O` | Insert a blank line at cursor location. | + +------------------+-------------------------------------------+ + | :kbd:`Control-P` | Cursor up; move up one line. | + +------------------+-------------------------------------------+ + + Move operations do nothing if the cursor is at an edge where the movement is not + possible. The following synonyms are supported where possible: + + +------------------------+------------------+ + | Constant | Keystroke | + +========================+==================+ + | :const:`KEY_LEFT` | :kbd:`Control-B` | + +------------------------+------------------+ + | :const:`KEY_RIGHT` | :kbd:`Control-F` | + +------------------------+------------------+ + | :const:`KEY_UP` | :kbd:`Control-P` | + +------------------------+------------------+ + | :const:`KEY_DOWN` | :kbd:`Control-N` | + +------------------------+------------------+ + | :const:`KEY_BACKSPACE` | :kbd:`Control-h` | + +------------------------+------------------+ + + All other keystrokes are treated as a command to insert the given character and + move right (with line wrapping). + + +.. method:: Textbox.gather() + + This method returns the window contents as a string; whether blanks in the + window are included is affected by the :attr:`stripspaces` member. + + +.. attribute:: Textbox.stripspaces + + This data member is a flag which controls the interpretation of blanks in the + window. When it is on, trailing blanks on each line are ignored; any cursor + motion that would land the cursor on a trailing blank goes to the end of that + line instead, and trailing blanks are stripped when the window contents are + gathered. + + +:mod:`curses.wrapper` --- Terminal handler for curses programs +============================================================== + +.. module:: curses.wrapper + :synopsis: Terminal configuration wrapper for curses programs. +.. moduleauthor:: Eric Raymond +.. sectionauthor:: Eric Raymond + + +.. versionadded:: 1.6 + +This module supplies one function, :func:`wrapper`, which runs another function +which should be the rest of your curses-using application. If the application +raises an exception, :func:`wrapper` will restore the terminal to a sane state +before re-raising the exception and generating a traceback. + + +.. function:: wrapper(func, ...) + + Wrapper function that initializes curses and calls another function, *func*, + restoring normal keyboard/screen behavior on error. The callable object *func* + is then passed the main window 'stdscr' as its first argument, followed by any + other arguments passed to :func:`wrapper`. + +Before calling the hook function, :func:`wrapper` turns on cbreak mode, turns +off echo, enables the terminal keypad, and initializes colors if the terminal +has color support. On exit (whether normally or by exception) it restores +cooked mode, turns on echo, and disables the terminal keypad. + diff --git a/Doc/library/custominterp.rst b/Doc/library/custominterp.rst new file mode 100644 index 0000000..2a9f0a4 --- /dev/null +++ b/Doc/library/custominterp.rst @@ -0,0 +1,20 @@ + +.. _custominterp: + +************************** +Custom Python Interpreters +************************** + +The modules described in this chapter allow writing interfaces similar to +Python's interactive interpreter. If you want a Python interpreter that +supports some special feature in addition to the Python language, you should +look at the :mod:`code` module. (The :mod:`codeop` module is lower-level, used +to support compiling a possibly-incomplete chunk of Python code.) + +The full list of modules described in this chapter is: + + +.. toctree:: + + code.rst + codeop.rst diff --git a/Doc/library/datatypes.rst b/Doc/library/datatypes.rst new file mode 100644 index 0000000..4cd042d --- /dev/null +++ b/Doc/library/datatypes.rst @@ -0,0 +1,37 @@ + +.. _datatypes: + +********** +Data Types +********** + +The modules described in this chapter provide a variety of specialized data +types such as dates and times, fixed-type arrays, heap queues, synchronized +queues, and sets. + +Python also provides some built-in data types, in particular, +:class:`dict`, :class:`list`, :class:`set` and :class:`frozenset`, and +:class:`tuple`. The :class:`str` class can be used to handle binary data +and 8-bit text, and the :class:`unicode` class to handle Unicode text. + +The following modules are documented in this chapter: + + +.. toctree:: + + datetime.rst + calendar.rst + collections.rst + heapq.rst + bisect.rst + array.rst + sched.rst + mutex.rst + queue.rst + weakref.rst + userdict.rst + types.rst + new.rst + copy.rst + pprint.rst + repr.rst diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst new file mode 100644 index 0000000..24d4f69 --- /dev/null +++ b/Doc/library/datetime.rst @@ -0,0 +1,1348 @@ +.. % XXX what order should the types be discussed in? + + +:mod:`datetime` --- Basic date and time types +============================================= + +.. module:: datetime + :synopsis: Basic date and time types. +.. moduleauthor:: Tim Peters +.. sectionauthor:: Tim Peters +.. sectionauthor:: A.M. Kuchling + + +.. versionadded:: 2.3 + +The :mod:`datetime` module supplies classes for manipulating dates and times in +both simple and complex ways. While date and time arithmetic is supported, the +focus of the implementation is on efficient member extraction for output +formatting and manipulation. For related +functionality, see also the :mod:`time` and :mod:`calendar` modules. + +There are two kinds of date and time objects: "naive" and "aware". This +distinction refers to whether the object has any notion of time zone, daylight +saving time, or other kind of algorithmic or political time adjustment. Whether +a naive :class:`datetime` object represents Coordinated Universal Time (UTC), +local time, or time in some other timezone is purely up to the program, just +like it's up to the program whether a particular number represents metres, +miles, or mass. Naive :class:`datetime` objects are easy to understand and to +work with, at the cost of ignoring some aspects of reality. + +For applications requiring more, :class:`datetime` and :class:`time` objects +have an optional time zone information member, :attr:`tzinfo`, that can contain +an instance of a subclass of the abstract :class:`tzinfo` class. These +:class:`tzinfo` objects capture information about the offset from UTC time, the +time zone name, and whether Daylight Saving Time is in effect. Note that no +concrete :class:`tzinfo` classes are supplied by the :mod:`datetime` module. +Supporting timezones at whatever level of detail is required is up to the +application. The rules for time adjustment across the world are more political +than rational, and there is no standard suitable for every application. + +The :mod:`datetime` module exports the following constants: + + +.. data:: MINYEAR + + The smallest year number allowed in a :class:`date` or :class:`datetime` object. + :const:`MINYEAR` is ``1``. + + +.. data:: MAXYEAR + + The largest year number allowed in a :class:`date` or :class:`datetime` object. + :const:`MAXYEAR` is ``9999``. + + +.. seealso:: + + Module :mod:`calendar` + General calendar related functions. + + Module :mod:`time` + Time access and conversions. + + +Available Types +--------------- + + +.. class:: date + + An idealized naive date, assuming the current Gregorian calendar always was, and + always will be, in effect. Attributes: :attr:`year`, :attr:`month`, and + :attr:`day`. + + +.. class:: time + + An idealized time, independent of any particular day, assuming that every day + has exactly 24\*60\*60 seconds (there is no notion of "leap seconds" here). + Attributes: :attr:`hour`, :attr:`minute`, :attr:`second`, :attr:`microsecond`, + and :attr:`tzinfo`. + + +.. class:: datetime + + A combination of a date and a time. Attributes: :attr:`year`, :attr:`month`, + :attr:`day`, :attr:`hour`, :attr:`minute`, :attr:`second`, :attr:`microsecond`, + and :attr:`tzinfo`. + + +.. class:: timedelta + + A duration expressing the difference between two :class:`date`, :class:`time`, + or :class:`datetime` instances to microsecond resolution. + + +.. class:: tzinfo + + An abstract base class for time zone information objects. These are used by the + :class:`datetime` and :class:`time` classes to provide a customizable notion of + time adjustment (for example, to account for time zone and/or daylight saving + time). + +Objects of these types are immutable. + +Objects of the :class:`date` type are always naive. + +An object *d* of type :class:`time` or :class:`datetime` may be naive or aware. +*d* is aware if ``d.tzinfo`` is not ``None`` and ``d.tzinfo.utcoffset(d)`` does +not return ``None``. If ``d.tzinfo`` is ``None``, or if ``d.tzinfo`` is not +``None`` but ``d.tzinfo.utcoffset(d)`` returns ``None``, *d* is naive. + +The distinction between naive and aware doesn't apply to :class:`timedelta` +objects. + +Subclass relationships:: + + object + timedelta + tzinfo + time + date + datetime + + +.. _datetime-timedelta: + +:class:`timedelta` Objects +-------------------------- + +A :class:`timedelta` object represents a duration, the difference between two +dates or times. + + +.. class:: timedelta([days[, seconds[, microseconds[, milliseconds[, minutes[, hours[, weeks]]]]]]]) + + All arguments are optional and default to ``0``. Arguments may be ints, longs, + or floats, and may be positive or negative. + + Only *days*, *seconds* and *microseconds* are stored internally. Arguments are + converted to those units: + + * A millisecond is converted to 1000 microseconds. + * A minute is converted to 60 seconds. + * An hour is converted to 3600 seconds. + * A week is converted to 7 days. + + and days, seconds and microseconds are then normalized so that the + representation is unique, with + + * ``0 <= microseconds < 1000000`` + * ``0 <= seconds < 3600*24`` (the number of seconds in one day) + * ``-999999999 <= days <= 999999999`` + + If any argument is a float and there are fractional microseconds, the fractional + microseconds left over from all arguments are combined and their sum is rounded + to the nearest microsecond. If no argument is a float, the conversion and + normalization processes are exact (no information is lost). + + If the normalized value of days lies outside the indicated range, + :exc:`OverflowError` is raised. + + Note that normalization of negative values may be surprising at first. For + example, :: + + >>> d = timedelta(microseconds=-1) + >>> (d.days, d.seconds, d.microseconds) + (-1, 86399, 999999) + +Class attributes are: + + +.. attribute:: timedelta.min + + The most negative :class:`timedelta` object, ``timedelta(-999999999)``. + + +.. attribute:: timedelta.max + + The most positive :class:`timedelta` object, ``timedelta(days=999999999, + hours=23, minutes=59, seconds=59, microseconds=999999)``. + + +.. attribute:: timedelta.resolution + + The smallest possible difference between non-equal :class:`timedelta` objects, + ``timedelta(microseconds=1)``. + +Note that, because of normalization, ``timedelta.max`` > ``-timedelta.min``. +``-timedelta.max`` is not representable as a :class:`timedelta` object. + +Instance attributes (read-only): + ++------------------+--------------------------------------------+ +| Attribute | Value | ++==================+============================================+ +| ``days`` | Between -999999999 and 999999999 inclusive | ++------------------+--------------------------------------------+ +| ``seconds`` | Between 0 and 86399 inclusive | ++------------------+--------------------------------------------+ +| ``microseconds`` | Between 0 and 999999 inclusive | ++------------------+--------------------------------------------+ + +Supported operations: + +.. % XXX this table is too wide! + ++--------------------------------+-----------------------------------------------+ +| Operation | Result | ++================================+===============================================+ +| ``t1 = t2 + t3`` | Sum of *t2* and *t3*. Afterwards *t1*-*t2* == | +| | *t3* and *t1*-*t3* == *t2* are true. (1) | ++--------------------------------+-----------------------------------------------+ +| ``t1 = t2 - t3`` | Difference of *t2* and *t3*. Afterwards *t1* | +| | == *t2* - *t3* and *t2* == *t1* + *t3* are | +| | true. (1) | ++--------------------------------+-----------------------------------------------+ +| ``t1 = t2 * i or t1 = i * t2`` | Delta multiplied by an integer or long. | +| | Afterwards *t1* // i == *t2* is true, | +| | provided ``i != 0``. | ++--------------------------------+-----------------------------------------------+ +| | In general, *t1* \* i == *t1* \* (i-1) + *t1* | +| | is true. (1) | ++--------------------------------+-----------------------------------------------+ +| ``t1 = t2 // i`` | The floor is computed and the remainder (if | +| | any) is thrown away. (3) | ++--------------------------------+-----------------------------------------------+ +| ``+t1`` | Returns a :class:`timedelta` object with the | +| | same value. (2) | ++--------------------------------+-----------------------------------------------+ +| ``-t1`` | equivalent to :class:`timedelta`\ | +| | (-*t1.days*, -*t1.seconds*, | +| | -*t1.microseconds*), and to *t1*\* -1. (1)(4) | ++--------------------------------+-----------------------------------------------+ +| ``abs(t)`` | equivalent to +*t* when ``t.days >= 0``, and | +| | to -*t* when ``t.days < 0``. (2) | ++--------------------------------+-----------------------------------------------+ + +Notes: + +(1) + This is exact, but may overflow. + +(2) + This is exact, and cannot overflow. + +(3) + Division by 0 raises :exc:`ZeroDivisionError`. + +(4) + -*timedelta.max* is not representable as a :class:`timedelta` object. + +In addition to the operations listed above :class:`timedelta` objects support +certain additions and subtractions with :class:`date` and :class:`datetime` +objects (see below). + +Comparisons of :class:`timedelta` objects are supported with the +:class:`timedelta` object representing the smaller duration considered to be the +smaller timedelta. In order to stop mixed-type comparisons from falling back to +the default comparison by object address, when a :class:`timedelta` object is +compared to an object of a different type, :exc:`TypeError` is raised unless the +comparison is ``==`` or ``!=``. The latter cases return :const:`False` or +:const:`True`, respectively. + +:class:`timedelta` objects are hashable (usable as dictionary keys), support +efficient pickling, and in Boolean contexts, a :class:`timedelta` object is +considered to be true if and only if it isn't equal to ``timedelta(0)``. + + +.. _datetime-date: + +:class:`date` Objects +--------------------- + +A :class:`date` object represents a date (year, month and day) in an idealized +calendar, the current Gregorian calendar indefinitely extended in both +directions. January 1 of year 1 is called day number 1, January 2 of year 1 is +called day number 2, and so on. This matches the definition of the "proleptic +Gregorian" calendar in Dershowitz and Reingold's book Calendrical Calculations, +where it's the base calendar for all computations. See the book for algorithms +for converting between proleptic Gregorian ordinals and many other calendar +systems. + + +.. class:: date(year, month, day) + + All arguments are required. Arguments may be ints or longs, in the following + ranges: + + * ``MINYEAR <= year <= MAXYEAR`` + * ``1 <= month <= 12`` + * ``1 <= day <= number of days in the given month and year`` + + If an argument outside those ranges is given, :exc:`ValueError` is raised. + +Other constructors, all class methods: + + +.. method:: date.today() + + Return the current local date. This is equivalent to + ``date.fromtimestamp(time.time())``. + + +.. method:: date.fromtimestamp(timestamp) + + Return the local date corresponding to the POSIX timestamp, such as is returned + by :func:`time.time`. This may raise :exc:`ValueError`, if the timestamp is out + of the range of values supported by the platform C :cfunc:`localtime` function. + It's common for this to be restricted to years from 1970 through 2038. Note + that on non-POSIX systems that include leap seconds in their notion of a + timestamp, leap seconds are ignored by :meth:`fromtimestamp`. + + +.. method:: date.fromordinal(ordinal) + + Return the date corresponding to the proleptic Gregorian ordinal, where January + 1 of year 1 has ordinal 1. :exc:`ValueError` is raised unless ``1 <= ordinal <= + date.max.toordinal()``. For any date *d*, ``date.fromordinal(d.toordinal()) == + d``. + +Class attributes: + + +.. attribute:: date.min + + The earliest representable date, ``date(MINYEAR, 1, 1)``. + + +.. attribute:: date.max + + The latest representable date, ``date(MAXYEAR, 12, 31)``. + + +.. attribute:: date.resolution + + The smallest possible difference between non-equal date objects, + ``timedelta(days=1)``. + +Instance attributes (read-only): + + +.. attribute:: date.year + + Between :const:`MINYEAR` and :const:`MAXYEAR` inclusive. + + +.. attribute:: date.month + + Between 1 and 12 inclusive. + + +.. attribute:: date.day + + Between 1 and the number of days in the given month of the given year. + +Supported operations: + ++-------------------------------+----------------------------------------------+ +| Operation | Result | ++===============================+==============================================+ +| ``date2 = date1 + timedelta`` | *date2* is ``timedelta.days`` days removed | +| | from *date1*. (1) | ++-------------------------------+----------------------------------------------+ +| ``date2 = date1 - timedelta`` | Computes *date2* such that ``date2 + | +| | timedelta == date1``. (2) | ++-------------------------------+----------------------------------------------+ +| ``timedelta = date1 - date2`` | \(3) | ++-------------------------------+----------------------------------------------+ +| ``date1 < date2`` | *date1* is considered less than *date2* when | +| | *date1* precedes *date2* in time. (4) | ++-------------------------------+----------------------------------------------+ + +Notes: + +(1) + *date2* is moved forward in time if ``timedelta.days > 0``, or backward if + ``timedelta.days < 0``. Afterward ``date2 - date1 == timedelta.days``. + ``timedelta.seconds`` and ``timedelta.microseconds`` are ignored. + :exc:`OverflowError` is raised if ``date2.year`` would be smaller than + :const:`MINYEAR` or larger than :const:`MAXYEAR`. + +(2) + This isn't quite equivalent to date1 + (-timedelta), because -timedelta in + isolation can overflow in cases where date1 - timedelta does not. + ``timedelta.seconds`` and ``timedelta.microseconds`` are ignored. + +(3) + This is exact, and cannot overflow. timedelta.seconds and + timedelta.microseconds are 0, and date2 + timedelta == date1 after. + +(4) + In other words, ``date1 < date2`` if and only if ``date1.toordinal() < + date2.toordinal()``. In order to stop comparison from falling back to the + default scheme of comparing object addresses, date comparison normally raises + :exc:`TypeError` if the other comparand isn't also a :class:`date` object. + However, ``NotImplemented`` is returned instead if the other comparand has a + :meth:`timetuple` attribute. This hook gives other kinds of date objects a + chance at implementing mixed-type comparison. If not, when a :class:`date` + object is compared to an object of a different type, :exc:`TypeError` is raised + unless the comparison is ``==`` or ``!=``. The latter cases return + :const:`False` or :const:`True`, respectively. + +Dates can be used as dictionary keys. In Boolean contexts, all :class:`date` +objects are considered to be true. + +Instance methods: + + +.. method:: date.replace(year, month, day) + + Return a date with the same value, except for those members given new values by + whichever keyword arguments are specified. For example, if ``d == date(2002, + 12, 31)``, then ``d.replace(day=26) == date(2002, 12, 26)``. + + +.. method:: date.timetuple() + + Return a :class:`time.struct_time` such as returned by :func:`time.localtime`. + The hours, minutes and seconds are 0, and the DST flag is -1. ``d.timetuple()`` + is equivalent to ``time.struct_time((d.year, d.month, d.day, 0, 0, 0, + d.weekday(), d.toordinal() - date(d.year, 1, 1).toordinal() + 1, -1))`` + + +.. method:: date.toordinal() + + Return the proleptic Gregorian ordinal of the date, where January 1 of year 1 + has ordinal 1. For any :class:`date` object *d*, + ``date.fromordinal(d.toordinal()) == d``. + + +.. method:: date.weekday() + + Return the day of the week as an integer, where Monday is 0 and Sunday is 6. + For example, ``date(2002, 12, 4).weekday() == 2``, a Wednesday. See also + :meth:`isoweekday`. + + +.. method:: date.isoweekday() + + Return the day of the week as an integer, where Monday is 1 and Sunday is 7. + For example, ``date(2002, 12, 4).isoweekday() == 3``, a Wednesday. See also + :meth:`weekday`, :meth:`isocalendar`. + + +.. method:: date.isocalendar() + + Return a 3-tuple, (ISO year, ISO week number, ISO weekday). + + The ISO calendar is a widely used variant of the Gregorian calendar. See + http://www.phys.uu.nl/ vgent/calendar/isocalendar.htm for a good explanation. + + The ISO year consists of 52 or 53 full weeks, and where a week starts on a + Monday and ends on a Sunday. The first week of an ISO year is the first + (Gregorian) calendar week of a year containing a Thursday. This is called week + number 1, and the ISO year of that Thursday is the same as its Gregorian year. + + For example, 2004 begins on a Thursday, so the first week of ISO year 2004 + begins on Monday, 29 Dec 2003 and ends on Sunday, 4 Jan 2004, so that + ``date(2003, 12, 29).isocalendar() == (2004, 1, 1)`` and ``date(2004, 1, + 4).isocalendar() == (2004, 1, 7)``. + + +.. method:: date.isoformat() + + Return a string representing the date in ISO 8601 format, 'YYYY-MM-DD'. For + example, ``date(2002, 12, 4).isoformat() == '2002-12-04'``. + + +.. method:: date.__str__() + + For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``. + + +.. method:: date.ctime() + + Return a string representing the date, for example ``date(2002, 12, + 4).ctime() == 'Wed Dec 4 00:00:00 2002'``. ``d.ctime()`` is equivalent to + ``time.ctime(time.mktime(d.timetuple()))`` on platforms where the native C + :cfunc:`ctime` function (which :func:`time.ctime` invokes, but which + :meth:`date.ctime` does not invoke) conforms to the C standard. + + +.. method:: date.strftime(format) + + Return a string representing the date, controlled by an explicit format string. + Format codes referring to hours, minutes or seconds will see 0 values. See + section :ref:`strftime-behavior`. + + +.. _datetime-datetime: + +:class:`datetime` Objects +------------------------- + +A :class:`datetime` object is a single object containing all the information +from a :class:`date` object and a :class:`time` object. Like a :class:`date` +object, :class:`datetime` assumes the current Gregorian calendar extended in +both directions; like a time object, :class:`datetime` assumes there are exactly +3600\*24 seconds in every day. + +Constructor: + + +.. class:: datetime(year, month, day[, hour[, minute[, second[, microsecond[, tzinfo]]]]]) + + The year, month and day arguments are required. *tzinfo* may be ``None``, or an + instance of a :class:`tzinfo` subclass. The remaining arguments may be ints or + longs, in the following ranges: + + * ``MINYEAR <= year <= MAXYEAR`` + * ``1 <= month <= 12`` + * ``1 <= day <= number of days in the given month and year`` + * ``0 <= hour < 24`` + * ``0 <= minute < 60`` + * ``0 <= second < 60`` + * ``0 <= microsecond < 1000000`` + + If an argument outside those ranges is given, :exc:`ValueError` is raised. + +Other constructors, all class methods: + + +.. method:: datetime.today() + + Return the current local datetime, with :attr:`tzinfo` ``None``. This is + equivalent to ``datetime.fromtimestamp(time.time())``. See also :meth:`now`, + :meth:`fromtimestamp`. + + +.. method:: datetime.now([tz]) + + Return the current local date and time. If optional argument *tz* is ``None`` + or not specified, this is like :meth:`today`, but, if possible, supplies more + precision than can be gotten from going through a :func:`time.time` timestamp + (for example, this may be possible on platforms supplying the C + :cfunc:`gettimeofday` function). + + Else *tz* must be an instance of a class :class:`tzinfo` subclass, and the + current date and time are converted to *tz*'s time zone. In this case the + result is equivalent to ``tz.fromutc(datetime.utcnow().replace(tzinfo=tz))``. + See also :meth:`today`, :meth:`utcnow`. + + +.. method:: datetime.utcnow() + + Return the current UTC date and time, with :attr:`tzinfo` ``None``. This is like + :meth:`now`, but returns the current UTC date and time, as a naive + :class:`datetime` object. See also :meth:`now`. + + +.. method:: datetime.fromtimestamp(timestamp[, tz]) + + Return the local date and time corresponding to the POSIX timestamp, such as is + returned by :func:`time.time`. If optional argument *tz* is ``None`` or not + specified, the timestamp is converted to the platform's local date and time, and + the returned :class:`datetime` object is naive. + + Else *tz* must be an instance of a class :class:`tzinfo` subclass, and the + timestamp is converted to *tz*'s time zone. In this case the result is + equivalent to + ``tz.fromutc(datetime.utcfromtimestamp(timestamp).replace(tzinfo=tz))``. + + :meth:`fromtimestamp` may raise :exc:`ValueError`, if the timestamp is out of + the range of values supported by the platform C :cfunc:`localtime` or + :cfunc:`gmtime` functions. It's common for this to be restricted to years in + 1970 through 2038. Note that on non-POSIX systems that include leap seconds in + their notion of a timestamp, leap seconds are ignored by :meth:`fromtimestamp`, + and then it's possible to have two timestamps differing by a second that yield + identical :class:`datetime` objects. See also :meth:`utcfromtimestamp`. + + +.. method:: datetime.utcfromtimestamp(timestamp) + + Return the UTC :class:`datetime` corresponding to the POSIX timestamp, with + :attr:`tzinfo` ``None``. This may raise :exc:`ValueError`, if the timestamp is + out of the range of values supported by the platform C :cfunc:`gmtime` function. + It's common for this to be restricted to years in 1970 through 2038. See also + :meth:`fromtimestamp`. + + +.. method:: datetime.fromordinal(ordinal) + + Return the :class:`datetime` corresponding to the proleptic Gregorian ordinal, + where January 1 of year 1 has ordinal 1. :exc:`ValueError` is raised unless ``1 + <= ordinal <= datetime.max.toordinal()``. The hour, minute, second and + microsecond of the result are all 0, and :attr:`tzinfo` is ``None``. + + +.. method:: datetime.combine(date, time) + + Return a new :class:`datetime` object whose date members are equal to the given + :class:`date` object's, and whose time and :attr:`tzinfo` members are equal to + the given :class:`time` object's. For any :class:`datetime` object *d*, ``d == + datetime.combine(d.date(), d.timetz())``. If date is a :class:`datetime` + object, its time and :attr:`tzinfo` members are ignored. + + +.. method:: datetime.strptime(date_string, format) + + Return a :class:`datetime` corresponding to *date_string*, parsed according to + *format*. This is equivalent to ``datetime(*(time.strptime(date_string, + format)[0:6]))``. :exc:`ValueError` is raised if the date_string and format + can't be parsed by :func:`time.strptime` or if it returns a value which isn't a + time tuple. + + .. versionadded:: 2.5 + +Class attributes: + + +.. attribute:: datetime.min + + The earliest representable :class:`datetime`, ``datetime(MINYEAR, 1, 1, + tzinfo=None)``. + + +.. attribute:: datetime.max + + The latest representable :class:`datetime`, ``datetime(MAXYEAR, 12, 31, 23, 59, + 59, 999999, tzinfo=None)``. + + +.. attribute:: datetime.resolution + + The smallest possible difference between non-equal :class:`datetime` objects, + ``timedelta(microseconds=1)``. + +Instance attributes (read-only): + + +.. attribute:: datetime.year + + Between :const:`MINYEAR` and :const:`MAXYEAR` inclusive. + + +.. attribute:: datetime.month + + Between 1 and 12 inclusive. + + +.. attribute:: datetime.day + + Between 1 and the number of days in the given month of the given year. + + +.. attribute:: datetime.hour + + In ``range(24)``. + + +.. attribute:: datetime.minute + + In ``range(60)``. + + +.. attribute:: datetime.second + + In ``range(60)``. + + +.. attribute:: datetime.microsecond + + In ``range(1000000)``. + + +.. attribute:: datetime.tzinfo + + The object passed as the *tzinfo* argument to the :class:`datetime` constructor, + or ``None`` if none was passed. + +Supported operations: + ++---------------------------------------+-------------------------------+ +| Operation | Result | ++=======================================+===============================+ +| ``datetime2 = datetime1 + timedelta`` | \(1) | ++---------------------------------------+-------------------------------+ +| ``datetime2 = datetime1 - timedelta`` | \(2) | ++---------------------------------------+-------------------------------+ +| ``timedelta = datetime1 - datetime2`` | \(3) | ++---------------------------------------+-------------------------------+ +| ``datetime1 < datetime2`` | Compares :class:`datetime` to | +| | :class:`datetime`. (4) | ++---------------------------------------+-------------------------------+ + +(1) + datetime2 is a duration of timedelta removed from datetime1, moving forward in + time if ``timedelta.days`` > 0, or backward if ``timedelta.days`` < 0. The + result has the same :attr:`tzinfo` member as the input datetime, and datetime2 - + datetime1 == timedelta after. :exc:`OverflowError` is raised if datetime2.year + would be smaller than :const:`MINYEAR` or larger than :const:`MAXYEAR`. Note + that no time zone adjustments are done even if the input is an aware object. + +(2) + Computes the datetime2 such that datetime2 + timedelta == datetime1. As for + addition, the result has the same :attr:`tzinfo` member as the input datetime, + and no time zone adjustments are done even if the input is aware. This isn't + quite equivalent to datetime1 + (-timedelta), because -timedelta in isolation + can overflow in cases where datetime1 - timedelta does not. + +(3) + Subtraction of a :class:`datetime` from a :class:`datetime` is defined only if + both operands are naive, or if both are aware. If one is aware and the other is + naive, :exc:`TypeError` is raised. + + If both are naive, or both are aware and have the same :attr:`tzinfo` member, + the :attr:`tzinfo` members are ignored, and the result is a :class:`timedelta` + object *t* such that ``datetime2 + t == datetime1``. No time zone adjustments + are done in this case. + + If both are aware and have different :attr:`tzinfo` members, ``a-b`` acts as if + *a* and *b* were first converted to naive UTC datetimes first. The result is + ``(a.replace(tzinfo=None) - a.utcoffset()) - (b.replace(tzinfo=None) - + b.utcoffset())`` except that the implementation never overflows. + +(4) + *datetime1* is considered less than *datetime2* when *datetime1* precedes + *datetime2* in time. + + If one comparand is naive and the other is aware, :exc:`TypeError` is raised. + If both comparands are aware, and have the same :attr:`tzinfo` member, the + common :attr:`tzinfo` member is ignored and the base datetimes are compared. If + both comparands are aware and have different :attr:`tzinfo` members, the + comparands are first adjusted by subtracting their UTC offsets (obtained from + ``self.utcoffset()``). + + .. note:: + + In order to stop comparison from falling back to the default scheme of comparing + object addresses, datetime comparison normally raises :exc:`TypeError` if the + other comparand isn't also a :class:`datetime` object. However, + ``NotImplemented`` is returned instead if the other comparand has a + :meth:`timetuple` attribute. This hook gives other kinds of date objects a + chance at implementing mixed-type comparison. If not, when a :class:`datetime` + object is compared to an object of a different type, :exc:`TypeError` is raised + unless the comparison is ``==`` or ``!=``. The latter cases return + :const:`False` or :const:`True`, respectively. + +:class:`datetime` objects can be used as dictionary keys. In Boolean contexts, +all :class:`datetime` objects are considered to be true. + +Instance methods: + + +.. method:: datetime.date() + + Return :class:`date` object with same year, month and day. + + +.. method:: datetime.time() + + Return :class:`time` object with same hour, minute, second and microsecond. + :attr:`tzinfo` is ``None``. See also method :meth:`timetz`. + + +.. method:: datetime.timetz() + + Return :class:`time` object with same hour, minute, second, microsecond, and + tzinfo members. See also method :meth:`time`. + + +.. method:: datetime.replace([year[, month[, day[, hour[, minute[, second[, microsecond[, tzinfo]]]]]]]]) + + Return a datetime with the same members, except for those members given new + values by whichever keyword arguments are specified. Note that ``tzinfo=None`` + can be specified to create a naive datetime from an aware datetime with no + conversion of date and time members. + + +.. method:: datetime.astimezone(tz) + + Return a :class:`datetime` object with new :attr:`tzinfo` member *tz*, adjusting + the date and time members so the result is the same UTC time as *self*, but in + *tz*'s local time. + + *tz* must be an instance of a :class:`tzinfo` subclass, and its + :meth:`utcoffset` and :meth:`dst` methods must not return ``None``. *self* must + be aware (``self.tzinfo`` must not be ``None``, and ``self.utcoffset()`` must + not return ``None``). + + If ``self.tzinfo`` is *tz*, ``self.astimezone(tz)`` is equal to *self*: no + adjustment of date or time members is performed. Else the result is local time + in time zone *tz*, representing the same UTC time as *self*: after ``astz = + dt.astimezone(tz)``, ``astz - astz.utcoffset()`` will usually have the same date + and time members as ``dt - dt.utcoffset()``. The discussion of class + :class:`tzinfo` explains the cases at Daylight Saving Time transition boundaries + where this cannot be achieved (an issue only if *tz* models both standard and + daylight time). + + If you merely want to attach a time zone object *tz* to a datetime *dt* without + adjustment of date and time members, use ``dt.replace(tzinfo=tz)``. If you + merely want to remove the time zone object from an aware datetime *dt* without + conversion of date and time members, use ``dt.replace(tzinfo=None)``. + + Note that the default :meth:`tzinfo.fromutc` method can be overridden in a + :class:`tzinfo` subclass to affect the result returned by :meth:`astimezone`. + Ignoring error cases, :meth:`astimezone` acts like:: + + def astimezone(self, tz): + if self.tzinfo is tz: + return self + # Convert self to UTC, and attach the new time zone object. + utc = (self - self.utcoffset()).replace(tzinfo=tz) + # Convert from UTC to tz's local time. + return tz.fromutc(utc) + + +.. method:: datetime.utcoffset() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.utcoffset(self)``, and raises an exception if the latter doesn't + return ``None``, or a :class:`timedelta` object representing a whole number of + minutes with magnitude less than one day. + + +.. method:: datetime.dst() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.dst(self)``, and raises an exception if the latter doesn't return + ``None``, or a :class:`timedelta` object representing a whole number of minutes + with magnitude less than one day. + + +.. method:: datetime.tzname() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.tzname(self)``, raises an exception if the latter doesn't return + ``None`` or a string object, + + +.. method:: datetime.timetuple() + + Return a :class:`time.struct_time` such as returned by :func:`time.localtime`. + ``d.timetuple()`` is equivalent to ``time.struct_time((d.year, d.month, d.day, + d.hour, d.minute, d.second, d.weekday(), d.toordinal() - date(d.year, 1, + 1).toordinal() + 1, dst))`` The :attr:`tm_isdst` flag of the result is set + according to the :meth:`dst` method: :attr:`tzinfo` is ``None`` or :meth:`dst` + returns ``None``, :attr:`tm_isdst` is set to ``-1``; else if :meth:`dst` + returns a non-zero value, :attr:`tm_isdst` is set to ``1``; else ``tm_isdst`` is + set to ``0``. + + +.. method:: datetime.utctimetuple() + + If :class:`datetime` instance *d* is naive, this is the same as + ``d.timetuple()`` except that :attr:`tm_isdst` is forced to 0 regardless of what + ``d.dst()`` returns. DST is never in effect for a UTC time. + + If *d* is aware, *d* is normalized to UTC time, by subtracting + ``d.utcoffset()``, and a :class:`time.struct_time` for the normalized time is + returned. :attr:`tm_isdst` is forced to 0. Note that the result's + :attr:`tm_year` member may be :const:`MINYEAR`\ -1 or :const:`MAXYEAR`\ +1, if + *d*.year was ``MINYEAR`` or ``MAXYEAR`` and UTC adjustment spills over a year + boundary. + + +.. method:: datetime.toordinal() + + Return the proleptic Gregorian ordinal of the date. The same as + ``self.date().toordinal()``. + + +.. method:: datetime.weekday() + + Return the day of the week as an integer, where Monday is 0 and Sunday is 6. + The same as ``self.date().weekday()``. See also :meth:`isoweekday`. + + +.. method:: datetime.isoweekday() + + Return the day of the week as an integer, where Monday is 1 and Sunday is 7. + The same as ``self.date().isoweekday()``. See also :meth:`weekday`, + :meth:`isocalendar`. + + +.. method:: datetime.isocalendar() + + Return a 3-tuple, (ISO year, ISO week number, ISO weekday). The same as + ``self.date().isocalendar()``. + + +.. method:: datetime.isoformat([sep]) + + Return a string representing the date and time in ISO 8601 format, + YYYY-MM-DDTHH:MM:SS.mmmmmm or, if :attr:`microsecond` is 0, + YYYY-MM-DDTHH:MM:SS + + If :meth:`utcoffset` does not return ``None``, a 6-character string is + appended, giving the UTC offset in (signed) hours and minutes: + YYYY-MM-DDTHH:MM:SS.mmmmmm+HH:MM or, if :attr:`microsecond` is 0 + YYYY-MM-DDTHH:MM:SS+HH:MM + + The optional argument *sep* (default ``'T'``) is a one-character separator, + placed between the date and time portions of the result. For example, :: + + >>> from datetime import tzinfo, timedelta, datetime + >>> class TZ(tzinfo): + ... def utcoffset(self, dt): return timedelta(minutes=-399) + ... + >>> datetime(2002, 12, 25, tzinfo=TZ()).isoformat(' ') + '2002-12-25 00:00:00-06:39' + + +.. method:: datetime.__str__() + + For a :class:`datetime` instance *d*, ``str(d)`` is equivalent to + ``d.isoformat(' ')``. + + +.. method:: datetime.ctime() + + Return a string representing the date and time, for example ``datetime(2002, 12, + 4, 20, 30, 40).ctime() == 'Wed Dec 4 20:30:40 2002'``. ``d.ctime()`` is + equivalent to ``time.ctime(time.mktime(d.timetuple()))`` on platforms where the + native C :cfunc:`ctime` function (which :func:`time.ctime` invokes, but which + :meth:`datetime.ctime` does not invoke) conforms to the C standard. + + +.. method:: datetime.strftime(format) + + Return a string representing the date and time, controlled by an explicit format + string. See section :ref:`strftime-behavior`. + + +.. _datetime-time: + +:class:`time` Objects +--------------------- + +A time object represents a (local) time of day, independent of any particular +day, and subject to adjustment via a :class:`tzinfo` object. + + +.. class:: time(hour[, minute[, second[, microsecond[, tzinfo]]]]) + + All arguments are optional. *tzinfo* may be ``None``, or an instance of a + :class:`tzinfo` subclass. The remaining arguments may be ints or longs, in the + following ranges: + + * ``0 <= hour < 24`` + * ``0 <= minute < 60`` + * ``0 <= second < 60`` + * ``0 <= microsecond < 1000000``. + + If an argument outside those ranges is given, :exc:`ValueError` is raised. All + default to ``0`` except *tzinfo*, which defaults to :const:`None`. + +Class attributes: + + +.. attribute:: time.min + + The earliest representable :class:`time`, ``time(0, 0, 0, 0)``. + + +.. attribute:: time.max + + The latest representable :class:`time`, ``time(23, 59, 59, 999999)``. + + +.. attribute:: time.resolution + + The smallest possible difference between non-equal :class:`time` objects, + ``timedelta(microseconds=1)``, although note that arithmetic on :class:`time` + objects is not supported. + +Instance attributes (read-only): + + +.. attribute:: time.hour + + In ``range(24)``. + + +.. attribute:: time.minute + + In ``range(60)``. + + +.. attribute:: time.second + + In ``range(60)``. + + +.. attribute:: time.microsecond + + In ``range(1000000)``. + + +.. attribute:: time.tzinfo + + The object passed as the tzinfo argument to the :class:`time` constructor, or + ``None`` if none was passed. + +Supported operations: + +* comparison of :class:`time` to :class:`time`, where *a* is considered less + than *b* when *a* precedes *b* in time. If one comparand is naive and the other + is aware, :exc:`TypeError` is raised. If both comparands are aware, and have + the same :attr:`tzinfo` member, the common :attr:`tzinfo` member is ignored and + the base times are compared. If both comparands are aware and have different + :attr:`tzinfo` members, the comparands are first adjusted by subtracting their + UTC offsets (obtained from ``self.utcoffset()``). In order to stop mixed-type + comparisons from falling back to the default comparison by object address, when + a :class:`time` object is compared to an object of a different type, + :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The + latter cases return :const:`False` or :const:`True`, respectively. + +* hash, use as dict key + +* efficient pickling + +* in Boolean contexts, a :class:`time` object is considered to be true if and + only if, after converting it to minutes and subtracting :meth:`utcoffset` (or + ``0`` if that's ``None``), the result is non-zero. + +Instance methods: + + +.. method:: time.replace([hour[, minute[, second[, microsecond[, tzinfo]]]]]) + + Return a :class:`time` with the same value, except for those members given new + values by whichever keyword arguments are specified. Note that ``tzinfo=None`` + can be specified to create a naive :class:`time` from an aware :class:`time`, + without conversion of the time members. + + +.. method:: time.isoformat() + + Return a string representing the time in ISO 8601 format, HH:MM:SS.mmmmmm or, if + self.microsecond is 0, HH:MM:SS If :meth:`utcoffset` does not return ``None``, a + 6-character string is appended, giving the UTC offset in (signed) hours and + minutes: HH:MM:SS.mmmmmm+HH:MM or, if self.microsecond is 0, HH:MM:SS+HH:MM + + +.. method:: time.__str__() + + For a time *t*, ``str(t)`` is equivalent to ``t.isoformat()``. + + +.. method:: time.strftime(format) + + Return a string representing the time, controlled by an explicit format string. + See section :ref:`strftime-behavior`. + + +.. method:: time.utcoffset() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.utcoffset(None)``, and raises an exception if the latter doesn't + return ``None`` or a :class:`timedelta` object representing a whole number of + minutes with magnitude less than one day. + + +.. method:: time.dst() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.dst(None)``, and raises an exception if the latter doesn't return + ``None``, or a :class:`timedelta` object representing a whole number of minutes + with magnitude less than one day. + + +.. method:: time.tzname() + + If :attr:`tzinfo` is ``None``, returns ``None``, else returns + ``self.tzinfo.tzname(None)``, or raises an exception if the latter doesn't + return ``None`` or a string object. + + +.. _datetime-tzinfo: + +:class:`tzinfo` Objects +----------------------- + +:class:`tzinfo` is an abstract base clase, meaning that this class should not be +instantiated directly. You need to derive a concrete subclass, and (at least) +supply implementations of the standard :class:`tzinfo` methods needed by the +:class:`datetime` methods you use. The :mod:`datetime` module does not supply +any concrete subclasses of :class:`tzinfo`. + +An instance of (a concrete subclass of) :class:`tzinfo` can be passed to the +constructors for :class:`datetime` and :class:`time` objects. The latter objects +view their members as being in local time, and the :class:`tzinfo` object +supports methods revealing offset of local time from UTC, the name of the time +zone, and DST offset, all relative to a date or time object passed to them. + +Special requirement for pickling: A :class:`tzinfo` subclass must have an +:meth:`__init__` method that can be called with no arguments, else it can be +pickled but possibly not unpickled again. This is a technical requirement that +may be relaxed in the future. + +A concrete subclass of :class:`tzinfo` may need to implement the following +methods. Exactly which methods are needed depends on the uses made of aware +:mod:`datetime` objects. If in doubt, simply implement all of them. + + +.. method:: tzinfo.utcoffset(self, dt) + + Return offset of local time from UTC, in minutes east of UTC. If local time is + west of UTC, this should be negative. Note that this is intended to be the + total offset from UTC; for example, if a :class:`tzinfo` object represents both + time zone and DST adjustments, :meth:`utcoffset` should return their sum. If + the UTC offset isn't known, return ``None``. Else the value returned must be a + :class:`timedelta` object specifying a whole number of minutes in the range + -1439 to 1439 inclusive (1440 = 24\*60; the magnitude of the offset must be less + than one day). Most implementations of :meth:`utcoffset` will probably look + like one of these two:: + + return CONSTANT # fixed-offset class + return CONSTANT + self.dst(dt) # daylight-aware class + + If :meth:`utcoffset` does not return ``None``, :meth:`dst` should not return + ``None`` either. + + The default implementation of :meth:`utcoffset` raises + :exc:`NotImplementedError`. + + +.. method:: tzinfo.dst(self, dt) + + Return the daylight saving time (DST) adjustment, in minutes east of UTC, or + ``None`` if DST information isn't known. Return ``timedelta(0)`` if DST is not + in effect. If DST is in effect, return the offset as a :class:`timedelta` object + (see :meth:`utcoffset` for details). Note that DST offset, if applicable, has + already been added to the UTC offset returned by :meth:`utcoffset`, so there's + no need to consult :meth:`dst` unless you're interested in obtaining DST info + separately. For example, :meth:`datetime.timetuple` calls its :attr:`tzinfo` + member's :meth:`dst` method to determine how the :attr:`tm_isdst` flag should be + set, and :meth:`tzinfo.fromutc` calls :meth:`dst` to account for DST changes + when crossing time zones. + + An instance *tz* of a :class:`tzinfo` subclass that models both standard and + daylight times must be consistent in this sense: + + ``tz.utcoffset(dt) - tz.dst(dt)`` + + must return the same result for every :class:`datetime` *dt* with ``dt.tzinfo == + tz`` For sane :class:`tzinfo` subclasses, this expression yields the time + zone's "standard offset", which should not depend on the date or the time, but + only on geographic location. The implementation of :meth:`datetime.astimezone` + relies on this, but cannot detect violations; it's the programmer's + responsibility to ensure it. If a :class:`tzinfo` subclass cannot guarantee + this, it may be able to override the default implementation of + :meth:`tzinfo.fromutc` to work correctly with :meth:`astimezone` regardless. + + Most implementations of :meth:`dst` will probably look like one of these two:: + + def dst(self): + # a fixed-offset class: doesn't account for DST + return timedelta(0) + + or :: + + def dst(self): + # Code to set dston and dstoff to the time zone's DST + # transition times based on the input dt.year, and expressed + # in standard local time. Then + + if dston <= dt.replace(tzinfo=None) < dstoff: + return timedelta(hours=1) + else: + return timedelta(0) + + The default implementation of :meth:`dst` raises :exc:`NotImplementedError`. + + +.. method:: tzinfo.tzname(self, dt) + + Return the time zone name corresponding to the :class:`datetime` object *dt*, as + a string. Nothing about string names is defined by the :mod:`datetime` module, + and there's no requirement that it mean anything in particular. For example, + "GMT", "UTC", "-500", "-5:00", "EDT", "US/Eastern", "America/New York" are all + valid replies. Return ``None`` if a string name isn't known. Note that this is + a method rather than a fixed string primarily because some :class:`tzinfo` + subclasses will wish to return different names depending on the specific value + of *dt* passed, especially if the :class:`tzinfo` class is accounting for + daylight time. + + The default implementation of :meth:`tzname` raises :exc:`NotImplementedError`. + +These methods are called by a :class:`datetime` or :class:`time` object, in +response to their methods of the same names. A :class:`datetime` object passes +itself as the argument, and a :class:`time` object passes ``None`` as the +argument. A :class:`tzinfo` subclass's methods should therefore be prepared to +accept a *dt* argument of ``None``, or of class :class:`datetime`. + +When ``None`` is passed, it's up to the class designer to decide the best +response. For example, returning ``None`` is appropriate if the class wishes to +say that time objects don't participate in the :class:`tzinfo` protocols. It +may be more useful for ``utcoffset(None)`` to return the standard UTC offset, as +there is no other convention for discovering the standard offset. + +When a :class:`datetime` object is passed in response to a :class:`datetime` +method, ``dt.tzinfo`` is the same object as *self*. :class:`tzinfo` methods can +rely on this, unless user code calls :class:`tzinfo` methods directly. The +intent is that the :class:`tzinfo` methods interpret *dt* as being in local +time, and not need worry about objects in other timezones. + +There is one more :class:`tzinfo` method that a subclass may wish to override: + + +.. method:: tzinfo.fromutc(self, dt) + + This is called from the default :class:`datetime.astimezone()` implementation. + When called from that, ``dt.tzinfo`` is *self*, and *dt*'s date and time members + are to be viewed as expressing a UTC time. The purpose of :meth:`fromutc` is to + adjust the date and time members, returning an equivalent datetime in *self*'s + local time. + + Most :class:`tzinfo` subclasses should be able to inherit the default + :meth:`fromutc` implementation without problems. It's strong enough to handle + fixed-offset time zones, and time zones accounting for both standard and + daylight time, and the latter even if the DST transition times differ in + different years. An example of a time zone the default :meth:`fromutc` + implementation may not handle correctly in all cases is one where the standard + offset (from UTC) depends on the specific date and time passed, which can happen + for political reasons. The default implementations of :meth:`astimezone` and + :meth:`fromutc` may not produce the result you want if the result is one of the + hours straddling the moment the standard offset changes. + + Skipping code for error cases, the default :meth:`fromutc` implementation acts + like:: + + def fromutc(self, dt): + # raise ValueError error if dt.tzinfo is not self + dtoff = dt.utcoffset() + dtdst = dt.dst() + # raise ValueError if dtoff is None or dtdst is None + delta = dtoff - dtdst # this is self's standard offset + if delta: + dt += delta # convert to standard local time + dtdst = dt.dst() + # raise ValueError if dtdst is None + if dtdst: + return dt + dtdst + else: + return dt + +Example :class:`tzinfo` classes: + +.. literalinclude:: ../includes/tzinfo-examples.py + + +Note that there are unavoidable subtleties twice per year in a :class:`tzinfo` +subclass accounting for both standard and daylight time, at the DST transition +points. For concreteness, consider US Eastern (UTC -0500), where EDT begins the +minute after 1:59 (EST) on the first Sunday in April, and ends the minute after +1:59 (EDT) on the last Sunday in October:: + + UTC 3:MM 4:MM 5:MM 6:MM 7:MM 8:MM + EST 22:MM 23:MM 0:MM 1:MM 2:MM 3:MM + EDT 23:MM 0:MM 1:MM 2:MM 3:MM 4:MM + + start 22:MM 23:MM 0:MM 1:MM 3:MM 4:MM + + end 23:MM 0:MM 1:MM 1:MM 2:MM 3:MM + +When DST starts (the "start" line), the local wall clock leaps from 1:59 to +3:00. A wall time of the form 2:MM doesn't really make sense on that day, so +``astimezone(Eastern)`` won't deliver a result with ``hour == 2`` on the day DST +begins. In order for :meth:`astimezone` to make this guarantee, the +:meth:`rzinfo.dst` method must consider times in the "missing hour" (2:MM for +Eastern) to be in daylight time. + +When DST ends (the "end" line), there's a potentially worse problem: there's an +hour that can't be spelled unambiguously in local wall time: the last hour of +daylight time. In Eastern, that's times of the form 5:MM UTC on the day +daylight time ends. The local wall clock leaps from 1:59 (daylight time) back +to 1:00 (standard time) again. Local times of the form 1:MM are ambiguous. +:meth:`astimezone` mimics the local clock's behavior by mapping two adjacent UTC +hours into the same local hour then. In the Eastern example, UTC times of the +form 5:MM and 6:MM both map to 1:MM when converted to Eastern. In order for +:meth:`astimezone` to make this guarantee, the :meth:`tzinfo.dst` method must +consider times in the "repeated hour" to be in standard time. This is easily +arranged, as in the example, by expressing DST switch times in the time zone's +standard local time. + +Applications that can't bear such ambiguities should avoid using hybrid +:class:`tzinfo` subclasses; there are no ambiguities when using UTC, or any +other fixed-offset :class:`tzinfo` subclass (such as a class representing only +EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). + + +.. _strftime-behavior: + +:meth:`strftime` Behavior +------------------------- + +:class:`date`, :class:`datetime`, and :class:`time` objects all support a +``strftime(format)`` method, to create a string representing the time under the +control of an explicit format string. Broadly speaking, ``d.strftime(fmt)`` +acts like the :mod:`time` module's ``time.strftime(fmt, d.timetuple())`` +although not all objects support a :meth:`timetuple` method. + +For :class:`time` objects, the format codes for year, month, and day should not +be used, as time objects have no such values. If they're used anyway, ``1900`` +is substituted for the year, and ``0`` for the month and day. + +For :class:`date` objects, the format codes for hours, minutes, and seconds +should not be used, as :class:`date` objects have no such values. If they're +used anyway, ``0`` is substituted for them. + +For a naive object, the ``%z`` and ``%Z`` format codes are replaced by empty +strings. + +For an aware object: + +``%z`` + :meth:`utcoffset` is transformed into a 5-character string of the form +HHMM or + -HHMM, where HH is a 2-digit string giving the number of UTC offset hours, and + MM is a 2-digit string giving the number of UTC offset minutes. For example, if + :meth:`utcoffset` returns ``timedelta(hours=-3, minutes=-30)``, ``%z`` is + replaced with the string ``'-0330'``. + +``%Z`` + If :meth:`tzname` returns ``None``, ``%Z`` is replaced by an empty string. + Otherwise ``%Z`` is replaced by the returned value, which must be a string. + +The full set of format codes supported varies across platforms, because Python +calls the platform C library's :func:`strftime` function, and platform +variations are common. The documentation for Python's :mod:`time` module lists +the format codes that the C standard (1989 version) requires, and those work on +all platforms with a standard C implementation. Note that the 1999 version of +the C standard added additional format codes. + +The exact range of years for which :meth:`strftime` works also varies across +platforms. Regardless of platform, years before 1900 cannot be used. + +.. % %% This example is obsolete, since strptime is now supported by datetime. +.. % +.. % \subsection{Examples} +.. % +.. % \subsubsection{Creating Datetime Objects from Formatted Strings} +.. % +.. % The \class{datetime} class does not directly support parsing formatted time +.. % strings. You can use \function{time.strptime} to do the parsing and create +.. % a \class{datetime} object from the tuple it returns: +.. % +.. % \begin{verbatim} +.. % >>> s = "2005-12-06T12:13:14" +.. % >>> from datetime import datetime +.. % >>> from time import strptime +.. % >>> datetime(*strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6]) +.. % datetime.datetime(2005, 12, 6, 12, 13, 14) +.. % \end{verbatim} +.. % + diff --git a/Doc/library/dbhash.rst b/Doc/library/dbhash.rst new file mode 100644 index 0000000..b5c9590 --- /dev/null +++ b/Doc/library/dbhash.rst @@ -0,0 +1,114 @@ + +:mod:`dbhash` --- DBM-style interface to the BSD database library +================================================================= + +.. module:: dbhash + :synopsis: DBM-style interface to the BSD database library. +.. sectionauthor:: Fred L. Drake, Jr. + + +.. index:: module: bsddb + +The :mod:`dbhash` module provides a function to open databases using the BSD +``db`` library. This module mirrors the interface of the other Python database +modules that provide access to DBM-style databases. The :mod:`bsddb` module is +required to use :mod:`dbhash`. + +This module provides an exception and a function: + + +.. exception:: error + + Exception raised on database errors other than :exc:`KeyError`. It is a synonym + for :exc:`bsddb.error`. + + +.. function:: open(path[, flag[, mode]]) + + Open a ``db`` database and return the database object. The *path* argument is + the name of the database file. + + The *flag* argument can be: + + +---------+-------------------------------------------+ + | Value | Meaning | + +=========+===========================================+ + | ``'r'`` | Open existing database for reading only | + | | (default) | + +---------+-------------------------------------------+ + | ``'w'`` | Open existing database for reading and | + | | writing | + +---------+-------------------------------------------+ + | ``'c'`` | Open database for reading and writing, | + | | creating it if it doesn't exist | + +---------+-------------------------------------------+ + | ``'n'`` | Always create a new, empty database, open | + | | for reading and writing | + +---------+-------------------------------------------+ + + For platforms on which the BSD ``db`` library supports locking, an ``'l'`` + can be appended to indicate that locking should be used. + + The optional *mode* parameter is used to indicate the Unix permission bits that + should be set if a new database must be created; this will be masked by the + current umask value for the process. + + +.. seealso:: + + Module :mod:`anydbm` + Generic interface to ``dbm``\ -style databases. + + Module :mod:`bsddb` + Lower-level interface to the BSD ``db`` library. + + Module :mod:`whichdb` + Utility module used to determine the type of an existing database. + + +.. _dbhash-objects: + +Database Objects +---------------- + +The database objects returned by :func:`open` provide the methods common to all +the DBM-style databases and mapping objects. The following methods are +available in addition to the standard methods. + + +.. method:: dbhash.first() + + It's possible to loop over every key/value pair in the database using this + method and the :meth:`next` method. The traversal is ordered by the databases + internal hash values, and won't be sorted by the key values. This method + returns the starting key. + + +.. method:: dbhash.last() + + Return the last key/value pair in a database traversal. This may be used to + begin a reverse-order traversal; see :meth:`previous`. + + +.. method:: dbhash.next() + + Returns the key next key/value pair in a database traversal. The following code + prints every key in the database ``db``, without having to create a list in + memory that contains them all:: + + print db.first() + for i in range(1, len(db)): + print db.next() + + +.. method:: dbhash.previous() + + Returns the previous key/value pair in a forward-traversal of the database. In + conjunction with :meth:`last`, this may be used to implement a reverse-order + traversal. + + +.. method:: dbhash.sync() + + This method forces any unwritten data to be written to the disk. + diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst new file mode 100644 index 0000000..52923e8 --- /dev/null +++ b/Doc/library/dbm.rst @@ -0,0 +1,74 @@ + +:mod:`dbm` --- Simple "database" interface +========================================== + +.. module:: dbm + :platform: Unix + :synopsis: The standard "database" interface, based on ndbm. + + +The :mod:`dbm` module provides an interface to the Unix "(n)dbm" library. Dbm +objects behave like mappings (dictionaries), except that keys and values are +always strings. Printing a dbm object doesn't print the keys and values, and the +:meth:`items` and :meth:`values` methods are not supported. + +This module can be used with the "classic" ndbm interface, the BSD DB +compatibility interface, or the GNU GDBM compatibility interface. On Unix, the +:program:`configure` script will attempt to locate the appropriate header file +to simplify building this module. + +The module defines the following: + + +.. exception:: error + + Raised on dbm-specific errors, such as I/O errors. :exc:`KeyError` is raised for + general mapping errors like specifying an incorrect key. + + +.. data:: library + + Name of the ``ndbm`` implementation library used. + + +.. function:: open(filename[, flag[, mode]]) + + Open a dbm database and return a dbm object. The *filename* argument is the + name of the database file (without the :file:`.dir` or :file:`.pag` extensions; + note that the BSD DB implementation of the interface will append the extension + :file:`.db` and only create one file). + + The optional *flag* argument must be one of these values: + + +---------+-------------------------------------------+ + | Value | Meaning | + +=========+===========================================+ + | ``'r'`` | Open existing database for reading only | + | | (default) | + +---------+-------------------------------------------+ + | ``'w'`` | Open existing database for reading and | + | | writing | + +---------+-------------------------------------------+ + | ``'c'`` | Open database for reading and writing, | + | | creating it if it doesn't exist | + +---------+-------------------------------------------+ + | ``'n'`` | Always create a new, empty database, open | + | | for reading and writing | + +---------+-------------------------------------------+ + + The optional *mode* argument is the Unix mode of the file, used only when the + database has to be created. It defaults to octal ``0666`` (and will be + modified by the prevailing umask). + + +.. seealso:: + + Module :mod:`anydbm` + Generic interface to ``dbm``\ -style databases. + + Module :mod:`gdbm` + Similar interface to the GNU GDBM library. + + Module :mod:`whichdb` + Utility module used to determine the type of an existing database. + diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst new file mode 100644 index 0000000..1d17109 --- /dev/null +++ b/Doc/library/decimal.rst @@ -0,0 +1,1289 @@ + +:mod:`decimal` --- Decimal floating point arithmetic +==================================================== + +.. module:: decimal + :synopsis: Implementation of the General Decimal Arithmetic Specification. + + +.. moduleauthor:: Eric Price +.. moduleauthor:: Facundo Batista +.. moduleauthor:: Raymond Hettinger +.. moduleauthor:: Aahz +.. moduleauthor:: Tim Peters + + +.. sectionauthor:: Raymond D. Hettinger + + +.. versionadded:: 2.4 + +The :mod:`decimal` module provides support for decimal floating point +arithmetic. It offers several advantages over the :class:`float()` datatype: + +* Decimal numbers can be represented exactly. In contrast, numbers like + :const:`1.1` do not have an exact representation in binary floating point. End + users typically would not expect :const:`1.1` to display as + :const:`1.1000000000000001` as it does with binary floating point. + +* The exactness carries over into arithmetic. In decimal floating point, ``0.1 + + 0.1 + 0.1 - 0.3`` is exactly equal to zero. In binary floating point, result + is :const:`5.5511151231257827e-017`. While near to zero, the differences + prevent reliable equality testing and differences can accumulate. For this + reason, decimal would be preferred in accounting applications which have strict + equality invariants. + +* The decimal module incorporates a notion of significant places so that ``1.30 + + 1.20`` is :const:`2.50`. The trailing zero is kept to indicate significance. + This is the customary presentation for monetary applications. For + multiplication, the "schoolbook" approach uses all the figures in the + multiplicands. For instance, ``1.3 * 1.2`` gives :const:`1.56` while ``1.30 * + 1.20`` gives :const:`1.5600`. + +* Unlike hardware based binary floating point, the decimal module has a user + settable precision (defaulting to 28 places) which can be as large as needed for + a given problem:: + + >>> getcontext().prec = 6 + >>> Decimal(1) / Decimal(7) + Decimal("0.142857") + >>> getcontext().prec = 28 + >>> Decimal(1) / Decimal(7) + Decimal("0.1428571428571428571428571429") + +* Both binary and decimal floating point are implemented in terms of published + standards. While the built-in float type exposes only a modest portion of its + capabilities, the decimal module exposes all required parts of the standard. + When needed, the programmer has full control over rounding and signal handling. + +The module design is centered around three concepts: the decimal number, the +context for arithmetic, and signals. + +A decimal number is immutable. It has a sign, coefficient digits, and an +exponent. To preserve significance, the coefficient digits do not truncate +trailing zeroes. Decimals also include special values such as +:const:`Infinity`, :const:`-Infinity`, and :const:`NaN`. The standard also +differentiates :const:`-0` from :const:`+0`. + +The context for arithmetic is an environment specifying precision, rounding +rules, limits on exponents, flags indicating the results of operations, and trap +enablers which determine whether signals are treated as exceptions. Rounding +options include :const:`ROUND_CEILING`, :const:`ROUND_DOWN`, +:const:`ROUND_FLOOR`, :const:`ROUND_HALF_DOWN`, :const:`ROUND_HALF_EVEN`, +:const:`ROUND_HALF_UP`, and :const:`ROUND_UP`. + +Signals are groups of exceptional conditions arising during the course of +computation. Depending on the needs of the application, signals may be ignored, +considered as informational, or treated as exceptions. The signals in the +decimal module are: :const:`Clamped`, :const:`InvalidOperation`, +:const:`DivisionByZero`, :const:`Inexact`, :const:`Rounded`, :const:`Subnormal`, +:const:`Overflow`, and :const:`Underflow`. + +For each signal there is a flag and a trap enabler. When a signal is +encountered, its flag is incremented from zero and, then, if the trap enabler is +set to one, an exception is raised. Flags are sticky, so the user needs to +reset them before monitoring a calculation. + + +.. seealso:: + + IBM's General Decimal Arithmetic Specification, `The General Decimal Arithmetic + Specification `_. + + IEEE standard 854-1987, `Unofficial IEEE 854 Text + `_. + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-tutorial: + +Quick-start Tutorial +-------------------- + +The usual start to using decimals is importing the module, viewing the current +context with :func:`getcontext` and, if necessary, setting new values for +precision, rounding, or enabled traps:: + + >>> from decimal import * + >>> getcontext() + Context(prec=28, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999, + capitals=1, flags=[], traps=[Overflow, InvalidOperation, + DivisionByZero]) + + >>> getcontext().prec = 7 # Set a new precision + +Decimal instances can be constructed from integers, strings, or tuples. To +create a Decimal from a :class:`float`, first convert it to a string. This +serves as an explicit reminder of the details of the conversion (including +representation error). Decimal numbers include special values such as +:const:`NaN` which stands for "Not a number", positive and negative +:const:`Infinity`, and :const:`-0`. :: + + >>> Decimal(10) + Decimal("10") + >>> Decimal("3.14") + Decimal("3.14") + >>> Decimal((0, (3, 1, 4), -2)) + Decimal("3.14") + >>> Decimal(str(2.0 ** 0.5)) + Decimal("1.41421356237") + >>> Decimal("NaN") + Decimal("NaN") + >>> Decimal("-Infinity") + Decimal("-Infinity") + +The significance of a new Decimal is determined solely by the number of digits +input. Context precision and rounding only come into play during arithmetic +operations. :: + + >>> getcontext().prec = 6 + >>> Decimal('3.0') + Decimal("3.0") + >>> Decimal('3.1415926535') + Decimal("3.1415926535") + >>> Decimal('3.1415926535') + Decimal('2.7182818285') + Decimal("5.85987") + >>> getcontext().rounding = ROUND_UP + >>> Decimal('3.1415926535') + Decimal('2.7182818285') + Decimal("5.85988") + +Decimals interact well with much of the rest of Python. Here is a small decimal +floating point flying circus:: + + >>> data = map(Decimal, '1.34 1.87 3.45 2.35 1.00 0.03 9.25'.split()) + >>> max(data) + Decimal("9.25") + >>> min(data) + Decimal("0.03") + >>> sorted(data) + [Decimal("0.03"), Decimal("1.00"), Decimal("1.34"), Decimal("1.87"), + Decimal("2.35"), Decimal("3.45"), Decimal("9.25")] + >>> sum(data) + Decimal("19.29") + >>> a,b,c = data[:3] + >>> str(a) + '1.34' + >>> float(a) + 1.3400000000000001 + >>> round(a, 1) # round() first converts to binary floating point + 1.3 + >>> int(a) + 1 + >>> a * 5 + Decimal("6.70") + >>> a * b + Decimal("2.5058") + >>> c % a + Decimal("0.77") + +The :meth:`quantize` method rounds a number to a fixed exponent. This method is +useful for monetary applications that often round results to a fixed number of +places:: + + >>> Decimal('7.325').quantize(Decimal('.01'), rounding=ROUND_DOWN) + Decimal("7.32") + >>> Decimal('7.325').quantize(Decimal('1.'), rounding=ROUND_UP) + Decimal("8") + +As shown above, the :func:`getcontext` function accesses the current context and +allows the settings to be changed. This approach meets the needs of most +applications. + +For more advanced work, it may be useful to create alternate contexts using the +Context() constructor. To make an alternate active, use the :func:`setcontext` +function. + +In accordance with the standard, the :mod:`Decimal` module provides two ready to +use standard contexts, :const:`BasicContext` and :const:`ExtendedContext`. The +former is especially useful for debugging because many of the traps are +enabled:: + + >>> myothercontext = Context(prec=60, rounding=ROUND_HALF_DOWN) + >>> setcontext(myothercontext) + >>> Decimal(1) / Decimal(7) + Decimal("0.142857142857142857142857142857142857142857142857142857142857") + + >>> ExtendedContext + Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999, + capitals=1, flags=[], traps=[]) + >>> setcontext(ExtendedContext) + >>> Decimal(1) / Decimal(7) + Decimal("0.142857143") + >>> Decimal(42) / Decimal(0) + Decimal("Infinity") + + >>> setcontext(BasicContext) + >>> Decimal(42) / Decimal(0) + Traceback (most recent call last): + File "", line 1, in -toplevel- + Decimal(42) / Decimal(0) + DivisionByZero: x / 0 + +Contexts also have signal flags for monitoring exceptional conditions +encountered during computations. The flags remain set until explicitly cleared, +so it is best to clear the flags before each set of monitored computations by +using the :meth:`clear_flags` method. :: + + >>> setcontext(ExtendedContext) + >>> getcontext().clear_flags() + >>> Decimal(355) / Decimal(113) + Decimal("3.14159292") + >>> getcontext() + Context(prec=9, rounding=ROUND_HALF_EVEN, Emin=-999999999, Emax=999999999, + capitals=1, flags=[Inexact, Rounded], traps=[]) + +The *flags* entry shows that the rational approximation to :const:`Pi` was +rounded (digits beyond the context precision were thrown away) and that the +result is inexact (some of the discarded digits were non-zero). + +Individual traps are set using the dictionary in the :attr:`traps` field of a +context:: + + >>> Decimal(1) / Decimal(0) + Decimal("Infinity") + >>> getcontext().traps[DivisionByZero] = 1 + >>> Decimal(1) / Decimal(0) + Traceback (most recent call last): + File "", line 1, in -toplevel- + Decimal(1) / Decimal(0) + DivisionByZero: x / 0 + +Most programs adjust the current context only once, at the beginning of the +program. And, in many applications, data is converted to :class:`Decimal` with +a single cast inside a loop. With context set and decimals created, the bulk of +the program manipulates the data no differently than with other Python numeric +types. + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-decimal: + +Decimal objects +--------------- + + +.. class:: Decimal([value [, context]]) + + Constructs a new :class:`Decimal` object based from *value*. + + *value* can be an integer, string, tuple, or another :class:`Decimal` object. If + no *value* is given, returns ``Decimal("0")``. If *value* is a string, it + should conform to the decimal numeric string syntax:: + + sign ::= '+' | '-' + digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' + indicator ::= 'e' | 'E' + digits ::= digit [digit]... + decimal-part ::= digits '.' [digits] | ['.'] digits + exponent-part ::= indicator [sign] digits + infinity ::= 'Infinity' | 'Inf' + nan ::= 'NaN' [digits] | 'sNaN' [digits] + numeric-value ::= decimal-part [exponent-part] | infinity + numeric-string ::= [sign] numeric-value | [sign] nan + + If *value* is a :class:`tuple`, it should have three components, a sign + (:const:`0` for positive or :const:`1` for negative), a :class:`tuple` of + digits, and an integer exponent. For example, ``Decimal((0, (1, 4, 1, 4), -3))`` + returns ``Decimal("1.414")``. + + The *context* precision does not affect how many digits are stored. That is + determined exclusively by the number of digits in *value*. For example, + ``Decimal("3.00000")`` records all five zeroes even if the context precision is + only three. + + The purpose of the *context* argument is determining what to do if *value* is a + malformed string. If the context traps :const:`InvalidOperation`, an exception + is raised; otherwise, the constructor returns a new Decimal with the value of + :const:`NaN`. + + Once constructed, :class:`Decimal` objects are immutable. + +Decimal floating point objects share many properties with the other builtin +numeric types such as :class:`float` and :class:`int`. All of the usual math +operations and special methods apply. Likewise, decimal objects can be copied, +pickled, printed, used as dictionary keys, used as set elements, compared, +sorted, and coerced to another type (such as :class:`float` or :class:`long`). + +In addition to the standard numeric properties, decimal floating point objects +also have a number of specialized methods: + + +.. method:: Decimal.adjusted() + + Return the adjusted exponent after shifting out the coefficient's rightmost + digits until only the lead digit remains: ``Decimal("321e+5").adjusted()`` + returns seven. Used for determining the position of the most significant digit + with respect to the decimal point. + + +.. method:: Decimal.as_tuple() + + Returns a tuple representation of the number: ``(sign, digittuple, exponent)``. + + +.. method:: Decimal.compare(other[, context]) + + Compares like :meth:`__cmp__` but returns a decimal instance:: + + a or b is a NaN ==> Decimal("NaN") + a < b ==> Decimal("-1") + a == b ==> Decimal("0") + a > b ==> Decimal("1") + + +.. method:: Decimal.max(other[, context]) + + Like ``max(self, other)`` except that the context rounding rule is applied + before returning and that :const:`NaN` values are either signalled or ignored + (depending on the context and whether they are signaling or quiet). + + +.. method:: Decimal.min(other[, context]) + + Like ``min(self, other)`` except that the context rounding rule is applied + before returning and that :const:`NaN` values are either signalled or ignored + (depending on the context and whether they are signaling or quiet). + + +.. method:: Decimal.normalize([context]) + + Normalize the number by stripping the rightmost trailing zeroes and converting + any result equal to :const:`Decimal("0")` to :const:`Decimal("0e0")`. Used for + producing canonical values for members of an equivalence class. For example, + ``Decimal("32.100")`` and ``Decimal("0.321000e+2")`` both normalize to the + equivalent value ``Decimal("32.1")``. + + +.. method:: Decimal.quantize(exp [, rounding[, context[, watchexp]]]) + + Quantize makes the exponent the same as *exp*. Searches for a rounding method + in *rounding*, then in *context*, and then in the current context. + + If *watchexp* is set (default), then an error is returned whenever the resulting + exponent is greater than :attr:`Emax` or less than :attr:`Etiny`. + + +.. method:: Decimal.remainder_near(other[, context]) + + Computes the modulo as either a positive or negative value depending on which is + closest to zero. For instance, ``Decimal(10).remainder_near(6)`` returns + ``Decimal("-2")`` which is closer to zero than ``Decimal("4")``. + + If both are equally close, the one chosen will have the same sign as *self*. + + +.. method:: Decimal.same_quantum(other[, context]) + + Test whether self and other have the same exponent or whether both are + :const:`NaN`. + + +.. method:: Decimal.sqrt([context]) + + Return the square root to full precision. + + +.. method:: Decimal.to_eng_string([context]) + + Convert to an engineering-type string. + + Engineering notation has an exponent which is a multiple of 3, so there are up + to 3 digits left of the decimal place. For example, converts + ``Decimal('123E+1')`` to ``Decimal("1.23E+3")`` + + +.. method:: Decimal.to_integral([rounding[, context]]) + + Rounds to the nearest integer without signaling :const:`Inexact` or + :const:`Rounded`. If given, applies *rounding*; otherwise, uses the rounding + method in either the supplied *context* or the current context. + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-context: + +Context objects +--------------- + +Contexts are environments for arithmetic operations. They govern precision, set +rules for rounding, determine which signals are treated as exceptions, and limit +the range for exponents. + +Each thread has its own current context which is accessed or changed using the +:func:`getcontext` and :func:`setcontext` functions: + + +.. function:: getcontext() + + Return the current context for the active thread. + + +.. function:: setcontext(c) + + Set the current context for the active thread to *c*. + +Beginning with Python 2.5, you can also use the :keyword:`with` statement and +the :func:`localcontext` function to temporarily change the active context. + + +.. function:: localcontext([c]) + + Return a context manager that will set the current context for the active thread + to a copy of *c* on entry to the with-statement and restore the previous context + when exiting the with-statement. If no context is specified, a copy of the + current context is used. + + .. versionadded:: 2.5 + + For example, the following code sets the current decimal precision to 42 places, + performs a calculation, and then automatically restores the previous context:: + + from __future__ import with_statement + from decimal import localcontext + + with localcontext() as ctx: + ctx.prec = 42 # Perform a high precision calculation + s = calculate_something() + s = +s # Round the final result back to the default precision + +New contexts can also be created using the :class:`Context` constructor +described below. In addition, the module provides three pre-made contexts: + + +.. class:: BasicContext + + This is a standard context defined by the General Decimal Arithmetic + Specification. Precision is set to nine. Rounding is set to + :const:`ROUND_HALF_UP`. All flags are cleared. All traps are enabled (treated + as exceptions) except :const:`Inexact`, :const:`Rounded`, and + :const:`Subnormal`. + + Because many of the traps are enabled, this context is useful for debugging. + + +.. class:: ExtendedContext + + This is a standard context defined by the General Decimal Arithmetic + Specification. Precision is set to nine. Rounding is set to + :const:`ROUND_HALF_EVEN`. All flags are cleared. No traps are enabled (so that + exceptions are not raised during computations). + + Because the trapped are disabled, this context is useful for applications that + prefer to have result value of :const:`NaN` or :const:`Infinity` instead of + raising exceptions. This allows an application to complete a run in the + presence of conditions that would otherwise halt the program. + + +.. class:: DefaultContext + + This context is used by the :class:`Context` constructor as a prototype for new + contexts. Changing a field (such a precision) has the effect of changing the + default for new contexts creating by the :class:`Context` constructor. + + This context is most useful in multi-threaded environments. Changing one of the + fields before threads are started has the effect of setting system-wide + defaults. Changing the fields after threads have started is not recommended as + it would require thread synchronization to prevent race conditions. + + In single threaded environments, it is preferable to not use this context at + all. Instead, simply create contexts explicitly as described below. + + The default values are precision=28, rounding=ROUND_HALF_EVEN, and enabled traps + for Overflow, InvalidOperation, and DivisionByZero. + +In addition to the three supplied contexts, new contexts can be created with the +:class:`Context` constructor. + + +.. class:: Context(prec=None, rounding=None, traps=None, flags=None, Emin=None, Emax=None, capitals=1) + + Creates a new context. If a field is not specified or is :const:`None`, the + default values are copied from the :const:`DefaultContext`. If the *flags* + field is not specified or is :const:`None`, all flags are cleared. + + The *prec* field is a positive integer that sets the precision for arithmetic + operations in the context. + + The *rounding* option is one of: + + * :const:`ROUND_CEILING` (towards :const:`Infinity`), + * :const:`ROUND_DOWN` (towards zero), + * :const:`ROUND_FLOOR` (towards :const:`-Infinity`), + * :const:`ROUND_HALF_DOWN` (to nearest with ties going towards zero), + * :const:`ROUND_HALF_EVEN` (to nearest with ties going to nearest even integer), + * :const:`ROUND_HALF_UP` (to nearest with ties going away from zero), or + * :const:`ROUND_UP` (away from zero). + + The *traps* and *flags* fields list any signals to be set. Generally, new + contexts should only set traps and leave the flags clear. + + The *Emin* and *Emax* fields are integers specifying the outer limits allowable + for exponents. + + The *capitals* field is either :const:`0` or :const:`1` (the default). If set to + :const:`1`, exponents are printed with a capital :const:`E`; otherwise, a + lowercase :const:`e` is used: :const:`Decimal('6.02e+23')`. + +The :class:`Context` class defines several general purpose methods as well as a +large number of methods for doing arithmetic directly in a given context. + + +.. method:: Context.clear_flags() + + Resets all of the flags to :const:`0`. + + +.. method:: Context.copy() + + Return a duplicate of the context. + + +.. method:: Context.create_decimal(num) + + Creates a new Decimal instance from *num* but using *self* as context. Unlike + the :class:`Decimal` constructor, the context precision, rounding method, flags, + and traps are applied to the conversion. + + This is useful because constants are often given to a greater precision than is + needed by the application. Another benefit is that rounding immediately + eliminates unintended effects from digits beyond the current precision. In the + following example, using unrounded inputs means that adding zero to a sum can + change the result:: + + >>> getcontext().prec = 3 + >>> Decimal("3.4445") + Decimal("1.0023") + Decimal("4.45") + >>> Decimal("3.4445") + Decimal(0) + Decimal("1.0023") + Decimal("4.44") + + +.. method:: Context.Etiny() + + Returns a value equal to ``Emin - prec + 1`` which is the minimum exponent value + for subnormal results. When underflow occurs, the exponent is set to + :const:`Etiny`. + + +.. method:: Context.Etop() + + Returns a value equal to ``Emax - prec + 1``. + +The usual approach to working with decimals is to create :class:`Decimal` +instances and then apply arithmetic operations which take place within the +current context for the active thread. An alternate approach is to use context +methods for calculating within a specific context. The methods are similar to +those for the :class:`Decimal` class and are only briefly recounted here. + + +.. method:: Context.abs(x) + + Returns the absolute value of *x*. + + +.. method:: Context.add(x, y) + + Return the sum of *x* and *y*. + + +.. method:: Context.compare(x, y) + + Compares values numerically. + + Like :meth:`__cmp__` but returns a decimal instance:: + + a or b is a NaN ==> Decimal("NaN") + a < b ==> Decimal("-1") + a == b ==> Decimal("0") + a > b ==> Decimal("1") + + +.. method:: Context.divide(x, y) + + Return *x* divided by *y*. + + +.. method:: Context.divmod(x, y) + + Divides two numbers and returns the integer part of the result. + + +.. method:: Context.max(x, y) + + Compare two values numerically and return the maximum. + + If they are numerically equal then the left-hand operand is chosen as the + result. + + +.. method:: Context.min(x, y) + + Compare two values numerically and return the minimum. + + If they are numerically equal then the left-hand operand is chosen as the + result. + + +.. method:: Context.minus(x) + + Minus corresponds to the unary prefix minus operator in Python. + + +.. method:: Context.multiply(x, y) + + Return the product of *x* and *y*. + + +.. method:: Context.normalize(x) + + Normalize reduces an operand to its simplest form. + + Essentially a :meth:`plus` operation with all trailing zeros removed from the + result. + + +.. method:: Context.plus(x) + + Plus corresponds to the unary prefix plus operator in Python. This operation + applies the context precision and rounding, so it is *not* an identity + operation. + + +.. method:: Context.power(x, y[, modulo]) + + Return ``x ** y`` to the *modulo* if given. + + The right-hand operand must be a whole number whose integer part (after any + exponent has been applied) has no more than 9 digits and whose fractional part + (if any) is all zeros before any rounding. The operand may be positive, + negative, or zero; if negative, the absolute value of the power is used, and the + left-hand operand is inverted (divided into 1) before use. + + If the increased precision needed for the intermediate calculations exceeds the + capabilities of the implementation then an :const:`InvalidOperation` condition + is signaled. + + If, when raising to a negative power, an underflow occurs during the division + into 1, the operation is not halted at that point but continues. + + +.. method:: Context.quantize(x, y) + + Returns a value equal to *x* after rounding and having the exponent of *y*. + + Unlike other operations, if the length of the coefficient after the quantize + operation would be greater than precision, then an :const:`InvalidOperation` is + signaled. This guarantees that, unless there is an error condition, the + quantized exponent is always equal to that of the right-hand operand. + + Also unlike other operations, quantize never signals Underflow, even if the + result is subnormal and inexact. + + +.. method:: Context.remainder(x, y) + + Returns the remainder from integer division. + + The sign of the result, if non-zero, is the same as that of the original + dividend. + + +.. method:: Context.remainder_near(x, y) + + Computed the modulo as either a positive or negative value depending on which is + closest to zero. For instance, ``Decimal(10).remainder_near(6)`` returns + ``Decimal("-2")`` which is closer to zero than ``Decimal("4")``. + + If both are equally close, the one chosen will have the same sign as *self*. + + +.. method:: Context.same_quantum(x, y) + + Test whether *x* and *y* have the same exponent or whether both are + :const:`NaN`. + + +.. method:: Context.sqrt(x) + + Return the square root of *x* to full precision. + + +.. method:: Context.subtract(x, y) + + Return the difference between *x* and *y*. + + +.. method:: Context.to_eng_string() + + Convert to engineering-type string. + + Engineering notation has an exponent which is a multiple of 3, so there are up + to 3 digits left of the decimal place. For example, converts + ``Decimal('123E+1')`` to ``Decimal("1.23E+3")`` + + +.. method:: Context.to_integral(x) + + Rounds to the nearest integer without signaling :const:`Inexact` or + :const:`Rounded`. + + +.. method:: Context.to_sci_string(x) + + Converts a number to a string using scientific notation. + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-signals: + +Signals +------- + +Signals represent conditions that arise during computation. Each corresponds to +one context flag and one context trap enabler. + +The context flag is incremented whenever the condition is encountered. After the +computation, flags may be checked for informational purposes (for instance, to +determine whether a computation was exact). After checking the flags, be sure to +clear all flags before starting the next computation. + +If the context's trap enabler is set for the signal, then the condition causes a +Python exception to be raised. For example, if the :class:`DivisionByZero` trap +is set, then a :exc:`DivisionByZero` exception is raised upon encountering the +condition. + + +.. class:: Clamped + + Altered an exponent to fit representation constraints. + + Typically, clamping occurs when an exponent falls outside the context's + :attr:`Emin` and :attr:`Emax` limits. If possible, the exponent is reduced to + fit by adding zeroes to the coefficient. + + +.. class:: DecimalException + + Base class for other signals and a subclass of :exc:`ArithmeticError`. + + +.. class:: DivisionByZero + + Signals the division of a non-infinite number by zero. + + Can occur with division, modulo division, or when raising a number to a negative + power. If this signal is not trapped, returns :const:`Infinity` or + :const:`-Infinity` with the sign determined by the inputs to the calculation. + + +.. class:: Inexact + + Indicates that rounding occurred and the result is not exact. + + Signals when non-zero digits were discarded during rounding. The rounded result + is returned. The signal flag or trap is used to detect when results are + inexact. + + +.. class:: InvalidOperation + + An invalid operation was performed. + + Indicates that an operation was requested that does not make sense. If not + trapped, returns :const:`NaN`. Possible causes include:: + + Infinity - Infinity + 0 * Infinity + Infinity / Infinity + x % 0 + Infinity % x + x._rescale( non-integer ) + sqrt(-x) and x > 0 + 0 ** 0 + x ** (non-integer) + x ** Infinity + + +.. class:: Overflow + + Numerical overflow. + + Indicates the exponent is larger than :attr:`Emax` after rounding has occurred. + If not trapped, the result depends on the rounding mode, either pulling inward + to the largest representable finite number or rounding outward to + :const:`Infinity`. In either case, :class:`Inexact` and :class:`Rounded` are + also signaled. + + +.. class:: Rounded + + Rounding occurred though possibly no information was lost. + + Signaled whenever rounding discards digits; even if those digits are zero (such + as rounding :const:`5.00` to :const:`5.0`). If not trapped, returns the result + unchanged. This signal is used to detect loss of significant digits. + + +.. class:: Subnormal + + Exponent was lower than :attr:`Emin` prior to rounding. + + Occurs when an operation result is subnormal (the exponent is too small). If not + trapped, returns the result unchanged. + + +.. class:: Underflow + + Numerical underflow with result rounded to zero. + + Occurs when a subnormal result is pushed to zero by rounding. :class:`Inexact` + and :class:`Subnormal` are also signaled. + +The following table summarizes the hierarchy of signals:: + + exceptions.ArithmeticError(exceptions.Exception) + DecimalException + Clamped + DivisionByZero(DecimalException, exceptions.ZeroDivisionError) + Inexact + Overflow(Inexact, Rounded) + Underflow(Inexact, Rounded, Subnormal) + InvalidOperation + Rounded + Subnormal + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-notes: + +Floating Point Notes +-------------------- + + +Mitigating round-off error with increased precision +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The use of decimal floating point eliminates decimal representation error +(making it possible to represent :const:`0.1` exactly); however, some operations +can still incur round-off error when non-zero digits exceed the fixed precision. + +The effects of round-off error can be amplified by the addition or subtraction +of nearly offsetting quantities resulting in loss of significance. Knuth +provides two instructive examples where rounded floating point arithmetic with +insufficient precision causes the breakdown of the associative and distributive +properties of addition:: + + # Examples from Seminumerical Algorithms, Section 4.2.2. + >>> from decimal import Decimal, getcontext + >>> getcontext().prec = 8 + + >>> u, v, w = Decimal(11111113), Decimal(-11111111), Decimal('7.51111111') + >>> (u + v) + w + Decimal("9.5111111") + >>> u + (v + w) + Decimal("10") + + >>> u, v, w = Decimal(20000), Decimal(-6), Decimal('6.0000003') + >>> (u*v) + (u*w) + Decimal("0.01") + >>> u * (v+w) + Decimal("0.0060000") + +The :mod:`decimal` module makes it possible to restore the identities by +expanding the precision sufficiently to avoid loss of significance:: + + >>> getcontext().prec = 20 + >>> u, v, w = Decimal(11111113), Decimal(-11111111), Decimal('7.51111111') + >>> (u + v) + w + Decimal("9.51111111") + >>> u + (v + w) + Decimal("9.51111111") + >>> + >>> u, v, w = Decimal(20000), Decimal(-6), Decimal('6.0000003') + >>> (u*v) + (u*w) + Decimal("0.0060000") + >>> u * (v+w) + Decimal("0.0060000") + + +Special values +^^^^^^^^^^^^^^ + +The number system for the :mod:`decimal` module provides special values +including :const:`NaN`, :const:`sNaN`, :const:`-Infinity`, :const:`Infinity`, +and two zeroes, :const:`+0` and :const:`-0`. + +Infinities can be constructed directly with: ``Decimal('Infinity')``. Also, +they can arise from dividing by zero when the :exc:`DivisionByZero` signal is +not trapped. Likewise, when the :exc:`Overflow` signal is not trapped, infinity +can result from rounding beyond the limits of the largest representable number. + +The infinities are signed (affine) and can be used in arithmetic operations +where they get treated as very large, indeterminate numbers. For instance, +adding a constant to infinity gives another infinite result. + +Some operations are indeterminate and return :const:`NaN`, or if the +:exc:`InvalidOperation` signal is trapped, raise an exception. For example, +``0/0`` returns :const:`NaN` which means "not a number". This variety of +:const:`NaN` is quiet and, once created, will flow through other computations +always resulting in another :const:`NaN`. This behavior can be useful for a +series of computations that occasionally have missing inputs --- it allows the +calculation to proceed while flagging specific results as invalid. + +A variant is :const:`sNaN` which signals rather than remaining quiet after every +operation. This is a useful return value when an invalid result needs to +interrupt a calculation for special handling. + +The signed zeros can result from calculations that underflow. They keep the sign +that would have resulted if the calculation had been carried out to greater +precision. Since their magnitude is zero, both positive and negative zeros are +treated as equal and their sign is informational. + +In addition to the two signed zeros which are distinct yet equal, there are +various representations of zero with differing precisions yet equivalent in +value. This takes a bit of getting used to. For an eye accustomed to +normalized floating point representations, it is not immediately obvious that +the following calculation returns a value equal to zero:: + + >>> 1 / Decimal('Infinity') + Decimal("0E-1000000026") + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-threads: + +Working with threads +-------------------- + +The :func:`getcontext` function accesses a different :class:`Context` object for +each thread. Having separate thread contexts means that threads may make +changes (such as ``getcontext.prec=10``) without interfering with other threads. + +Likewise, the :func:`setcontext` function automatically assigns its target to +the current thread. + +If :func:`setcontext` has not been called before :func:`getcontext`, then +:func:`getcontext` will automatically create a new context for use in the +current thread. + +The new context is copied from a prototype context called *DefaultContext*. To +control the defaults so that each thread will use the same values throughout the +application, directly modify the *DefaultContext* object. This should be done +*before* any threads are started so that there won't be a race condition between +threads calling :func:`getcontext`. For example:: + + # Set applicationwide defaults for all threads about to be launched + DefaultContext.prec = 12 + DefaultContext.rounding = ROUND_DOWN + DefaultContext.traps = ExtendedContext.traps.copy() + DefaultContext.traps[InvalidOperation] = 1 + setcontext(DefaultContext) + + # Afterwards, the threads can be started + t1.start() + t2.start() + t3.start() + . . . + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-recipes: + +Recipes +------- + +Here are a few recipes that serve as utility functions and that demonstrate ways +to work with the :class:`Decimal` class:: + + def moneyfmt(value, places=2, curr='', sep=',', dp='.', + pos='', neg='-', trailneg=''): + """Convert Decimal to a money formatted string. + + places: required number of places after the decimal point + curr: optional currency symbol before the sign (may be blank) + sep: optional grouping separator (comma, period, space, or blank) + dp: decimal point indicator (comma or period) + only specify as blank when places is zero + pos: optional sign for positive numbers: '+', space or blank + neg: optional sign for negative numbers: '-', '(', space or blank + trailneg:optional trailing minus indicator: '-', ')', space or blank + + >>> d = Decimal('-1234567.8901') + >>> moneyfmt(d, curr='$') + '-$1,234,567.89' + >>> moneyfmt(d, places=0, sep='.', dp='', neg='', trailneg='-') + '1.234.568-' + >>> moneyfmt(d, curr='$', neg='(', trailneg=')') + '($1,234,567.89)' + >>> moneyfmt(Decimal(123456789), sep=' ') + '123 456 789.00' + >>> moneyfmt(Decimal('-0.02'), neg='<', trailneg='>') + '<.02>' + + """ + q = Decimal((0, (1,), -places)) # 2 places --> '0.01' + sign, digits, exp = value.quantize(q).as_tuple() + assert exp == -places + result = [] + digits = map(str, digits) + build, next = result.append, digits.pop + if sign: + build(trailneg) + for i in range(places): + if digits: + build(next()) + else: + build('0') + build(dp) + i = 0 + while digits: + build(next()) + i += 1 + if i == 3 and digits: + i = 0 + build(sep) + build(curr) + if sign: + build(neg) + else: + build(pos) + result.reverse() + return ''.join(result) + + def pi(): + """Compute Pi to the current precision. + + >>> print pi() + 3.141592653589793238462643383 + + """ + getcontext().prec += 2 # extra digits for intermediate steps + three = Decimal(3) # substitute "three=3.0" for regular floats + lasts, t, s, n, na, d, da = 0, three, 3, 1, 0, 0, 24 + while s != lasts: + lasts = s + n, na = n+na, na+8 + d, da = d+da, da+32 + t = (t * n) / d + s += t + getcontext().prec -= 2 + return +s # unary plus applies the new precision + + def exp(x): + """Return e raised to the power of x. Result type matches input type. + + >>> print exp(Decimal(1)) + 2.718281828459045235360287471 + >>> print exp(Decimal(2)) + 7.389056098930650227230427461 + >>> print exp(2.0) + 7.38905609893 + >>> print exp(2+0j) + (7.38905609893+0j) + + """ + getcontext().prec += 2 + i, lasts, s, fact, num = 0, 0, 1, 1, 1 + while s != lasts: + lasts = s + i += 1 + fact *= i + num *= x + s += num / fact + getcontext().prec -= 2 + return +s + + def cos(x): + """Return the cosine of x as measured in radians. + + >>> print cos(Decimal('0.5')) + 0.8775825618903727161162815826 + >>> print cos(0.5) + 0.87758256189 + >>> print cos(0.5+0j) + (0.87758256189+0j) + + """ + getcontext().prec += 2 + i, lasts, s, fact, num, sign = 0, 0, 1, 1, 1, 1 + while s != lasts: + lasts = s + i += 2 + fact *= i * (i-1) + num *= x * x + sign *= -1 + s += num / fact * sign + getcontext().prec -= 2 + return +s + + def sin(x): + """Return the sine of x as measured in radians. + + >>> print sin(Decimal('0.5')) + 0.4794255386042030002732879352 + >>> print sin(0.5) + 0.479425538604 + >>> print sin(0.5+0j) + (0.479425538604+0j) + + """ + getcontext().prec += 2 + i, lasts, s, fact, num, sign = 1, 0, x, 1, x, 1 + while s != lasts: + lasts = s + i += 2 + fact *= i * (i-1) + num *= x * x + sign *= -1 + s += num / fact * sign + getcontext().prec -= 2 + return +s + + +.. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +.. _decimal-faq: + +Decimal FAQ +----------- + +Q. It is cumbersome to type ``decimal.Decimal('1234.5')``. Is there a way to +minimize typing when using the interactive interpreter? + +\A. Some users abbreviate the constructor to just a single letter:: + + >>> D = decimal.Decimal + >>> D('1.23') + D('3.45') + Decimal("4.68") + +Q. In a fixed-point application with two decimal places, some inputs have many +places and need to be rounded. Others are not supposed to have excess digits +and need to be validated. What methods should be used? + +A. The :meth:`quantize` method rounds to a fixed number of decimal places. If +the :const:`Inexact` trap is set, it is also useful for validation:: + + >>> TWOPLACES = Decimal(10) ** -2 # same as Decimal('0.01') + + >>> # Round to two places + >>> Decimal("3.214").quantize(TWOPLACES) + Decimal("3.21") + + >>> # Validate that a number does not exceed two places + >>> Decimal("3.21").quantize(TWOPLACES, context=Context(traps=[Inexact])) + Decimal("3.21") + + >>> Decimal("3.214").quantize(TWOPLACES, context=Context(traps=[Inexact])) + Traceback (most recent call last): + ... + Inexact: Changed in rounding + +Q. Once I have valid two place inputs, how do I maintain that invariant +throughout an application? + +A. Some operations like addition and subtraction automatically preserve fixed +point. Others, like multiplication and division, change the number of decimal +places and need to be followed-up with a :meth:`quantize` step. + +Q. There are many ways to express the same value. The numbers :const:`200`, +:const:`200.000`, :const:`2E2`, and :const:`.02E+4` all have the same value at +various precisions. Is there a way to transform them to a single recognizable +canonical value? + +A. The :meth:`normalize` method maps all equivalent values to a single +representative:: + + >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) + >>> [v.normalize() for v in values] + [Decimal("2E+2"), Decimal("2E+2"), Decimal("2E+2"), Decimal("2E+2")] + +Q. Some decimal values always print with exponential notation. Is there a way +to get a non-exponential representation? + +A. For some values, exponential notation is the only way to express the number +of significant places in the coefficient. For example, expressing +:const:`5.0E+3` as :const:`5000` keeps the value constant but cannot show the +original's two-place significance. + +Q. Is there a way to convert a regular float to a :class:`Decimal`? + +A. Yes, all binary floating point numbers can be exactly expressed as a +Decimal. An exact conversion may take more precision than intuition would +suggest, so trapping :const:`Inexact` will signal a need for more precision:: + + def floatToDecimal(f): + "Convert a floating point number to a Decimal with no loss of information" + # Transform (exactly) a float to a mantissa (0.5 <= abs(m) < 1.0) and an + # exponent. Double the mantissa until it is an integer. Use the integer + # mantissa and exponent to compute an equivalent Decimal. If this cannot + # be done exactly, then retry with more precision. + + mantissa, exponent = math.frexp(f) + while mantissa != int(mantissa): + mantissa *= 2.0 + exponent -= 1 + mantissa = int(mantissa) + + oldcontext = getcontext() + setcontext(Context(traps=[Inexact])) + try: + while True: + try: + return mantissa * Decimal(2) ** exponent + except Inexact: + getcontext().prec += 1 + finally: + setcontext(oldcontext) + +Q. Why isn't the :func:`floatToDecimal` routine included in the module? + +A. There is some question about whether it is advisable to mix binary and +decimal floating point. Also, its use requires some care to avoid the +representation issues associated with binary floating point:: + + >>> floatToDecimal(1.1) + Decimal("1.100000000000000088817841970012523233890533447265625") + +Q. Within a complex calculation, how can I make sure that I haven't gotten a +spurious result because of insufficient precision or rounding anomalies. + +A. The decimal module makes it easy to test results. A best practice is to +re-run calculations using greater precision and with various rounding modes. +Widely differing results indicate insufficient precision, rounding mode issues, +ill-conditioned inputs, or a numerically unstable algorithm. + +Q. I noticed that context precision is applied to the results of operations but +not to the inputs. Is there anything to watch out for when mixing values of +different precisions? + +A. Yes. The principle is that all values are considered to be exact and so is +the arithmetic on those values. Only the results are rounded. The advantage +for inputs is that "what you type is what you get". A disadvantage is that the +results can look odd if you forget that the inputs haven't been rounded:: + + >>> getcontext().prec = 3 + >>> Decimal('3.104') + D('2.104') + Decimal("5.21") + >>> Decimal('3.104') + D('0.000') + D('2.104') + Decimal("5.20") + +The solution is either to increase precision or to force rounding of inputs +using the unary plus operation:: + + >>> getcontext().prec = 3 + >>> +Decimal('1.23456789') # unary plus triggers rounding + Decimal("1.23") + +Alternatively, inputs can be rounded upon creation using the +:meth:`Context.create_decimal` method:: + + >>> Context(prec=5, rounding=ROUND_DOWN).create_decimal('1.2345678') + Decimal("1.2345") + diff --git a/Doc/library/development.rst b/Doc/library/development.rst new file mode 100644 index 0000000..be8c33d --- /dev/null +++ b/Doc/library/development.rst @@ -0,0 +1,22 @@ + +.. _development: + +***************** +Development Tools +***************** + +The modules described in this chapter help you write software. For example, the +:mod:`pydoc` module takes a module and generates documentation based on the +module's contents. The :mod:`doctest` and :mod:`unittest` modules contains +frameworks for writing unit tests that automatically exercise code and verify +that the expected output is produced. + +The list of modules described in this chapter is: + + +.. toctree:: + + pydoc.rst + doctest.rst + unittest.rst + test.rst diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst new file mode 100644 index 0000000..95b83e6 --- /dev/null +++ b/Doc/library/difflib.rst @@ -0,0 +1,644 @@ + +:mod:`difflib` --- Helpers for computing deltas +=============================================== + +.. module:: difflib + :synopsis: Helpers for computing differences between objects. +.. moduleauthor:: Tim Peters +.. sectionauthor:: Tim Peters + + +.. % LaTeXification by Fred L. Drake, Jr. . + +.. versionadded:: 2.1 + + +.. class:: SequenceMatcher + + This is a flexible class for comparing pairs of sequences of any type, so long + as the sequence elements are hashable. The basic algorithm predates, and is a + little fancier than, an algorithm published in the late 1980's by Ratcliff and + Obershelp under the hyperbolic name "gestalt pattern matching." The idea is to + find the longest contiguous matching subsequence that contains no "junk" + elements (the Ratcliff and Obershelp algorithm doesn't address junk). The same + idea is then applied recursively to the pieces of the sequences to the left and + to the right of the matching subsequence. This does not yield minimal edit + sequences, but does tend to yield matches that "look right" to people. + + **Timing:** The basic Ratcliff-Obershelp algorithm is cubic time in the worst + case and quadratic time in the expected case. :class:`SequenceMatcher` is + quadratic time for the worst case and has expected-case behavior dependent in a + complicated way on how many elements the sequences have in common; best case + time is linear. + + +.. class:: Differ + + This is a class for comparing sequences of lines of text, and producing + human-readable differences or deltas. Differ uses :class:`SequenceMatcher` + both to compare sequences of lines, and to compare sequences of characters + within similar (near-matching) lines. + + Each line of a :class:`Differ` delta begins with a two-letter code: + + +----------+-------------------------------------------+ + | Code | Meaning | + +==========+===========================================+ + | ``'- '`` | line unique to sequence 1 | + +----------+-------------------------------------------+ + | ``'+ '`` | line unique to sequence 2 | + +----------+-------------------------------------------+ + | ``' '`` | line common to both sequences | + +----------+-------------------------------------------+ + | ``'? '`` | line not present in either input sequence | + +----------+-------------------------------------------+ + + Lines beginning with '``?``' attempt to guide the eye to intraline differences, + and were not present in either input sequence. These lines can be confusing if + the sequences contain tab characters. + + +.. class:: HtmlDiff + + This class can be used to create an HTML table (or a complete HTML file + containing the table) showing a side by side, line by line comparison of text + with inter-line and intra-line change highlights. The table can be generated in + either full or contextual difference mode. + + The constructor for this class is: + + + .. function:: __init__([tabsize][, wrapcolumn][, linejunk][, charjunk]) + + Initializes instance of :class:`HtmlDiff`. + + *tabsize* is an optional keyword argument to specify tab stop spacing and + defaults to ``8``. + + *wrapcolumn* is an optional keyword to specify column number where lines are + broken and wrapped, defaults to ``None`` where lines are not wrapped. + + *linejunk* and *charjunk* are optional keyword arguments passed into ``ndiff()`` + (used by :class:`HtmlDiff` to generate the side by side HTML differences). See + ``ndiff()`` documentation for argument default values and descriptions. + + The following methods are public: + + + .. function:: make_file(fromlines, tolines [, fromdesc][, todesc][, context][, numlines]) + + Compares *fromlines* and *tolines* (lists of strings) and returns a string which + is a complete HTML file containing a table showing line by line differences with + inter-line and intra-line changes highlighted. + + *fromdesc* and *todesc* are optional keyword arguments to specify from/to file + column header strings (both default to an empty string). + + *context* and *numlines* are both optional keyword arguments. Set *context* to + ``True`` when contextual differences are to be shown, else the default is + ``False`` to show the full files. *numlines* defaults to ``5``. When *context* + is ``True`` *numlines* controls the number of context lines which surround the + difference highlights. When *context* is ``False`` *numlines* controls the + number of lines which are shown before a difference highlight when using the + "next" hyperlinks (setting to zero would cause the "next" hyperlinks to place + the next difference highlight at the top of the browser without any leading + context). + + + .. function:: make_table(fromlines, tolines [, fromdesc][, todesc][, context][, numlines]) + + Compares *fromlines* and *tolines* (lists of strings) and returns a string which + is a complete HTML table showing line by line differences with inter-line and + intra-line changes highlighted. + + The arguments for this method are the same as those for the :meth:`make_file` + method. + + :file:`Tools/scripts/diff.py` is a command-line front-end to this class and + contains a good example of its use. + + .. versionadded:: 2.4 + + +.. function:: context_diff(a, b[, fromfile][, tofile][, fromfiledate][, tofiledate][, n][, lineterm]) + + Compare *a* and *b* (lists of strings); return a delta (a generator generating + the delta lines) in context diff format. + + Context diffs are a compact way of showing just the lines that have changed plus + a few lines of context. The changes are shown in a before/after style. The + number of context lines is set by *n* which defaults to three. + + By default, the diff control lines (those with ``***`` or ``---``) are created + with a trailing newline. This is helpful so that inputs created from + :func:`file.readlines` result in diffs that are suitable for use with + :func:`file.writelines` since both the inputs and outputs have trailing + newlines. + + For inputs that do not have trailing newlines, set the *lineterm* argument to + ``""`` so that the output will be uniformly newline free. + + The context diff format normally has a header for filenames and modification + times. Any or all of these may be specified using strings for *fromfile*, + *tofile*, *fromfiledate*, and *tofiledate*. The modification times are normally + expressed in the format returned by :func:`time.ctime`. If not specified, the + strings default to blanks. + + :file:`Tools/scripts/diff.py` is a command-line front-end for this function. + + .. versionadded:: 2.3 + + +.. function:: get_close_matches(word, possibilities[, n][, cutoff]) + + Return a list of the best "good enough" matches. *word* is a sequence for which + close matches are desired (typically a string), and *possibilities* is a list of + sequences against which to match *word* (typically a list of strings). + + Optional argument *n* (default ``3``) is the maximum number of close matches to + return; *n* must be greater than ``0``. + + Optional argument *cutoff* (default ``0.6``) is a float in the range [0, 1]. + Possibilities that don't score at least that similar to *word* are ignored. + + The best (no more than *n*) matches among the possibilities are returned in a + list, sorted by similarity score, most similar first. :: + + >>> get_close_matches('appel', ['ape', 'apple', 'peach', 'puppy']) + ['apple', 'ape'] + >>> import keyword + >>> get_close_matches('wheel', keyword.kwlist) + ['while'] + >>> get_close_matches('apple', keyword.kwlist) + [] + >>> get_close_matches('accept', keyword.kwlist) + ['except'] + + +.. function:: ndiff(a, b[, linejunk][, charjunk]) + + Compare *a* and *b* (lists of strings); return a :class:`Differ`\ -style delta + (a generator generating the delta lines). + + Optional keyword parameters *linejunk* and *charjunk* are for filter functions + (or ``None``): + + *linejunk*: A function that accepts a single string argument, and returns true + if the string is junk, or false if not. The default is (``None``), starting with + Python 2.3. Before then, the default was the module-level function + :func:`IS_LINE_JUNK`, which filters out lines without visible characters, except + for at most one pound character (``'#'``). As of Python 2.3, the underlying + :class:`SequenceMatcher` class does a dynamic analysis of which lines are so + frequent as to constitute noise, and this usually works better than the pre-2.3 + default. + + *charjunk*: A function that accepts a character (a string of length 1), and + returns if the character is junk, or false if not. The default is module-level + function :func:`IS_CHARACTER_JUNK`, which filters out whitespace characters (a + blank or tab; note: bad idea to include newline in this!). + + :file:`Tools/scripts/ndiff.py` is a command-line front-end to this function. :: + + >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1), + ... 'ore\ntree\nemu\n'.splitlines(1)) + >>> print ''.join(diff), + - one + ? ^ + + ore + ? ^ + - two + - three + ? - + + tree + + emu + + +.. function:: restore(sequence, which) + + Return one of the two sequences that generated a delta. + + Given a *sequence* produced by :meth:`Differ.compare` or :func:`ndiff`, extract + lines originating from file 1 or 2 (parameter *which*), stripping off line + prefixes. + + Example:: + + >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(1), + ... 'ore\ntree\nemu\n'.splitlines(1)) + >>> diff = list(diff) # materialize the generated delta into a list + >>> print ''.join(restore(diff, 1)), + one + two + three + >>> print ''.join(restore(diff, 2)), + ore + tree + emu + + +.. function:: unified_diff(a, b[, fromfile][, tofile][, fromfiledate][, tofiledate][, n][, lineterm]) + + Compare *a* and *b* (lists of strings); return a delta (a generator generating + the delta lines) in unified diff format. + + Unified diffs are a compact way of showing just the lines that have changed plus + a few lines of context. The changes are shown in a inline style (instead of + separate before/after blocks). The number of context lines is set by *n* which + defaults to three. + + By default, the diff control lines (those with ``---``, ``+++``, or ``@@``) are + created with a trailing newline. This is helpful so that inputs created from + :func:`file.readlines` result in diffs that are suitable for use with + :func:`file.writelines` since both the inputs and outputs have trailing + newlines. + + For inputs that do not have trailing newlines, set the *lineterm* argument to + ``""`` so that the output will be uniformly newline free. + + The context diff format normally has a header for filenames and modification + times. Any or all of these may be specified using strings for *fromfile*, + *tofile*, *fromfiledate*, and *tofiledate*. The modification times are normally + expressed in the format returned by :func:`time.ctime`. If not specified, the + strings default to blanks. + + :file:`Tools/scripts/diff.py` is a command-line front-end for this function. + + .. versionadded:: 2.3 + + +.. function:: IS_LINE_JUNK(line) + + Return true for ignorable lines. The line *line* is ignorable if *line* is + blank or contains a single ``'#'``, otherwise it is not ignorable. Used as a + default for parameter *linejunk* in :func:`ndiff` before Python 2.3. + + +.. function:: IS_CHARACTER_JUNK(ch) + + Return true for ignorable characters. The character *ch* is ignorable if *ch* + is a space or tab, otherwise it is not ignorable. Used as a default for + parameter *charjunk* in :func:`ndiff`. + + +.. seealso:: + + `Pattern Matching: The Gestalt Approach `_ + Discussion of a similar algorithm by John W. Ratcliff and D. E. Metzener. This + was published in `Dr. Dobb's Journal `_ in July, 1988. + + +.. _sequence-matcher: + +SequenceMatcher Objects +----------------------- + +The :class:`SequenceMatcher` class has this constructor: + + +.. class:: SequenceMatcher([isjunk[, a[, b]]]) + + Optional argument *isjunk* must be ``None`` (the default) or a one-argument + function that takes a sequence element and returns true if and only if the + element is "junk" and should be ignored. Passing ``None`` for *isjunk* is + equivalent to passing ``lambda x: 0``; in other words, no elements are ignored. + For example, pass:: + + lambda x: x in " \t" + + if you're comparing lines as sequences of characters, and don't want to synch up + on blanks or hard tabs. + + The optional arguments *a* and *b* are sequences to be compared; both default to + empty strings. The elements of both sequences must be hashable. + +:class:`SequenceMatcher` objects have the following methods: + + +.. method:: SequenceMatcher.set_seqs(a, b) + + Set the two sequences to be compared. + +:class:`SequenceMatcher` computes and caches detailed information about the +second sequence, so if you want to compare one sequence against many sequences, +use :meth:`set_seq2` to set the commonly used sequence once and call +:meth:`set_seq1` repeatedly, once for each of the other sequences. + + +.. method:: SequenceMatcher.set_seq1(a) + + Set the first sequence to be compared. The second sequence to be compared is + not changed. + + +.. method:: SequenceMatcher.set_seq2(b) + + Set the second sequence to be compared. The first sequence to be compared is + not changed. + + +.. method:: SequenceMatcher.find_longest_match(alo, ahi, blo, bhi) + + Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``. + + If *isjunk* was omitted or ``None``, :meth:`get_longest_match` returns ``(i, j, + k)`` such that ``a[i:i+k]`` is equal to ``b[j:j+k]``, where ``alo <= i <= i+k <= + ahi`` and ``blo <= j <= j+k <= bhi``. For all ``(i', j', k')`` meeting those + conditions, the additional conditions ``k >= k'``, ``i <= i'``, and if ``i == + i'``, ``j <= j'`` are also met. In other words, of all maximal matching blocks, + return one that starts earliest in *a*, and of all those maximal matching blocks + that start earliest in *a*, return the one that starts earliest in *b*. :: + + >>> s = SequenceMatcher(None, " abcd", "abcd abcd") + >>> s.find_longest_match(0, 5, 0, 9) + (0, 4, 5) + + If *isjunk* was provided, first the longest matching block is determined as + above, but with the additional restriction that no junk element appears in the + block. Then that block is extended as far as possible by matching (only) junk + elements on both sides. So the resulting block never matches on junk except as + identical junk happens to be adjacent to an interesting match. + + Here's the same example as before, but considering blanks to be junk. That + prevents ``' abcd'`` from matching the ``' abcd'`` at the tail end of the second + sequence directly. Instead only the ``'abcd'`` can match, and matches the + leftmost ``'abcd'`` in the second sequence:: + + >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd") + >>> s.find_longest_match(0, 5, 0, 9) + (1, 0, 4) + + If no blocks match, this returns ``(alo, blo, 0)``. + + +.. method:: SequenceMatcher.get_matching_blocks() + + Return list of triples describing matching subsequences. Each triple is of the + form ``(i, j, n)``, and means that ``a[i:i+n] == b[j:j+n]``. The triples are + monotonically increasing in *i* and *j*. + + The last triple is a dummy, and has the value ``(len(a), len(b), 0)``. It is + the only triple with ``n == 0``. If ``(i, j, n)`` and ``(i', j', n')`` are + adjacent triples in the list, and the second is not the last triple in the list, + then ``i+n != i'`` or ``j+n != j'``; in other words, adjacent triples always + describe non-adjacent equal blocks. + + .. % Explain why a dummy is used! + + .. versionchanged:: 2.5 + The guarantee that adjacent triples always describe non-adjacent blocks was + implemented. + + :: + + >>> s = SequenceMatcher(None, "abxcd", "abcd") + >>> s.get_matching_blocks() + [(0, 0, 2), (3, 2, 2), (5, 4, 0)] + + +.. method:: SequenceMatcher.get_opcodes() + + Return list of 5-tuples describing how to turn *a* into *b*. Each tuple is of + the form ``(tag, i1, i2, j1, j2)``. The first tuple has ``i1 == j1 == 0``, and + remaining tuples have *i1* equal to the *i2* from the preceding tuple, and, + likewise, *j1* equal to the previous *j2*. + + The *tag* values are strings, with these meanings: + + +---------------+---------------------------------------------+ + | Value | Meaning | + +===============+=============================================+ + | ``'replace'`` | ``a[i1:i2]`` should be replaced by | + | | ``b[j1:j2]``. | + +---------------+---------------------------------------------+ + | ``'delete'`` | ``a[i1:i2]`` should be deleted. Note that | + | | ``j1 == j2`` in this case. | + +---------------+---------------------------------------------+ + | ``'insert'`` | ``b[j1:j2]`` should be inserted at | + | | ``a[i1:i1]``. Note that ``i1 == i2`` in | + | | this case. | + +---------------+---------------------------------------------+ + | ``'equal'`` | ``a[i1:i2] == b[j1:j2]`` (the sub-sequences | + | | are equal). | + +---------------+---------------------------------------------+ + + For example:: + + >>> a = "qabxcd" + >>> b = "abycdf" + >>> s = SequenceMatcher(None, a, b) + >>> for tag, i1, i2, j1, j2 in s.get_opcodes(): + ... print ("%7s a[%d:%d] (%s) b[%d:%d] (%s)" % + ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])) + delete a[0:1] (q) b[0:0] () + equal a[1:3] (ab) b[0:2] (ab) + replace a[3:4] (x) b[2:3] (y) + equal a[4:6] (cd) b[3:5] (cd) + insert a[6:6] () b[5:6] (f) + + +.. method:: SequenceMatcher.get_grouped_opcodes([n]) + + Return a generator of groups with up to *n* lines of context. + + Starting with the groups returned by :meth:`get_opcodes`, this method splits out + smaller change clusters and eliminates intervening ranges which have no changes. + + The groups are returned in the same format as :meth:`get_opcodes`. + + .. versionadded:: 2.3 + + +.. method:: SequenceMatcher.ratio() + + Return a measure of the sequences' similarity as a float in the range [0, 1]. + + Where T is the total number of elements in both sequences, and M is the number + of matches, this is 2.0\*M / T. Note that this is ``1.0`` if the sequences are + identical, and ``0.0`` if they have nothing in common. + + This is expensive to compute if :meth:`get_matching_blocks` or + :meth:`get_opcodes` hasn't already been called, in which case you may want to + try :meth:`quick_ratio` or :meth:`real_quick_ratio` first to get an upper bound. + + +.. method:: SequenceMatcher.quick_ratio() + + Return an upper bound on :meth:`ratio` relatively quickly. + + This isn't defined beyond that it is an upper bound on :meth:`ratio`, and is + faster to compute. + + +.. method:: SequenceMatcher.real_quick_ratio() + + Return an upper bound on :meth:`ratio` very quickly. + + This isn't defined beyond that it is an upper bound on :meth:`ratio`, and is + faster to compute than either :meth:`ratio` or :meth:`quick_ratio`. + +The three methods that return the ratio of matching to total characters can give +different results due to differing levels of approximation, although +:meth:`quick_ratio` and :meth:`real_quick_ratio` are always at least as large as +:meth:`ratio`:: + + >>> s = SequenceMatcher(None, "abcd", "bcde") + >>> s.ratio() + 0.75 + >>> s.quick_ratio() + 0.75 + >>> s.real_quick_ratio() + 1.0 + + +.. _sequencematcher-examples: + +SequenceMatcher Examples +------------------------ + +This example compares two strings, considering blanks to be "junk:" :: + + >>> s = SequenceMatcher(lambda x: x == " ", + ... "private Thread currentThread;", + ... "private volatile Thread currentThread;") + +:meth:`ratio` returns a float in [0, 1], measuring the similarity of the +sequences. As a rule of thumb, a :meth:`ratio` value over 0.6 means the +sequences are close matches:: + + >>> print round(s.ratio(), 3) + 0.866 + +If you're only interested in where the sequences match, +:meth:`get_matching_blocks` is handy:: + + >>> for block in s.get_matching_blocks(): + ... print "a[%d] and b[%d] match for %d elements" % block + a[0] and b[0] match for 8 elements + a[8] and b[17] match for 6 elements + a[14] and b[23] match for 15 elements + a[29] and b[38] match for 0 elements + +Note that the last tuple returned by :meth:`get_matching_blocks` is always a +dummy, ``(len(a), len(b), 0)``, and this is the only case in which the last +tuple element (number of elements matched) is ``0``. + +If you want to know how to change the first sequence into the second, use +:meth:`get_opcodes`:: + + >>> for opcode in s.get_opcodes(): + ... print "%6s a[%d:%d] b[%d:%d]" % opcode + equal a[0:8] b[0:8] + insert a[8:8] b[8:17] + equal a[8:14] b[17:23] + equal a[14:29] b[23:38] + +See also the function :func:`get_close_matches` in this module, which shows how +simple code building on :class:`SequenceMatcher` can be used to do useful work. + + +.. _differ-objects: + +Differ Objects +-------------- + +Note that :class:`Differ`\ -generated deltas make no claim to be **minimal** +diffs. To the contrary, minimal diffs are often counter-intuitive, because they +synch up anywhere possible, sometimes accidental matches 100 pages apart. +Restricting synch points to contiguous matches preserves some notion of +locality, at the occasional cost of producing a longer diff. + +The :class:`Differ` class has this constructor: + + +.. class:: Differ([linejunk[, charjunk]]) + + Optional keyword parameters *linejunk* and *charjunk* are for filter functions + (or ``None``): + + *linejunk*: A function that accepts a single string argument, and returns true + if the string is junk. The default is ``None``, meaning that no line is + considered junk. + + *charjunk*: A function that accepts a single character argument (a string of + length 1), and returns true if the character is junk. The default is ``None``, + meaning that no character is considered junk. + +:class:`Differ` objects are used (deltas generated) via a single method: + + +.. method:: Differ.compare(a, b) + + Compare two sequences of lines, and generate the delta (a sequence of lines). + + Each sequence must contain individual single-line strings ending with newlines. + Such sequences can be obtained from the :meth:`readlines` method of file-like + objects. The delta generated also consists of newline-terminated strings, ready + to be printed as-is via the :meth:`writelines` method of a file-like object. + + +.. _differ-examples: + +Differ Example +-------------- + +This example compares two texts. First we set up the texts, sequences of +individual single-line strings ending with newlines (such sequences can also be +obtained from the :meth:`readlines` method of file-like objects):: + + >>> text1 = ''' 1. Beautiful is better than ugly. + ... 2. Explicit is better than implicit. + ... 3. Simple is better than complex. + ... 4. Complex is better than complicated. + ... '''.splitlines(1) + >>> len(text1) + 4 + >>> text1[0][-1] + '\n' + >>> text2 = ''' 1. Beautiful is better than ugly. + ... 3. Simple is better than complex. + ... 4. Complicated is better than complex. + ... 5. Flat is better than nested. + ... '''.splitlines(1) + +Next we instantiate a Differ object:: + + >>> d = Differ() + +Note that when instantiating a :class:`Differ` object we may pass functions to +filter out line and character "junk." See the :meth:`Differ` constructor for +details. + +Finally, we compare the two:: + + >>> result = list(d.compare(text1, text2)) + +``result`` is a list of strings, so let's pretty-print it:: + + >>> from pprint import pprint + >>> pprint(result) + [' 1. Beautiful is better than ugly.\n', + '- 2. Explicit is better than implicit.\n', + '- 3. Simple is better than complex.\n', + '+ 3. Simple is better than complex.\n', + '? ++ \n', + '- 4. Complex is better than complicated.\n', + '? ^ ---- ^ \n', + '+ 4. Complicated is better than complex.\n', + '? ++++ ^ ^ \n', + '+ 5. Flat is better than nested.\n'] + +As a single multi-line string it looks like this:: + + >>> import sys + >>> sys.stdout.writelines(result) + 1. Beautiful is better than ugly. + - 2. Explicit is better than implicit. + - 3. Simple is better than complex. + + 3. Simple is better than complex. + ? ++ + - 4. Complex is better than complicated. + ? ^ ---- ^ + + 4. Complicated is better than complex. + ? ++++ ^ ^ + + 5. Flat is better than nested. + diff --git a/Doc/library/dircache.rst b/Doc/library/dircache.rst new file mode 100644 index 0000000..28aa667 --- /dev/null +++ b/Doc/library/dircache.rst @@ -0,0 +1,56 @@ + +:mod:`dircache` --- Cached directory listings +============================================= + +.. module:: dircache + :synopsis: Return directory listing, with cache mechanism. +.. sectionauthor:: Moshe Zadka + + +The :mod:`dircache` module defines a function for reading directory listing +using a cache, and cache invalidation using the *mtime* of the directory. +Additionally, it defines a function to annotate directories by appending a +slash. + +The :mod:`dircache` module defines the following functions: + + +.. function:: reset() + + Resets the directory cache. + + +.. function:: listdir(path) + + Return a directory listing of *path*, as gotten from :func:`os.listdir`. Note + that unless *path* changes, further call to :func:`listdir` will not re-read the + directory structure. + + Note that the list returned should be regarded as read-only. (Perhaps a future + version should change it to return a tuple?) + + +.. function:: opendir(path) + + Same as :func:`listdir`. Defined for backwards compatibility. + + +.. function:: annotate(head, list) + + Assume *list* is a list of paths relative to *head*, and append, in place, a + ``'/'`` to each path which points to a directory. + +:: + + >>> import dircache + >>> a = dircache.listdir('/') + >>> a = a[:] # Copy the return value so we can change 'a' + >>> a + ['bin', 'boot', 'cdrom', 'dev', 'etc', 'floppy', 'home', 'initrd', 'lib', 'lost+ + found', 'mnt', 'proc', 'root', 'sbin', 'tmp', 'usr', 'var', 'vmlinuz'] + >>> dircache.annotate('/', a) + >>> a + ['bin/', 'boot/', 'cdrom/', 'dev/', 'etc/', 'floppy/', 'home/', 'initrd/', 'lib/ + ', 'lost+found/', 'mnt/', 'proc/', 'root/', 'sbin/', 'tmp/', 'usr/', 'var/', 'vm + linuz'] + diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst new file mode 100644 index 0000000..5f28473 --- /dev/null +++ b/Doc/library/dis.rst @@ -0,0 +1,775 @@ + +:mod:`dis` --- Disassembler for Python byte code +================================================ + +.. module:: dis + :synopsis: Disassembler for Python byte code. + + +The :mod:`dis` module supports the analysis of Python byte code by disassembling +it. Since there is no Python assembler, this module defines the Python assembly +language. The Python byte code which this module takes as an input is defined +in the file :file:`Include/opcode.h` and used by the compiler and the +interpreter. + +Example: Given the function :func:`myfunc`:: + + def myfunc(alist): + return len(alist) + +the following command can be used to get the disassembly of :func:`myfunc`:: + + >>> dis.dis(myfunc) + 2 0 LOAD_GLOBAL 0 (len) + 3 LOAD_FAST 0 (alist) + 6 CALL_FUNCTION 1 + 9 RETURN_VALUE + +(The "2" is a line number). + +The :mod:`dis` module defines the following functions and constants: + + +.. function:: dis([bytesource]) + + Disassemble the *bytesource* object. *bytesource* can denote either a module, a + class, a method, a function, or a code object. For a module, it disassembles + all functions. For a class, it disassembles all methods. For a single code + sequence, it prints one line per byte code instruction. If no object is + provided, it disassembles the last traceback. + + +.. function:: distb([tb]) + + Disassembles the top-of-stack function of a traceback, using the last traceback + if none was passed. The instruction causing the exception is indicated. + + +.. function:: disassemble(code[, lasti]) + + Disassembles a code object, indicating the last instruction if *lasti* was + provided. The output is divided in the following columns: + + #. the line number, for the first instruction of each line + #. the current instruction, indicated as ``-->``, + #. a labelled instruction, indicated with ``>>``, + #. the address of the instruction, + #. the operation code name, + #. operation parameters, and + #. interpretation of the parameters in parentheses. + + The parameter interpretation recognizes local and global variable names, + constant values, branch targets, and compare operators. + + +.. function:: disco(code[, lasti]) + + A synonym for disassemble. It is more convenient to type, and kept for + compatibility with earlier Python releases. + + +.. data:: opname + + Sequence of operation names, indexable using the byte code. + + +.. data:: opmap + + Dictionary mapping byte codes to operation names. + + +.. data:: cmp_op + + Sequence of all compare operation names. + + +.. data:: hasconst + + Sequence of byte codes that have a constant parameter. + + +.. data:: hasfree + + Sequence of byte codes that access a free variable. + + +.. data:: hasname + + Sequence of byte codes that access an attribute by name. + + +.. data:: hasjrel + + Sequence of byte codes that have a relative jump target. + + +.. data:: hasjabs + + Sequence of byte codes that have an absolute jump target. + + +.. data:: haslocal + + Sequence of byte codes that access a local variable. + + +.. data:: hascompare + + Sequence of byte codes of Boolean operations. + + +.. _bytecodes: + +Python Byte Code Instructions +----------------------------- + +The Python compiler currently generates the following byte code instructions. + + +.. opcode:: STOP_CODE () + + Indicates end-of-code to the compiler, not used by the interpreter. + + +.. opcode:: NOP () + + Do nothing code. Used as a placeholder by the bytecode optimizer. + + +.. opcode:: POP_TOP () + + Removes the top-of-stack (TOS) item. + + +.. opcode:: ROT_TWO () + + Swaps the two top-most stack items. + + +.. opcode:: ROT_THREE () + + Lifts second and third stack item one position up, moves top down to position + three. + + +.. opcode:: ROT_FOUR () + + Lifts second, third and forth stack item one position up, moves top down to + position four. + + +.. opcode:: DUP_TOP () + + Duplicates the reference on top of the stack. + +Unary Operations take the top of the stack, apply the operation, and push the +result back on the stack. + + +.. opcode:: UNARY_POSITIVE () + + Implements ``TOS = +TOS``. + + +.. opcode:: UNARY_NEGATIVE () + + Implements ``TOS = -TOS``. + + +.. opcode:: UNARY_NOT () + + Implements ``TOS = not TOS``. + + +.. opcode:: UNARY_INVERT () + + Implements ``TOS = ~TOS``. + + +.. opcode:: GET_ITER () + + Implements ``TOS = iter(TOS)``. + +Binary operations remove the top of the stack (TOS) and the second top-most +stack item (TOS1) from the stack. They perform the operation, and put the +result back on the stack. + + +.. opcode:: BINARY_POWER () + + Implements ``TOS = TOS1 ** TOS``. + + +.. opcode:: BINARY_MULTIPLY () + + Implements ``TOS = TOS1 * TOS``. + + +.. opcode:: BINARY_FLOOR_DIVIDE () + + Implements ``TOS = TOS1 // TOS``. + + +.. opcode:: BINARY_TRUE_DIVIDE () + + Implements ``TOS = TOS1 / TOS`` when ``from __future__ import division`` is in + effect. + + +.. opcode:: BINARY_MODULO () + + Implements ``TOS = TOS1 % TOS``. + + +.. opcode:: BINARY_ADD () + + Implements ``TOS = TOS1 + TOS``. + + +.. opcode:: BINARY_SUBTRACT () + + Implements ``TOS = TOS1 - TOS``. + + +.. opcode:: BINARY_SUBSCR () + + Implements ``TOS = TOS1[TOS]``. + + +.. opcode:: BINARY_LSHIFT () + + Implements ``TOS = TOS1 << TOS``. + + +.. opcode:: BINARY_RSHIFT () + + Implements ``TOS = TOS1 >> TOS``. + + +.. opcode:: BINARY_AND () + + Implements ``TOS = TOS1 & TOS``. + + +.. opcode:: BINARY_XOR () + + Implements ``TOS = TOS1 ^ TOS``. + + +.. opcode:: BINARY_OR () + + Implements ``TOS = TOS1 | TOS``. + +In-place operations are like binary operations, in that they remove TOS and +TOS1, and push the result back on the stack, but the operation is done in-place +when TOS1 supports it, and the resulting TOS may be (but does not have to be) +the original TOS1. + + +.. opcode:: INPLACE_POWER () + + Implements in-place ``TOS = TOS1 ** TOS``. + + +.. opcode:: INPLACE_MULTIPLY () + + Implements in-place ``TOS = TOS1 * TOS``. + + +.. opcode:: INPLACE_FLOOR_DIVIDE () + + Implements in-place ``TOS = TOS1 // TOS``. + + +.. opcode:: INPLACE_TRUE_DIVIDE () + + Implements in-place ``TOS = TOS1 / TOS`` when ``from __future__ import + division`` is in effect. + + +.. opcode:: INPLACE_MODULO () + + Implements in-place ``TOS = TOS1 % TOS``. + + +.. opcode:: INPLACE_ADD () + + Implements in-place ``TOS = TOS1 + TOS``. + + +.. opcode:: INPLACE_SUBTRACT () + + Implements in-place ``TOS = TOS1 - TOS``. + + +.. opcode:: INPLACE_LSHIFT () + + Implements in-place ``TOS = TOS1 << TOS``. + + +.. opcode:: INPLACE_RSHIFT () + + Implements in-place ``TOS = TOS1 >> TOS``. + + +.. opcode:: INPLACE_AND () + + Implements in-place ``TOS = TOS1 & TOS``. + + +.. opcode:: INPLACE_XOR () + + Implements in-place ``TOS = TOS1 ^ TOS``. + + +.. opcode:: INPLACE_OR () + + Implements in-place ``TOS = TOS1 | TOS``. + +The slice opcodes take up to three parameters. + + +.. opcode:: SLICE+0 () + + Implements ``TOS = TOS[:]``. + + +.. opcode:: SLICE+1 () + + Implements ``TOS = TOS1[TOS:]``. + + +.. opcode:: SLICE+2 () + + Implements ``TOS = TOS1[:TOS]``. + + +.. opcode:: SLICE+3 () + + Implements ``TOS = TOS2[TOS1:TOS]``. + +Slice assignment needs even an additional parameter. As any statement, they put +nothing on the stack. + + +.. opcode:: STORE_SLICE+0 () + + Implements ``TOS[:] = TOS1``. + + +.. opcode:: STORE_SLICE+1 () + + Implements ``TOS1[TOS:] = TOS2``. + + +.. opcode:: STORE_SLICE+2 () + + Implements ``TOS1[:TOS] = TOS2``. + + +.. opcode:: STORE_SLICE+3 () + + Implements ``TOS2[TOS1:TOS] = TOS3``. + + +.. opcode:: DELETE_SLICE+0 () + + Implements ``del TOS[:]``. + + +.. opcode:: DELETE_SLICE+1 () + + Implements ``del TOS1[TOS:]``. + + +.. opcode:: DELETE_SLICE+2 () + + Implements ``del TOS1[:TOS]``. + + +.. opcode:: DELETE_SLICE+3 () + + Implements ``del TOS2[TOS1:TOS]``. + + +.. opcode:: STORE_SUBSCR () + + Implements ``TOS1[TOS] = TOS2``. + + +.. opcode:: DELETE_SUBSCR () + + Implements ``del TOS1[TOS]``. + +Miscellaneous opcodes. + + +.. opcode:: PRINT_EXPR () + + Implements the expression statement for the interactive mode. TOS is removed + from the stack and printed. In non-interactive mode, an expression statement is + terminated with ``POP_STACK``. + + +.. opcode:: BREAK_LOOP () + + Terminates a loop due to a :keyword:`break` statement. + + +.. opcode:: CONTINUE_LOOP (target) + + Continues a loop due to a :keyword:`continue` statement. *target* is the + address to jump to (which should be a ``FOR_ITER`` instruction). + + +.. opcode:: SET_ADD () + + Calls ``set.add(TOS1, TOS)``. Used to implement set comprehensions. + + +.. opcode:: LIST_APPEND () + + Calls ``list.append(TOS1, TOS)``. Used to implement list comprehensions. + + +.. opcode:: LOAD_LOCALS () + + Pushes a reference to the locals of the current scope on the stack. This is used + in the code for a class definition: After the class body is evaluated, the + locals are passed to the class definition. + + +.. opcode:: RETURN_VALUE () + + Returns with TOS to the caller of the function. + + +.. opcode:: YIELD_VALUE () + + Pops ``TOS`` and yields it from a generator. + + +.. opcode:: IMPORT_STAR () + + Loads all symbols not starting with ``'_'`` directly from the module TOS to the + local namespace. The module is popped after loading all names. This opcode + implements ``from module import *``. + + +.. opcode:: POP_BLOCK () + + Removes one block from the block stack. Per frame, there is a stack of blocks, + denoting nested loops, try statements, and such. + + +.. opcode:: END_FINALLY () + + Terminates a :keyword:`finally` clause. The interpreter recalls whether the + exception has to be re-raised, or whether the function returns, and continues + with the outer-next block. + + +.. opcode:: BUILD_CLASS () + + Creates a new class object. TOS is the methods dictionary, TOS1 the tuple of + the names of the base classes, and TOS2 the class name. + +All of the following opcodes expect arguments. An argument is two bytes, with +the more significant byte last. + + +.. opcode:: STORE_NAME (namei) + + Implements ``name = TOS``. *namei* is the index of *name* in the attribute + :attr:`co_names` of the code object. The compiler tries to use ``STORE_LOCAL`` + or ``STORE_GLOBAL`` if possible. + + +.. opcode:: DELETE_NAME (namei) + + Implements ``del name``, where *namei* is the index into :attr:`co_names` + attribute of the code object. + + +.. opcode:: UNPACK_SEQUENCE (count) + + Unpacks TOS into *count* individual values, which are put onto the stack + right-to-left. + +.. % \begin{opcodedesc}{UNPACK_LIST}{count} +.. % This opcode is obsolete. +.. % \end{opcodedesc} +.. % \begin{opcodedesc}{UNPACK_ARG}{count} +.. % This opcode is obsolete. +.. % \end{opcodedesc} + + +.. opcode:: DUP_TOPX (count) + + Duplicate *count* items, keeping them in the same order. Due to implementation + limits, *count* should be between 1 and 5 inclusive. + + +.. opcode:: STORE_ATTR (namei) + + Implements ``TOS.name = TOS1``, where *namei* is the index of name in + :attr:`co_names`. + + +.. opcode:: DELETE_ATTR (namei) + + Implements ``del TOS.name``, using *namei* as index into :attr:`co_names`. + + +.. opcode:: STORE_GLOBAL (namei) + + Works as ``STORE_NAME``, but stores the name as a global. + + +.. opcode:: DELETE_GLOBAL (namei) + + Works as ``DELETE_NAME``, but deletes a global name. + +.. % \begin{opcodedesc}{UNPACK_VARARG}{argc} +.. % This opcode is obsolete. +.. % \end{opcodedesc} + + +.. opcode:: LOAD_CONST (consti) + + Pushes ``co_consts[consti]`` onto the stack. + + +.. opcode:: LOAD_NAME (namei) + + Pushes the value associated with ``co_names[namei]`` onto the stack. + + +.. opcode:: BUILD_TUPLE (count) + + Creates a tuple consuming *count* items from the stack, and pushes the resulting + tuple onto the stack. + + +.. opcode:: BUILD_LIST (count) + + Works as ``BUILD_TUPLE``, but creates a list. + + +.. opcode:: BUILD_SET (count) + + Works as ``BUILD_TUPLE``, but creates a set. + + +.. opcode:: BUILD_MAP (zero) + + Pushes a new empty dictionary object onto the stack. The argument is ignored + and set to zero by the compiler. + + +.. opcode:: LOAD_ATTR (namei) + + Replaces TOS with ``getattr(TOS, co_names[namei])``. + + +.. opcode:: COMPARE_OP (opname) + + Performs a Boolean operation. The operation name can be found in + ``cmp_op[opname]``. + + +.. opcode:: IMPORT_NAME (namei) + + Imports the module ``co_names[namei]``. The module object is pushed onto the + stack. The current namespace is not affected: for a proper import statement, a + subsequent ``STORE_FAST`` instruction modifies the namespace. + + +.. opcode:: IMPORT_FROM (namei) + + Loads the attribute ``co_names[namei]`` from the module found in TOS. The + resulting object is pushed onto the stack, to be subsequently stored by a + ``STORE_FAST`` instruction. + + +.. opcode:: JUMP_FORWARD (delta) + + Increments byte code counter by *delta*. + + +.. opcode:: JUMP_IF_TRUE (delta) + + If TOS is true, increment the byte code counter by *delta*. TOS is left on the + stack. + + +.. opcode:: JUMP_IF_FALSE (delta) + + If TOS is false, increment the byte code counter by *delta*. TOS is not + changed. + + +.. opcode:: JUMP_ABSOLUTE (target) + + Set byte code counter to *target*. + + +.. opcode:: FOR_ITER (delta) + + ``TOS`` is an iterator. Call its :meth:`__next__` method. If this yields a new + value, push it on the stack (leaving the iterator below it). If the iterator + indicates it is exhausted ``TOS`` is popped, and the byte code counter is + incremented by *delta*. + +.. % \begin{opcodedesc}{FOR_LOOP}{delta} +.. % This opcode is obsolete. +.. % \end{opcodedesc} +.. % \begin{opcodedesc}{LOAD_LOCAL}{namei} +.. % This opcode is obsolete. +.. % \end{opcodedesc} + + +.. opcode:: LOAD_GLOBAL (namei) + + Loads the global named ``co_names[namei]`` onto the stack. + +.. % \begin{opcodedesc}{SET_FUNC_ARGS}{argc} +.. % This opcode is obsolete. +.. % \end{opcodedesc} + + +.. opcode:: SETUP_LOOP (delta) + + Pushes a block for a loop onto the block stack. The block spans from the + current instruction with a size of *delta* bytes. + + +.. opcode:: SETUP_EXCEPT (delta) + + Pushes a try block from a try-except clause onto the block stack. *delta* points + to the first except block. + + +.. opcode:: SETUP_FINALLY (delta) + + Pushes a try block from a try-except clause onto the block stack. *delta* points + to the finally block. + + +.. opcode:: LOAD_FAST (var_num) + + Pushes a reference to the local ``co_varnames[var_num]`` onto the stack. + + +.. opcode:: STORE_FAST (var_num) + + Stores TOS into the local ``co_varnames[var_num]``. + + +.. opcode:: DELETE_FAST (var_num) + + Deletes local ``co_varnames[var_num]``. + + +.. opcode:: LOAD_CLOSURE (i) + + Pushes a reference to the cell contained in slot *i* of the cell and free + variable storage. The name of the variable is ``co_cellvars[i]`` if *i* is + less than the length of *co_cellvars*. Otherwise it is ``co_freevars[i - + len(co_cellvars)]``. + + +.. opcode:: LOAD_DEREF (i) + + Loads the cell contained in slot *i* of the cell and free variable storage. + Pushes a reference to the object the cell contains on the stack. + + +.. opcode:: STORE_DEREF (i) + + Stores TOS into the cell contained in slot *i* of the cell and free variable + storage. + + +.. opcode:: SET_LINENO (lineno) + + This opcode is obsolete. + + +.. opcode:: RAISE_VARARGS (argc) + + Raises an exception. *argc* indicates the number of parameters to the raise + statement, ranging from 0 to 3. The handler will find the traceback as TOS2, + the parameter as TOS1, and the exception as TOS. + + +.. opcode:: CALL_FUNCTION (argc) + + Calls a function. The low byte of *argc* indicates the number of positional + parameters, the high byte the number of keyword parameters. On the stack, the + opcode finds the keyword parameters first. For each keyword argument, the value + is on top of the key. Below the keyword parameters, the positional parameters + are on the stack, with the right-most parameter on top. Below the parameters, + the function object to call is on the stack. + + +.. opcode:: MAKE_FUNCTION (argc) + + Pushes a new function object on the stack. TOS is the code associated with the + function. The function object is defined to have *argc* default parameters, + which are found below TOS. + + +.. opcode:: MAKE_CLOSURE (argc) + + Creates a new function object, sets its *__closure__* slot, and pushes it on the + stack. TOS is the code associated with the function. If the code object has N + free variables, the next N items on the stack are the cells for these variables. + The function also has *argc* default parameters, where are found before the + cells. + + +.. opcode:: BUILD_SLICE (argc) + + .. index:: builtin: slice + + Pushes a slice object on the stack. *argc* must be 2 or 3. If it is 2, + ``slice(TOS1, TOS)`` is pushed; if it is 3, ``slice(TOS2, TOS1, TOS)`` is + pushed. See the ``slice()`` built-in function for more information. + + +.. opcode:: EXTENDED_ARG (ext) + + Prefixes any opcode which has an argument too big to fit into the default two + bytes. *ext* holds two additional bytes which, taken together with the + subsequent opcode's argument, comprise a four-byte argument, *ext* being the two + most-significant bytes. + + +.. opcode:: CALL_FUNCTION_VAR (argc) + + Calls a function. *argc* is interpreted as in ``CALL_FUNCTION``. The top element + on the stack contains the variable argument list, followed by keyword and + positional arguments. + + +.. opcode:: CALL_FUNCTION_KW (argc) + + Calls a function. *argc* is interpreted as in ``CALL_FUNCTION``. The top element + on the stack contains the keyword arguments dictionary, followed by explicit + keyword and positional arguments. + + +.. opcode:: CALL_FUNCTION_VAR_KW (argc) + + Calls a function. *argc* is interpreted as in ``CALL_FUNCTION``. The top + element on the stack contains the keyword arguments dictionary, followed by the + variable-arguments tuple, followed by explicit keyword and positional arguments. + + +.. opcode:: HAVE_ARGUMENT () + + This is not really an opcode. It identifies the dividing line between opcodes + which don't take arguments ``< HAVE_ARGUMENT`` and those which do ``>= + HAVE_ARGUMENT``. + diff --git a/Doc/library/distutils.rst b/Doc/library/distutils.rst new file mode 100644 index 0000000..534faab --- /dev/null +++ b/Doc/library/distutils.rst @@ -0,0 +1,30 @@ + +:mod:`distutils` --- Building and installing Python modules +=========================================================== + +.. module:: distutils + :synopsis: Support for building and installing Python modules into an existing Python + installation. +.. sectionauthor:: Fred L. Drake, Jr. + + +The :mod:`distutils` package provides support for building and installing +additional modules into a Python installation. The new modules may be either +100%-pure Python, or may be extension modules written in C, or may be +collections of Python packages which include modules coded in both Python and C. + +This package is discussed in two separate chapters: + + +.. seealso:: + + :ref:`distutils-index` + The manual for developers and packagers of Python modules. This describes how + to prepare :mod:`distutils`\ -based packages so that they may be easily + installed into an existing Python installation. + + :ref:`install-index` + An "administrators" manual which includes information on installing modules into + an existing Python installation. You do not need to be a Python programmer to + read this manual. + diff --git a/Doc/library/dl.rst b/Doc/library/dl.rst new file mode 100644 index 0000000..ff42619 --- /dev/null +++ b/Doc/library/dl.rst @@ -0,0 +1,111 @@ + +:mod:`dl` --- Call C functions in shared objects +================================================ + +.. module:: dl + :platform: Unix + :synopsis: Call C functions in shared objects. +.. sectionauthor:: Moshe Zadka + + +.. % ?????????? Anyone???????????? + +The :mod:`dl` module defines an interface to the :cfunc:`dlopen` function, which +is the most common interface on Unix platforms for handling dynamically linked +libraries. It allows the program to call arbitrary functions in such a library. + +.. warning:: + + The :mod:`dl` module bypasses the Python type system and error handling. If + used incorrectly it may cause segmentation faults, crashes or other incorrect + behaviour. + +.. note:: + + This module will not work unless ``sizeof(int) == sizeof(long) == sizeof(char + *)`` If this is not the case, :exc:`SystemError` will be raised on import. + +The :mod:`dl` module defines the following function: + + +.. function:: open(name[, mode=RTLD_LAZY]) + + Open a shared object file, and return a handle. Mode signifies late binding + (:const:`RTLD_LAZY`) or immediate binding (:const:`RTLD_NOW`). Default is + :const:`RTLD_LAZY`. Note that some systems do not support :const:`RTLD_NOW`. + + Return value is a :class:`dlobject`. + +The :mod:`dl` module defines the following constants: + + +.. data:: RTLD_LAZY + + Useful as an argument to :func:`open`. + + +.. data:: RTLD_NOW + + Useful as an argument to :func:`open`. Note that on systems which do not + support immediate binding, this constant will not appear in the module. For + maximum portability, use :func:`hasattr` to determine if the system supports + immediate binding. + +The :mod:`dl` module defines the following exception: + + +.. exception:: error + + Exception raised when an error has occurred inside the dynamic loading and + linking routines. + +Example:: + + >>> import dl, time + >>> a=dl.open('/lib/libc.so.6') + >>> a.call('time'), time.time() + (929723914, 929723914.498) + +This example was tried on a Debian GNU/Linux system, and is a good example of +the fact that using this module is usually a bad alternative. + + +.. _dl-objects: + +Dl Objects +---------- + +Dl objects, as returned by :func:`open` above, have the following methods: + + +.. method:: dl.close() + + Free all resources, except the memory. + + +.. method:: dl.sym(name) + + Return the pointer for the function named *name*, as a number, if it exists in + the referenced shared object, otherwise ``None``. This is useful in code like:: + + >>> if a.sym('time'): + ... a.call('time') + ... else: + ... time.time() + + (Note that this function will return a non-zero number, as zero is the *NULL* + pointer) + + +.. method:: dl.call(name[, arg1[, arg2...]]) + + Call the function named *name* in the referenced shared object. The arguments + must be either Python integers, which will be passed as is, Python strings, to + which a pointer will be passed, or ``None``, which will be passed as *NULL*. + Note that strings should only be passed to functions as :ctype:`const char\*`, + as Python will not like its string mutated. + + There must be at most 10 arguments, and arguments not given will be treated as + ``None``. The function's return value must be a C :ctype:`long`, which is a + Python integer. + diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst new file mode 100644 index 0000000..23f96e4 --- /dev/null +++ b/Doc/library/doctest.rst @@ -0,0 +1,1869 @@ +:mod:`doctest` --- Test interactive Python examples +=================================================== + +.. module:: doctest + :synopsis: Test pieces of code within docstrings. +.. moduleauthor:: Tim Peters +.. sectionauthor:: Tim Peters +.. sectionauthor:: Moshe Zadka +.. sectionauthor:: Edward Loper + + +The :mod:`doctest` module searches for pieces of text that look like interactive +Python sessions, and then executes those sessions to verify that they work +exactly as shown. There are several common ways to use doctest: + +* To check that a module's docstrings are up-to-date by verifying that all + interactive examples still work as documented. + +* To perform regression testing by verifying that interactive examples from a + test file or a test object work as expected. + +* To write tutorial documentation for a package, liberally illustrated with + input-output examples. Depending on whether the examples or the expository text + are emphasized, this has the flavor of "literate testing" or "executable + documentation". + +Here's a complete but small example module:: + + """ + This is the "example" module. + + The example module supplies one function, factorial(). For example, + + >>> factorial(5) + 120 + """ + + def factorial(n): + """Return the factorial of n, an exact integer >= 0. + + If the result is small enough to fit in an int, return an int. + Else return a long. + + >>> [factorial(n) for n in range(6)] + [1, 1, 2, 6, 24, 120] + >>> [factorial(long(n)) for n in range(6)] + [1, 1, 2, 6, 24, 120] + >>> factorial(30) + 265252859812191058636308480000000L + >>> factorial(30L) + 265252859812191058636308480000000L + >>> factorial(-1) + Traceback (most recent call last): + ... + ValueError: n must be >= 0 + + Factorials of floats are OK, but the float must be an exact integer: + >>> factorial(30.1) + Traceback (most recent call last): + ... + ValueError: n must be exact integer + >>> factorial(30.0) + 265252859812191058636308480000000L + + It must also not be ridiculously large: + >>> factorial(1e100) + Traceback (most recent call last): + ... + OverflowError: n too large + """ + + +.. % allow LaTeX to break here. + +:: + + import math + if not n >= 0: + raise ValueError("n must be >= 0") + if math.floor(n) != n: + raise ValueError("n must be exact integer") + if n+1 == n: # catch a value like 1e300 + raise OverflowError("n too large") + result = 1 + factor = 2 + while factor <= n: + result *= factor + factor += 1 + return result + + def _test(): + import doctest + doctest.testmod() + + if __name__ == "__main__": + _test() + +If you run :file:`example.py` directly from the command line, :mod:`doctest` +works its magic:: + + $ python example.py + $ + +There's no output! That's normal, and it means all the examples worked. Pass +:option:`-v` to the script, and :mod:`doctest` prints a detailed log of what +it's trying, and prints a summary at the end:: + + $ python example.py -v + Trying: + factorial(5) + Expecting: + 120 + ok + Trying: + [factorial(n) for n in range(6)] + Expecting: + [1, 1, 2, 6, 24, 120] + ok + Trying: + [factorial(long(n)) for n in range(6)] + Expecting: + [1, 1, 2, 6, 24, 120] + ok + +And so on, eventually ending with:: + + Trying: + factorial(1e100) + Expecting: + Traceback (most recent call last): + ... + OverflowError: n too large + ok + 1 items had no tests: + __main__._test + 2 items passed all tests: + 1 tests in __main__ + 8 tests in __main__.factorial + 9 tests in 3 items. + 9 passed and 0 failed. + Test passed. + $ + +That's all you need to know to start making productive use of :mod:`doctest`! +Jump in. The following sections provide full details. Note that there are many +examples of doctests in the standard Python test suite and libraries. +Especially useful examples can be found in the standard test file +:file:`Lib/test/test_doctest.py`. + + +.. _doctest-simple-testmod: + +Simple Usage: Checking Examples in Docstrings +--------------------------------------------- + +The simplest way to start using doctest (but not necessarily the way you'll +continue to do it) is to end each module :mod:`M` with:: + + def _test(): + import doctest + doctest.testmod() + + if __name__ == "__main__": + _test() + +:mod:`doctest` then examines docstrings in module :mod:`M`. + +Running the module as a script causes the examples in the docstrings to get +executed and verified:: + + python M.py + +This won't display anything unless an example fails, in which case the failing +example(s) and the cause(s) of the failure(s) are printed to stdout, and the +final line of output is ``***Test Failed*** N failures.``, where *N* is the +number of examples that failed. + +Run it with the :option:`-v` switch instead:: + + python M.py -v + +and a detailed report of all examples tried is printed to standard output, along +with assorted summaries at the end. + +You can force verbose mode by passing ``verbose=True`` to :func:`testmod`, or +prohibit it by passing ``verbose=False``. In either of those cases, +``sys.argv`` is not examined by :func:`testmod` (so passing :option:`-v` or not +has no effect). + +Since Python 2.6, there is also a command line shortcut for running +:func:`testmod`. You can instruct the Python interpreter to run the doctest +module directly from the standard library and pass the module name(s) on the +command line:: + + python -m doctest -v example.py + +This will import :file:`example.py` as a standalone module and run +:func:`testmod` on it. Note that this may not work correctly if the file is +part of a package and imports other submodules from that package. + +For more information on :func:`testmod`, see section :ref:`doctest-basic-api`. + + +.. _doctest-simple-testfile: + +Simple Usage: Checking Examples in a Text File +---------------------------------------------- + +Another simple application of doctest is testing interactive examples in a text +file. This can be done with the :func:`testfile` function:: + + import doctest + doctest.testfile("example.txt") + +That short script executes and verifies any interactive Python examples +contained in the file :file:`example.txt`. The file content is treated as if it +were a single giant docstring; the file doesn't need to contain a Python +program! For example, perhaps :file:`example.txt` contains this:: + + The ``example`` module + ====================== + + Using ``factorial`` + ------------------- + + This is an example text file in reStructuredText format. First import + ``factorial`` from the ``example`` module: + + >>> from example import factorial + + Now use it: + + >>> factorial(6) + 120 + +Running ``doctest.testfile("example.txt")`` then finds the error in this +documentation:: + + File "./example.txt", line 14, in example.txt + Failed example: + factorial(6) + Expected: + 120 + Got: + 720 + +As with :func:`testmod`, :func:`testfile` won't display anything unless an +example fails. If an example does fail, then the failing example(s) and the +cause(s) of the failure(s) are printed to stdout, using the same format as +:func:`testmod`. + +By default, :func:`testfile` looks for files in the calling module's directory. +See section :ref:`doctest-basic-api` for a description of the optional arguments +that can be used to tell it to look for files in other locations. + +Like :func:`testmod`, :func:`testfile`'s verbosity can be set with the +:option:`-v` command-line switch or with the optional keyword argument +*verbose*. + +Since Python 2.6, there is also a command line shortcut for running +:func:`testfile`. You can instruct the Python interpreter to run the doctest +module directly from the standard library and pass the file name(s) on the +command line:: + + python -m doctest -v example.txt + +Because the file name does not end with :file:`.py`, :mod:`doctest` infers that +it must be run with :func:`testfile`, not :func:`testmod`. + +For more information on :func:`testfile`, see section :ref:`doctest-basic-api`. + + +.. _doctest-how-it-works: + +How It Works +------------ + +This section examines in detail how doctest works: which docstrings it looks at, +how it finds interactive examples, what execution context it uses, how it +handles exceptions, and how option flags can be used to control its behavior. +This is the information that you need to know to write doctest examples; for +information about actually running doctest on these examples, see the following +sections. + + +.. _doctest-which-docstrings: + +Which Docstrings Are Examined? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The module docstring, and all function, class and method docstrings are +searched. Objects imported into the module are not searched. + +In addition, if ``M.__test__`` exists and "is true", it must be a dict, and each +entry maps a (string) name to a function object, class object, or string. +Function and class object docstrings found from ``M.__test__`` are searched, and +strings are treated as if they were docstrings. In output, a key ``K`` in +``M.__test__`` appears with name :: + + .__test__.K + +Any classes found are recursively searched similarly, to test docstrings in +their contained methods and nested classes. + +.. versionchanged:: 2.4 + A "private name" concept is deprecated and no longer documented. + + +.. _doctest-finding-examples: + +How are Docstring Examples Recognized? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In most cases a copy-and-paste of an interactive console session works fine, but +doctest isn't trying to do an exact emulation of any specific Python shell. All +hard tab characters are expanded to spaces, using 8-column tab stops. If you +don't believe tabs should mean that, too bad: don't use hard tabs, or write +your own :class:`DocTestParser` class. + +.. versionchanged:: 2.4 + Expanding tabs to spaces is new; previous versions tried to preserve hard tabs, + with confusing results. + +:: + + >>> # comments are ignored + >>> x = 12 + >>> x + 12 + >>> if x == 13: + ... print "yes" + ... else: + ... print "no" + ... print "NO" + ... print "NO!!!" + ... + no + NO + NO!!! + >>> + +Any expected output must immediately follow the final ``'>>> '`` or ``'... '`` +line containing the code, and the expected output (if any) extends to the next +``'>>> '`` or all-whitespace line. + +The fine print: + +* Expected output cannot contain an all-whitespace line, since such a line is + taken to signal the end of expected output. If expected output does contain a + blank line, put ```` in your doctest example each place a blank line + is expected. + + .. versionchanged:: 2.4 + ```` was added; there was no way to use expected output containing + empty lines in previous versions. + +* Output to stdout is captured, but not output to stderr (exception tracebacks + are captured via a different means). + +* If you continue a line via backslashing in an interactive session, or for any + other reason use a backslash, you should use a raw docstring, which will + preserve your backslashes exactly as you type them:: + + >>> def f(x): + ... r'''Backslashes in a raw docstring: m\n''' + >>> print f.__doc__ + Backslashes in a raw docstring: m\n + + Otherwise, the backslash will be interpreted as part of the string. For example, + the "\\" above would be interpreted as a newline character. Alternatively, you + can double each backslash in the doctest version (and not use a raw string):: + + >>> def f(x): + ... '''Backslashes in a raw docstring: m\\n''' + >>> print f.__doc__ + Backslashes in a raw docstring: m\n + +* The starting column doesn't matter:: + + >>> assert "Easy!" + >>> import math + >>> math.floor(1.9) + 1.0 + + and as many leading whitespace characters are stripped from the expected output + as appeared in the initial ``'>>> '`` line that started the example. + + +.. _doctest-execution-context: + +What's the Execution Context? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, each time :mod:`doctest` finds a docstring to test, it uses a +*shallow copy* of :mod:`M`'s globals, so that running tests doesn't change the +module's real globals, and so that one test in :mod:`M` can't leave behind +crumbs that accidentally allow another test to work. This means examples can +freely use any names defined at top-level in :mod:`M`, and names defined earlier +in the docstring being run. Examples cannot see names defined in other +docstrings. + +You can force use of your own dict as the execution context by passing +``globs=your_dict`` to :func:`testmod` or :func:`testfile` instead. + + +.. _doctest-exceptions: + +What About Exceptions? +^^^^^^^^^^^^^^^^^^^^^^ + +No problem, provided that the traceback is the only output produced by the +example: just paste in the traceback. [#]_ Since tracebacks contain details +that are likely to change rapidly (for example, exact file paths and line +numbers), this is one case where doctest works hard to be flexible in what it +accepts. + +Simple example:: + + >>> [1, 2, 3].remove(42) + Traceback (most recent call last): + File "", line 1, in ? + ValueError: list.remove(x): x not in list + +That doctest succeeds if :exc:`ValueError` is raised, with the ``list.remove(x): +x not in list`` detail as shown. + +The expected output for an exception must start with a traceback header, which +may be either of the following two lines, indented the same as the first line of +the example:: + + Traceback (most recent call last): + Traceback (innermost last): + +The traceback header is followed by an optional traceback stack, whose contents +are ignored by doctest. The traceback stack is typically omitted, or copied +verbatim from an interactive session. + +The traceback stack is followed by the most interesting part: the line(s) +containing the exception type and detail. This is usually the last line of a +traceback, but can extend across multiple lines if the exception has a +multi-line detail:: + + >>> raise ValueError('multi\n line\ndetail') + Traceback (most recent call last): + File "", line 1, in ? + ValueError: multi + line + detail + +The last three lines (starting with :exc:`ValueError`) are compared against the +exception's type and detail, and the rest are ignored. + +Best practice is to omit the traceback stack, unless it adds significant +documentation value to the example. So the last example is probably better as:: + + >>> raise ValueError('multi\n line\ndetail') + Traceback (most recent call last): + ... + ValueError: multi + line + detail + +Note that tracebacks are treated very specially. In particular, in the +rewritten example, the use of ``...`` is independent of doctest's +:const:`ELLIPSIS` option. The ellipsis in that example could be left out, or +could just as well be three (or three hundred) commas or digits, or an indented +transcript of a Monty Python skit. + +Some details you should read once, but won't need to remember: + +* Doctest can't guess whether your expected output came from an exception + traceback or from ordinary printing. So, e.g., an example that expects + ``ValueError: 42 is prime`` will pass whether :exc:`ValueError` is actually + raised or if the example merely prints that traceback text. In practice, + ordinary output rarely begins with a traceback header line, so this doesn't + create real problems. + +* Each line of the traceback stack (if present) must be indented further than + the first line of the example, *or* start with a non-alphanumeric character. + The first line following the traceback header indented the same and starting + with an alphanumeric is taken to be the start of the exception detail. Of + course this does the right thing for genuine tracebacks. + +* When the :const:`IGNORE_EXCEPTION_DETAIL` doctest option is is specified, + everything following the leftmost colon is ignored. + +* The interactive shell omits the traceback header line for some + :exc:`SyntaxError`\ s. But doctest uses the traceback header line to + distinguish exceptions from non-exceptions. So in the rare case where you need + to test a :exc:`SyntaxError` that omits the traceback header, you will need to + manually add the traceback header line to your test example. + +* For some :exc:`SyntaxError`\ s, Python displays the character position of the + syntax error, using a ``^`` marker:: + + >>> 1 1 + File "", line 1 + 1 1 + ^ + SyntaxError: invalid syntax + + Since the lines showing the position of the error come before the exception type + and detail, they are not checked by doctest. For example, the following test + would pass, even though it puts the ``^`` marker in the wrong location:: + + >>> 1 1 + Traceback (most recent call last): + File "", line 1 + 1 1 + ^ + SyntaxError: invalid syntax + +.. versionchanged:: 2.4 + The ability to handle a multi-line exception detail, and the + :const:`IGNORE_EXCEPTION_DETAIL` doctest option, were added. + + +.. _doctest-options: + +Option Flags and Directives +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A number of option flags control various aspects of doctest's behavior. +Symbolic names for the flags are supplied as module constants, which can be +or'ed together and passed to various functions. The names can also be used in +doctest directives (see below). + +The first group of options define test semantics, controlling aspects of how +doctest decides whether actual output matches an example's expected output: + + +.. data:: DONT_ACCEPT_TRUE_FOR_1 + + By default, if an expected output block contains just ``1``, an actual output + block containing just ``1`` or just ``True`` is considered to be a match, and + similarly for ``0`` versus ``False``. When :const:`DONT_ACCEPT_TRUE_FOR_1` is + specified, neither substitution is allowed. The default behavior caters to that + Python changed the return type of many functions from integer to boolean; + doctests expecting "little integer" output still work in these cases. This + option will probably go away, but not for several years. + + +.. data:: DONT_ACCEPT_BLANKLINE + + By default, if an expected output block contains a line containing only the + string ````, then that line will match a blank line in the actual + output. Because a genuinely blank line delimits the expected output, this is + the only way to communicate that a blank line is expected. When + :const:`DONT_ACCEPT_BLANKLINE` is specified, this substitution is not allowed. + + +.. data:: NORMALIZE_WHITESPACE + + When specified, all sequences of whitespace (blanks and newlines) are treated as + equal. Any sequence of whitespace within the expected output will match any + sequence of whitespace within the actual output. By default, whitespace must + match exactly. :const:`NORMALIZE_WHITESPACE` is especially useful when a line of + expected output is very long, and you want to wrap it across multiple lines in + your source. + + +.. data:: ELLIPSIS + + When specified, an ellipsis marker (``...``) in the expected output can match + any substring in the actual output. This includes substrings that span line + boundaries, and empty substrings, so it's best to keep usage of this simple. + Complicated uses can lead to the same kinds of "oops, it matched too much!" + surprises that ``.*`` is prone to in regular expressions. + + +.. data:: IGNORE_EXCEPTION_DETAIL + + When specified, an example that expects an exception passes if an exception of + the expected type is raised, even if the exception detail does not match. For + example, an example expecting ``ValueError: 42`` will pass if the actual + exception raised is ``ValueError: 3*14``, but will fail, e.g., if + :exc:`TypeError` is raised. + + Note that a similar effect can be obtained using :const:`ELLIPSIS`, and + :const:`IGNORE_EXCEPTION_DETAIL` may go away when Python releases prior to 2.4 + become uninteresting. Until then, :const:`IGNORE_EXCEPTION_DETAIL` is the only + clear way to write a doctest that doesn't care about the exception detail yet + continues to pass under Python releases prior to 2.4 (doctest directives appear + to be comments to them). For example, :: + + >>> (1, 2)[3] = 'moo' #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + File "", line 1, in ? + TypeError: object doesn't support item assignment + + passes under Python 2.4 and Python 2.3. The detail changed in 2.4, to say "does + not" instead of "doesn't". + + +.. data:: SKIP + + When specified, do not run the example at all. This can be useful in contexts + where doctest examples serve as both documentation and test cases, and an + example should be included for documentation purposes, but should not be + checked. E.g., the example's output might be random; or the example might + depend on resources which would be unavailable to the test driver. + + The SKIP flag can also be used for temporarily "commenting out" examples. + + +.. data:: COMPARISON_FLAGS + + A bitmask or'ing together all the comparison flags above. + +The second group of options controls how test failures are reported: + + +.. data:: REPORT_UDIFF + + When specified, failures that involve multi-line expected and actual outputs are + displayed using a unified diff. + + +.. data:: REPORT_CDIFF + + When specified, failures that involve multi-line expected and actual outputs + will be displayed using a context diff. + + +.. data:: REPORT_NDIFF + + When specified, differences are computed by ``difflib.Differ``, using the same + algorithm as the popular :file:`ndiff.py` utility. This is the only method that + marks differences within lines as well as across lines. For example, if a line + of expected output contains digit ``1`` where actual output contains letter + ``l``, a line is inserted with a caret marking the mismatching column positions. + + +.. data:: REPORT_ONLY_FIRST_FAILURE + + When specified, display the first failing example in each doctest, but suppress + output for all remaining examples. This will prevent doctest from reporting + correct examples that break because of earlier failures; but it might also hide + incorrect examples that fail independently of the first failure. When + :const:`REPORT_ONLY_FIRST_FAILURE` is specified, the remaining examples are + still run, and still count towards the total number of failures reported; only + the output is suppressed. + + +.. data:: REPORTING_FLAGS + + A bitmask or'ing together all the reporting flags above. + +"Doctest directives" may be used to modify the option flags for individual +examples. Doctest directives are expressed as a special Python comment +following an example's source code: + +.. productionlist:: doctest + directive: "#" "doctest:" `directive_options` + directive_options: `directive_option` ("," `directive_option`)\* + directive_option: `on_or_off` `directive_option_name` + on_or_off: "+" \| "-" + directive_option_name: "DONT_ACCEPT_BLANKLINE" \| "NORMALIZE_WHITESPACE" \| ... + +Whitespace is not allowed between the ``+`` or ``-`` and the directive option +name. The directive option name can be any of the option flag names explained +above. + +An example's doctest directives modify doctest's behavior for that single +example. Use ``+`` to enable the named behavior, or ``-`` to disable it. + +For example, this test passes:: + + >>> print range(20) #doctest: +NORMALIZE_WHITESPACE + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] + +Without the directive it would fail, both because the actual output doesn't have +two blanks before the single-digit list elements, and because the actual output +is on a single line. This test also passes, and also requires a directive to do +so:: + + >>> print range(20) # doctest:+ELLIPSIS + [0, 1, ..., 18, 19] + +Multiple directives can be used on a single physical line, separated by commas:: + + >>> print range(20) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + [0, 1, ..., 18, 19] + +If multiple directive comments are used for a single example, then they are +combined:: + + >>> print range(20) # doctest: +ELLIPSIS + ... # doctest: +NORMALIZE_WHITESPACE + [0, 1, ..., 18, 19] + +As the previous example shows, you can add ``...`` lines to your example +containing only directives. This can be useful when an example is too long for +a directive to comfortably fit on the same line:: + + >>> print range(5) + range(10,20) + range(30,40) + range(50,60) + ... # doctest: +ELLIPSIS + [0, ..., 4, 10, ..., 19, 30, ..., 39, 50, ..., 59] + +Note that since all options are disabled by default, and directives apply only +to the example they appear in, enabling options (via ``+`` in a directive) is +usually the only meaningful choice. However, option flags can also be passed to +functions that run doctests, establishing different defaults. In such cases, +disabling an option via ``-`` in a directive can be useful. + +.. versionchanged:: 2.4 + Constants :const:`DONT_ACCEPT_BLANKLINE`, :const:`NORMALIZE_WHITESPACE`, + :const:`ELLIPSIS`, :const:`IGNORE_EXCEPTION_DETAIL`, :const:`REPORT_UDIFF`, + :const:`REPORT_CDIFF`, :const:`REPORT_NDIFF`, + :const:`REPORT_ONLY_FIRST_FAILURE`, :const:`COMPARISON_FLAGS` and + :const:`REPORTING_FLAGS` were added; by default ```` in expected + output matches an empty line in actual output; and doctest directives were + added. + +.. versionchanged:: 2.5 + Constant :const:`SKIP` was added. + +There's also a way to register new option flag names, although this isn't useful +unless you intend to extend :mod:`doctest` internals via subclassing: + + +.. function:: register_optionflag(name) + + Create a new option flag with a given name, and return the new flag's integer + value. :func:`register_optionflag` can be used when subclassing + :class:`OutputChecker` or :class:`DocTestRunner` to create new options that are + supported by your subclasses. :func:`register_optionflag` should always be + called using the following idiom:: + + MY_FLAG = register_optionflag('MY_FLAG') + + .. versionadded:: 2.4 + + +.. _doctest-warnings: + +Warnings +^^^^^^^^ + +:mod:`doctest` is serious about requiring exact matches in expected output. If +even a single character doesn't match, the test fails. This will probably +surprise you a few times, as you learn exactly what Python does and doesn't +guarantee about output. For example, when printing a dict, Python doesn't +guarantee that the key-value pairs will be printed in any particular order, so a +test like + +.. % Hey! What happened to Monty Python examples? +.. % Tim: ask Guido -- it's his example! + +:: + + >>> foo() + {"Hermione": "hippogryph", "Harry": "broomstick"} + +is vulnerable! One workaround is to do :: + + >>> foo() == {"Hermione": "hippogryph", "Harry": "broomstick"} + True + +instead. Another is to do :: + + >>> d = foo().items() + >>> d.sort() + >>> d + [('Harry', 'broomstick'), ('Hermione', 'hippogryph')] + +There are others, but you get the idea. + +Another bad idea is to print things that embed an object address, like :: + + >>> id(1.0) # certain to fail some of the time + 7948648 + >>> class C: pass + >>> C() # the default repr() for instances embeds an address + <__main__.C instance at 0x00AC18F0> + +The :const:`ELLIPSIS` directive gives a nice approach for the last example:: + + >>> C() #doctest: +ELLIPSIS + <__main__.C instance at 0x...> + +Floating-point numbers are also subject to small output variations across +platforms, because Python defers to the platform C library for float formatting, +and C libraries vary widely in quality here. :: + + >>> 1./7 # risky + 0.14285714285714285 + >>> print 1./7 # safer + 0.142857142857 + >>> print round(1./7, 6) # much safer + 0.142857 + +Numbers of the form ``I/2.**J`` are safe across all platforms, and I often +contrive doctest examples to produce numbers of that form:: + + >>> 3./4 # utterly safe + 0.75 + +Simple fractions are also easier for people to understand, and that makes for +better documentation. + + +.. _doctest-basic-api: + +Basic API +--------- + +The functions :func:`testmod` and :func:`testfile` provide a simple interface to +doctest that should be sufficient for most basic uses. For a less formal +introduction to these two functions, see sections :ref:`doctest-simple-testmod` +and :ref:`doctest-simple-testfile`. + + +.. function:: testfile(filename[, module_relative][, name][, package][, globs][, verbose][, report][, optionflags][, extraglobs][, raise_on_error][, parser][, encoding]) + + All arguments except *filename* are optional, and should be specified in keyword + form. + + Test examples in the file named *filename*. Return ``(failure_count, + test_count)``. + + Optional argument *module_relative* specifies how the filename should be + interpreted: + + * If *module_relative* is ``True`` (the default), then *filename* specifies an + OS-independent module-relative path. By default, this path is relative to the + calling module's directory; but if the *package* argument is specified, then it + is relative to that package. To ensure OS-independence, *filename* should use + ``/`` characters to separate path segments, and may not be an absolute path + (i.e., it may not begin with ``/``). + + * If *module_relative* is ``False``, then *filename* specifies an OS-specific + path. The path may be absolute or relative; relative paths are resolved with + respect to the current working directory. + + Optional argument *name* gives the name of the test; by default, or if ``None``, + ``os.path.basename(filename)`` is used. + + Optional argument *package* is a Python package or the name of a Python package + whose directory should be used as the base directory for a module-relative + filename. If no package is specified, then the calling module's directory is + used as the base directory for module-relative filenames. It is an error to + specify *package* if *module_relative* is ``False``. + + Optional argument *globs* gives a dict to be used as the globals when executing + examples. A new shallow copy of this dict is created for the doctest, so its + examples start with a clean slate. By default, or if ``None``, a new empty dict + is used. + + Optional argument *extraglobs* gives a dict merged into the globals used to + execute examples. This works like :meth:`dict.update`: if *globs* and + *extraglobs* have a common key, the associated value in *extraglobs* appears in + the combined dict. By default, or if ``None``, no extra globals are used. This + is an advanced feature that allows parameterization of doctests. For example, a + doctest can be written for a base class, using a generic name for the class, + then reused to test any number of subclasses by passing an *extraglobs* dict + mapping the generic name to the subclass to be tested. + + Optional argument *verbose* prints lots of stuff if true, and prints only + failures if false; by default, or if ``None``, it's true if and only if ``'-v'`` + is in ``sys.argv``. + + Optional argument *report* prints a summary at the end when true, else prints + nothing at the end. In verbose mode, the summary is detailed, else the summary + is very brief (in fact, empty if all tests passed). + + Optional argument *optionflags* or's together option flags. See section + :ref:`doctest-options`. + + Optional argument *raise_on_error* defaults to false. If true, an exception is + raised upon the first failure or unexpected exception in an example. This + allows failures to be post-mortem debugged. Default behavior is to continue + running examples. + + Optional argument *parser* specifies a :class:`DocTestParser` (or subclass) that + should be used to extract tests from the files. It defaults to a normal parser + (i.e., ``DocTestParser()``). + + Optional argument *encoding* specifies an encoding that should be used to + convert the file to unicode. + + .. versionadded:: 2.4 + + .. versionchanged:: 2.5 + The parameter *encoding* was added. + + +.. function:: testmod([m][, name][, globs][, verbose][, report][, optionflags][, extraglobs][, raise_on_error][, exclude_empty]) + + All arguments are optional, and all except for *m* should be specified in + keyword form. + + Test examples in docstrings in functions and classes reachable from module *m* + (or module :mod:`__main__` if *m* is not supplied or is ``None``), starting with + ``m.__doc__``. + + Also test examples reachable from dict ``m.__test__``, if it exists and is not + ``None``. ``m.__test__`` maps names (strings) to functions, classes and + strings; function and class docstrings are searched for examples; strings are + searched directly, as if they were docstrings. + + Only docstrings attached to objects belonging to module *m* are searched. + + Return ``(failure_count, test_count)``. + + Optional argument *name* gives the name of the module; by default, or if + ``None``, ``m.__name__`` is used. + + Optional argument *exclude_empty* defaults to false. If true, objects for which + no doctests are found are excluded from consideration. The default is a backward + compatibility hack, so that code still using :meth:`doctest.master.summarize` in + conjunction with :func:`testmod` continues to get output for objects with no + tests. The *exclude_empty* argument to the newer :class:`DocTestFinder` + constructor defaults to true. + + Optional arguments *extraglobs*, *verbose*, *report*, *optionflags*, + *raise_on_error*, and *globs* are the same as for function :func:`testfile` + above, except that *globs* defaults to ``m.__dict__``. + + .. versionchanged:: 2.3 + The parameter *optionflags* was added. + + .. versionchanged:: 2.4 + The parameters *extraglobs*, *raise_on_error* and *exclude_empty* were added. + + .. versionchanged:: 2.5 + The optional argument *isprivate*, deprecated in 2.4, was removed. + +There's also a function to run the doctests associated with a single object. +This function is provided for backward compatibility. There are no plans to +deprecate it, but it's rarely useful: + + +.. function:: run_docstring_examples(f, globs[, verbose][, name][, compileflags][, optionflags]) + + Test examples associated with object *f*; for example, *f* may be a module, + function, or class object. + + A shallow copy of dictionary argument *globs* is used for the execution context. + + Optional argument *name* is used in failure messages, and defaults to + ``"NoName"``. + + If optional argument *verbose* is true, output is generated even if there are no + failures. By default, output is generated only in case of an example failure. + + Optional argument *compileflags* gives the set of flags that should be used by + the Python compiler when running the examples. By default, or if ``None``, + flags are deduced corresponding to the set of future features found in *globs*. + + Optional argument *optionflags* works as for function :func:`testfile` above. + + +.. _doctest-unittest-api: + +Unittest API +------------ + +As your collection of doctest'ed modules grows, you'll want a way to run all +their doctests systematically. Prior to Python 2.4, :mod:`doctest` had a barely +documented :class:`Tester` class that supplied a rudimentary way to combine +doctests from multiple modules. :class:`Tester` was feeble, and in practice most +serious Python testing frameworks build on the :mod:`unittest` module, which +supplies many flexible ways to combine tests from multiple sources. So, in +Python 2.4, :mod:`doctest`'s :class:`Tester` class is deprecated, and +:mod:`doctest` provides two functions that can be used to create :mod:`unittest` +test suites from modules and text files containing doctests. These test suites +can then be run using :mod:`unittest` test runners:: + + import unittest + import doctest + import my_module_with_doctests, and_another + + suite = unittest.TestSuite() + for mod in my_module_with_doctests, and_another: + suite.addTest(doctest.DocTestSuite(mod)) + runner = unittest.TextTestRunner() + runner.run(suite) + +There are two main functions for creating :class:`unittest.TestSuite` instances +from text files and modules with doctests: + + +.. function:: DocFileSuite([module_relative][, package][, setUp][, tearDown][, globs][, optionflags][, parser][, encoding]) + + Convert doctest tests from one or more text files to a + :class:`unittest.TestSuite`. + + The returned :class:`unittest.TestSuite` is to be run by the unittest framework + and runs the interactive examples in each file. If an example in any file + fails, then the synthesized unit test fails, and a :exc:`failureException` + exception is raised showing the name of the file containing the test and a + (sometimes approximate) line number. + + Pass one or more paths (as strings) to text files to be examined. + + Options may be provided as keyword arguments: + + Optional argument *module_relative* specifies how the filenames in *paths* + should be interpreted: + + * If *module_relative* is ``True`` (the default), then each filename specifies + an OS-independent module-relative path. By default, this path is relative to + the calling module's directory; but if the *package* argument is specified, then + it is relative to that package. To ensure OS-independence, each filename should + use ``/`` characters to separate path segments, and may not be an absolute path + (i.e., it may not begin with ``/``). + + * If *module_relative* is ``False``, then each filename specifies an OS-specific + path. The path may be absolute or relative; relative paths are resolved with + respect to the current working directory. + + Optional argument *package* is a Python package or the name of a Python package + whose directory should be used as the base directory for module-relative + filenames. If no package is specified, then the calling module's directory is + used as the base directory for module-relative filenames. It is an error to + specify *package* if *module_relative* is ``False``. + + Optional argument *setUp* specifies a set-up function for the test suite. This + is called before running the tests in each file. The *setUp* function will be + passed a :class:`DocTest` object. The setUp function can access the test + globals as the *globs* attribute of the test passed. + + Optional argument *tearDown* specifies a tear-down function for the test suite. + This is called after running the tests in each file. The *tearDown* function + will be passed a :class:`DocTest` object. The setUp function can access the + test globals as the *globs* attribute of the test passed. + + Optional argument *globs* is a dictionary containing the initial global + variables for the tests. A new copy of this dictionary is created for each + test. By default, *globs* is a new empty dictionary. + + Optional argument *optionflags* specifies the default doctest options for the + tests, created by or-ing together individual option flags. See section + :ref:`doctest-options`. See function :func:`set_unittest_reportflags` below for + a better way to set reporting options. + + Optional argument *parser* specifies a :class:`DocTestParser` (or subclass) that + should be used to extract tests from the files. It defaults to a normal parser + (i.e., ``DocTestParser()``). + + Optional argument *encoding* specifies an encoding that should be used to + convert the file to unicode. + + .. versionadded:: 2.4 + + .. versionchanged:: 2.5 + The global ``__file__`` was added to the globals provided to doctests loaded + from a text file using :func:`DocFileSuite`. + + .. versionchanged:: 2.5 + The parameter *encoding* was added. + + +.. function:: DocTestSuite([module][, globs][, extraglobs][, test_finder][, setUp][, tearDown][, checker]) + + Convert doctest tests for a module to a :class:`unittest.TestSuite`. + + The returned :class:`unittest.TestSuite` is to be run by the unittest framework + and runs each doctest in the module. If any of the doctests fail, then the + synthesized unit test fails, and a :exc:`failureException` exception is raised + showing the name of the file containing the test and a (sometimes approximate) + line number. + + Optional argument *module* provides the module to be tested. It can be a module + object or a (possibly dotted) module name. If not specified, the module calling + this function is used. + + Optional argument *globs* is a dictionary containing the initial global + variables for the tests. A new copy of this dictionary is created for each + test. By default, *globs* is a new empty dictionary. + + Optional argument *extraglobs* specifies an extra set of global variables, which + is merged into *globs*. By default, no extra globals are used. + + Optional argument *test_finder* is the :class:`DocTestFinder` object (or a + drop-in replacement) that is used to extract doctests from the module. + + Optional arguments *setUp*, *tearDown*, and *optionflags* are the same as for + function :func:`DocFileSuite` above. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.4 + The parameters *globs*, *extraglobs*, *test_finder*, *setUp*, *tearDown*, and + *optionflags* were added; this function now uses the same search technique as + :func:`testmod`. + +Under the covers, :func:`DocTestSuite` creates a :class:`unittest.TestSuite` out +of :class:`doctest.DocTestCase` instances, and :class:`DocTestCase` is a +subclass of :class:`unittest.TestCase`. :class:`DocTestCase` isn't documented +here (it's an internal detail), but studying its code can answer questions about +the exact details of :mod:`unittest` integration. + +Similarly, :func:`DocFileSuite` creates a :class:`unittest.TestSuite` out of +:class:`doctest.DocFileCase` instances, and :class:`DocFileCase` is a subclass +of :class:`DocTestCase`. + +So both ways of creating a :class:`unittest.TestSuite` run instances of +:class:`DocTestCase`. This is important for a subtle reason: when you run +:mod:`doctest` functions yourself, you can control the :mod:`doctest` options in +use directly, by passing option flags to :mod:`doctest` functions. However, if +you're writing a :mod:`unittest` framework, :mod:`unittest` ultimately controls +when and how tests get run. The framework author typically wants to control +:mod:`doctest` reporting options (perhaps, e.g., specified by command line +options), but there's no way to pass options through :mod:`unittest` to +:mod:`doctest` test runners. + +For this reason, :mod:`doctest` also supports a notion of :mod:`doctest` +reporting flags specific to :mod:`unittest` support, via this function: + + +.. function:: set_unittest_reportflags(flags) + + Set the :mod:`doctest` reporting flags to use. + + Argument *flags* or's together option flags. See section + :ref:`doctest-options`. Only "reporting flags" can be used. + + This is a module-global setting, and affects all future doctests run by module + :mod:`unittest`: the :meth:`runTest` method of :class:`DocTestCase` looks at + the option flags specified for the test case when the :class:`DocTestCase` + instance was constructed. If no reporting flags were specified (which is the + typical and expected case), :mod:`doctest`'s :mod:`unittest` reporting flags are + or'ed into the option flags, and the option flags so augmented are passed to the + :class:`DocTestRunner` instance created to run the doctest. If any reporting + flags were specified when the :class:`DocTestCase` instance was constructed, + :mod:`doctest`'s :mod:`unittest` reporting flags are ignored. + + The value of the :mod:`unittest` reporting flags in effect before the function + was called is returned by the function. + + .. versionadded:: 2.4 + + +.. _doctest-advanced-api: + +Advanced API +------------ + +The basic API is a simple wrapper that's intended to make doctest easy to use. +It is fairly flexible, and should meet most users' needs; however, if you +require more fine-grained control over testing, or wish to extend doctest's +capabilities, then you should use the advanced API. + +The advanced API revolves around two container classes, which are used to store +the interactive examples extracted from doctest cases: + +* :class:`Example`: A single python statement, paired with its expected output. + +* :class:`DocTest`: A collection of :class:`Example`\ s, typically extracted + from a single docstring or text file. + +Additional processing classes are defined to find, parse, and run, and check +doctest examples: + +* :class:`DocTestFinder`: Finds all docstrings in a given module, and uses a + :class:`DocTestParser` to create a :class:`DocTest` from every docstring that + contains interactive examples. + +* :class:`DocTestParser`: Creates a :class:`DocTest` object from a string (such + as an object's docstring). + +* :class:`DocTestRunner`: Executes the examples in a :class:`DocTest`, and uses + an :class:`OutputChecker` to verify their output. + +* :class:`OutputChecker`: Compares the actual output from a doctest example with + the expected output, and decides whether they match. + +The relationships among these processing classes are summarized in the following +diagram:: + + list of: + +------+ +---------+ + |module| --DocTestFinder-> | DocTest | --DocTestRunner-> results + +------+ | ^ +---------+ | ^ (printed) + | | | Example | | | + v | | ... | v | + DocTestParser | Example | OutputChecker + +---------+ + + +.. _doctest-doctest: + +DocTest Objects +^^^^^^^^^^^^^^^ + + +.. class:: DocTest(examples, globs, name, filename, lineno, docstring) + + A collection of doctest examples that should be run in a single namespace. The + constructor arguments are used to initialize the member variables of the same + names. + + .. versionadded:: 2.4 + +:class:`DocTest` defines the following member variables. They are initialized +by the constructor, and should not be modified directly. + + +.. attribute:: DocTest.examples + + A list of :class:`Example` objects encoding the individual interactive Python + examples that should be run by this test. + + +.. attribute:: DocTest.globs + + The namespace (aka globals) that the examples should be run in. This is a + dictionary mapping names to values. Any changes to the namespace made by the + examples (such as binding new variables) will be reflected in :attr:`globs` + after the test is run. + + +.. attribute:: DocTest.name + + A string name identifying the :class:`DocTest`. Typically, this is the name of + the object or file that the test was extracted from. + + +.. attribute:: DocTest.filename + + The name of the file that this :class:`DocTest` was extracted from; or ``None`` + if the filename is unknown, or if the :class:`DocTest` was not extracted from a + file. + + +.. attribute:: DocTest.lineno + + The line number within :attr:`filename` where this :class:`DocTest` begins, or + ``None`` if the line number is unavailable. This line number is zero-based with + respect to the beginning of the file. + + +.. attribute:: DocTest.docstring + + The string that the test was extracted from, or 'None' if the string is + unavailable, or if the test was not extracted from a string. + + +.. _doctest-example: + +Example Objects +^^^^^^^^^^^^^^^ + + +.. class:: Example(source, want[, exc_msg][, lineno][, indent][, options]) + + A single interactive example, consisting of a Python statement and its expected + output. The constructor arguments are used to initialize the member variables + of the same names. + + .. versionadded:: 2.4 + +:class:`Example` defines the following member variables. They are initialized +by the constructor, and should not be modified directly. + + +.. attribute:: Example.source + + A string containing the example's source code. This source code consists of a + single Python statement, and always ends with a newline; the constructor adds a + newline when necessary. + + +.. attribute:: Example.want + + The expected output from running the example's source code (either from stdout, + or a traceback in case of exception). :attr:`want` ends with a newline unless + no output is expected, in which case it's an empty string. The constructor adds + a newline when necessary. + + +.. attribute:: Example.exc_msg + + The exception message generated by the example, if the example is expected to + generate an exception; or ``None`` if it is not expected to generate an + exception. This exception message is compared against the return value of + :func:`traceback.format_exception_only`. :attr:`exc_msg` ends with a newline + unless it's ``None``. The constructor adds a newline if needed. + + +.. attribute:: Example.lineno + + The line number within the string containing this example where the example + begins. This line number is zero-based with respect to the beginning of the + containing string. + + +.. attribute:: Example.indent + + The example's indentation in the containing string, i.e., the number of space + characters that precede the example's first prompt. + + +.. attribute:: Example.options + + A dictionary mapping from option flags to ``True`` or ``False``, which is used + to override default options for this example. Any option flags not contained in + this dictionary are left at their default value (as specified by the + :class:`DocTestRunner`'s :attr:`optionflags`). By default, no options are set. + + +.. _doctest-doctestfinder: + +DocTestFinder objects +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: DocTestFinder([verbose][, parser][, recurse][, exclude_empty]) + + A processing class used to extract the :class:`DocTest`\ s that are relevant to + a given object, from its docstring and the docstrings of its contained objects. + :class:`DocTest`\ s can currently be extracted from the following object types: + modules, functions, classes, methods, staticmethods, classmethods, and + properties. + + The optional argument *verbose* can be used to display the objects searched by + the finder. It defaults to ``False`` (no output). + + The optional argument *parser* specifies the :class:`DocTestParser` object (or a + drop-in replacement) that is used to extract doctests from docstrings. + + If the optional argument *recurse* is false, then :meth:`DocTestFinder.find` + will only examine the given object, and not any contained objects. + + If the optional argument *exclude_empty* is false, then + :meth:`DocTestFinder.find` will include tests for objects with empty docstrings. + + .. versionadded:: 2.4 + +:class:`DocTestFinder` defines the following method: + + +.. method:: DocTestFinder.find(obj[, name][, module][, globs][, extraglobs]) + + Return a list of the :class:`DocTest`\ s that are defined by *obj*'s docstring, + or by any of its contained objects' docstrings. + + The optional argument *name* specifies the object's name; this name will be used + to construct names for the returned :class:`DocTest`\ s. If *name* is not + specified, then ``obj.__name__`` is used. + + The optional parameter *module* is the module that contains the given object. + If the module is not specified or is None, then the test finder will attempt to + automatically determine the correct module. The object's module is used: + + * As a default namespace, if *globs* is not specified. + + * To prevent the DocTestFinder from extracting DocTests from objects that are + imported from other modules. (Contained objects with modules other than + *module* are ignored.) + + * To find the name of the file containing the object. + + * To help find the line number of the object within its file. + + If *module* is ``False``, no attempt to find the module will be made. This is + obscure, of use mostly in testing doctest itself: if *module* is ``False``, or + is ``None`` but cannot be found automatically, then all objects are considered + to belong to the (non-existent) module, so all contained objects will + (recursively) be searched for doctests. + + The globals for each :class:`DocTest` is formed by combining *globs* and + *extraglobs* (bindings in *extraglobs* override bindings in *globs*). A new + shallow copy of the globals dictionary is created for each :class:`DocTest`. If + *globs* is not specified, then it defaults to the module's *__dict__*, if + specified, or ``{}`` otherwise. If *extraglobs* is not specified, then it + defaults to ``{}``. + + +.. _doctest-doctestparser: + +DocTestParser objects +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: DocTestParser() + + A processing class used to extract interactive examples from a string, and use + them to create a :class:`DocTest` object. + + .. versionadded:: 2.4 + +:class:`DocTestParser` defines the following methods: + + +.. method:: DocTestParser.get_doctest(string, globs, name, filename, lineno) + + Extract all doctest examples from the given string, and collect them into a + :class:`DocTest` object. + + *globs*, *name*, *filename*, and *lineno* are attributes for the new + :class:`DocTest` object. See the documentation for :class:`DocTest` for more + information. + + +.. method:: DocTestParser.get_examples(string[, name]) + + Extract all doctest examples from the given string, and return them as a list of + :class:`Example` objects. Line numbers are 0-based. The optional argument + *name* is a name identifying this string, and is only used for error messages. + + +.. method:: DocTestParser.parse(string[, name]) + + Divide the given string into examples and intervening text, and return them as a + list of alternating :class:`Example`\ s and strings. Line numbers for the + :class:`Example`\ s are 0-based. The optional argument *name* is a name + identifying this string, and is only used for error messages. + + +.. _doctest-doctestrunner: + +DocTestRunner objects +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: DocTestRunner([checker][, verbose][, optionflags]) + + A processing class used to execute and verify the interactive examples in a + :class:`DocTest`. + + The comparison between expected outputs and actual outputs is done by an + :class:`OutputChecker`. This comparison may be customized with a number of + option flags; see section :ref:`doctest-options` for more information. If the + option flags are insufficient, then the comparison may also be customized by + passing a subclass of :class:`OutputChecker` to the constructor. + + The test runner's display output can be controlled in two ways. First, an output + function can be passed to :meth:`TestRunner.run`; this function will be called + with strings that should be displayed. It defaults to ``sys.stdout.write``. If + capturing the output is not sufficient, then the display output can be also + customized by subclassing DocTestRunner, and overriding the methods + :meth:`report_start`, :meth:`report_success`, + :meth:`report_unexpected_exception`, and :meth:`report_failure`. + + The optional keyword argument *checker* specifies the :class:`OutputChecker` + object (or drop-in replacement) that should be used to compare the expected + outputs to the actual outputs of doctest examples. + + The optional keyword argument *verbose* controls the :class:`DocTestRunner`'s + verbosity. If *verbose* is ``True``, then information is printed about each + example, as it is run. If *verbose* is ``False``, then only failures are + printed. If *verbose* is unspecified, or ``None``, then verbose output is used + iff the command-line switch :option:`-v` is used. + + The optional keyword argument *optionflags* can be used to control how the test + runner compares expected output to actual output, and how it displays failures. + For more information, see section :ref:`doctest-options`. + + .. versionadded:: 2.4 + +:class:`DocTestParser` defines the following methods: + + +.. method:: DocTestRunner.report_start(out, test, example) + + Report that the test runner is about to process the given example. This method + is provided to allow subclasses of :class:`DocTestRunner` to customize their + output; it should not be called directly. + + *example* is the example about to be processed. *test* is the test containing + *example*. *out* is the output function that was passed to + :meth:`DocTestRunner.run`. + + +.. method:: DocTestRunner.report_success(out, test, example, got) + + Report that the given example ran successfully. This method is provided to + allow subclasses of :class:`DocTestRunner` to customize their output; it should + not be called directly. + + *example* is the example about to be processed. *got* is the actual output from + the example. *test* is the test containing *example*. *out* is the output + function that was passed to :meth:`DocTestRunner.run`. + + +.. method:: DocTestRunner.report_failure(out, test, example, got) + + Report that the given example failed. This method is provided to allow + subclasses of :class:`DocTestRunner` to customize their output; it should not be + called directly. + + *example* is the example about to be processed. *got* is the actual output from + the example. *test* is the test containing *example*. *out* is the output + function that was passed to :meth:`DocTestRunner.run`. + + +.. method:: DocTestRunner.report_unexpected_exception(out, test, example, exc_info) + + Report that the given example raised an unexpected exception. This method is + provided to allow subclasses of :class:`DocTestRunner` to customize their + output; it should not be called directly. + + *example* is the example about to be processed. *exc_info* is a tuple containing + information about the unexpected exception (as returned by + :func:`sys.exc_info`). *test* is the test containing *example*. *out* is the + output function that was passed to :meth:`DocTestRunner.run`. + + +.. method:: DocTestRunner.run(test[, compileflags][, out][, clear_globs]) + + Run the examples in *test* (a :class:`DocTest` object), and display the results + using the writer function *out*. + + The examples are run in the namespace ``test.globs``. If *clear_globs* is true + (the default), then this namespace will be cleared after the test runs, to help + with garbage collection. If you would like to examine the namespace after the + test completes, then use *clear_globs=False*. + + *compileflags* gives the set of flags that should be used by the Python compiler + when running the examples. If not specified, then it will default to the set of + future-import flags that apply to *globs*. + + The output of each example is checked using the :class:`DocTestRunner`'s output + checker, and the results are formatted by the :meth:`DocTestRunner.report_\*` + methods. + + +.. method:: DocTestRunner.summarize([verbose]) + + Print a summary of all the test cases that have been run by this DocTestRunner, + and return a tuple ``(failure_count, test_count)``. + + The optional *verbose* argument controls how detailed the summary is. If the + verbosity is not specified, then the :class:`DocTestRunner`'s verbosity is used. + + +.. _doctest-outputchecker: + +OutputChecker objects +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: OutputChecker() + + A class used to check the whether the actual output from a doctest example + matches the expected output. :class:`OutputChecker` defines two methods: + :meth:`check_output`, which compares a given pair of outputs, and returns true + if they match; and :meth:`output_difference`, which returns a string describing + the differences between two outputs. + + .. versionadded:: 2.4 + +:class:`OutputChecker` defines the following methods: + + +.. method:: OutputChecker.check_output(want, got, optionflags) + + Return ``True`` iff the actual output from an example (*got*) matches the + expected output (*want*). These strings are always considered to match if they + are identical; but depending on what option flags the test runner is using, + several non-exact match types are also possible. See section + :ref:`doctest-options` for more information about option flags. + + +.. method:: OutputChecker.output_difference(example, got, optionflags) + + Return a string describing the differences between the expected output for a + given example (*example*) and the actual output (*got*). *optionflags* is the + set of option flags used to compare *want* and *got*. + + +.. _doctest-debugging: + +Debugging +--------- + +Doctest provides several mechanisms for debugging doctest examples: + +* Several functions convert doctests to executable Python programs, which can be + run under the Python debugger, :mod:`pdb`. + +* The :class:`DebugRunner` class is a subclass of :class:`DocTestRunner` that + raises an exception for the first failing example, containing information about + that example. This information can be used to perform post-mortem debugging on + the example. + +* The :mod:`unittest` cases generated by :func:`DocTestSuite` support the + :meth:`debug` method defined by :class:`unittest.TestCase`. + +* You can add a call to :func:`pdb.set_trace` in a doctest example, and you'll + drop into the Python debugger when that line is executed. Then you can inspect + current values of variables, and so on. For example, suppose :file:`a.py` + contains just this module docstring:: + + """ + >>> def f(x): + ... g(x*2) + >>> def g(x): + ... print x+3 + ... import pdb; pdb.set_trace() + >>> f(3) + 9 + """ + + Then an interactive Python session may look like this:: + + >>> import a, doctest + >>> doctest.testmod(a) + --Return-- + > (3)g()->None + -> import pdb; pdb.set_trace() + (Pdb) list + 1 def g(x): + 2 print x+3 + 3 -> import pdb; pdb.set_trace() + [EOF] + (Pdb) print x + 6 + (Pdb) step + --Return-- + > (2)f()->None + -> g(x*2) + (Pdb) list + 1 def f(x): + 2 -> g(x*2) + [EOF] + (Pdb) print x + 3 + (Pdb) step + --Return-- + > (1)?()->None + -> f(3) + (Pdb) cont + (0, 3) + >>> + + .. versionchanged:: 2.4 + The ability to use :func:`pdb.set_trace` usefully inside doctests was added. + +Functions that convert doctests to Python code, and possibly run the synthesized +code under the debugger: + + +.. function:: script_from_examples(s) + + Convert text with examples to a script. + + Argument *s* is a string containing doctest examples. The string is converted + to a Python script, where doctest examples in *s* are converted to regular code, + and everything else is converted to Python comments. The generated script is + returned as a string. For example, :: + + import doctest + print doctest.script_from_examples(r""" + Set x and y to 1 and 2. + >>> x, y = 1, 2 + + Print their sum: + >>> print x+y + 3 + """) + + displays:: + + # Set x and y to 1 and 2. + x, y = 1, 2 + # + # Print their sum: + print x+y + # Expected: + ## 3 + + This function is used internally by other functions (see below), but can also be + useful when you want to transform an interactive Python session into a Python + script. + + .. versionadded:: 2.4 + + +.. function:: testsource(module, name) + + Convert the doctest for an object to a script. + + Argument *module* is a module object, or dotted name of a module, containing the + object whose doctests are of interest. Argument *name* is the name (within the + module) of the object with the doctests of interest. The result is a string, + containing the object's docstring converted to a Python script, as described for + :func:`script_from_examples` above. For example, if module :file:`a.py` + contains a top-level function :func:`f`, then :: + + import a, doctest + print doctest.testsource(a, "a.f") + + prints a script version of function :func:`f`'s docstring, with doctests + converted to code, and the rest placed in comments. + + .. versionadded:: 2.3 + + +.. function:: debug(module, name[, pm]) + + Debug the doctests for an object. + + The *module* and *name* arguments are the same as for function + :func:`testsource` above. The synthesized Python script for the named object's + docstring is written to a temporary file, and then that file is run under the + control of the Python debugger, :mod:`pdb`. + + A shallow copy of ``module.__dict__`` is used for both local and global + execution context. + + Optional argument *pm* controls whether post-mortem debugging is used. If *pm* + has a true value, the script file is run directly, and the debugger gets + involved only if the script terminates via raising an unhandled exception. If + it does, then post-mortem debugging is invoked, via :func:`pdb.post_mortem`, + passing the traceback object from the unhandled exception. If *pm* is not + specified, or is false, the script is run under the debugger from the start, via + passing an appropriate :func:`exec` call to :func:`pdb.run`. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.4 + The *pm* argument was added. + + +.. function:: debug_src(src[, pm][, globs]) + + Debug the doctests in a string. + + This is like function :func:`debug` above, except that a string containing + doctest examples is specified directly, via the *src* argument. + + Optional argument *pm* has the same meaning as in function :func:`debug` above. + + Optional argument *globs* gives a dictionary to use as both local and global + execution context. If not specified, or ``None``, an empty dictionary is used. + If specified, a shallow copy of the dictionary is used. + + .. versionadded:: 2.4 + +The :class:`DebugRunner` class, and the special exceptions it may raise, are of +most interest to testing framework authors, and will only be sketched here. See +the source code, and especially :class:`DebugRunner`'s docstring (which is a +doctest!) for more details: + + +.. class:: DebugRunner([checker][, verbose][, optionflags]) + + A subclass of :class:`DocTestRunner` that raises an exception as soon as a + failure is encountered. If an unexpected exception occurs, an + :exc:`UnexpectedException` exception is raised, containing the test, the + example, and the original exception. If the output doesn't match, then a + :exc:`DocTestFailure` exception is raised, containing the test, the example, and + the actual output. + + For information about the constructor parameters and methods, see the + documentation for :class:`DocTestRunner` in section :ref:`doctest-advanced-api`. + +There are two exceptions that may be raised by :class:`DebugRunner` instances: + + +.. exception:: DocTestFailure(test, example, got) + + An exception thrown by :class:`DocTestRunner` to signal that a doctest example's + actual output did not match its expected output. The constructor arguments are + used to initialize the member variables of the same names. + +:exc:`DocTestFailure` defines the following member variables: + + +.. attribute:: DocTestFailure.test + + The :class:`DocTest` object that was being run when the example failed. + + +.. attribute:: DocTestFailure.example + + The :class:`Example` that failed. + + +.. attribute:: DocTestFailure.got + + The example's actual output. + + +.. exception:: UnexpectedException(test, example, exc_info) + + An exception thrown by :class:`DocTestRunner` to signal that a doctest example + raised an unexpected exception. The constructor arguments are used to + initialize the member variables of the same names. + +:exc:`UnexpectedException` defines the following member variables: + + +.. attribute:: UnexpectedException.test + + The :class:`DocTest` object that was being run when the example failed. + + +.. attribute:: UnexpectedException.example + + The :class:`Example` that failed. + + +.. attribute:: UnexpectedException.exc_info + + A tuple containing information about the unexpected exception, as returned by + :func:`sys.exc_info`. + + +.. _doctest-soapbox: + +Soapbox +------- + +As mentioned in the introduction, :mod:`doctest` has grown to have three primary +uses: + +#. Checking examples in docstrings. + +#. Regression testing. + +#. Executable documentation / literate testing. + +These uses have different requirements, and it is important to distinguish them. +In particular, filling your docstrings with obscure test cases makes for bad +documentation. + +When writing a docstring, choose docstring examples with care. There's an art to +this that needs to be learned---it may not be natural at first. Examples should +add genuine value to the documentation. A good example can often be worth many +words. If done with care, the examples will be invaluable for your users, and +will pay back the time it takes to collect them many times over as the years go +by and things change. I'm still amazed at how often one of my :mod:`doctest` +examples stops working after a "harmless" change. + +Doctest also makes an excellent tool for regression testing, especially if you +don't skimp on explanatory text. By interleaving prose and examples, it becomes +much easier to keep track of what's actually being tested, and why. When a test +fails, good prose can make it much easier to figure out what the problem is, and +how it should be fixed. It's true that you could write extensive comments in +code-based testing, but few programmers do. Many have found that using doctest +approaches instead leads to much clearer tests. Perhaps this is simply because +doctest makes writing prose a little easier than writing code, while writing +comments in code is a little harder. I think it goes deeper than just that: +the natural attitude when writing a doctest-based test is that you want to +explain the fine points of your software, and illustrate them with examples. +This in turn naturally leads to test files that start with the simplest +features, and logically progress to complications and edge cases. A coherent +narrative is the result, instead of a collection of isolated functions that test +isolated bits of functionality seemingly at random. It's a different attitude, +and produces different results, blurring the distinction between testing and +explaining. + +Regression testing is best confined to dedicated objects or files. There are +several options for organizing tests: + +* Write text files containing test cases as interactive examples, and test the + files using :func:`testfile` or :func:`DocFileSuite`. This is recommended, + although is easiest to do for new projects, designed from the start to use + doctest. + +* Define functions named ``_regrtest_topic`` that consist of single docstrings, + containing test cases for the named topics. These functions can be included in + the same file as the module, or separated out into a separate test file. + +* Define a ``__test__`` dictionary mapping from regression test topics to + docstrings containing test cases. + +.. rubric:: Footnotes + +.. [#] Examples containing both expected output and an exception are not supported. + Trying to guess where one ends and the other begins is too error-prone, and that + also makes for a confusing test. + diff --git a/Doc/library/docxmlrpcserver.rst b/Doc/library/docxmlrpcserver.rst new file mode 100644 index 0000000..958ea95 --- /dev/null +++ b/Doc/library/docxmlrpcserver.rst @@ -0,0 +1,97 @@ + +:mod:`DocXMLRPCServer` --- Self-documenting XML-RPC server +========================================================== + +.. module:: DocXMLRPCServer + :synopsis: Self-documenting XML-RPC server implementation. +.. moduleauthor:: Brian Quinlan +.. sectionauthor:: Brian Quinlan + + +.. versionadded:: 2.3 + +The :mod:`DocXMLRPCServer` module extends the classes found in +:mod:`SimpleXMLRPCServer` to serve HTML documentation in response to HTTP GET +requests. Servers can either be free standing, using :class:`DocXMLRPCServer`, +or embedded in a CGI environment, using :class:`DocCGIXMLRPCRequestHandler`. + + +.. class:: DocXMLRPCServer(addr[, requestHandler[, logRequests[, allow_none[, encoding[, bind_and_activate]]]]]) + + Create a new server instance. All parameters have the same meaning as for + :class:`SimpleXMLRPCServer.SimpleXMLRPCServer`; *requestHandler* defaults to + :class:`DocXMLRPCRequestHandler`. + + +.. class:: DocCGIXMLRPCRequestHandler() + + Create a new instance to handle XML-RPC requests in a CGI environment. + + +.. class:: DocXMLRPCRequestHandler() + + Create a new request handler instance. This request handler supports XML-RPC + POST requests, documentation GET requests, and modifies logging so that the + *logRequests* parameter to the :class:`DocXMLRPCServer` constructor parameter is + honored. + + +.. _doc-xmlrpc-servers: + +DocXMLRPCServer Objects +----------------------- + +The :class:`DocXMLRPCServer` class is derived from +:class:`SimpleXMLRPCServer.SimpleXMLRPCServer` and provides a means of creating +self-documenting, stand alone XML-RPC servers. HTTP POST requests are handled as +XML-RPC method calls. HTTP GET requests are handled by generating pydoc-style +HTML documentation. This allows a server to provide its own web-based +documentation. + + +.. method:: DocXMLRPCServer.set_server_title(server_title) + + Set the title used in the generated HTML documentation. This title will be used + inside the HTML "title" element. + + +.. method:: DocXMLRPCServer.set_server_name(server_name) + + Set the name used in the generated HTML documentation. This name will appear at + the top of the generated documentation inside a "h1" element. + + +.. method:: DocXMLRPCServer.set_server_documentation(server_documentation) + + Set the description used in the generated HTML documentation. This description + will appear as a paragraph, below the server name, in the documentation. + + +DocCGIXMLRPCRequestHandler +-------------------------- + +The :class:`DocCGIXMLRPCRequestHandler` class is derived from +:class:`SimpleXMLRPCServer.CGIXMLRPCRequestHandler` and provides a means of +creating self-documenting, XML-RPC CGI scripts. HTTP POST requests are handled +as XML-RPC method calls. HTTP GET requests are handled by generating pydoc-style +HTML documentation. This allows a server to provide its own web-based +documentation. + + +.. method:: DocCGIXMLRPCRequestHandler.set_server_title(server_title) + + Set the title used in the generated HTML documentation. This title will be used + inside the HTML "title" element. + + +.. method:: DocCGIXMLRPCRequestHandler.set_server_name(server_name) + + Set the name used in the generated HTML documentation. This name will appear at + the top of the generated documentation inside a "h1" element. + + +.. method:: DocCGIXMLRPCRequestHandler.set_server_documentation(server_documentation) + + Set the description used in the generated HTML documentation. This description + will appear as a paragraph, below the server name, in the documentation. + diff --git a/Doc/library/dumbdbm.rst b/Doc/library/dumbdbm.rst new file mode 100644 index 0000000..3db9fda --- /dev/null +++ b/Doc/library/dumbdbm.rst @@ -0,0 +1,81 @@ + +:mod:`dumbdbm` --- Portable DBM implementation +============================================== + +.. module:: dumbdbm + :synopsis: Portable implementation of the simple DBM interface. + + +.. index:: single: databases + +.. note:: + + The :mod:`dumbdbm` module is intended as a last resort fallback for the + :mod:`anydbm` module when no more robust module is available. The :mod:`dumbdbm` + module is not written for speed and is not nearly as heavily used as the other + database modules. + +The :mod:`dumbdbm` module provides a persistent dictionary-like interface which +is written entirely in Python. Unlike other modules such as :mod:`gdbm` and +:mod:`bsddb`, no external library is required. As with other persistent +mappings, the keys and values must always be strings. + +The module defines the following: + + +.. exception:: error + + Raised on dumbdbm-specific errors, such as I/O errors. :exc:`KeyError` is + raised for general mapping errors like specifying an incorrect key. + + +.. function:: open(filename[, flag[, mode]]) + + Open a dumbdbm database and return a dumbdbm object. The *filename* argument is + the basename of the database file (without any specific extensions). When a + dumbdbm database is created, files with :file:`.dat` and :file:`.dir` extensions + are created. + + The optional *flag* argument is currently ignored; the database is always opened + for update, and will be created if it does not exist. + + The optional *mode* argument is the Unix mode of the file, used only when the + database has to be created. It defaults to octal ``0666`` (and will be modified + by the prevailing umask). + + .. versionchanged:: 2.2 + The *mode* argument was ignored in earlier versions. + + +.. seealso:: + + Module :mod:`anydbm` + Generic interface to ``dbm``\ -style databases. + + Module :mod:`dbm` + Similar interface to the DBM/NDBM library. + + Module :mod:`gdbm` + Similar interface to the GNU GDBM library. + + Module :mod:`shelve` + Persistence module which stores non-string data. + + Module :mod:`whichdb` + Utility module used to determine the type of an existing database. + + +.. _dumbdbm-objects: + +Dumbdbm Objects +--------------- + +In addition to the methods provided by the :class:`UserDict.DictMixin` class, +:class:`dumbdbm` objects provide the following methods. + + +.. method:: dumbdbm.sync() + + Synchronize the on-disk directory and data files. This method is called by the + :meth:`sync` method of :class:`Shelve` objects. + diff --git a/Doc/library/dummy_thread.rst b/Doc/library/dummy_thread.rst new file mode 100644 index 0000000..0b2cb17 --- /dev/null +++ b/Doc/library/dummy_thread.rst @@ -0,0 +1,23 @@ + +:mod:`dummy_thread` --- Drop-in replacement for the :mod:`thread` module +======================================================================== + +.. module:: dummy_thread + :synopsis: Drop-in replacement for the thread module. + + +This module provides a duplicate interface to the :mod:`thread` module. It is +meant to be imported when the :mod:`thread` module is not provided on a +platform. + +Suggested usage is:: + + try: + import thread as _thread + except ImportError: + import dummy_thread as _thread + +Be careful to not use this module where deadlock might occur from a thread +being created that blocks waiting for another thread to be created. This often +occurs with blocking I/O. + diff --git a/Doc/library/dummy_threading.rst b/Doc/library/dummy_threading.rst new file mode 100644 index 0000000..0ffb687 --- /dev/null +++ b/Doc/library/dummy_threading.rst @@ -0,0 +1,23 @@ + +:mod:`dummy_threading` --- Drop-in replacement for the :mod:`threading` module +============================================================================== + +.. module:: dummy_threading + :synopsis: Drop-in replacement for the threading module. + + +This module provides a duplicate interface to the :mod:`threading` module. It +is meant to be imported when the :mod:`thread` module is not provided on a +platform. + +Suggested usage is:: + + try: + import threading as _threading + except ImportError: + import dummy_threading as _threading + +Be careful to not use this module where deadlock might occur from a thread +being created that blocks waiting for another thread to be created. This often +occurs with blocking I/O. + diff --git a/Doc/library/easydialogs.rst b/Doc/library/easydialogs.rst new file mode 100644 index 0000000..50b312f --- /dev/null +++ b/Doc/library/easydialogs.rst @@ -0,0 +1,207 @@ + +:mod:`EasyDialogs` --- Basic Macintosh dialogs +============================================== + +.. module:: EasyDialogs + :platform: Mac + :synopsis: Basic Macintosh dialogs. + + +The :mod:`EasyDialogs` module contains some simple dialogs for the Macintosh. +All routines take an optional resource ID parameter *id* with which one can +override the :const:`DLOG` resource used for the dialog, provided that the +dialog items correspond (both type and item number) to those in the default +:const:`DLOG` resource. See source code for details. + +The :mod:`EasyDialogs` module defines the following functions: + + +.. function:: Message(str[, id[, ok]]) + + Displays a modal dialog with the message text *str*, which should be at most 255 + characters long. The button text defaults to "OK", but is set to the string + argument *ok* if the latter is supplied. Control is returned when the user + clicks the "OK" button. + + +.. function:: AskString(prompt[, default[, id[, ok[, cancel]]]]) + + Asks the user to input a string value via a modal dialog. *prompt* is the prompt + message, and the optional *default* supplies the initial value for the string + (otherwise ``""`` is used). The text of the "OK" and "Cancel" buttons can be + changed with the *ok* and *cancel* arguments. All strings can be at most 255 + bytes long. :func:`AskString` returns the string entered or :const:`None` in + case the user cancelled. + + +.. function:: AskPassword(prompt[, default[, id[, ok[, cancel]]]]) + + Asks the user to input a string value via a modal dialog. Like + :func:`AskString`, but with the text shown as bullets. The arguments have the + same meaning as for :func:`AskString`. + + +.. function:: AskYesNoCancel(question[, default[, yes[, no[, cancel[, id]]]]]) + + Presents a dialog with prompt *question* and three buttons labelled "Yes", "No", + and "Cancel". Returns ``1`` for "Yes", ``0`` for "No" and ``-1`` for "Cancel". + The value of *default* (or ``0`` if *default* is not supplied) is returned when + the :kbd:`RETURN` key is pressed. The text of the buttons can be changed with + the *yes*, *no*, and *cancel* arguments; to prevent a button from appearing, + supply ``""`` for the corresponding argument. + + +.. function:: ProgressBar([title[, maxval[, label[, id]]]]) + + Displays a modeless progress-bar dialog. This is the constructor for the + :class:`ProgressBar` class described below. *title* is the text string displayed + (default "Working..."), *maxval* is the value at which progress is complete + (default ``0``, indicating that an indeterminate amount of work remains to be + done), and *label* is the text that is displayed above the progress bar itself. + + +.. function:: GetArgv([optionlist[ commandlist[, addoldfile[, addnewfile[, addfolder[, id]]]]]]) + + Displays a dialog which aids the user in constructing a command-line argument + list. Returns the list in ``sys.argv`` format, suitable for passing as an + argument to :func:`getopt.getopt`. *addoldfile*, *addnewfile*, and *addfolder* + are boolean arguments. When nonzero, they enable the user to insert into the + command line paths to an existing file, a (possibly) not-yet-existent file, and + a folder, respectively. (Note: Option arguments must appear in the command line + before file and folder arguments in order to be recognized by + :func:`getopt.getopt`.) Arguments containing spaces can be specified by + enclosing them within single or double quotes. A :exc:`SystemExit` exception is + raised if the user presses the "Cancel" button. + + *optionlist* is a list that determines a popup menu from which the allowed + options are selected. Its items can take one of two forms: *optstr* or + ``(optstr, descr)``. When present, *descr* is a short descriptive string that + is displayed in the dialog while this option is selected in the popup menu. The + correspondence between *optstr*\s and command-line arguments is: + + +----------------------+------------------------------------------+ + | *optstr* format | Command-line format | + +======================+==========================================+ + | ``x`` | :option:`-x` (short option) | + +----------------------+------------------------------------------+ + | ``x:`` or ``x=`` | :option:`-x` (short option with value) | + +----------------------+------------------------------------------+ + | ``xyz`` | :option:`--xyz` (long option) | + +----------------------+------------------------------------------+ + | ``xyz:`` or ``xyz=`` | :option:`--xyz` (long option with value) | + +----------------------+------------------------------------------+ + + *commandlist* is a list of items of the form *cmdstr* or ``(cmdstr, descr)``, + where *descr* is as above. The *cmdstr*s will appear in a popup menu. When + chosen, the text of *cmdstr* will be appended to the command line as is, except + that a trailing ``':'`` or ``'='`` (if present) will be trimmed off. + + .. versionadded:: 2.0 + + +.. function:: AskFileForOpen( [message] [, typeList] [, defaultLocation] [, defaultOptionFlags] [, location] [, clientName] [, windowTitle] [, actionButtonLabel] [, cancelButtonLabel] [, preferenceKey] [, popupExtension] [, eventProc] [, previewProc] [, filterProc] [, wanted] ) + + Post a dialog asking the user for a file to open, and return the file selected + or :const:`None` if the user cancelled. *message* is a text message to display, + *typeList* is a list of 4-char filetypes allowable, *defaultLocation* is the + pathname, :class:`FSSpec` or :class:`FSRef` of the folder to show initially, + *location* is the ``(x, y)`` position on the screen where the dialog is shown, + *actionButtonLabel* is a string to show instead of "Open" in the OK button, + *cancelButtonLabel* is a string to show instead of "Cancel" in the cancel + button, *wanted* is the type of value wanted as a return: :class:`str`, + :class:`unicode`, :class:`FSSpec`, :class:`FSRef` and subtypes thereof are + acceptable. + + .. index:: single: Navigation Services + + For a description of the other arguments please see the Apple Navigation + Services documentation and the :mod:`EasyDialogs` source code. + + +.. function:: AskFileForSave( [message] [, savedFileName] [, defaultLocation] [, defaultOptionFlags] [, location] [, clientName] [, windowTitle] [, actionButtonLabel] [, cancelButtonLabel] [, preferenceKey] [, popupExtension] [, fileType] [, fileCreator] [, eventProc] [, wanted] ) + + Post a dialog asking the user for a file to save to, and return the file + selected or :const:`None` if the user cancelled. *savedFileName* is the default + for the file name to save to (the return value). See :func:`AskFileForOpen` for + a description of the other arguments. + + +.. function:: AskFolder( [message] [, defaultLocation] [, defaultOptionFlags] [, location] [, clientName] [, windowTitle] [, actionButtonLabel] [, cancelButtonLabel] [, preferenceKey] [, popupExtension] [, eventProc] [, filterProc] [, wanted] ) + + Post a dialog asking the user to select a folder, and return the folder selected + or :const:`None` if the user cancelled. See :func:`AskFileForOpen` for a + description of the arguments. + + +.. seealso:: + + `Navigation Services Reference `_ + Programmer's reference documentation for the Navigation Services, a part of the + Carbon framework. + + +.. _progressbar-objects: + +ProgressBar Objects +------------------- + +:class:`ProgressBar` objects provide support for modeless progress-bar dialogs. +Both determinate (thermometer style) and indeterminate (barber-pole style) +progress bars are supported. The bar will be determinate if its maximum value +is greater than zero; otherwise it will be indeterminate. + +.. versionchanged:: 2.2 + Support for indeterminate-style progress bars was added. + +The dialog is displayed immediately after creation. If the dialog's "Cancel" +button is pressed, or if :kbd:`Cmd-.` or :kbd:`ESC` is typed, the dialog window +is hidden and :exc:`KeyboardInterrupt` is raised (but note that this response +does not occur until the progress bar is next updated, typically via a call to +:meth:`inc` or :meth:`set`). Otherwise, the bar remains visible until the +:class:`ProgressBar` object is discarded. + +:class:`ProgressBar` objects possess the following attributes and methods: + + +.. attribute:: ProgressBar.curval + + The current value (of type integer or long integer) of the progress bar. The + normal access methods coerce :attr:`curval` between ``0`` and :attr:`maxval`. + This attribute should not be altered directly. + + +.. attribute:: ProgressBar.maxval + + The maximum value (of type integer or long integer) of the progress bar; the + progress bar (thermometer style) is full when :attr:`curval` equals + :attr:`maxval`. If :attr:`maxval` is ``0``, the bar will be indeterminate + (barber-pole). This attribute should not be altered directly. + + +.. method:: ProgressBar.title([newstr]) + + Sets the text in the title bar of the progress dialog to *newstr*. + + +.. method:: ProgressBar.label([newstr]) + + Sets the text in the progress box of the progress dialog to *newstr*. + + +.. method:: ProgressBar.set(value[, max]) + + Sets the progress bar's :attr:`curval` to *value*, and also :attr:`maxval` to + *max* if the latter is provided. *value* is first coerced between 0 and + :attr:`maxval`. The thermometer bar is updated to reflect the changes, + including a change from indeterminate to determinate or vice versa. + + +.. method:: ProgressBar.inc([n]) + + Increments the progress bar's :attr:`curval` by *n*, or by ``1`` if *n* is not + provided. (Note that *n* may be negative, in which case the effect is a + decrement.) The progress bar is updated to reflect the change. If the bar is + indeterminate, this causes one "spin" of the barber pole. The resulting + :attr:`curval` is coerced between 0 and :attr:`maxval` if incrementing causes it + to fall outside this range. + diff --git a/Doc/library/email-examples.rst b/Doc/library/email-examples.rst new file mode 100644 index 0000000..64a9944 --- /dev/null +++ b/Doc/library/email-examples.rst @@ -0,0 +1,33 @@ +:mod:`email`: Examples +---------------------- + +Here are a few examples of how to use the :mod:`email` package to read, write, +and send simple email messages, as well as more complex MIME messages. + +First, let's see how to create and send a simple text message: + +.. literalinclude:: ../includes/email-simple.py + + +Here's an example of how to send a MIME message containing a bunch of family +pictures that may be residing in a directory: + +.. literalinclude:: ../includes/email-mime.py + + +Here's an example of how to send the entire contents of a directory as an email +message: [1]_ + +.. literalinclude:: ../includes/email-dir.py + + +And finally, here's an example of how to unpack a MIME message like the one +above, into a directory of files: + +.. literalinclude:: ../includes/email-unpack.py + + +.. rubric:: Footnotes + +.. [1] Thanks to Matthew Dixon Cowles for the original inspiration and examples. + diff --git a/Doc/library/email.charset.rst b/Doc/library/email.charset.rst new file mode 100644 index 0000000..d16d281 --- /dev/null +++ b/Doc/library/email.charset.rst @@ -0,0 +1,249 @@ +:mod:`email`: Representing character sets +----------------------------------------- + +.. module:: email.charset + :synopsis: Character Sets + + +This module provides a class :class:`Charset` for representing character sets +and character set conversions in email messages, as well as a character set +registry and several convenience methods for manipulating this registry. +Instances of :class:`Charset` are used in several other modules within the +:mod:`email` package. + +Import this class from the :mod:`email.charset` module. + +.. versionadded:: 2.2.2 + + +.. class:: Charset([input_charset]) + + Map character sets to their email properties. + + This class provides information about the requirements imposed on email for a + specific character set. It also provides convenience routines for converting + between character sets, given the availability of the applicable codecs. Given + a character set, it will do its best to provide information on how to use that + character set in an email message in an RFC-compliant way. + + Certain character sets must be encoded with quoted-printable or base64 when used + in email headers or bodies. Certain character sets must be converted outright, + and are not allowed in email. + + Optional *input_charset* is as described below; it is always coerced to lower + case. After being alias normalized it is also used as a lookup into the + registry of character sets to find out the header encoding, body encoding, and + output conversion codec to be used for the character set. For example, if + *input_charset* is ``iso-8859-1``, then headers and bodies will be encoded using + quoted-printable and no output conversion codec is necessary. If + *input_charset* is ``euc-jp``, then headers will be encoded with base64, bodies + will not be encoded, but output text will be converted from the ``euc-jp`` + character set to the ``iso-2022-jp`` character set. + +:class:`Charset` instances have the following data attributes: + + +.. data:: input_charset + + The initial character set specified. Common aliases are converted to their + *official* email names (e.g. ``latin_1`` is converted to ``iso-8859-1``). + Defaults to 7-bit ``us-ascii``. + + +.. data:: header_encoding + + If the character set must be encoded before it can be used in an email header, + this attribute will be set to ``Charset.QP`` (for quoted-printable), + ``Charset.BASE64`` (for base64 encoding), or ``Charset.SHORTEST`` for the + shortest of QP or BASE64 encoding. Otherwise, it will be ``None``. + + +.. data:: body_encoding + + Same as *header_encoding*, but describes the encoding for the mail message's + body, which indeed may be different than the header encoding. + ``Charset.SHORTEST`` is not allowed for *body_encoding*. + + +.. data:: output_charset + + Some character sets must be converted before they can be used in email headers + or bodies. If the *input_charset* is one of them, this attribute will contain + the name of the character set output will be converted to. Otherwise, it will + be ``None``. + + +.. data:: input_codec + + The name of the Python codec used to convert the *input_charset* to Unicode. If + no conversion codec is necessary, this attribute will be ``None``. + + +.. data:: output_codec + + The name of the Python codec used to convert Unicode to the *output_charset*. + If no conversion codec is necessary, this attribute will have the same value as + the *input_codec*. + +:class:`Charset` instances also have the following methods: + + +.. method:: Charset.get_body_encoding() + + Return the content transfer encoding used for body encoding. + + This is either the string ``quoted-printable`` or ``base64`` depending on the + encoding used, or it is a function, in which case you should call the function + with a single argument, the Message object being encoded. The function should + then set the :mailheader:`Content-Transfer-Encoding` header itself to whatever + is appropriate. + + Returns the string ``quoted-printable`` if *body_encoding* is ``QP``, returns + the string ``base64`` if *body_encoding* is ``BASE64``, and returns the string + ``7bit`` otherwise. + + +.. method:: Charset.convert(s) + + Convert the string *s* from the *input_codec* to the *output_codec*. + + +.. method:: Charset.to_splittable(s) + + Convert a possibly multibyte string to a safely splittable format. *s* is the + string to split. + + Uses the *input_codec* to try and convert the string to Unicode, so it can be + safely split on character boundaries (even for multibyte characters). + + Returns the string as-is if it isn't known how to convert *s* to Unicode with + the *input_charset*. + + Characters that could not be converted to Unicode will be replaced with the + Unicode replacement character ``'U+FFFD'``. + + +.. method:: Charset.from_splittable(ustr[, to_output]) + + Convert a splittable string back into an encoded string. *ustr* is a Unicode + string to "unsplit". + + This method uses the proper codec to try and convert the string from Unicode + back into an encoded format. Return the string as-is if it is not Unicode, or + if it could not be converted from Unicode. + + Characters that could not be converted from Unicode will be replaced with an + appropriate character (usually ``'?'``). + + If *to_output* is ``True`` (the default), uses *output_codec* to convert to an + encoded format. If *to_output* is ``False``, it uses *input_codec*. + + +.. method:: Charset.get_output_charset() + + Return the output character set. + + This is the *output_charset* attribute if that is not ``None``, otherwise it is + *input_charset*. + + +.. method:: Charset.encoded_header_len() + + Return the length of the encoded header string, properly calculating for + quoted-printable or base64 encoding. + + +.. method:: Charset.header_encode(s[, convert]) + + Header-encode the string *s*. + + If *convert* is ``True``, the string will be converted from the input charset to + the output charset automatically. This is not useful for multibyte character + sets, which have line length issues (multibyte characters must be split on a + character, not a byte boundary); use the higher-level :class:`Header` class to + deal with these issues (see :mod:`email.header`). *convert* defaults to + ``False``. + + The type of encoding (base64 or quoted-printable) will be based on the + *header_encoding* attribute. + + +.. method:: Charset.body_encode(s[, convert]) + + Body-encode the string *s*. + + If *convert* is ``True`` (the default), the string will be converted from the + input charset to output charset automatically. Unlike :meth:`header_encode`, + there are no issues with byte boundaries and multibyte charsets in email bodies, + so this is usually pretty safe. + + The type of encoding (base64 or quoted-printable) will be based on the + *body_encoding* attribute. + +The :class:`Charset` class also provides a number of methods to support standard +operations and built-in functions. + + +.. method:: Charset.__str__() + + Returns *input_charset* as a string coerced to lower case. :meth:`__repr__` is + an alias for :meth:`__str__`. + + +.. method:: Charset.__eq__(other) + + This method allows you to compare two :class:`Charset` instances for equality. + + +.. method:: Header.__ne__(other) + + This method allows you to compare two :class:`Charset` instances for inequality. + +The :mod:`email.charset` module also provides the following functions for adding +new entries to the global character set, alias, and codec registries: + + +.. function:: add_charset(charset[, header_enc[, body_enc[, output_charset]]]) + + Add character properties to the global registry. + + *charset* is the input character set, and must be the canonical name of a + character set. + + Optional *header_enc* and *body_enc* is either ``Charset.QP`` for + quoted-printable, ``Charset.BASE64`` for base64 encoding, + ``Charset.SHORTEST`` for the shortest of quoted-printable or base64 encoding, + or ``None`` for no encoding. ``SHORTEST`` is only valid for + *header_enc*. The default is ``None`` for no encoding. + + Optional *output_charset* is the character set that the output should be in. + Conversions will proceed from input charset, to Unicode, to the output charset + when the method :meth:`Charset.convert` is called. The default is to output in + the same character set as the input. + + Both *input_charset* and *output_charset* must have Unicode codec entries in the + module's character set-to-codec mapping; use :func:`add_codec` to add codecs the + module does not know about. See the :mod:`codecs` module's documentation for + more information. + + The global character set registry is kept in the module global dictionary + ``CHARSETS``. + + +.. function:: add_alias(alias, canonical) + + Add a character set alias. *alias* is the alias name, e.g. ``latin-1``. + *canonical* is the character set's canonical name, e.g. ``iso-8859-1``. + + The global charset alias registry is kept in the module global dictionary + ``ALIASES``. + + +.. function:: add_codec(charset, codecname) + + Add a codec that map characters in the given character set to and from Unicode. + + *charset* is the canonical name of a character set. *codecname* is the name of a + Python codec, as appropriate for the second argument to the :func:`unicode` + built-in, or to the :meth:`encode` method of a Unicode string. + diff --git a/Doc/library/email.encoders.rst b/Doc/library/email.encoders.rst new file mode 100644 index 0000000..28669c4 --- /dev/null +++ b/Doc/library/email.encoders.rst @@ -0,0 +1,57 @@ +:mod:`email`: Encoders +---------------------- + +.. module:: email.encoders + :synopsis: Encoders for email message payloads. + + +When creating :class:`Message` objects from scratch, you often need to encode +the payloads for transport through compliant mail servers. This is especially +true for :mimetype:`image/\*` and :mimetype:`text/\*` type messages containing +binary data. + +The :mod:`email` package provides some convenient encodings in its +:mod:`encoders` module. These encoders are actually used by the +:class:`MIMEAudio` and :class:`MIMEImage` class constructors to provide default +encodings. All encoder functions take exactly one argument, the message object +to encode. They usually extract the payload, encode it, and reset the payload +to this newly encoded value. They should also set the +:mailheader:`Content-Transfer-Encoding` header as appropriate. + +Here are the encoding functions provided: + + +.. function:: encode_quopri(msg) + + Encodes the payload into quoted-printable form and sets the + :mailheader:`Content-Transfer-Encoding` header to ``quoted-printable`` [#]_. + This is a good encoding to use when most of your payload is normal printable + data, but contains a few unprintable characters. + + +.. function:: encode_base64(msg) + + Encodes the payload into base64 form and sets the + :mailheader:`Content-Transfer-Encoding` header to ``base64``. This is a good + encoding to use when most of your payload is unprintable data since it is a more + compact form than quoted-printable. The drawback of base64 encoding is that it + renders the text non-human readable. + + +.. function:: encode_7or8bit(msg) + + This doesn't actually modify the message's payload, but it does set the + :mailheader:`Content-Transfer-Encoding` header to either ``7bit`` or ``8bit`` as + appropriate, based on the payload data. + + +.. function:: encode_noop(msg) + + This does nothing; it doesn't even set the + :mailheader:`Content-Transfer-Encoding` header. + +.. rubric:: Footnotes + +.. [#] Note that encoding with :meth:`encode_quopri` also encodes all tabs and space + characters in the data. + diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst new file mode 100644 index 0000000..916d2a5 --- /dev/null +++ b/Doc/library/email.errors.rst @@ -0,0 +1,91 @@ +:mod:`email`: Exception and Defect classes +------------------------------------------ + +.. module:: email.errors + :synopsis: The exception classes used by the email package. + + +The following exception classes are defined in the :mod:`email.errors` module: + + +.. exception:: MessageError() + + This is the base class for all exceptions that the :mod:`email` package can + raise. It is derived from the standard :exc:`Exception` class and defines no + additional methods. + + +.. exception:: MessageParseError() + + This is the base class for exceptions thrown by the :class:`Parser` class. It + is derived from :exc:`MessageError`. + + +.. exception:: HeaderParseError() + + Raised under some error conditions when parsing the :rfc:`2822` headers of a + message, this class is derived from :exc:`MessageParseError`. It can be raised + from the :meth:`Parser.parse` or :meth:`Parser.parsestr` methods. + + Situations where it can be raised include finding an envelope header after the + first :rfc:`2822` header of the message, finding a continuation line before the + first :rfc:`2822` header is found, or finding a line in the headers which is + neither a header or a continuation line. + + +.. exception:: BoundaryError() + + Raised under some error conditions when parsing the :rfc:`2822` headers of a + message, this class is derived from :exc:`MessageParseError`. It can be raised + from the :meth:`Parser.parse` or :meth:`Parser.parsestr` methods. + + Situations where it can be raised include not being able to find the starting or + terminating boundary in a :mimetype:`multipart/\*` message when strict parsing + is used. + + +.. exception:: MultipartConversionError() + + Raised when a payload is added to a :class:`Message` object using + :meth:`add_payload`, but the payload is already a scalar and the message's + :mailheader:`Content-Type` main type is not either :mimetype:`multipart` or + missing. :exc:`MultipartConversionError` multiply inherits from + :exc:`MessageError` and the built-in :exc:`TypeError`. + + Since :meth:`Message.add_payload` is deprecated, this exception is rarely raised + in practice. However the exception may also be raised if the :meth:`attach` + method is called on an instance of a class derived from + :class:`MIMENonMultipart` (e.g. :class:`MIMEImage`). + +Here's the list of the defects that the :class:`FeedParser` can find while +parsing messages. Note that the defects are added to the message where the +problem was found, so for example, if a message nested inside a +:mimetype:`multipart/alternative` had a malformed header, that nested message +object would have a defect, but the containing messages would not. + +All defect classes are subclassed from :class:`email.errors.MessageDefect`, but +this class is *not* an exception! + +.. versionadded:: 2.4 + All the defect classes were added. + +* :class:`NoBoundaryInMultipartDefect` -- A message claimed to be a multipart, + but had no :mimetype:`boundary` parameter. + +* :class:`StartBoundaryNotFoundDefect` -- The start boundary claimed in the + :mailheader:`Content-Type` header was never found. + +* :class:`FirstHeaderLineIsContinuationDefect` -- The message had a continuation + line as its first header line. + +* :class:`MisplacedEnvelopeHeaderDefect` - A "Unix From" header was found in the + middle of a header block. + +* :class:`MalformedHeaderDefect` -- A header was found that was missing a colon, + or was otherwise malformed. + +* :class:`MultipartInvariantViolationDefect` -- A message claimed to be a + :mimetype:`multipart`, but no subparts were found. Note that when a message has + this defect, its :meth:`is_multipart` method may return false even though its + content type claims to be :mimetype:`multipart`. + diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst new file mode 100644 index 0000000..bb1f57d --- /dev/null +++ b/Doc/library/email.generator.rst @@ -0,0 +1,123 @@ +:mod:`email`: Generating MIME documents +--------------------------------------- + +.. module:: email.generator + :synopsis: Generate flat text email messages from a message structure. + + +One of the most common tasks is to generate the flat text of the email message +represented by a message object structure. You will need to do this if you want +to send your message via the :mod:`smtplib` module or the :mod:`nntplib` module, +or print the message on the console. Taking a message object structure and +producing a flat text document is the job of the :class:`Generator` class. + +Again, as with the :mod:`email.parser` module, you aren't limited to the +functionality of the bundled generator; you could write one from scratch +yourself. However the bundled generator knows how to generate most email in a +standards-compliant way, should handle MIME and non-MIME email messages just +fine, and is designed so that the transformation from flat text, to a message +structure via the :class:`Parser` class, and back to flat text, is idempotent +(the input is identical to the output). + +Here are the public methods of the :class:`Generator` class, imported from the +:mod:`email.generator` module: + + +.. class:: Generator(outfp[, mangle_from_[, maxheaderlen]]) + + The constructor for the :class:`Generator` class takes a file-like object called + *outfp* for an argument. *outfp* must support the :meth:`write` method and be + usable as the output file in a Python extended print statement. + + Optional *mangle_from_* is a flag that, when ``True``, puts a ``>`` character in + front of any line in the body that starts exactly as ``From``, i.e. ``From`` + followed by a space at the beginning of the line. This is the only guaranteed + portable way to avoid having such lines be mistaken for a Unix mailbox format + envelope header separator (see `WHY THE CONTENT-LENGTH FORMAT IS BAD + `_ for details). *mangle_from_* + defaults to ``True``, but you might want to set this to ``False`` if you are not + writing Unix mailbox format files. + + Optional *maxheaderlen* specifies the longest length for a non-continued header. + When a header line is longer than *maxheaderlen* (in characters, with tabs + expanded to 8 spaces), the header will be split as defined in the + :mod:`email.header.Header` class. Set to zero to disable header wrapping. The + default is 78, as recommended (but not required) by :rfc:`2822`. + +The other public :class:`Generator` methods are: + + +.. method:: Generator.flatten(msg[, unixfrom]) + + Print the textual representation of the message object structure rooted at *msg* + to the output file specified when the :class:`Generator` instance was created. + Subparts are visited depth-first and the resulting text will be properly MIME + encoded. + + Optional *unixfrom* is a flag that forces the printing of the envelope header + delimiter before the first :rfc:`2822` header of the root message object. If + the root object has no envelope header, a standard one is crafted. By default, + this is set to ``False`` to inhibit the printing of the envelope delimiter. + + Note that for subparts, no envelope header is ever printed. + + .. versionadded:: 2.2.2 + + +.. method:: Generator.clone(fp) + + Return an independent clone of this :class:`Generator` instance with the exact + same options. + + .. versionadded:: 2.2.2 + + +.. method:: Generator.write(s) + + Write the string *s* to the underlying file object, i.e. *outfp* passed to + :class:`Generator`'s constructor. This provides just enough file-like API for + :class:`Generator` instances to be used in extended print statements. + +As a convenience, see the methods :meth:`Message.as_string` and +``str(aMessage)``, a.k.a. :meth:`Message.__str__`, which simplify the generation +of a formatted string representation of a message object. For more detail, see +:mod:`email.message`. + +The :mod:`email.generator` module also provides a derived class, called +:class:`DecodedGenerator` which is like the :class:`Generator` base class, +except that non-\ :mimetype:`text` parts are substituted with a format string +representing the part. + + +.. class:: DecodedGenerator(outfp[, mangle_from_[, maxheaderlen[, fmt]]]) + + This class, derived from :class:`Generator` walks through all the subparts of a + message. If the subpart is of main type :mimetype:`text`, then it prints the + decoded payload of the subpart. Optional *_mangle_from_* and *maxheaderlen* are + as with the :class:`Generator` base class. + + If the subpart is not of main type :mimetype:`text`, optional *fmt* is a format + string that is used instead of the message payload. *fmt* is expanded with the + following keywords, ``%(keyword)s`` format: + + * ``type`` -- Full MIME type of the non-\ :mimetype:`text` part + + * ``maintype`` -- Main MIME type of the non-\ :mimetype:`text` part + + * ``subtype`` -- Sub-MIME type of the non-\ :mimetype:`text` part + + * ``filename`` -- Filename of the non-\ :mimetype:`text` part + + * ``description`` -- Description associated with the non-\ :mimetype:`text` part + + * ``encoding`` -- Content transfer encoding of the non-\ :mimetype:`text` part + + The default value for *fmt* is ``None``, meaning :: + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + + .. versionadded:: 2.2.2 + +.. versionchanged:: 2.5 + The previously deprecated method :meth:`__call__` was removed. + diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst new file mode 100644 index 0000000..0ecd35f --- /dev/null +++ b/Doc/library/email.header.rst @@ -0,0 +1,171 @@ +:mod:`email`: Internationalized headers +--------------------------------------- + +.. module:: email.header + :synopsis: Representing non-ASCII headers + + +:rfc:`2822` is the base standard that describes the format of email messages. +It derives from the older :rfc:`822` standard which came into widespread use at +a time when most email was composed of ASCII characters only. :rfc:`2822` is a +specification written assuming email contains only 7-bit ASCII characters. + +Of course, as email has been deployed worldwide, it has become +internationalized, such that language specific character sets can now be used in +email messages. The base standard still requires email messages to be +transferred using only 7-bit ASCII characters, so a slew of RFCs have been +written describing how to encode email containing non-ASCII characters into +:rfc:`2822`\ -compliant format. These RFCs include :rfc:`2045`, :rfc:`2046`, +:rfc:`2047`, and :rfc:`2231`. The :mod:`email` package supports these standards +in its :mod:`email.header` and :mod:`email.charset` modules. + +If you want to include non-ASCII characters in your email headers, say in the +:mailheader:`Subject` or :mailheader:`To` fields, you should use the +:class:`Header` class and assign the field in the :class:`Message` object to an +instance of :class:`Header` instead of using a string for the header value. +Import the :class:`Header` class from the :mod:`email.header` module. For +example:: + + >>> from email.message import Message + >>> from email.header import Header + >>> msg = Message() + >>> h = Header('p\xf6stal', 'iso-8859-1') + >>> msg['Subject'] = h + >>> print msg.as_string() + Subject: =?iso-8859-1?q?p=F6stal?= + + + +Notice here how we wanted the :mailheader:`Subject` field to contain a non-ASCII +character? We did this by creating a :class:`Header` instance and passing in +the character set that the byte string was encoded in. When the subsequent +:class:`Message` instance was flattened, the :mailheader:`Subject` field was +properly :rfc:`2047` encoded. MIME-aware mail readers would show this header +using the embedded ISO-8859-1 character. + +.. versionadded:: 2.2.2 + +Here is the :class:`Header` class description: + + +.. class:: Header([s[, charset[, maxlinelen[, header_name[, continuation_ws[, errors]]]]]]) + + Create a MIME-compliant header that can contain strings in different character + sets. + + Optional *s* is the initial header value. If ``None`` (the default), the + initial header value is not set. You can later append to the header with + :meth:`append` method calls. *s* may be a byte string or a Unicode string, but + see the :meth:`append` documentation for semantics. + + Optional *charset* serves two purposes: it has the same meaning as the *charset* + argument to the :meth:`append` method. It also sets the default character set + for all subsequent :meth:`append` calls that omit the *charset* argument. If + *charset* is not provided in the constructor (the default), the ``us-ascii`` + character set is used both as *s*'s initial charset and as the default for + subsequent :meth:`append` calls. + + The maximum line length can be specified explicit via *maxlinelen*. For + splitting the first line to a shorter value (to account for the field header + which isn't included in *s*, e.g. :mailheader:`Subject`) pass in the name of the + field in *header_name*. The default *maxlinelen* is 76, and the default value + for *header_name* is ``None``, meaning it is not taken into account for the + first line of a long, split header. + + Optional *continuation_ws* must be :rfc:`2822`\ -compliant folding whitespace, + and is usually either a space or a hard tab character. This character will be + prepended to continuation lines. + +Optional *errors* is passed straight through to the :meth:`append` method. + + +.. method:: Header.append(s[, charset[, errors]]) + + Append the string *s* to the MIME header. + + Optional *charset*, if given, should be a :class:`Charset` instance (see + :mod:`email.charset`) or the name of a character set, which will be converted to + a :class:`Charset` instance. A value of ``None`` (the default) means that the + *charset* given in the constructor is used. + + *s* may be a byte string or a Unicode string. If it is a byte string (i.e. + ``isinstance(s, str)`` is true), then *charset* is the encoding of that byte + string, and a :exc:`UnicodeError` will be raised if the string cannot be decoded + with that character set. + + If *s* is a Unicode string, then *charset* is a hint specifying the character + set of the characters in the string. In this case, when producing an + :rfc:`2822`\ -compliant header using :rfc:`2047` rules, the Unicode string will + be encoded using the following charsets in order: ``us-ascii``, the *charset* + hint, ``utf-8``. The first character set to not provoke a :exc:`UnicodeError` + is used. + + Optional *errors* is passed through to any :func:`unicode` or + :func:`ustr.encode` call, and defaults to "strict". + + +.. method:: Header.encode([splitchars]) + + Encode a message header into an RFC-compliant format, possibly wrapping long + lines and encapsulating non-ASCII parts in base64 or quoted-printable encodings. + Optional *splitchars* is a string containing characters to split long ASCII + lines on, in rough support of :rfc:`2822`'s *highest level syntactic breaks*. + This doesn't affect :rfc:`2047` encoded lines. + +The :class:`Header` class also provides a number of methods to support standard +operators and built-in functions. + + +.. method:: Header.__str__() + + A synonym for :meth:`Header.encode`. Useful for ``str(aHeader)``. + + +.. method:: Header.__unicode__() + + A helper for the built-in :func:`unicode` function. Returns the header as a + Unicode string. + + +.. method:: Header.__eq__(other) + + This method allows you to compare two :class:`Header` instances for equality. + + +.. method:: Header.__ne__(other) + + This method allows you to compare two :class:`Header` instances for inequality. + +The :mod:`email.header` module also provides the following convenient functions. + + +.. function:: decode_header(header) + + Decode a message header value without converting the character set. The header + value is in *header*. + + This function returns a list of ``(decoded_string, charset)`` pairs containing + each of the decoded parts of the header. *charset* is ``None`` for non-encoded + parts of the header, otherwise a lower case string containing the name of the + character set specified in the encoded string. + + Here's an example:: + + >>> from email.header import decode_header + >>> decode_header('=?iso-8859-1?q?p=F6stal?=') + [('p\xf6stal', 'iso-8859-1')] + + +.. function:: make_header(decoded_seq[, maxlinelen[, header_name[, continuation_ws]]]) + + Create a :class:`Header` instance from a sequence of pairs as returned by + :func:`decode_header`. + + :func:`decode_header` takes a header value string and returns a sequence of + pairs of the format ``(decoded_string, charset)`` where *charset* is the name of + the character set. + + This function takes one of those sequence of pairs and returns a :class:`Header` + instance. Optional *maxlinelen*, *header_name*, and *continuation_ws* are as in + the :class:`Header` constructor. + diff --git a/Doc/library/email.iterators.rst b/Doc/library/email.iterators.rst new file mode 100644 index 0000000..aa70141 --- /dev/null +++ b/Doc/library/email.iterators.rst @@ -0,0 +1,65 @@ +:mod:`email`: Iterators +----------------------- + +.. module:: email.iterators + :synopsis: Iterate over a message object tree. + + +Iterating over a message object tree is fairly easy with the +:meth:`Message.walk` method. The :mod:`email.iterators` module provides some +useful higher level iterations over message object trees. + + +.. function:: body_line_iterator(msg[, decode]) + + This iterates over all the payloads in all the subparts of *msg*, returning the + string payloads line-by-line. It skips over all the subpart headers, and it + skips over any subpart with a payload that isn't a Python string. This is + somewhat equivalent to reading the flat text representation of the message from + a file using :meth:`readline`, skipping over all the intervening headers. + + Optional *decode* is passed through to :meth:`Message.get_payload`. + + +.. function:: typed_subpart_iterator(msg[, maintype[, subtype]]) + + This iterates over all the subparts of *msg*, returning only those subparts that + match the MIME type specified by *maintype* and *subtype*. + + Note that *subtype* is optional; if omitted, then subpart MIME type matching is + done only with the main type. *maintype* is optional too; it defaults to + :mimetype:`text`. + + Thus, by default :func:`typed_subpart_iterator` returns each subpart that has a + MIME type of :mimetype:`text/\*`. + +The following function has been added as a useful debugging tool. It should +*not* be considered part of the supported public interface for the package. + + +.. function:: _structure(msg[, fp[, level]]) + + Prints an indented representation of the content types of the message object + structure. For example:: + + >>> msg = email.message_from_file(somefile) + >>> _structure(msg) + multipart/mixed + text/plain + text/plain + multipart/digest + message/rfc822 + text/plain + message/rfc822 + text/plain + message/rfc822 + text/plain + message/rfc822 + text/plain + message/rfc822 + text/plain + text/plain + + Optional *fp* is a file-like object to print the output to. It must be suitable + for Python's extended print statement. *level* is used internally. + diff --git a/Doc/library/email.message.rst b/Doc/library/email.message.rst new file mode 100644 index 0000000..e1fb20e --- /dev/null +++ b/Doc/library/email.message.rst @@ -0,0 +1,548 @@ +:mod:`email`: Representing an email message +------------------------------------------- + +.. module:: email.message + :synopsis: The base class representing email messages. + + +The central class in the :mod:`email` package is the :class:`Message` class, +imported from the :mod:`email.message` module. It is the base class for the +:mod:`email` object model. :class:`Message` provides the core functionality for +setting and querying header fields, and for accessing message bodies. + +Conceptually, a :class:`Message` object consists of *headers* and *payloads*. +Headers are :rfc:`2822` style field names and values where the field name and +value are separated by a colon. The colon is not part of either the field name +or the field value. + +Headers are stored and returned in case-preserving form but are matched +case-insensitively. There may also be a single envelope header, also known as +the *Unix-From* header or the ``From_`` header. The payload is either a string +in the case of simple message objects or a list of :class:`Message` objects for +MIME container documents (e.g. :mimetype:`multipart/\*` and +:mimetype:`message/rfc822`). + +:class:`Message` objects provide a mapping style interface for accessing the +message headers, and an explicit interface for accessing both the headers and +the payload. It provides convenience methods for generating a flat text +representation of the message object tree, for accessing commonly used header +parameters, and for recursively walking over the object tree. + +Here are the methods of the :class:`Message` class: + + +.. class:: Message() + + The constructor takes no arguments. + + +.. method:: Message.as_string([unixfrom]) + + Return the entire message flatten as a string. When optional *unixfrom* is + ``True``, the envelope header is included in the returned string. *unixfrom* + defaults to ``False``. + + Note that this method is provided as a convenience and may not always format the + message the way you want. For example, by default it mangles lines that begin + with ``From``. For more flexibility, instantiate a :class:`Generator` instance + and use its :meth:`flatten` method directly. For example:: + + from cStringIO import StringIO + from email.generator import Generator + fp = StringIO() + g = Generator(fp, mangle_from_=False, maxheaderlen=60) + g.flatten(msg) + text = fp.getvalue() + + +.. method:: Message.__str__() + + Equivalent to ``as_string(unixfrom=True)``. + + +.. method:: Message.is_multipart() + + Return ``True`` if the message's payload is a list of sub-\ :class:`Message` + objects, otherwise return ``False``. When :meth:`is_multipart` returns False, + the payload should be a string object. + + +.. method:: Message.set_unixfrom(unixfrom) + + Set the message's envelope header to *unixfrom*, which should be a string. + + +.. method:: Message.get_unixfrom() + + Return the message's envelope header. Defaults to ``None`` if the envelope + header was never set. + + +.. method:: Message.attach(payload) + + Add the given *payload* to the current payload, which must be ``None`` or a list + of :class:`Message` objects before the call. After the call, the payload will + always be a list of :class:`Message` objects. If you want to set the payload to + a scalar object (e.g. a string), use :meth:`set_payload` instead. + + +.. method:: Message.get_payload([i[, decode]]) + + Return a reference the current payload, which will be a list of :class:`Message` + objects when :meth:`is_multipart` is ``True``, or a string when + :meth:`is_multipart` is ``False``. If the payload is a list and you mutate the + list object, you modify the message's payload in place. + + With optional argument *i*, :meth:`get_payload` will return the *i*-th element + of the payload, counting from zero, if :meth:`is_multipart` is ``True``. An + :exc:`IndexError` will be raised if *i* is less than 0 or greater than or equal + to the number of items in the payload. If the payload is a string (i.e. + :meth:`is_multipart` is ``False``) and *i* is given, a :exc:`TypeError` is + raised. + + Optional *decode* is a flag indicating whether the payload should be decoded or + not, according to the :mailheader:`Content-Transfer-Encoding` header. When + ``True`` and the message is not a multipart, the payload will be decoded if this + header's value is ``quoted-printable`` or ``base64``. If some other encoding is + used, or :mailheader:`Content-Transfer-Encoding` header is missing, or if the + payload has bogus base64 data, the payload is returned as-is (undecoded). If + the message is a multipart and the *decode* flag is ``True``, then ``None`` is + returned. The default for *decode* is ``False``. + + +.. method:: Message.set_payload(payload[, charset]) + + Set the entire message object's payload to *payload*. It is the client's + responsibility to ensure the payload invariants. Optional *charset* sets the + message's default character set; see :meth:`set_charset` for details. + + .. versionchanged:: 2.2.2 + *charset* argument added. + + +.. method:: Message.set_charset(charset) + + Set the character set of the payload to *charset*, which can either be a + :class:`Charset` instance (see :mod:`email.charset`), a string naming a + character set, or ``None``. If it is a string, it will be converted to a + :class:`Charset` instance. If *charset* is ``None``, the ``charset`` parameter + will be removed from the :mailheader:`Content-Type` header. Anything else will + generate a :exc:`TypeError`. + + The message will be assumed to be of type :mimetype:`text/\*` encoded with + *charset.input_charset*. It will be converted to *charset.output_charset* and + encoded properly, if needed, when generating the plain text representation of + the message. MIME headers (:mailheader:`MIME-Version`, + :mailheader:`Content-Type`, :mailheader:`Content-Transfer-Encoding`) will be + added as needed. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_charset() + + Return the :class:`Charset` instance associated with the message's payload. + + .. versionadded:: 2.2.2 + +The following methods implement a mapping-like interface for accessing the +message's :rfc:`2822` headers. Note that there are some semantic differences +between these methods and a normal mapping (i.e. dictionary) interface. For +example, in a dictionary there are no duplicate keys, but here there may be +duplicate message headers. Also, in dictionaries there is no guaranteed order +to the keys returned by :meth:`keys`, but in a :class:`Message` object, headers +are always returned in the order they appeared in the original message, or were +added to the message later. Any header deleted and then re-added are always +appended to the end of the header list. + +These semantic differences are intentional and are biased toward maximal +convenience. + +Note that in all cases, any envelope header present in the message is not +included in the mapping interface. + + +.. method:: Message.__len__() + + Return the total number of headers, including duplicates. + + +.. method:: Message.__contains__(name) + + Return true if the message object has a field named *name*. Matching is done + case-insensitively and *name* should not include the trailing colon. Used for + the ``in`` operator, e.g.:: + + if 'message-id' in myMessage: + print 'Message-ID:', myMessage['message-id'] + + +.. method:: Message.__getitem__(name) + + Return the value of the named header field. *name* should not include the colon + field separator. If the header is missing, ``None`` is returned; a + :exc:`KeyError` is never raised. + + Note that if the named field appears more than once in the message's headers, + exactly which of those field values will be returned is undefined. Use the + :meth:`get_all` method to get the values of all the extant named headers. + + +.. method:: Message.__setitem__(name, val) + + Add a header to the message with field name *name* and value *val*. The field + is appended to the end of the message's existing fields. + + Note that this does *not* overwrite or delete any existing header with the same + name. If you want to ensure that the new header is the only one present in the + message with field name *name*, delete the field first, e.g.:: + + del msg['subject'] + msg['subject'] = 'Python roolz!' + + +.. method:: Message.__delitem__(name) + + Delete all occurrences of the field with name *name* from the message's headers. + No exception is raised if the named field isn't present in the headers. + + +.. method:: Message.has_key(name) + + Return true if the message contains a header field named *name*, otherwise + return false. + + +.. method:: Message.keys() + + Return a list of all the message's header field names. + + +.. method:: Message.values() + + Return a list of all the message's field values. + + +.. method:: Message.items() + + Return a list of 2-tuples containing all the message's field headers and values. + + +.. method:: Message.get(name[, failobj]) + + Return the value of the named header field. This is identical to + :meth:`__getitem__` except that optional *failobj* is returned if the named + header is missing (defaults to ``None``). + +Here are some additional useful methods: + + +.. method:: Message.get_all(name[, failobj]) + + Return a list of all the values for the field named *name*. If there are no such + named headers in the message, *failobj* is returned (defaults to ``None``). + + +.. method:: Message.add_header(_name, _value, **_params) + + Extended header setting. This method is similar to :meth:`__setitem__` except + that additional header parameters can be provided as keyword arguments. *_name* + is the header field to add and *_value* is the *primary* value for the header. + + For each item in the keyword argument dictionary *_params*, the key is taken as + the parameter name, with underscores converted to dashes (since dashes are + illegal in Python identifiers). Normally, the parameter will be added as + ``key="value"`` unless the value is ``None``, in which case only the key will be + added. + + Here's an example:: + + msg.add_header('Content-Disposition', 'attachment', filename='bud.gif') + + This will add a header that looks like :: + + Content-Disposition: attachment; filename="bud.gif" + + +.. method:: Message.replace_header(_name, _value) + + Replace a header. Replace the first header found in the message that matches + *_name*, retaining header order and field name case. If no matching header was + found, a :exc:`KeyError` is raised. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_content_type() + + Return the message's content type. The returned string is coerced to lower case + of the form :mimetype:`maintype/subtype`. If there was no + :mailheader:`Content-Type` header in the message the default type as given by + :meth:`get_default_type` will be returned. Since according to :rfc:`2045`, + messages always have a default type, :meth:`get_content_type` will always return + a value. + + :rfc:`2045` defines a message's default type to be :mimetype:`text/plain` unless + it appears inside a :mimetype:`multipart/digest` container, in which case it + would be :mimetype:`message/rfc822`. If the :mailheader:`Content-Type` header + has an invalid type specification, :rfc:`2045` mandates that the default type be + :mimetype:`text/plain`. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_content_maintype() + + Return the message's main content type. This is the :mimetype:`maintype` part + of the string returned by :meth:`get_content_type`. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_content_subtype() + + Return the message's sub-content type. This is the :mimetype:`subtype` part of + the string returned by :meth:`get_content_type`. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_default_type() + + Return the default content type. Most messages have a default content type of + :mimetype:`text/plain`, except for messages that are subparts of + :mimetype:`multipart/digest` containers. Such subparts have a default content + type of :mimetype:`message/rfc822`. + + .. versionadded:: 2.2.2 + + +.. method:: Message.set_default_type(ctype) + + Set the default content type. *ctype* should either be :mimetype:`text/plain` + or :mimetype:`message/rfc822`, although this is not enforced. The default + content type is not stored in the :mailheader:`Content-Type` header. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_params([failobj[, header[, unquote]]]) + + Return the message's :mailheader:`Content-Type` parameters, as a list. The + elements of the returned list are 2-tuples of key/value pairs, as split on the + ``'='`` sign. The left hand side of the ``'='`` is the key, while the right + hand side is the value. If there is no ``'='`` sign in the parameter the value + is the empty string, otherwise the value is as described in :meth:`get_param` + and is unquoted if optional *unquote* is ``True`` (the default). + + Optional *failobj* is the object to return if there is no + :mailheader:`Content-Type` header. Optional *header* is the header to search + instead of :mailheader:`Content-Type`. + + .. versionchanged:: 2.2.2 + *unquote* argument added. + + +.. method:: Message.get_param(param[, failobj[, header[, unquote]]]) + + Return the value of the :mailheader:`Content-Type` header's parameter *param* as + a string. If the message has no :mailheader:`Content-Type` header or if there + is no such parameter, then *failobj* is returned (defaults to ``None``). + + Optional *header* if given, specifies the message header to use instead of + :mailheader:`Content-Type`. + + Parameter keys are always compared case insensitively. The return value can + either be a string, or a 3-tuple if the parameter was :rfc:`2231` encoded. When + it's a 3-tuple, the elements of the value are of the form ``(CHARSET, LANGUAGE, + VALUE)``. Note that both ``CHARSET`` and ``LANGUAGE`` can be ``None``, in which + case you should consider ``VALUE`` to be encoded in the ``us-ascii`` charset. + You can usually ignore ``LANGUAGE``. + + If your application doesn't care whether the parameter was encoded as in + :rfc:`2231`, you can collapse the parameter value by calling + :func:`email.Utils.collapse_rfc2231_value`, passing in the return value from + :meth:`get_param`. This will return a suitably decoded Unicode string whn the + value is a tuple, or the original string unquoted if it isn't. For example:: + + rawparam = msg.get_param('foo') + param = email.Utils.collapse_rfc2231_value(rawparam) + + In any case, the parameter value (either the returned string, or the ``VALUE`` + item in the 3-tuple) is always unquoted, unless *unquote* is set to ``False``. + + .. versionchanged:: 2.2.2 + *unquote* argument added, and 3-tuple return value possible. + + +.. method:: Message.set_param(param, value[, header[, requote[, charset[, language]]]]) + + Set a parameter in the :mailheader:`Content-Type` header. If the parameter + already exists in the header, its value will be replaced with *value*. If the + :mailheader:`Content-Type` header as not yet been defined for this message, it + will be set to :mimetype:`text/plain` and the new parameter value will be + appended as per :rfc:`2045`. + + Optional *header* specifies an alternative header to :mailheader:`Content-Type`, + and all parameters will be quoted as necessary unless optional *requote* is + ``False`` (the default is ``True``). + + If optional *charset* is specified, the parameter will be encoded according to + :rfc:`2231`. Optional *language* specifies the RFC 2231 language, defaulting to + the empty string. Both *charset* and *language* should be strings. + + .. versionadded:: 2.2.2 + + +.. method:: Message.del_param(param[, header[, requote]]) + + Remove the given parameter completely from the :mailheader:`Content-Type` + header. The header will be re-written in place without the parameter or its + value. All values will be quoted as necessary unless *requote* is ``False`` + (the default is ``True``). Optional *header* specifies an alternative to + :mailheader:`Content-Type`. + + .. versionadded:: 2.2.2 + + +.. method:: Message.set_type(type[, header][, requote]) + + Set the main type and subtype for the :mailheader:`Content-Type` header. *type* + must be a string in the form :mimetype:`maintype/subtype`, otherwise a + :exc:`ValueError` is raised. + + This method replaces the :mailheader:`Content-Type` header, keeping all the + parameters in place. If *requote* is ``False``, this leaves the existing + header's quoting as is, otherwise the parameters will be quoted (the default). + + An alternative header can be specified in the *header* argument. When the + :mailheader:`Content-Type` header is set a :mailheader:`MIME-Version` header is + also added. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_filename([failobj]) + + Return the value of the ``filename`` parameter of the + :mailheader:`Content-Disposition` header of the message. If the header does not + have a ``filename`` parameter, this method falls back to looking for the + ``name`` parameter. If neither is found, or the header is missing, then + *failobj* is returned. The returned string will always be unquoted as per + :meth:`Utils.unquote`. + + +.. method:: Message.get_boundary([failobj]) + + Return the value of the ``boundary`` parameter of the :mailheader:`Content-Type` + header of the message, or *failobj* if either the header is missing, or has no + ``boundary`` parameter. The returned string will always be unquoted as per + :meth:`Utils.unquote`. + + +.. method:: Message.set_boundary(boundary) + + Set the ``boundary`` parameter of the :mailheader:`Content-Type` header to + *boundary*. :meth:`set_boundary` will always quote *boundary* if necessary. A + :exc:`HeaderParseError` is raised if the message object has no + :mailheader:`Content-Type` header. + + Note that using this method is subtly different than deleting the old + :mailheader:`Content-Type` header and adding a new one with the new boundary via + :meth:`add_header`, because :meth:`set_boundary` preserves the order of the + :mailheader:`Content-Type` header in the list of headers. However, it does *not* + preserve any continuation lines which may have been present in the original + :mailheader:`Content-Type` header. + + +.. method:: Message.get_content_charset([failobj]) + + Return the ``charset`` parameter of the :mailheader:`Content-Type` header, + coerced to lower case. If there is no :mailheader:`Content-Type` header, or if + that header has no ``charset`` parameter, *failobj* is returned. + + Note that this method differs from :meth:`get_charset` which returns the + :class:`Charset` instance for the default encoding of the message body. + + .. versionadded:: 2.2.2 + + +.. method:: Message.get_charsets([failobj]) + + Return a list containing the character set names in the message. If the message + is a :mimetype:`multipart`, then the list will contain one element for each + subpart in the payload, otherwise, it will be a list of length 1. + + Each item in the list will be a string which is the value of the ``charset`` + parameter in the :mailheader:`Content-Type` header for the represented subpart. + However, if the subpart has no :mailheader:`Content-Type` header, no ``charset`` + parameter, or is not of the :mimetype:`text` main MIME type, then that item in + the returned list will be *failobj*. + + +.. method:: Message.walk() + + The :meth:`walk` method is an all-purpose generator which can be used to iterate + over all the parts and subparts of a message object tree, in depth-first + traversal order. You will typically use :meth:`walk` as the iterator in a + ``for`` loop; each iteration returns the next subpart. + + Here's an example that prints the MIME type of every part of a multipart message + structure:: + + >>> for part in msg.walk(): + ... print part.get_content_type() + multipart/report + text/plain + message/delivery-status + text/plain + text/plain + message/rfc822 + +.. versionchanged:: 2.5 + The previously deprecated methods :meth:`get_type`, :meth:`get_main_type`, and + :meth:`get_subtype` were removed. + +:class:`Message` objects can also optionally contain two instance attributes, +which can be used when generating the plain text of a MIME message. + + +.. data:: preamble + + The format of a MIME document allows for some text between the blank line + following the headers, and the first multipart boundary string. Normally, this + text is never visible in a MIME-aware mail reader because it falls outside the + standard MIME armor. However, when viewing the raw text of the message, or when + viewing the message in a non-MIME aware reader, this text can become visible. + + The *preamble* attribute contains this leading extra-armor text for MIME + documents. When the :class:`Parser` discovers some text after the headers but + before the first boundary string, it assigns this text to the message's + *preamble* attribute. When the :class:`Generator` is writing out the plain text + representation of a MIME message, and it finds the message has a *preamble* + attribute, it will write this text in the area between the headers and the first + boundary. See :mod:`email.parser` and :mod:`email.generator` for details. + + Note that if the message object has no preamble, the *preamble* attribute will + be ``None``. + + +.. data:: epilogue + + The *epilogue* attribute acts the same way as the *preamble* attribute, except + that it contains text that appears between the last boundary and the end of the + message. + + .. versionchanged:: 2.5 + You do not need to set the epilogue to the empty string in order for the + :class:`Generator` to print a newline at the end of the file. + + +.. data:: defects + + The *defects* attribute contains a list of all the problems found when parsing + this message. See :mod:`email.errors` for a detailed description of the + possible parsing defects. + + .. versionadded:: 2.4 + diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst new file mode 100644 index 0000000..6f1b0ae --- /dev/null +++ b/Doc/library/email.mime.rst @@ -0,0 +1,175 @@ +:mod:`email`: Creating email and MIME objects from scratch +---------------------------------------------------------- + +.. module:: email.mime + :synopsis: Build MIME messages. + + +Ordinarily, you get a message object structure by passing a file or some text to +a parser, which parses the text and returns the root message object. However +you can also build a complete message structure from scratch, or even individual +:class:`Message` objects by hand. In fact, you can also take an existing +structure and add new :class:`Message` objects, move them around, etc. This +makes a very convenient interface for slicing-and-dicing MIME messages. + +You can create a new object structure by creating :class:`Message` instances, +adding attachments and all the appropriate headers manually. For MIME messages +though, the :mod:`email` package provides some convenient subclasses to make +things easier. + +Here are the classes: + + +.. class:: MIMEBase(_maintype, _subtype, **_params) + + Module: :mod:`email.mime.base` + + This is the base class for all the MIME-specific subclasses of :class:`Message`. + Ordinarily you won't create instances specifically of :class:`MIMEBase`, + although you could. :class:`MIMEBase` is provided primarily as a convenient + base class for more specific MIME-aware subclasses. + + *_maintype* is the :mailheader:`Content-Type` major type (e.g. :mimetype:`text` + or :mimetype:`image`), and *_subtype* is the :mailheader:`Content-Type` minor + type (e.g. :mimetype:`plain` or :mimetype:`gif`). *_params* is a parameter + key/value dictionary and is passed directly to :meth:`Message.add_header`. + + The :class:`MIMEBase` class always adds a :mailheader:`Content-Type` header + (based on *_maintype*, *_subtype*, and *_params*), and a + :mailheader:`MIME-Version` header (always set to ``1.0``). + + +.. class:: MIMENonMultipart() + + Module: :mod:`email.mime.nonmultipart` + + A subclass of :class:`MIMEBase`, this is an intermediate base class for MIME + messages that are not :mimetype:`multipart`. The primary purpose of this class + is to prevent the use of the :meth:`attach` method, which only makes sense for + :mimetype:`multipart` messages. If :meth:`attach` is called, a + :exc:`MultipartConversionError` exception is raised. + + .. versionadded:: 2.2.2 + + +.. class:: MIMEMultipart([subtype[, boundary[, _subparts[, _params]]]]) + + Module: :mod:`email.mime.multipart` + + A subclass of :class:`MIMEBase`, this is an intermediate base class for MIME + messages that are :mimetype:`multipart`. Optional *_subtype* defaults to + :mimetype:`mixed`, but can be used to specify the subtype of the message. A + :mailheader:`Content-Type` header of :mimetype:`multipart/`*_subtype* will be + added to the message object. A :mailheader:`MIME-Version` header will also be + added. + + Optional *boundary* is the multipart boundary string. When ``None`` (the + default), the boundary is calculated when needed. + + *_subparts* is a sequence of initial subparts for the payload. It must be + possible to convert this sequence to a list. You can always attach new subparts + to the message by using the :meth:`Message.attach` method. + + Additional parameters for the :mailheader:`Content-Type` header are taken from + the keyword arguments, or passed into the *_params* argument, which is a keyword + dictionary. + + .. versionadded:: 2.2.2 + + +.. class:: MIMEApplication(_data[, _subtype[, _encoder[, **_params]]]) + + Module: :mod:`email.mime.application` + + A subclass of :class:`MIMENonMultipart`, the :class:`MIMEApplication` class is + used to represent MIME message objects of major type :mimetype:`application`. + *_data* is a string containing the raw byte data. Optional *_subtype* specifies + the MIME subtype and defaults to :mimetype:`octet-stream`. + + Optional *_encoder* is a callable (i.e. function) which will perform the actual + encoding of the data for transport. This callable takes one argument, which is + the :class:`MIMEApplication` instance. It should use :meth:`get_payload` and + :meth:`set_payload` to change the payload to encoded form. It should also add + any :mailheader:`Content-Transfer-Encoding` or other headers to the message + object as necessary. The default encoding is base64. See the + :mod:`email.encoders` module for a list of the built-in encoders. + + *_params* are passed straight through to the base class constructor. + + .. versionadded:: 2.5 + + +.. class:: MIMEAudio(_audiodata[, _subtype[, _encoder[, **_params]]]) + + Module: :mod:`email.mime.audio` + + A subclass of :class:`MIMENonMultipart`, the :class:`MIMEAudio` class is used to + create MIME message objects of major type :mimetype:`audio`. *_audiodata* is a + string containing the raw audio data. If this data can be decoded by the + standard Python module :mod:`sndhdr`, then the subtype will be automatically + included in the :mailheader:`Content-Type` header. Otherwise you can explicitly + specify the audio subtype via the *_subtype* parameter. If the minor type could + not be guessed and *_subtype* was not given, then :exc:`TypeError` is raised. + + Optional *_encoder* is a callable (i.e. function) which will perform the actual + encoding of the audio data for transport. This callable takes one argument, + which is the :class:`MIMEAudio` instance. It should use :meth:`get_payload` and + :meth:`set_payload` to change the payload to encoded form. It should also add + any :mailheader:`Content-Transfer-Encoding` or other headers to the message + object as necessary. The default encoding is base64. See the + :mod:`email.encoders` module for a list of the built-in encoders. + + *_params* are passed straight through to the base class constructor. + + +.. class:: MIMEImage(_imagedata[, _subtype[, _encoder[, **_params]]]) + + Module: :mod:`email.mime.image` + + A subclass of :class:`MIMENonMultipart`, the :class:`MIMEImage` class is used to + create MIME message objects of major type :mimetype:`image`. *_imagedata* is a + string containing the raw image data. If this data can be decoded by the + standard Python module :mod:`imghdr`, then the subtype will be automatically + included in the :mailheader:`Content-Type` header. Otherwise you can explicitly + specify the image subtype via the *_subtype* parameter. If the minor type could + not be guessed and *_subtype* was not given, then :exc:`TypeError` is raised. + + Optional *_encoder* is a callable (i.e. function) which will perform the actual + encoding of the image data for transport. This callable takes one argument, + which is the :class:`MIMEImage` instance. It should use :meth:`get_payload` and + :meth:`set_payload` to change the payload to encoded form. It should also add + any :mailheader:`Content-Transfer-Encoding` or other headers to the message + object as necessary. The default encoding is base64. See the + :mod:`email.encoders` module for a list of the built-in encoders. + + *_params* are passed straight through to the :class:`MIMEBase` constructor. + + +.. class:: MIMEMessage(_msg[, _subtype]) + + Module: :mod:`email.mime.message` + + A subclass of :class:`MIMENonMultipart`, the :class:`MIMEMessage` class is used + to create MIME objects of main type :mimetype:`message`. *_msg* is used as the + payload, and must be an instance of class :class:`Message` (or a subclass + thereof), otherwise a :exc:`TypeError` is raised. + + Optional *_subtype* sets the subtype of the message; it defaults to + :mimetype:`rfc822`. + + +.. class:: MIMEText(_text[, _subtype[, _charset]]) + + Module: :mod:`email.mime.text` + + A subclass of :class:`MIMENonMultipart`, the :class:`MIMEText` class is used to + create MIME objects of major type :mimetype:`text`. *_text* is the string for + the payload. *_subtype* is the minor type and defaults to :mimetype:`plain`. + *_charset* is the character set of the text and is passed as a parameter to the + :class:`MIMENonMultipart` constructor; it defaults to ``us-ascii``. No guessing + or encoding is performed on the text data. + + .. versionchanged:: 2.4 + The previously deprecated *_encoding* argument has been removed. Encoding + happens implicitly based on the *_charset* argument. + diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst new file mode 100644 index 0000000..048ed22 --- /dev/null +++ b/Doc/library/email.parser.rst @@ -0,0 +1,220 @@ +:mod:`email`: Parsing email messages +------------------------------------ + +.. module:: email.parser + :synopsis: Parse flat text email messages to produce a message object structure. + + +Message object structures can be created in one of two ways: they can be created +from whole cloth by instantiating :class:`Message` objects and stringing them +together via :meth:`attach` and :meth:`set_payload` calls, or they can be +created by parsing a flat text representation of the email message. + +The :mod:`email` package provides a standard parser that understands most email +document structures, including MIME documents. You can pass the parser a string +or a file object, and the parser will return to you the root :class:`Message` +instance of the object structure. For simple, non-MIME messages the payload of +this root object will likely be a string containing the text of the message. +For MIME messages, the root object will return ``True`` from its +:meth:`is_multipart` method, and the subparts can be accessed via the +:meth:`get_payload` and :meth:`walk` methods. + +There are actually two parser interfaces available for use, the classic +:class:`Parser` API and the incremental :class:`FeedParser` API. The classic +:class:`Parser` API is fine if you have the entire text of the message in memory +as a string, or if the entire message lives in a file on the file system. +:class:`FeedParser` is more appropriate for when you're reading the message from +a stream which might block waiting for more input (e.g. reading an email message +from a socket). The :class:`FeedParser` can consume and parse the message +incrementally, and only returns the root object when you close the parser [#]_. + +Note that the parser can be extended in limited ways, and of course you can +implement your own parser completely from scratch. There is no magical +connection between the :mod:`email` package's bundled parser and the +:class:`Message` class, so your custom parser can create message object trees +any way it finds necessary. + + +FeedParser API +^^^^^^^^^^^^^^ + +.. versionadded:: 2.4 + +The :class:`FeedParser`, imported from the :mod:`email.feedparser` module, +provides an API that is conducive to incremental parsing of email messages, such +as would be necessary when reading the text of an email message from a source +that can block (e.g. a socket). The :class:`FeedParser` can of course be used +to parse an email message fully contained in a string or a file, but the classic +:class:`Parser` API may be more convenient for such use cases. The semantics +and results of the two parser APIs are identical. + +The :class:`FeedParser`'s API is simple; you create an instance, feed it a bunch +of text until there's no more to feed it, then close the parser to retrieve the +root message object. The :class:`FeedParser` is extremely accurate when parsing +standards-compliant messages, and it does a very good job of parsing +non-compliant messages, providing information about how a message was deemed +broken. It will populate a message object's *defects* attribute with a list of +any problems it found in a message. See the :mod:`email.errors` module for the +list of defects that it can find. + +Here is the API for the :class:`FeedParser`: + + +.. class:: FeedParser([_factory]) + + Create a :class:`FeedParser` instance. Optional *_factory* is a no-argument + callable that will be called whenever a new message object is needed. It + defaults to the :class:`email.message.Message` class. + + +.. method:: FeedParser.feed(data) + + Feed the :class:`FeedParser` some more data. *data* should be a string + containing one or more lines. The lines can be partial and the + :class:`FeedParser` will stitch such partial lines together properly. The lines + in the string can have any of the common three line endings, carriage return, + newline, or carriage return and newline (they can even be mixed). + + +.. method:: FeedParser.close() + + Closing a :class:`FeedParser` completes the parsing of all previously fed data, + and returns the root message object. It is undefined what happens if you feed + more data to a closed :class:`FeedParser`. + + +Parser class API +^^^^^^^^^^^^^^^^ + +The :class:`Parser` class, imported from the :mod:`email.parser` module, +provides an API that can be used to parse a message when the complete contents +of the message are available in a string or file. The :mod:`email.parser` +module also provides a second class, called :class:`HeaderParser` which can be +used if you're only interested in the headers of the message. +:class:`HeaderParser` can be much faster in these situations, since it does not +attempt to parse the message body, instead setting the payload to the raw body +as a string. :class:`HeaderParser` has the same API as the :class:`Parser` +class. + + +.. class:: Parser([_class]) + + The constructor for the :class:`Parser` class takes an optional argument + *_class*. This must be a callable factory (such as a function or a class), and + it is used whenever a sub-message object needs to be created. It defaults to + :class:`Message` (see :mod:`email.message`). The factory will be called without + arguments. + + The optional *strict* flag is ignored. + + .. deprecated:: 2.4 + Because the :class:`Parser` class is a backward compatible API wrapper + around the new-in-Python 2.4 :class:`FeedParser`, *all* parsing is + effectively non-strict. You should simply stop passing a *strict* flag to + the :class:`Parser` constructor. + + .. versionchanged:: 2.2.2 + The *strict* flag was added. + + .. versionchanged:: 2.4 + The *strict* flag was deprecated. + +The other public :class:`Parser` methods are: + + +.. method:: Parser.parse(fp[, headersonly]) + + Read all the data from the file-like object *fp*, parse the resulting text, and + return the root message object. *fp* must support both the :meth:`readline` and + the :meth:`read` methods on file-like objects. + + The text contained in *fp* must be formatted as a block of :rfc:`2822` style + headers and header continuation lines, optionally preceded by a envelope + header. The header block is terminated either by the end of the data or by a + blank line. Following the header block is the body of the message (which may + contain MIME-encoded subparts). + + Optional *headersonly* is as with the :meth:`parse` method. + + .. versionchanged:: 2.2.2 + The *headersonly* flag was added. + + +.. method:: Parser.parsestr(text[, headersonly]) + + Similar to the :meth:`parse` method, except it takes a string object instead of + a file-like object. Calling this method on a string is exactly equivalent to + wrapping *text* in a :class:`StringIO` instance first and calling :meth:`parse`. + + Optional *headersonly* is a flag specifying whether to stop parsing after + reading the headers or not. The default is ``False``, meaning it parses the + entire contents of the file. + + .. versionchanged:: 2.2.2 + The *headersonly* flag was added. + +Since creating a message object structure from a string or a file object is such +a common task, two functions are provided as a convenience. They are available +in the top-level :mod:`email` package namespace. + + +.. function:: message_from_string(s[, _class[, strict]]) + + Return a message object structure from a string. This is exactly equivalent to + ``Parser().parsestr(s)``. Optional *_class* and *strict* are interpreted as + with the :class:`Parser` class constructor. + + .. versionchanged:: 2.2.2 + The *strict* flag was added. + + +.. function:: message_from_file(fp[, _class[, strict]]) + + Return a message object structure tree from an open file object. This is + exactly equivalent to ``Parser().parse(fp)``. Optional *_class* and *strict* + are interpreted as with the :class:`Parser` class constructor. + + .. versionchanged:: 2.2.2 + The *strict* flag was added. + +Here's an example of how you might use this at an interactive Python prompt:: + + >>> import email + >>> msg = email.message_from_string(myString) + + +Additional notes +^^^^^^^^^^^^^^^^ + +Here are some notes on the parsing semantics: + +* Most non-\ :mimetype:`multipart` type messages are parsed as a single message + object with a string payload. These objects will return ``False`` for + :meth:`is_multipart`. Their :meth:`get_payload` method will return a string + object. + +* All :mimetype:`multipart` type messages will be parsed as a container message + object with a list of sub-message objects for their payload. The outer + container message will return ``True`` for :meth:`is_multipart` and their + :meth:`get_payload` method will return the list of :class:`Message` subparts. + +* Most messages with a content type of :mimetype:`message/\*` (e.g. + :mimetype:`message/delivery-status` and :mimetype:`message/rfc822`) will also be + parsed as container object containing a list payload of length 1. Their + :meth:`is_multipart` method will return ``True``. The single element in the + list payload will be a sub-message object. + +* Some non-standards compliant messages may not be internally consistent about + their :mimetype:`multipart`\ -edness. Such messages may have a + :mailheader:`Content-Type` header of type :mimetype:`multipart`, but their + :meth:`is_multipart` method may return ``False``. If such messages were parsed + with the :class:`FeedParser`, they will have an instance of the + :class:`MultipartInvariantViolationDefect` class in their *defects* attribute + list. See :mod:`email.errors` for details. + +.. rubric:: Footnotes + +.. [#] As of email package version 3.0, introduced in Python 2.4, the classic + :class:`Parser` was re-implemented in terms of the :class:`FeedParser`, so the + semantics and results are identical between the two parsers. + diff --git a/Doc/library/email.rst b/Doc/library/email.rst new file mode 100644 index 0000000..212c321 --- /dev/null +++ b/Doc/library/email.rst @@ -0,0 +1,324 @@ +.. % Copyright (C) 2001-2007 Python Software Foundation +.. % Author: barry@python.org (Barry Warsaw) + + +:mod:`email` --- An email and MIME handling package +=================================================== + +.. module:: email + :synopsis: Package supporting the parsing, manipulating, and generating email messages, + including MIME documents. +.. moduleauthor:: Barry A. Warsaw +.. sectionauthor:: Barry A. Warsaw + + +.. versionadded:: 2.2 + +The :mod:`email` package is a library for managing email messages, including +MIME and other :rfc:`2822`\ -based message documents. It subsumes most of the +functionality in several older standard modules such as :mod:`rfc822`, +:mod:`mimetools`, :mod:`multifile`, and other non-standard packages such as +:mod:`mimecntl`. It is specifically *not* designed to do any sending of email +messages to SMTP (:rfc:`2821`), NNTP, or other servers; those are functions of +modules such as :mod:`smtplib` and :mod:`nntplib`. The :mod:`email` package +attempts to be as RFC-compliant as possible, supporting in addition to +:rfc:`2822`, such MIME-related RFCs as :rfc:`2045`, :rfc:`2046`, :rfc:`2047`, +and :rfc:`2231`. + +The primary distinguishing feature of the :mod:`email` package is that it splits +the parsing and generating of email messages from the internal *object model* +representation of email. Applications using the :mod:`email` package deal +primarily with objects; you can add sub-objects to messages, remove sub-objects +from messages, completely re-arrange the contents, etc. There is a separate +parser and a separate generator which handles the transformation from flat text +to the object model, and then back to flat text again. There are also handy +subclasses for some common MIME object types, and a few miscellaneous utilities +that help with such common tasks as extracting and parsing message field values, +creating RFC-compliant dates, etc. + +The following sections describe the functionality of the :mod:`email` package. +The ordering follows a progression that should be common in applications: an +email message is read as flat text from a file or other source, the text is +parsed to produce the object structure of the email message, this structure is +manipulated, and finally, the object tree is rendered back into flat text. + +It is perfectly feasible to create the object structure out of whole cloth --- +i.e. completely from scratch. From there, a similar progression can be taken as +above. + +Also included are detailed specifications of all the classes and modules that +the :mod:`email` package provides, the exception classes you might encounter +while using the :mod:`email` package, some auxiliary utilities, and a few +examples. For users of the older :mod:`mimelib` package, or previous versions +of the :mod:`email` package, a section on differences and porting is provided. + +Contents of the :mod:`email` package documentation: + +.. toctree:: + + email.message.rst + email.parser.rst + email.generator.rst + email.mime.rst + email.header.rst + email.charset.rst + email.encoders.rst + email.errors.rst + email.util.rst + email.iterators.rst + email-examples.rst + + +.. seealso:: + + Module :mod:`smtplib` + SMTP protocol client + + Module :mod:`nntplib` + NNTP protocol client + + +.. _email-pkg-history: + +Package History +--------------- + +This table describes the release history of the email package, corresponding to +the version of Python that the package was released with. For purposes of this +document, when you see a note about change or added versions, these refer to the +Python version the change was made in, *not* the email package version. This +table also describes the Python compatibility of each version of the package. + ++---------------+------------------------------+-----------------------+ +| email version | distributed with | compatible with | ++===============+==============================+=======================+ +| :const:`1.x` | Python 2.2.0 to Python 2.2.1 | *no longer supported* | ++---------------+------------------------------+-----------------------+ +| :const:`2.5` | Python 2.2.2+ and Python 2.3 | Python 2.1 to 2.5 | ++---------------+------------------------------+-----------------------+ +| :const:`3.0` | Python 2.4 | Python 2.3 to 2.5 | ++---------------+------------------------------+-----------------------+ +| :const:`4.0` | Python 2.5 | Python 2.3 to 2.5 | ++---------------+------------------------------+-----------------------+ + +Here are the major differences between :mod:`email` version 4 and version 3: + +* All modules have been renamed according to :pep:`8` standards. For example, + the version 3 module :mod:`email.Message` was renamed to :mod:`email.message` in + version 4. + +* A new subpackage :mod:`email.mime` was added and all the version 3 + :mod:`email.MIME\*` modules were renamed and situated into the :mod:`email.mime` + subpackage. For example, the version 3 module :mod:`email.MIMEText` was renamed + to :mod:`email.mime.text`. + + *Note that the version 3 names will continue to work until Python 2.6*. + +* The :mod:`email.mime.application` module was added, which contains the + :class:`MIMEApplication` class. + +* Methods that were deprecated in version 3 have been removed. These include + :meth:`Generator.__call__`, :meth:`Message.get_type`, + :meth:`Message.get_main_type`, :meth:`Message.get_subtype`. + +* Fixes have been added for :rfc:`2231` support which can change some of the + return types for :func:`Message.get_param` and friends. Under some + circumstances, values which used to return a 3-tuple now return simple strings + (specifically, if all extended parameter segments were unencoded, there is no + language and charset designation expected, so the return type is now a simple + string). Also, %-decoding used to be done for both encoded and unencoded + segments; this decoding is now done only for encoded segments. + +Here are the major differences between :mod:`email` version 3 and version 2: + +* The :class:`FeedParser` class was introduced, and the :class:`Parser` class + was implemented in terms of the :class:`FeedParser`. All parsing therefore is + non-strict, and parsing will make a best effort never to raise an exception. + Problems found while parsing messages are stored in the message's *defect* + attribute. + +* All aspects of the API which raised :exc:`DeprecationWarning`\ s in version 2 + have been removed. These include the *_encoder* argument to the + :class:`MIMEText` constructor, the :meth:`Message.add_payload` method, the + :func:`Utils.dump_address_pair` function, and the functions :func:`Utils.decode` + and :func:`Utils.encode`. + +* New :exc:`DeprecationWarning`\ s have been added to: + :meth:`Generator.__call__`, :meth:`Message.get_type`, + :meth:`Message.get_main_type`, :meth:`Message.get_subtype`, and the *strict* + argument to the :class:`Parser` class. These are expected to be removed in + future versions. + +* Support for Pythons earlier than 2.3 has been removed. + +Here are the differences between :mod:`email` version 2 and version 1: + +* The :mod:`email.Header` and :mod:`email.Charset` modules have been added. + +* The pickle format for :class:`Message` instances has changed. Since this was + never (and still isn't) formally defined, this isn't considered a backward + incompatibility. However if your application pickles and unpickles + :class:`Message` instances, be aware that in :mod:`email` version 2, + :class:`Message` instances now have private variables *_charset* and + *_default_type*. + +* Several methods in the :class:`Message` class have been deprecated, or their + signatures changed. Also, many new methods have been added. See the + documentation for the :class:`Message` class for details. The changes should be + completely backward compatible. + +* The object structure has changed in the face of :mimetype:`message/rfc822` + content types. In :mod:`email` version 1, such a type would be represented by a + scalar payload, i.e. the container message's :meth:`is_multipart` returned + false, :meth:`get_payload` was not a list object, but a single :class:`Message` + instance. + + This structure was inconsistent with the rest of the package, so the object + representation for :mimetype:`message/rfc822` content types was changed. In + :mod:`email` version 2, the container *does* return ``True`` from + :meth:`is_multipart`, and :meth:`get_payload` returns a list containing a single + :class:`Message` item. + + Note that this is one place that backward compatibility could not be completely + maintained. However, if you're already testing the return type of + :meth:`get_payload`, you should be fine. You just need to make sure your code + doesn't do a :meth:`set_payload` with a :class:`Message` instance on a container + with a content type of :mimetype:`message/rfc822`. + +* The :class:`Parser` constructor's *strict* argument was added, and its + :meth:`parse` and :meth:`parsestr` methods grew a *headersonly* argument. The + *strict* flag was also added to functions :func:`email.message_from_file` and + :func:`email.message_from_string`. + +* :meth:`Generator.__call__` is deprecated; use :meth:`Generator.flatten` + instead. The :class:`Generator` class has also grown the :meth:`clone` method. + +* The :class:`DecodedGenerator` class in the :mod:`email.Generator` module was + added. + +* The intermediate base classes :class:`MIMENonMultipart` and + :class:`MIMEMultipart` have been added, and interposed in the class hierarchy + for most of the other MIME-related derived classes. + +* The *_encoder* argument to the :class:`MIMEText` constructor has been + deprecated. Encoding now happens implicitly based on the *_charset* argument. + +* The following functions in the :mod:`email.Utils` module have been deprecated: + :func:`dump_address_pairs`, :func:`decode`, and :func:`encode`. The following + functions have been added to the module: :func:`make_msgid`, + :func:`decode_rfc2231`, :func:`encode_rfc2231`, and :func:`decode_params`. + +* The non-public function :func:`email.Iterators._structure` was added. + + +Differences from :mod:`mimelib` +------------------------------- + +The :mod:`email` package was originally prototyped as a separate library called +`mimelib `_. Changes have been made so that method names +are more consistent, and some methods or modules have either been added or +removed. The semantics of some of the methods have also changed. For the most +part, any functionality available in :mod:`mimelib` is still available in the +:mod:`email` package, albeit often in a different way. Backward compatibility +between the :mod:`mimelib` package and the :mod:`email` package was not a +priority. + +Here is a brief description of the differences between the :mod:`mimelib` and +the :mod:`email` packages, along with hints on how to port your applications. + +Of course, the most visible difference between the two packages is that the +package name has been changed to :mod:`email`. In addition, the top-level +package has the following differences: + +* :func:`messageFromString` has been renamed to :func:`message_from_string`. + +* :func:`messageFromFile` has been renamed to :func:`message_from_file`. + +The :class:`Message` class has the following differences: + +* The method :meth:`asString` was renamed to :meth:`as_string`. + +* The method :meth:`ismultipart` was renamed to :meth:`is_multipart`. + +* The :meth:`get_payload` method has grown a *decode* optional argument. + +* The method :meth:`getall` was renamed to :meth:`get_all`. + +* The method :meth:`addheader` was renamed to :meth:`add_header`. + +* The method :meth:`gettype` was renamed to :meth:`get_type`. + +* The method :meth:`getmaintype` was renamed to :meth:`get_main_type`. + +* The method :meth:`getsubtype` was renamed to :meth:`get_subtype`. + +* The method :meth:`getparams` was renamed to :meth:`get_params`. Also, whereas + :meth:`getparams` returned a list of strings, :meth:`get_params` returns a list + of 2-tuples, effectively the key/value pairs of the parameters, split on the + ``'='`` sign. + +* The method :meth:`getparam` was renamed to :meth:`get_param`. + +* The method :meth:`getcharsets` was renamed to :meth:`get_charsets`. + +* The method :meth:`getfilename` was renamed to :meth:`get_filename`. + +* The method :meth:`getboundary` was renamed to :meth:`get_boundary`. + +* The method :meth:`setboundary` was renamed to :meth:`set_boundary`. + +* The method :meth:`getdecodedpayload` was removed. To get similar + functionality, pass the value 1 to the *decode* flag of the get_payload() + method. + +* The method :meth:`getpayloadastext` was removed. Similar functionality is + supported by the :class:`DecodedGenerator` class in the :mod:`email.generator` + module. + +* The method :meth:`getbodyastext` was removed. You can get similar + functionality by creating an iterator with :func:`typed_subpart_iterator` in the + :mod:`email.iterators` module. + +The :class:`Parser` class has no differences in its public interface. It does +have some additional smarts to recognize :mimetype:`message/delivery-status` +type messages, which it represents as a :class:`Message` instance containing +separate :class:`Message` subparts for each header block in the delivery status +notification [#]_. + +The :class:`Generator` class has no differences in its public interface. There +is a new class in the :mod:`email.generator` module though, called +:class:`DecodedGenerator` which provides most of the functionality previously +available in the :meth:`Message.getpayloadastext` method. + +The following modules and classes have been changed: + +* The :class:`MIMEBase` class constructor arguments *_major* and *_minor* have + changed to *_maintype* and *_subtype* respectively. + +* The ``Image`` class/module has been renamed to ``MIMEImage``. The *_minor* + argument has been renamed to *_subtype*. + +* The ``Text`` class/module has been renamed to ``MIMEText``. The *_minor* + argument has been renamed to *_subtype*. + +* The ``MessageRFC822`` class/module has been renamed to ``MIMEMessage``. Note + that an earlier version of :mod:`mimelib` called this class/module ``RFC822``, + but that clashed with the Python standard library module :mod:`rfc822` on some + case-insensitive file systems. + + Also, the :class:`MIMEMessage` class now represents any kind of MIME message + with main type :mimetype:`message`. It takes an optional argument *_subtype* + which is used to set the MIME subtype. *_subtype* defaults to + :mimetype:`rfc822`. + +:mod:`mimelib` provided some utility functions in its :mod:`address` and +:mod:`date` modules. All of these functions have been moved to the +:mod:`email.utils` module. + +The ``MsgReader`` class/module has been removed. Its functionality is most +closely supported in the :func:`body_line_iterator` function in the +:mod:`email.iterators` module. + +.. rubric:: Footnotes + +.. [#] Delivery Status Notifications (DSN) are defined in :rfc:`1894`. diff --git a/Doc/library/email.util.rst b/Doc/library/email.util.rst new file mode 100644 index 0000000..aa67885 --- /dev/null +++ b/Doc/library/email.util.rst @@ -0,0 +1,166 @@ +:mod:`email`: Miscellaneous utilities +------------------------------------- + +.. module:: email.utils + :synopsis: Miscellaneous email package utilities. + + +There are several useful utilities provided in the :mod:`email.utils` module: + + +.. function:: quote(str) + + Return a new string with backslashes in *str* replaced by two backslashes, and + double quotes replaced by backslash-double quote. + + +.. function:: unquote(str) + + Return a new string which is an *unquoted* version of *str*. If *str* ends and + begins with double quotes, they are stripped off. Likewise if *str* ends and + begins with angle brackets, they are stripped off. + + +.. function:: parseaddr(address) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + + +.. function:: formataddr(pair) + + The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, + email_address)`` and returns the string value suitable for a :mailheader:`To` or + :mailheader:`Cc` header. If the first element of *pair* is false, then the + second element is returned unmodified. + + +.. function:: getaddresses(fieldvalues) + + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by + :meth:`Message.get_all`. Here's a simple example that gets all the recipients + of a message:: + + from email.utils import getaddresses + + tos = msg.get_all('to', []) + ccs = msg.get_all('cc', []) + resent_tos = msg.get_all('resent-to', []) + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + + +.. function:: parsedate(date) + + Attempts to parse a date according to the rules in :rfc:`2822`. however, some + mailers don't follow that format as specified, so :func:`parsedate` tries to + guess correctly in such cases. *date* is a string containing an :rfc:`2822` + date, such as ``"Mon, 20 Nov 1995 19:12:08 -0500"``. If it succeeds in parsing + the date, :func:`parsedate` returns a 9-tuple that can be passed directly to + :func:`time.mktime`; otherwise ``None`` will be returned. Note that indexes 6, + 7, and 8 of the result tuple are not usable. + + +.. function:: parsedate_tz(date) + + Performs the same function as :func:`parsedate`, but returns either ``None`` or + a 10-tuple; the first 9 elements make up a tuple that can be passed directly to + :func:`time.mktime`, and the tenth is the offset of the date's timezone from UTC + (which is the official term for Greenwich Mean Time) [#]_. If the input string + has no timezone, the last element of the tuple returned is ``None``. Note that + indexes 6, 7, and 8 of the result tuple are not usable. + + +.. function:: mktime_tz(tuple) + + Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. It + the timezone item in the tuple is ``None``, assume local time. Minor + deficiency: :func:`mktime_tz` interprets the first 8 elements of *tuple* as a + local time and then compensates for the timezone difference. This may yield a + slight error around changes in daylight savings time, though not worth worrying + about for common use. + + +.. function:: formatdate([timeval[, localtime][, usegmt]]) + + Returns a date string as per :rfc:`2822`, e.g.:: + + Fri, 09 Nov 2001 01:08:47 -0000 + + Optional *timeval* if given is a floating point time value as accepted by + :func:`time.gmtime` and :func:`time.localtime`, otherwise the current time is + used. + + Optional *localtime* is a flag that when ``True``, interprets *timeval*, and + returns a date relative to the local timezone instead of UTC, properly taking + daylight savings time into account. The default is ``False`` meaning UTC is + used. + + Optional *usegmt* is a flag that when ``True``, outputs a date string with the + timezone as an ascii string ``GMT``, rather than a numeric ``-0000``. This is + needed for some protocols (such as HTTP). This only applies when *localtime* is + ``False``. + + .. versionadded:: 2.4 + + +.. function:: make_msgid([idstring]) + + Returns a string suitable for an :rfc:`2822`\ -compliant + :mailheader:`Message-ID` header. Optional *idstring* if given, is a string used + to strengthen the uniqueness of the message id. + + +.. function:: decode_rfc2231(s) + + Decode the string *s* according to :rfc:`2231`. + + +.. function:: encode_rfc2231(s[, charset[, language]]) + + Encode the string *s* according to :rfc:`2231`. Optional *charset* and + *language*, if given is the character set name and language name to use. If + neither is given, *s* is returned as-is. If *charset* is given but *language* + is not, the string is encoded using the empty string for *language*. + + +.. function:: collapse_rfc2231_value(value[, errors[, fallback_charset]]) + + When a header parameter is encoded in :rfc:`2231` format, + :meth:`Message.get_param` may return a 3-tuple containing the character set, + language, and value. :func:`collapse_rfc2231_value` turns this into a unicode + string. Optional *errors* is passed to the *errors* argument of the built-in + :func:`unicode` function; it defaults to ``replace``. Optional + *fallback_charset* specifies the character set to use if the one in the + :rfc:`2231` header is not known by Python; it defaults to ``us-ascii``. + + For convenience, if the *value* passed to :func:`collapse_rfc2231_value` is not + a tuple, it should be a string and it is returned unquoted. + + +.. function:: decode_params(params) + + Decode parameters list according to :rfc:`2231`. *params* is a sequence of + 2-tuples containing elements of the form ``(content-type, string-value)``. + +.. versionchanged:: 2.4 + The :func:`dump_address_pair` function has been removed; use :func:`formataddr` + instead. + +.. versionchanged:: 2.4 + The :func:`decode` function has been removed; use the + :meth:`Header.decode_header` method instead. + +.. versionchanged:: 2.4 + The :func:`encode` function has been removed; use the :meth:`Header.encode` + method instead. + +.. rubric:: Footnotes + +.. [#] Note that the sign of the timezone offset is the opposite of the sign of the + ``time.timezone`` variable for the same timezone; the latter variable follows + the POSIX standard while this module follows :rfc:`2822`. + diff --git a/Doc/library/errno.rst b/Doc/library/errno.rst new file mode 100644 index 0000000..daf9ff0 --- /dev/null +++ b/Doc/library/errno.rst @@ -0,0 +1,636 @@ + +:mod:`errno` --- Standard errno system symbols +============================================== + +.. module:: errno + :synopsis: Standard errno system symbols. + + +This module makes available standard ``errno`` system symbols. The value of each +symbol is the corresponding integer value. The names and descriptions are +borrowed from :file:`linux/include/errno.h`, which should be pretty +all-inclusive. + + +.. data:: errorcode + + Dictionary providing a mapping from the errno value to the string name in the + underlying system. For instance, ``errno.errorcode[errno.EPERM]`` maps to + ``'EPERM'``. + +To translate a numeric error code to an error message, use :func:`os.strerror`. + +Of the following list, symbols that are not used on the current platform are not +defined by the module. The specific list of defined symbols is available as +``errno.errorcode.keys()``. Symbols available can include: + + +.. data:: EPERM + + Operation not permitted + + +.. data:: ENOENT + + No such file or directory + + +.. data:: ESRCH + + No such process + + +.. data:: EINTR + + Interrupted system call + + +.. data:: EIO + + I/O error + + +.. data:: ENXIO + + No such device or address + + +.. data:: E2BIG + + Arg list too long + + +.. data:: ENOEXEC + + Exec format error + + +.. data:: EBADF + + Bad file number + + +.. data:: ECHILD + + No child processes + + +.. data:: EAGAIN + + Try again + + +.. data:: ENOMEM + + Out of memory + + +.. data:: EACCES + + Permission denied + + +.. data:: EFAULT + + Bad address + + +.. data:: ENOTBLK + + Block device required + + +.. data:: EBUSY + + Device or resource busy + + +.. data:: EEXIST + + File exists + + +.. data:: EXDEV + + Cross-device link + + +.. data:: ENODEV + + No such device + + +.. data:: ENOTDIR + + Not a directory + + +.. data:: EISDIR + + Is a directory + + +.. data:: EINVAL + + Invalid argument + + +.. data:: ENFILE + + File table overflow + + +.. data:: EMFILE + + Too many open files + + +.. data:: ENOTTY + + Not a typewriter + + +.. data:: ETXTBSY + + Text file busy + + +.. data:: EFBIG + + File too large + + +.. data:: ENOSPC + + No space left on device + + +.. data:: ESPIPE + + Illegal seek + + +.. data:: EROFS + + Read-only file system + + +.. data:: EMLINK + + Too many links + + +.. data:: EPIPE + + Broken pipe + + +.. data:: EDOM + + Math argument out of domain of func + + +.. data:: ERANGE + + Math result not representable + + +.. data:: EDEADLK + + Resource deadlock would occur + + +.. data:: ENAMETOOLONG + + File name too long + + +.. data:: ENOLCK + + No record locks available + + +.. data:: ENOSYS + + Function not implemented + + +.. data:: ENOTEMPTY + + Directory not empty + + +.. data:: ELOOP + + Too many symbolic links encountered + + +.. data:: EWOULDBLOCK + + Operation would block + + +.. data:: ENOMSG + + No message of desired type + + +.. data:: EIDRM + + Identifier removed + + +.. data:: ECHRNG + + Channel number out of range + + +.. data:: EL2NSYNC + + Level 2 not synchronized + + +.. data:: EL3HLT + + Level 3 halted + + +.. data:: EL3RST + + Level 3 reset + + +.. data:: ELNRNG + + Link number out of range + + +.. data:: EUNATCH + + Protocol driver not attached + + +.. data:: ENOCSI + + No CSI structure available + + +.. data:: EL2HLT + + Level 2 halted + + +.. data:: EBADE + + Invalid exchange + + +.. data:: EBADR + + Invalid request descriptor + + +.. data:: EXFULL + + Exchange full + + +.. data:: ENOANO + + No anode + + +.. data:: EBADRQC + + Invalid request code + + +.. data:: EBADSLT + + Invalid slot + + +.. data:: EDEADLOCK + + File locking deadlock error + + +.. data:: EBFONT + + Bad font file format + + +.. data:: ENOSTR + + Device not a stream + + +.. data:: ENODATA + + No data available + + +.. data:: ETIME + + Timer expired + + +.. data:: ENOSR + + Out of streams resources + + +.. data:: ENONET + + Machine is not on the network + + +.. data:: ENOPKG + + Package not installed + + +.. data:: EREMOTE + + Object is remote + + +.. data:: ENOLINK + + Link has been severed + + +.. data:: EADV + + Advertise error + + +.. data:: ESRMNT + + Srmount error + + +.. data:: ECOMM + + Communication error on send + + +.. data:: EPROTO + + Protocol error + + +.. data:: EMULTIHOP + + Multihop attempted + + +.. data:: EDOTDOT + + RFS specific error + + +.. data:: EBADMSG + + Not a data message + + +.. data:: EOVERFLOW + + Value too large for defined data type + + +.. data:: ENOTUNIQ + + Name not unique on network + + +.. data:: EBADFD + + File descriptor in bad state + + +.. data:: EREMCHG + + Remote address changed + + +.. data:: ELIBACC + + Can not access a needed shared library + + +.. data:: ELIBBAD + + Accessing a corrupted shared library + + +.. data:: ELIBSCN + + .lib section in a.out corrupted + + +.. data:: ELIBMAX + + Attempting to link in too many shared libraries + + +.. data:: ELIBEXEC + + Cannot exec a shared library directly + + +.. data:: EILSEQ + + Illegal byte sequence + + +.. data:: ERESTART + + Interrupted system call should be restarted + + +.. data:: ESTRPIPE + + Streams pipe error + + +.. data:: EUSERS + + Too many users + + +.. data:: ENOTSOCK + + Socket operation on non-socket + + +.. data:: EDESTADDRREQ + + Destination address required + + +.. data:: EMSGSIZE + + Message too long + + +.. data:: EPROTOTYPE + + Protocol wrong type for socket + + +.. data:: ENOPROTOOPT + + Protocol not available + + +.. data:: EPROTONOSUPPORT + + Protocol not supported + + +.. data:: ESOCKTNOSUPPORT + + Socket type not supported + + +.. data:: EOPNOTSUPP + + Operation not supported on transport endpoint + + +.. data:: EPFNOSUPPORT + + Protocol family not supported + + +.. data:: EAFNOSUPPORT + + Address family not supported by protocol + + +.. data:: EADDRINUSE + + Address already in use + + +.. data:: EADDRNOTAVAIL + + Cannot assign requested address + + +.. data:: ENETDOWN + + Network is down + + +.. data:: ENETUNREACH + + Network is unreachable + + +.. data:: ENETRESET + + Network dropped connection because of reset + + +.. data:: ECONNABORTED + + Software caused connection abort + + +.. data:: ECONNRESET + + Connection reset by peer + + +.. data:: ENOBUFS + + No buffer space available + + +.. data:: EISCONN + + Transport endpoint is already connected + + +.. data:: ENOTCONN + + Transport endpoint is not connected + + +.. data:: ESHUTDOWN + + Cannot send after transport endpoint shutdown + + +.. data:: ETOOMANYREFS + + Too many references: cannot splice + + +.. data:: ETIMEDOUT + + Connection timed out + + +.. data:: ECONNREFUSED + + Connection refused + + +.. data:: EHOSTDOWN + + Host is down + + +.. data:: EHOSTUNREACH + + No route to host + + +.. data:: EALREADY + + Operation already in progress + + +.. data:: EINPROGRESS + + Operation now in progress + + +.. data:: ESTALE + + Stale NFS file handle + + +.. data:: EUCLEAN + + Structure needs cleaning + + +.. data:: ENOTNAM + + Not a XENIX named type file + + +.. data:: ENAVAIL + + No XENIX semaphores available + + +.. data:: EISNAM + + Is a named type file + + +.. data:: EREMOTEIO + + Remote I/O error + + +.. data:: EDQUOT + + Quota exceeded + diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst new file mode 100644 index 0000000..d6a64fc --- /dev/null +++ b/Doc/library/exceptions.rst @@ -0,0 +1,475 @@ +.. _bltin-exceptions: + +Built-in Exceptions +=================== + +.. module:: exceptions + :synopsis: Standard exception classes. + + +Exceptions should be class objects. The exceptions are defined in the module +:mod:`exceptions`. This module never needs to be imported explicitly: the +exceptions are provided in the built-in namespace as well as the +:mod:`exceptions` module. + +.. index:: + statement: try + statement: except + +For class exceptions, in a :keyword:`try` statement with an :keyword:`except` +clause that mentions a particular class, that clause also handles any exception +classes derived from that class (but not exception classes from which *it* is +derived). Two exception classes that are not related via subclassing are never +equivalent, even if they have the same name. + +.. index:: statement: raise + +The built-in exceptions listed below can be generated by the interpreter or +built-in functions. Except where mentioned, they have an "associated value" +indicating the detailed cause of the error. This may be a string or a tuple +containing several items of information (e.g., an error code and a string +explaining the code). The associated value is the second argument to the +:keyword:`raise` statement. If the exception class is derived from the standard +root class :exc:`BaseException`, the associated value is present as the +exception instance's :attr:`args` attribute. + +User code can raise built-in exceptions. This can be used to test an exception +handler or to report an error condition "just like" the situation in which the +interpreter raises the same exception; but beware that there is nothing to +prevent user code from raising an inappropriate error. + +The built-in exception classes can be sub-classed to define new exceptions; +programmers are encouraged to at least derive new exceptions from the +:exc:`Exception` class and not :exc:`BaseException`. More information on +defining exceptions is available in the Python Tutorial under +:ref:`tut-userexceptions`. + +The following exceptions are only used as base classes for other exceptions. + + +.. exception:: BaseException + + The base class for all built-in exceptions. It is not meant to be directly + inherited by user-defined classes (for that use :exc:`Exception`). If + :func:`str` or :func:`unicode` is called on an instance of this class, the + representation of the argument(s) to the instance are returned or the emptry + string when there were no arguments. All arguments are stored in :attr:`args` + as a tuple. + + .. versionadded:: 2.5 + + +.. exception:: Exception + + All built-in, non-system-exiting exceptions are derived from this class. All + user-defined exceptions should also be derived from this class. + + .. versionchanged:: 2.5 + Changed to inherit from :exc:`BaseException`. + + +.. exception:: ArithmeticError + + The base class for those built-in exceptions that are raised for various + arithmetic errors: :exc:`OverflowError`, :exc:`ZeroDivisionError`, + :exc:`FloatingPointError`. + + +.. exception:: LookupError + + The base class for the exceptions that are raised when a key or index used on a + mapping or sequence is invalid: :exc:`IndexError`, :exc:`KeyError`. This can be + raised directly by :func:`sys.setdefaultencoding`. + + +.. exception:: EnvironmentError + + The base class for exceptions that can occur outside the Python system: + :exc:`IOError`, :exc:`OSError`. When exceptions of this type are created with a + 2-tuple, the first item is available on the instance's :attr:`errno` attribute + (it is assumed to be an error number), and the second item is available on the + :attr:`strerror` attribute (it is usually the associated error message). The + tuple itself is also available on the :attr:`args` attribute. + + .. versionadded:: 1.5.2 + + When an :exc:`EnvironmentError` exception is instantiated with a 3-tuple, the + first two items are available as above, while the third item is available on the + :attr:`filename` attribute. However, for backwards compatibility, the + :attr:`args` attribute contains only a 2-tuple of the first two constructor + arguments. + + The :attr:`filename` attribute is ``None`` when this exception is created with + other than 3 arguments. The :attr:`errno` and :attr:`strerror` attributes are + also ``None`` when the instance was created with other than 2 or 3 arguments. + In this last case, :attr:`args` contains the verbatim constructor arguments as a + tuple. + +The following exceptions are the exceptions that are actually raised. + + +.. exception:: AssertionError + + .. index:: statement: assert + + Raised when an :keyword:`assert` statement fails. + + +.. exception:: AttributeError + + Raised when an attribute reference or assignment fails. (When an object does + not support attribute references or attribute assignments at all, + :exc:`TypeError` is raised.) + + .. % xref to attribute reference? + + +.. exception:: EOFError + + Raised when attempting to read beyond the end of a file. (N.B.: the :meth:`read` + and :meth:`readline` methods of file objects return an empty string when they + hit EOF.) + + .. % XXXJH xrefs here + .. % XXXJH xrefs here + + +.. exception:: FloatingPointError + + Raised when a floating point operation fails. This exception is always defined, + but can only be raised when Python is configured with the + :option:`--with-fpectl` option, or the :const:`WANT_SIGFPE_HANDLER` symbol is + defined in the :file:`pyconfig.h` file. + + +.. exception:: GeneratorExit + + Raise when a generator's :meth:`close` method is called. + + .. versionadded:: 2.5 + + .. versionchanged:: 3.0 + Changed to inherit from Exception instead of StandardError. + + +.. exception:: IOError + + Raised when an I/O operation (such as a :keyword:`print` statement, the built-in + :func:`open` function or a method of a file object) fails for an I/O-related + reason, e.g., "file not found" or "disk full". + + .. % XXXJH xrefs here + + This class is derived from :exc:`EnvironmentError`. See the discussion above + for more information on exception instance attributes. + + +.. exception:: ImportError + + Raised when an :keyword:`import` statement fails to find the module definition + or when a ``from ... import`` fails to find a name that is to be imported. + + .. % XXXJH xref to import statement? + + +.. exception:: IndexError + + Raised when a sequence subscript is out of range. (Slice indices are silently + truncated to fall in the allowed range; if an index is not a plain integer, + :exc:`TypeError` is raised.) + + .. % XXXJH xref to sequences + + +.. exception:: KeyError + + Raised when a mapping (dictionary) key is not found in the set of existing keys. + + .. % XXXJH xref to mapping objects? + + +.. exception:: KeyboardInterrupt + + Raised when the user hits the interrupt key (normally :kbd:`Control-C` or + :kbd:`Delete`). During execution, a check for interrupts is made regularly. The + exception inherits from :exc:`BaseException` so as to not be accidentally caught + by code that catches :exc:`Exception` and thus prevent the interpreter from + exiting. + + .. % XXX(hylton) xrefs here + + .. versionchanged:: 2.5 + Changed to inherit from :exc:`BaseException`. + + +.. exception:: MemoryError + + Raised when an operation runs out of memory but the situation may still be + rescued (by deleting some objects). The associated value is a string indicating + what kind of (internal) operation ran out of memory. Note that because of the + underlying memory management architecture (C's :cfunc:`malloc` function), the + interpreter may not always be able to completely recover from this situation; it + nevertheless raises an exception so that a stack traceback can be printed, in + case a run-away program was the cause. + + +.. exception:: NameError + + Raised when a local or global name is not found. This applies only to + unqualified names. The associated value is an error message that includes the + name that could not be found. + + +.. exception:: NotImplementedError + + This exception is derived from :exc:`RuntimeError`. In user defined base + classes, abstract methods should raise this exception when they require derived + classes to override the method. + + .. versionadded:: 1.5.2 + + +.. exception:: OSError + + This class is derived from :exc:`EnvironmentError` and is used primarily as the + :mod:`os` module's ``os.error`` exception. See :exc:`EnvironmentError` above for + a description of the possible associated values. + + .. % xref for os module + + .. versionadded:: 1.5.2 + + +.. exception:: OverflowError + + Raised when the result of an arithmetic operation is too large to be + represented. This cannot occur for long integers (which would rather raise + :exc:`MemoryError` than give up). Because of the lack of standardization of + floating point exception handling in C, most floating point operations also + aren't checked. For plain integers, all operations that can overflow are + checked except left shift, where typical applications prefer to drop bits than + raise an exception. + + .. % XXXJH reference to long's and/or int's? + + +.. exception:: ReferenceError + + This exception is raised when a weak reference proxy, created by the + :func:`weakref.proxy` function, is used to access an attribute of the referent + after it has been garbage collected. For more information on weak references, + see the :mod:`weakref` module. + + .. versionadded:: 2.2 + Previously known as the :exc:`weakref.ReferenceError` exception. + + +.. exception:: RuntimeError + + Raised when an error is detected that doesn't fall in any of the other + categories. The associated value is a string indicating what precisely went + wrong. (This exception is mostly a relic from a previous version of the + interpreter; it is not used very much any more.) + + +.. exception:: StopIteration + + Raised by builtin :func:`next` and an iterator's :meth:`__next__` method to + signal that there are no further values. + + .. versionadded:: 2.2 + + .. versionchanged:: 3.0 + Changed to inherit from Exception instead of StandardError. + + +.. exception:: SyntaxError + + Raised when the parser encounters a syntax error. This may occur in an + :keyword:`import` statement, in a call to the built-in functions :func:`exec` + or :func:`eval`, or when reading the initial script or standard input + (also interactively). + + .. % XXXJH xref to these functions? + + Instances of this class have attributes :attr:`filename`, :attr:`lineno`, + :attr:`offset` and :attr:`text` for easier access to the details. :func:`str` + of the exception instance returns only the message. + + +.. exception:: SystemError + + Raised when the interpreter finds an internal error, but the situation does not + look so serious to cause it to abandon all hope. The associated value is a + string indicating what went wrong (in low-level terms). + + You should report this to the author or maintainer of your Python interpreter. + Be sure to report the version of the Python interpreter (``sys.version``; it is + also printed at the start of an interactive Python session), the exact error + message (the exception's associated value) and if possible the source of the + program that triggered the error. + + +.. exception:: SystemExit + + This exception is raised by the :func:`sys.exit` function. When it is not + handled, the Python interpreter exits; no stack traceback is printed. If the + associated value is a plain integer, it specifies the system exit status (passed + to C's :cfunc:`exit` function); if it is ``None``, the exit status is zero; if + it has another type (such as a string), the object's value is printed and the + exit status is one. + + .. % XXX(hylton) xref to module sys? + + Instances have an attribute :attr:`code` which is set to the proposed exit + status or error message (defaulting to ``None``). Also, this exception derives + directly from :exc:`BaseException` and not :exc:`Exception`, since it is not + technically an error. + + A call to :func:`sys.exit` is translated into an exception so that clean-up + handlers (:keyword:`finally` clauses of :keyword:`try` statements) can be + executed, and so that a debugger can execute a script without running the risk + of losing control. The :func:`os._exit` function can be used if it is + absolutely positively necessary to exit immediately (for example, in the child + process after a call to :func:`fork`). + + The exception inherits from :exc:`BaseException` instead of :exc:`Exception` so + that it is not accidentally caught by code that catches :exc:`Exception`. This + allows the exception to properly propagate up and cause the interpreter to exit. + + .. versionchanged:: 2.5 + Changed to inherit from :exc:`BaseException`. + + +.. exception:: TypeError + + Raised when an operation or function is applied to an object of inappropriate + type. The associated value is a string giving details about the type mismatch. + + +.. exception:: UnboundLocalError + + Raised when a reference is made to a local variable in a function or method, but + no value has been bound to that variable. This is a subclass of + :exc:`NameError`. + + .. versionadded:: 2.0 + + +.. exception:: UnicodeError + + Raised when a Unicode-related encoding or decoding error occurs. It is a + subclass of :exc:`ValueError`. + + .. versionadded:: 2.0 + + +.. exception:: UnicodeEncodeError + + Raised when a Unicode-related error occurs during encoding. It is a subclass of + :exc:`UnicodeError`. + + .. versionadded:: 2.3 + + +.. exception:: UnicodeDecodeError + + Raised when a Unicode-related error occurs during decoding. It is a subclass of + :exc:`UnicodeError`. + + .. versionadded:: 2.3 + + +.. exception:: UnicodeTranslateError + + Raised when a Unicode-related error occurs during translating. It is a subclass + of :exc:`UnicodeError`. + + .. versionadded:: 2.3 + + +.. exception:: ValueError + + Raised when a built-in operation or function receives an argument that has the + right type but an inappropriate value, and the situation is not described by a + more precise exception such as :exc:`IndexError`. + + +.. exception:: WindowsError + + Raised when a Windows-specific error occurs or when the error number does not + correspond to an :cdata:`errno` value. The :attr:`winerror` and + :attr:`strerror` values are created from the return values of the + :cfunc:`GetLastError` and :cfunc:`FormatMessage` functions from the Windows + Platform API. The :attr:`errno` value maps the :attr:`winerror` value to + corresponding ``errno.h`` values. This is a subclass of :exc:`OSError`. + + .. versionadded:: 2.0 + + .. versionchanged:: 2.5 + Previous versions put the :cfunc:`GetLastError` codes into :attr:`errno`. + + +.. exception:: ZeroDivisionError + + Raised when the second argument of a division or modulo operation is zero. The + associated value is a string indicating the type of the operands and the + operation. + +The following exceptions are used as warning categories; see the :mod:`warnings` +module for more information. + + +.. exception:: Warning + + Base class for warning categories. + + +.. exception:: UserWarning + + Base class for warnings generated by user code. + + +.. exception:: DeprecationWarning + + Base class for warnings about deprecated features. + + +.. exception:: PendingDeprecationWarning + + Base class for warnings about features which will be deprecated in the future. + + +.. exception:: SyntaxWarning + + Base class for warnings about dubious syntax + + +.. exception:: RuntimeWarning + + Base class for warnings about dubious runtime behavior. + + +.. exception:: FutureWarning + + Base class for warnings about constructs that will change semantically in the + future. + + +.. exception:: ImportWarning + + Base class for warnings about probable mistakes in module imports. + + .. versionadded:: 2.5 + + +.. exception:: UnicodeWarning + + Base class for warnings related to Unicode. + + .. versionadded:: 2.5 + +The class hierarchy for built-in exceptions is: + + +.. literalinclude:: ../../Lib/test/exception_hierarchy.txt diff --git a/Doc/library/fcntl.rst b/Doc/library/fcntl.rst new file mode 100644 index 0000000..2d7bb9c --- /dev/null +++ b/Doc/library/fcntl.rst @@ -0,0 +1,155 @@ + +:mod:`fcntl` --- The :func:`fcntl` and :func:`ioctl` system calls +================================================================= + +.. module:: fcntl + :platform: Unix + :synopsis: The fcntl() and ioctl() system calls. +.. sectionauthor:: Jaap Vermeulen + + +.. index:: + pair: UNIX@Unix; file control + pair: UNIX@Unix; I/O control + +This module performs file control and I/O control on file descriptors. It is an +interface to the :cfunc:`fcntl` and :cfunc:`ioctl` Unix routines. + +All functions in this module take a file descriptor *fd* as their first +argument. This can be an integer file descriptor, such as returned by +``sys.stdin.fileno()``, or a file object, such as ``sys.stdin`` itself, which +provides a :meth:`fileno` which returns a genuine file descriptor. + +The module defines the following functions: + + +.. function:: fcntl(fd, op[, arg]) + + Perform the requested operation on file descriptor *fd* (file objects providing + a :meth:`fileno` method are accepted as well). The operation is defined by *op* + and is operating system dependent. These codes are also found in the + :mod:`fcntl` module. The argument *arg* is optional, and defaults to the integer + value ``0``. When present, it can either be an integer value, or a string. + With the argument missing or an integer value, the return value of this function + is the integer return value of the C :cfunc:`fcntl` call. When the argument is + a string it represents a binary structure, e.g. created by :func:`struct.pack`. + The binary data is copied to a buffer whose address is passed to the C + :cfunc:`fcntl` call. The return value after a successful call is the contents + of the buffer, converted to a string object. The length of the returned string + will be the same as the length of the *arg* argument. This is limited to 1024 + bytes. If the information returned in the buffer by the operating system is + larger than 1024 bytes, this is most likely to result in a segmentation + violation or a more subtle data corruption. + + If the :cfunc:`fcntl` fails, an :exc:`IOError` is raised. + + +.. function:: ioctl(fd, op[, arg[, mutate_flag]]) + + This function is identical to the :func:`fcntl` function, except that the + operations are typically defined in the library module :mod:`termios` and the + argument handling is even more complicated. + + The parameter *arg* can be one of an integer, absent (treated identically to the + integer ``0``), an object supporting the read-only buffer interface (most likely + a plain Python string) or an object supporting the read-write buffer interface. + + In all but the last case, behaviour is as for the :func:`fcntl` function. + + If a mutable buffer is passed, then the behaviour is determined by the value of + the *mutate_flag* parameter. + + If it is false, the buffer's mutability is ignored and behaviour is as for a + read-only buffer, except that the 1024 byte limit mentioned above is avoided -- + so long as the buffer you pass is as least as long as what the operating system + wants to put there, things should work. + + If *mutate_flag* is true, then the buffer is (in effect) passed to the + underlying :func:`ioctl` system call, the latter's return code is passed back to + the calling Python, and the buffer's new contents reflect the action of the + :func:`ioctl`. This is a slight simplification, because if the supplied buffer + is less than 1024 bytes long it is first copied into a static buffer 1024 bytes + long which is then passed to :func:`ioctl` and copied back into the supplied + buffer. + + If *mutate_flag* is not supplied, then from Python 2.5 it defaults to true, + which is a change from versions 2.3 and 2.4. Supply the argument explicitly if + version portability is a priority. + + An example:: + + >>> import array, fcntl, struct, termios, os + >>> os.getpgrp() + 13341 + >>> struct.unpack('h', fcntl.ioctl(0, termios.TIOCGPGRP, " "))[0] + 13341 + >>> buf = array.array('h', [0]) + >>> fcntl.ioctl(0, termios.TIOCGPGRP, buf, 1) + 0 + >>> buf + array('h', [13341]) + + +.. function:: flock(fd, op) + + Perform the lock operation *op* on file descriptor *fd* (file objects providing + a :meth:`fileno` method are accepted as well). See the Unix manual + :manpage:`flock(3)` for details. (On some systems, this function is emulated + using :cfunc:`fcntl`.) + + +.. function:: lockf(fd, operation, [length, [start, [whence]]]) + + This is essentially a wrapper around the :func:`fcntl` locking calls. *fd* is + the file descriptor of the file to lock or unlock, and *operation* is one of the + following values: + + * :const:`LOCK_UN` -- unlock + * :const:`LOCK_SH` -- acquire a shared lock + * :const:`LOCK_EX` -- acquire an exclusive lock + + When *operation* is :const:`LOCK_SH` or :const:`LOCK_EX`, it can also be + bit-wise OR'd with :const:`LOCK_NB` to avoid blocking on lock acquisition. + If :const:`LOCK_NB` is used and the lock cannot be acquired, an + :exc:`IOError` will be raised and the exception will have an *errno* + attribute set to :const:`EACCES` or :const:`EAGAIN` (depending on the + operating system; for portability, check for both values). On at least some + systems, :const:`LOCK_EX` can only be used if the file descriptor refers to a + file opened for writing. + + *length* is the number of bytes to lock, *start* is the byte offset at which the + lock starts, relative to *whence*, and *whence* is as with :func:`fileobj.seek`, + specifically: + + * :const:`0` -- relative to the start of the file (:const:`SEEK_SET`) + * :const:`1` -- relative to the current buffer position (:const:`SEEK_CUR`) + * :const:`2` -- relative to the end of the file (:const:`SEEK_END`) + + The default for *start* is 0, which means to start at the beginning of the file. + The default for *length* is 0 which means to lock to the end of the file. The + default for *whence* is also 0. + +Examples (all on a SVR4 compliant system):: + + import struct, fcntl, os + + f = open(...) + rv = fcntl.fcntl(f, fcntl.F_SETFL, os.O_NDELAY) + + lockdata = struct.pack('hhllhh', fcntl.F_WRLCK, 0, 0, 0, 0, 0) + rv = fcntl.fcntl(f, fcntl.F_SETLKW, lockdata) + +Note that in the first example the return value variable *rv* will hold an +integer value; in the second example it will hold a string value. The structure +lay-out for the *lockdata* variable is system dependent --- therefore using the +:func:`flock` call may be better. + + +.. seealso:: + + Module :mod:`os` + If the locking flags :const:`O_SHLOCK` and :const:`O_EXLOCK` are present + in the :mod:`os` module, the :func:`os.open` function provides a more + platform-independent alternative to the :func:`lockf` and :func:`flock` + functions. + diff --git a/Doc/library/filecmp.rst b/Doc/library/filecmp.rst new file mode 100644 index 0000000..6004214 --- /dev/null +++ b/Doc/library/filecmp.rst @@ -0,0 +1,152 @@ + +:mod:`filecmp` --- File and Directory Comparisons +================================================= + +.. module:: filecmp + :synopsis: Compare files efficiently. +.. sectionauthor:: Moshe Zadka + + +The :mod:`filecmp` module defines functions to compare files and directories, +with various optional time/correctness trade-offs. + +The :mod:`filecmp` module defines the following functions: + + +.. function:: cmp(f1, f2[, shallow]) + + Compare the files named *f1* and *f2*, returning ``True`` if they seem equal, + ``False`` otherwise. + + Unless *shallow* is given and is false, files with identical :func:`os.stat` + signatures are taken to be equal. + + Files that were compared using this function will not be compared again unless + their :func:`os.stat` signature changes. + + Note that no external programs are called from this function, giving it + portability and efficiency. + + +.. function:: cmpfiles(dir1, dir2, common[, shallow]) + + Returns three lists of file names: *match*, *mismatch*, *errors*. *match* + contains the list of files match in both directories, *mismatch* includes the + names of those that don't, and *errros* lists the names of files which could not + be compared. Files may be listed in *errors* because the user may lack + permission to read them or many other reasons, but always that the comparison + could not be done for some reason. + + The *common* parameter is a list of file names found in both directories. The + *shallow* parameter has the same meaning and default value as for + :func:`filecmp.cmp`. + +Example:: + + >>> import filecmp + >>> filecmp.cmp('undoc.rst', 'undoc.rst') + True + >>> filecmp.cmp('undoc.rst', 'index.rst') + False + + +.. _dircmp-objects: + +The :class:`dircmp` class +------------------------- + +:class:`dircmp` instances are built using this constructor: + + +.. class:: dircmp(a, b[, ignore[, hide]]) + + Construct a new directory comparison object, to compare the directories *a* and + *b*. *ignore* is a list of names to ignore, and defaults to ``['RCS', 'CVS', + 'tags']``. *hide* is a list of names to hide, and defaults to ``[os.curdir, + os.pardir]``. + +The :class:`dircmp` class provides the following methods: + + +.. method:: dircmp.report() + + Print (to ``sys.stdout``) a comparison between *a* and *b*. + + +.. method:: dircmp.report_partial_closure() + + Print a comparison between *a* and *b* and common immediate subdirectories. + + +.. method:: dircmp.report_full_closure() + + Print a comparison between *a* and *b* and common subdirectories (recursively). + +The :class:`dircmp` offers a number of interesting attributes that may be used +to get various bits of information about the directory trees being compared. + +Note that via :meth:`__getattr__` hooks, all attributes are computed lazily, so +there is no speed penalty if only those attributes which are lightweight to +compute are used. + + +.. attribute:: dircmp.left_list + + Files and subdirectories in *a*, filtered by *hide* and *ignore*. + + +.. attribute:: dircmp.right_list + + Files and subdirectories in *b*, filtered by *hide* and *ignore*. + + +.. attribute:: dircmp.common + + Files and subdirectories in both *a* and *b*. + + +.. attribute:: dircmp.left_only + + Files and subdirectories only in *a*. + + +.. attribute:: dircmp.right_only + + Files and subdirectories only in *b*. + + +.. attribute:: dircmp.common_dirs + + Subdirectories in both *a* and *b*. + + +.. attribute:: dircmp.common_files + + Files in both *a* and *b* + + +.. attribute:: dircmp.common_funny + + Names in both *a* and *b*, such that the type differs between the directories, + or names for which :func:`os.stat` reports an error. + + +.. attribute:: dircmp.same_files + + Files which are identical in both *a* and *b*. + + +.. attribute:: dircmp.diff_files + + Files which are in both *a* and *b*, whose contents differ. + + +.. attribute:: dircmp.funny_files + + Files which are in both *a* and *b*, but could not be compared. + + +.. attribute:: dircmp.subdirs + + A dictionary mapping names in :attr:`common_dirs` to :class:`dircmp` objects. + diff --git a/Doc/library/fileformats.rst b/Doc/library/fileformats.rst new file mode 100644 index 0000000..c0c2eed --- /dev/null +++ b/Doc/library/fileformats.rst @@ -0,0 +1,18 @@ + +.. _fileformats: + +************ +File Formats +************ + +The modules described in this chapter parse various miscellaneous file formats +that aren't markup languages or are related to e-mail. + + +.. toctree:: + + csv.rst + configparser.rst + robotparser.rst + netrc.rst + xdrlib.rst diff --git a/Doc/library/fileinput.rst b/Doc/library/fileinput.rst new file mode 100644 index 0000000..d0a3ed9 --- /dev/null +++ b/Doc/library/fileinput.rst @@ -0,0 +1,183 @@ +:mod:`fileinput` --- Iterate over lines from multiple input streams +=================================================================== + +.. module:: fileinput + :synopsis: Loop over standard input or a list of files. +.. moduleauthor:: Guido van Rossum +.. sectionauthor:: Fred L. Drake, Jr. + + +This module implements a helper class and functions to quickly write a loop over +standard input or a list of files. + +The typical use is:: + + import fileinput + for line in fileinput.input(): + process(line) + +This iterates over the lines of all files listed in ``sys.argv[1:]``, defaulting +to ``sys.stdin`` if the list is empty. If a filename is ``'-'``, it is also +replaced by ``sys.stdin``. To specify an alternative list of filenames, pass it +as the first argument to :func:`input`. A single file name is also allowed. + +All files are opened in text mode by default, but you can override this by +specifying the *mode* parameter in the call to :func:`input` or +:class:`FileInput()`. If an I/O error occurs during opening or reading a file, +:exc:`IOError` is raised. + +If ``sys.stdin`` is used more than once, the second and further use will return +no lines, except perhaps for interactive use, or if it has been explicitly reset +(e.g. using ``sys.stdin.seek(0)``). + +Empty files are opened and immediately closed; the only time their presence in +the list of filenames is noticeable at all is when the last file opened is +empty. + +Lines are returned with any newlines intact, which means that the last line in +a file may not have one. + +You can control how files are opened by providing an opening hook via the +*openhook* parameter to :func:`fileinput.input` or :class:`FileInput()`. The +hook must be a function that takes two arguments, *filename* and *mode*, and +returns an accordingly opened file-like object. Two useful hooks are already +provided by this module. + +The following function is the primary interface of this module: + + +.. function:: input([files[, inplace[, backup[, mode[, openhook]]]]]) + + Create an instance of the :class:`FileInput` class. The instance will be used + as global state for the functions of this module, and is also returned to use + during iteration. The parameters to this function will be passed along to the + constructor of the :class:`FileInput` class. + + .. versionchanged:: 2.5 + Added the *mode* and *openhook* parameters. + +The following functions use the global state created by :func:`fileinput.input`; +if there is no active state, :exc:`RuntimeError` is raised. + + +.. function:: filename() + + Return the name of the file currently being read. Before the first line has + been read, returns ``None``. + + +.. function:: fileno() + + Return the integer "file descriptor" for the current file. When no file is + opened (before the first line and between files), returns ``-1``. + + .. versionadded:: 2.5 + + +.. function:: lineno() + + Return the cumulative line number of the line that has just been read. Before + the first line has been read, returns ``0``. After the last line of the last + file has been read, returns the line number of that line. + + +.. function:: filelineno() + + Return the line number in the current file. Before the first line has been + read, returns ``0``. After the last line of the last file has been read, + returns the line number of that line within the file. + + +.. function:: isfirstline() + + Returns true if the line just read is the first line of its file, otherwise + returns false. + + +.. function:: isstdin() + + Returns true if the last line was read from ``sys.stdin``, otherwise returns + false. + + +.. function:: nextfile() + + Close the current file so that the next iteration will read the first line from + the next file (if any); lines not read from the file will not count towards the + cumulative line count. The filename is not changed until after the first line + of the next file has been read. Before the first line has been read, this + function has no effect; it cannot be used to skip the first file. After the + last line of the last file has been read, this function has no effect. + + +.. function:: close() + + Close the sequence. + +The class which implements the sequence behavior provided by the module is +available for subclassing as well: + + +.. class:: FileInput([files[, inplace[, backup[, mode[, openhook]]]]]) + + Class :class:`FileInput` is the implementation; its methods :meth:`filename`, + :meth:`fileno`, :meth:`lineno`, :meth:`filelineno`, :meth:`isfirstline`, + :meth:`isstdin`, :meth:`nextfile` and :meth:`close` correspond to the functions + of the same name in the module. In addition it has a :meth:`readline` method + which returns the next input line, and a :meth:`__getitem__` method which + implements the sequence behavior. The sequence must be accessed in strictly + sequential order; random access and :meth:`readline` cannot be mixed. + + With *mode* you can specify which file mode will be passed to :func:`open`. It + must be one of ``'r'``, ``'rU'``, ``'U'`` and ``'rb'``. + + The *openhook*, when given, must be a function that takes two arguments, + *filename* and *mode*, and returns an accordingly opened file-like object. You + cannot use *inplace* and *openhook* together. + + .. versionchanged:: 2.5 + Added the *mode* and *openhook* parameters. + +**Optional in-place filtering:** if the keyword argument ``inplace=1`` is passed +to :func:`fileinput.input` or to the :class:`FileInput` constructor, the file is +moved to a backup file and standard output is directed to the input file (if a +file of the same name as the backup file already exists, it will be replaced +silently). This makes it possible to write a filter that rewrites its input +file in place. If the *backup* parameter is given (typically as +``backup='.'``), it specifies the extension for the backup file, +and the backup file remains around; by default, the extension is ``'.bak'`` and +it is deleted when the output file is closed. In-place filtering is disabled +when standard input is read. + +**Caveat:** The current implementation does not work for MS-DOS 8+3 filesystems. + +The two following opening hooks are provided by this module: + + +.. function:: hook_compressed(filename, mode) + + Transparently opens files compressed with gzip and bzip2 (recognized by the + extensions ``'.gz'`` and ``'.bz2'``) using the :mod:`gzip` and :mod:`bz2` + modules. If the filename extension is not ``'.gz'`` or ``'.bz2'``, the file is + opened normally (ie, using :func:`open` without any decompression). + + Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)`` + + .. versionadded:: 2.5 + + +.. function:: hook_encoded(encoding) + + Returns a hook which opens each file with :func:`codecs.open`, using the given + *encoding* to read the file. + + Usage example: ``fi = + fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))`` + + .. note:: + + With this hook, :class:`FileInput` might return Unicode strings depending on the + specified *encoding*. + + .. versionadded:: 2.5 + diff --git a/Doc/library/filesys.rst b/Doc/library/filesys.rst new file mode 100644 index 0000000..e5b5e44 --- /dev/null +++ b/Doc/library/filesys.rst @@ -0,0 +1,38 @@ + +.. _filesys: + +************************* +File and Directory Access +************************* + +The modules described in this chapter deal with disk files and directories. For +example, there are modules for reading the properties of files, manipulating +paths in a portable way, and creating temporary files. The full list of modules +in this chapter is: + + +.. toctree:: + + os.path.rst + fileinput.rst + stat.rst + statvfs.rst + filecmp.rst + tempfile.rst + glob.rst + fnmatch.rst + linecache.rst + shutil.rst + dircache.rst + macpath.rst + + +.. seealso:: + + Section :ref:`bltin-file-objects` + A description of Python's built-in file objects. + + Module :mod:`os` + Operating system interfaces, including functions to work with files at a lower + level than the built-in file object. + diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst new file mode 100644 index 0000000..244bad9 --- /dev/null +++ b/Doc/library/fnmatch.rst @@ -0,0 +1,91 @@ + +:mod:`fnmatch` --- Unix filename pattern matching +================================================= + +.. module:: fnmatch + :synopsis: Unix shell style filename pattern matching. + + +.. index:: single: filenames; wildcard expansion + +.. index:: module: re + +This module provides support for Unix shell-style wildcards, which are *not* the +same as regular expressions (which are documented in the :mod:`re` module). The +special characters used in shell-style wildcards are: + ++------------+------------------------------------+ +| Pattern | Meaning | ++============+====================================+ +| ``*`` | matches everything | ++------------+------------------------------------+ +| ``?`` | matches any single character | ++------------+------------------------------------+ +| ``[seq]`` | matches any character in *seq* | ++------------+------------------------------------+ +| ``[!seq]`` | matches any character not in *seq* | ++------------+------------------------------------+ + +.. index:: module: glob + +Note that the filename separator (``'/'`` on Unix) is *not* special to this +module. See module :mod:`glob` for pathname expansion (:mod:`glob` uses +:func:`fnmatch` to match pathname segments). Similarly, filenames starting with +a period are not special for this module, and are matched by the ``*`` and ``?`` +patterns. + + +.. function:: fnmatch(filename, pattern) + + Test whether the *filename* string matches the *pattern* string, returning true + or false. If the operating system is case-insensitive, then both parameters + will be normalized to all lower- or upper-case before the comparison is + performed. If you require a case-sensitive comparison regardless of whether + that's standard for your operating system, use :func:`fnmatchcase` instead. + + This example will print all file names in the current directory with the + extension ``.txt``:: + + import fnmatch + import os + + for file in os.listdir('.'): + if fnmatch.fnmatch(file, '*.txt'): + print file + + +.. function:: fnmatchcase(filename, pattern) + + Test whether *filename* matches *pattern*, returning true or false; the + comparison is case-sensitive. + + +.. function:: filter(names, pattern) + + Return the subset of the list of *names* that match *pattern*. It is the same as + ``[n for n in names if fnmatch(n, pattern)]``, but implemented more efficiently. + + .. versionadded:: 2.2 + + +.. function:: translate(pattern) + + Return the shell-style *pattern* converted to a regular expression. + + Example:: + + >>> import fnmatch, re + >>> + >>> regex = fnmatch.translate('*.txt') + >>> regex + '.*\\.txt$' + >>> reobj = re.compile(regex) + >>> print reobj.match('foobar.txt') + <_sre.SRE_Match object at 0x...> + + +.. seealso:: + + Module :mod:`glob` + Unix shell-style path expansion. + diff --git a/Doc/library/formatter.rst b/Doc/library/formatter.rst new file mode 100644 index 0000000..2774a2b --- /dev/null +++ b/Doc/library/formatter.rst @@ -0,0 +1,350 @@ + +:mod:`formatter` --- Generic output formatting +============================================== + +.. module:: formatter + :synopsis: Generic output formatter and device interface. + + +.. index:: single: HTMLParser (class in htmllib) + +This module supports two interface definitions, each with multiple +implementations. The *formatter* interface is used by the :class:`HTMLParser` +class of the :mod:`htmllib` module, and the *writer* interface is required by +the formatter interface. + +Formatter objects transform an abstract flow of formatting events into specific +output events on writer objects. Formatters manage several stack structures to +allow various properties of a writer object to be changed and restored; writers +need not be able to handle relative changes nor any sort of "change back" +operation. Specific writer properties which may be controlled via formatter +objects are horizontal alignment, font, and left margin indentations. A +mechanism is provided which supports providing arbitrary, non-exclusive style +settings to a writer as well. Additional interfaces facilitate formatting +events which are not reversible, such as paragraph separation. + +Writer objects encapsulate device interfaces. Abstract devices, such as file +formats, are supported as well as physical devices. The provided +implementations all work with abstract devices. The interface makes available +mechanisms for setting the properties which formatter objects manage and +inserting data into the output. + + +.. _formatter-interface: + +The Formatter Interface +----------------------- + +Interfaces to create formatters are dependent on the specific formatter class +being instantiated. The interfaces described below are the required interfaces +which all formatters must support once initialized. + +One data element is defined at the module level: + + +.. data:: AS_IS + + Value which can be used in the font specification passed to the ``push_font()`` + method described below, or as the new value to any other ``push_property()`` + method. Pushing the ``AS_IS`` value allows the corresponding ``pop_property()`` + method to be called without having to track whether the property was changed. + +The following attributes are defined for formatter instance objects: + + +.. attribute:: formatter.writer + + The writer instance with which the formatter interacts. + + +.. method:: formatter.end_paragraph(blanklines) + + Close any open paragraphs and insert at least *blanklines* before the next + paragraph. + + +.. method:: formatter.add_line_break() + + Add a hard line break if one does not already exist. This does not break the + logical paragraph. + + +.. method:: formatter.add_hor_rule(*args, **kw) + + Insert a horizontal rule in the output. A hard break is inserted if there is + data in the current paragraph, but the logical paragraph is not broken. The + arguments and keywords are passed on to the writer's :meth:`send_line_break` + method. + + +.. method:: formatter.add_flowing_data(data) + + Provide data which should be formatted with collapsed whitespace. Whitespace + from preceding and successive calls to :meth:`add_flowing_data` is considered as + well when the whitespace collapse is performed. The data which is passed to + this method is expected to be word-wrapped by the output device. Note that any + word-wrapping still must be performed by the writer object due to the need to + rely on device and font information. + + +.. method:: formatter.add_literal_data(data) + + Provide data which should be passed to the writer unchanged. Whitespace, + including newline and tab characters, are considered legal in the value of + *data*. + + +.. method:: formatter.add_label_data(format, counter) + + Insert a label which should be placed to the left of the current left margin. + This should be used for constructing bulleted or numbered lists. If the + *format* value is a string, it is interpreted as a format specification for + *counter*, which should be an integer. The result of this formatting becomes the + value of the label; if *format* is not a string it is used as the label value + directly. The label value is passed as the only argument to the writer's + :meth:`send_label_data` method. Interpretation of non-string label values is + dependent on the associated writer. + + Format specifications are strings which, in combination with a counter value, + are used to compute label values. Each character in the format string is copied + to the label value, with some characters recognized to indicate a transform on + the counter value. Specifically, the character ``'1'`` represents the counter + value formatter as an Arabic number, the characters ``'A'`` and ``'a'`` + represent alphabetic representations of the counter value in upper and lower + case, respectively, and ``'I'`` and ``'i'`` represent the counter value in Roman + numerals, in upper and lower case. Note that the alphabetic and roman + transforms require that the counter value be greater than zero. + + +.. method:: formatter.flush_softspace() + + Send any pending whitespace buffered from a previous call to + :meth:`add_flowing_data` to the associated writer object. This should be called + before any direct manipulation of the writer object. + + +.. method:: formatter.push_alignment(align) + + Push a new alignment setting onto the alignment stack. This may be + :const:`AS_IS` if no change is desired. If the alignment value is changed from + the previous setting, the writer's :meth:`new_alignment` method is called with + the *align* value. + + +.. method:: formatter.pop_alignment() + + Restore the previous alignment. + + +.. method:: formatter.push_font((size, italic, bold, teletype)) + + Change some or all font properties of the writer object. Properties which are + not set to :const:`AS_IS` are set to the values passed in while others are + maintained at their current settings. The writer's :meth:`new_font` method is + called with the fully resolved font specification. + + +.. method:: formatter.pop_font() + + Restore the previous font. + + +.. method:: formatter.push_margin(margin) + + Increase the number of left margin indentations by one, associating the logical + tag *margin* with the new indentation. The initial margin level is ``0``. + Changed values of the logical tag must be true values; false values other than + :const:`AS_IS` are not sufficient to change the margin. + + +.. method:: formatter.pop_margin() + + Restore the previous margin. + + +.. method:: formatter.push_style(*styles) + + Push any number of arbitrary style specifications. All styles are pushed onto + the styles stack in order. A tuple representing the entire stack, including + :const:`AS_IS` values, is passed to the writer's :meth:`new_styles` method. + + +.. method:: formatter.pop_style([n=1]) + + Pop the last *n* style specifications passed to :meth:`push_style`. A tuple + representing the revised stack, including :const:`AS_IS` values, is passed to + the writer's :meth:`new_styles` method. + + +.. method:: formatter.set_spacing(spacing) + + Set the spacing style for the writer. + + +.. method:: formatter.assert_line_data([flag=1]) + + Inform the formatter that data has been added to the current paragraph + out-of-band. This should be used when the writer has been manipulated + directly. The optional *flag* argument can be set to false if the writer + manipulations produced a hard line break at the end of the output. + + +.. _formatter-impls: + +Formatter Implementations +------------------------- + +Two implementations of formatter objects are provided by this module. Most +applications may use one of these classes without modification or subclassing. + + +.. class:: NullFormatter([writer]) + + A formatter which does nothing. If *writer* is omitted, a :class:`NullWriter` + instance is created. No methods of the writer are called by + :class:`NullFormatter` instances. Implementations should inherit from this + class if implementing a writer interface but don't need to inherit any + implementation. + + +.. class:: AbstractFormatter(writer) + + The standard formatter. This implementation has demonstrated wide applicability + to many writers, and may be used directly in most circumstances. It has been + used to implement a full-featured World Wide Web browser. + + +.. _writer-interface: + +The Writer Interface +-------------------- + +Interfaces to create writers are dependent on the specific writer class being +instantiated. The interfaces described below are the required interfaces which +all writers must support once initialized. Note that while most applications can +use the :class:`AbstractFormatter` class as a formatter, the writer must +typically be provided by the application. + + +.. method:: writer.flush() + + Flush any buffered output or device control events. + + +.. method:: writer.new_alignment(align) + + Set the alignment style. The *align* value can be any object, but by convention + is a string or ``None``, where ``None`` indicates that the writer's "preferred" + alignment should be used. Conventional *align* values are ``'left'``, + ``'center'``, ``'right'``, and ``'justify'``. + + +.. method:: writer.new_font(font) + + Set the font style. The value of *font* will be ``None``, indicating that the + device's default font should be used, or a tuple of the form ``(``*size*, + *italic*, *bold*, *teletype*``)``. Size will be a string indicating the size of + font that should be used; specific strings and their interpretation must be + defined by the application. The *italic*, *bold*, and *teletype* values are + Boolean values specifying which of those font attributes should be used. + + +.. method:: writer.new_margin(margin, level) + + Set the margin level to the integer *level* and the logical tag to *margin*. + Interpretation of the logical tag is at the writer's discretion; the only + restriction on the value of the logical tag is that it not be a false value for + non-zero values of *level*. + + +.. method:: writer.new_spacing(spacing) + + Set the spacing style to *spacing*. + + +.. method:: writer.new_styles(styles) + + Set additional styles. The *styles* value is a tuple of arbitrary values; the + value :const:`AS_IS` should be ignored. The *styles* tuple may be interpreted + either as a set or as a stack depending on the requirements of the application + and writer implementation. + + +.. method:: writer.send_line_break() + + Break the current line. + + +.. method:: writer.send_paragraph(blankline) + + Produce a paragraph separation of at least *blankline* blank lines, or the + equivalent. The *blankline* value will be an integer. Note that the + implementation will receive a call to :meth:`send_line_break` before this call + if a line break is needed; this method should not include ending the last line + of the paragraph. It is only responsible for vertical spacing between + paragraphs. + + +.. method:: writer.send_hor_rule(*args, **kw) + + Display a horizontal rule on the output device. The arguments to this method + are entirely application- and writer-specific, and should be interpreted with + care. The method implementation may assume that a line break has already been + issued via :meth:`send_line_break`. + + +.. method:: writer.send_flowing_data(data) + + Output character data which may be word-wrapped and re-flowed as needed. Within + any sequence of calls to this method, the writer may assume that spans of + multiple whitespace characters have been collapsed to single space characters. + + +.. method:: writer.send_literal_data(data) + + Output character data which has already been formatted for display. Generally, + this should be interpreted to mean that line breaks indicated by newline + characters should be preserved and no new line breaks should be introduced. The + data may contain embedded newline and tab characters, unlike data provided to + the :meth:`send_formatted_data` interface. + + +.. method:: writer.send_label_data(data) + + Set *data* to the left of the current left margin, if possible. The value of + *data* is not restricted; treatment of non-string values is entirely + application- and writer-dependent. This method will only be called at the + beginning of a line. + + +.. _writer-impls: + +Writer Implementations +---------------------- + +Three implementations of the writer object interface are provided as examples by +this module. Most applications will need to derive new writer classes from the +:class:`NullWriter` class. + + +.. class:: NullWriter() + + A writer which only provides the interface definition; no actions are taken on + any methods. This should be the base class for all writers which do not need to + inherit any implementation methods. + + +.. class:: AbstractWriter() + + A writer which can be used in debugging formatters, but not much else. Each + method simply announces itself by printing its name and arguments on standard + output. + + +.. class:: DumbWriter([file[, maxcol=72]]) + + Simple writer class which writes output on the file object passed in as *file* + or, if *file* is omitted, on standard output. The output is simply word-wrapped + to the number of columns specified by *maxcol*. This class is suitable for + reflowing a sequence of paragraphs. + diff --git a/Doc/library/fpectl.rst b/Doc/library/fpectl.rst new file mode 100644 index 0000000..ef030f0 --- /dev/null +++ b/Doc/library/fpectl.rst @@ -0,0 +1,120 @@ + +:mod:`fpectl` --- Floating point exception control +================================================== + +.. module:: fpectl + :platform: Unix + :synopsis: Provide control for floating point exception handling. +.. moduleauthor:: Lee Busby +.. sectionauthor:: Lee Busby + + +.. note:: + + The :mod:`fpectl` module is not built by default, and its usage is discouraged + and may be dangerous except in the hands of experts. See also the section + :ref:`fpectl-limitations` on limitations for more details. + +.. index:: single: IEEE-754 + +Most computers carry out floating point operations in conformance with the +so-called IEEE-754 standard. On any real computer, some floating point +operations produce results that cannot be expressed as a normal floating point +value. For example, try :: + + >>> import math + >>> math.exp(1000) + inf + >>> math.exp(1000) / math.exp(1000) + nan + +(The example above will work on many platforms. DEC Alpha may be one exception.) +"Inf" is a special, non-numeric value in IEEE-754 that stands for "infinity", +and "nan" means "not a number." Note that, other than the non-numeric results, +nothing special happened when you asked Python to carry out those calculations. +That is in fact the default behaviour prescribed in the IEEE-754 standard, and +if it works for you, stop reading now. + +In some circumstances, it would be better to raise an exception and stop +processing at the point where the faulty operation was attempted. The +:mod:`fpectl` module is for use in that situation. It provides control over +floating point units from several hardware manufacturers, allowing the user to +turn on the generation of :const:`SIGFPE` whenever any of the IEEE-754 +exceptions Division by Zero, Overflow, or Invalid Operation occurs. In tandem +with a pair of wrapper macros that are inserted into the C code comprising your +python system, :const:`SIGFPE` is trapped and converted into the Python +:exc:`FloatingPointError` exception. + +The :mod:`fpectl` module defines the following functions and may raise the given +exception: + + +.. function:: turnon_sigfpe() + + Turn on the generation of :const:`SIGFPE`, and set up an appropriate signal + handler. + + +.. function:: turnoff_sigfpe() + + Reset default handling of floating point exceptions. + + +.. exception:: FloatingPointError + + After :func:`turnon_sigfpe` has been executed, a floating point operation that + raises one of the IEEE-754 exceptions Division by Zero, Overflow, or Invalid + operation will in turn raise this standard Python exception. + + +.. _fpectl-example: + +Example +------- + +The following example demonstrates how to start up and test operation of the +:mod:`fpectl` module. :: + + >>> import fpectl + >>> import fpetest + >>> fpectl.turnon_sigfpe() + >>> fpetest.test() + overflow PASS + FloatingPointError: Overflow + + div by 0 PASS + FloatingPointError: Division by zero + [ more output from test elided ] + >>> import math + >>> math.exp(1000) + Traceback (most recent call last): + File "", line 1, in ? + FloatingPointError: in math_1 + + +.. _fpectl-limitations: + +Limitations and other considerations +------------------------------------ + +Setting up a given processor to trap IEEE-754 floating point errors currently +requires custom code on a per-architecture basis. You may have to modify +:mod:`fpectl` to control your particular hardware. + +Conversion of an IEEE-754 exception to a Python exception requires that the +wrapper macros ``PyFPE_START_PROTECT`` and ``PyFPE_END_PROTECT`` be inserted +into your code in an appropriate fashion. Python itself has been modified to +support the :mod:`fpectl` module, but many other codes of interest to numerical +analysts have not. + +The :mod:`fpectl` module is not thread-safe. + + +.. seealso:: + + Some files in the source distribution may be interesting in learning more about + how this module operates. The include file :file:`Include/pyfpe.h` discusses the + implementation of this module at some length. :file:`Modules/fpetestmodule.c` + gives several examples of use. Many additional examples can be found in + :file:`Objects/floatobject.c`. + diff --git a/Doc/library/fpformat.rst b/Doc/library/fpformat.rst new file mode 100644 index 0000000..33655fb --- /dev/null +++ b/Doc/library/fpformat.rst @@ -0,0 +1,56 @@ + +:mod:`fpformat` --- Floating point conversions +============================================== + +.. module:: fpformat + :synopsis: General floating point formatting functions. +.. sectionauthor:: Moshe Zadka + + +The :mod:`fpformat` module defines functions for dealing with floating point +numbers representations in 100% pure Python. + +.. note:: + + This module is unneeded: everything here could be done via the ``%`` string + interpolation operator. + +The :mod:`fpformat` module defines the following functions and an exception: + + +.. function:: fix(x, digs) + + Format *x* as ``[-]ddd.ddd`` with *digs* digits after the point and at least one + digit before. If ``digs <= 0``, the decimal point is suppressed. + + *x* can be either a number or a string that looks like one. *digs* is an + integer. + + Return value is a string. + + +.. function:: sci(x, digs) + + Format *x* as ``[-]d.dddE[+-]ddd`` with *digs* digits after the point and + exactly one digit before. If ``digs <= 0``, one digit is kept and the point is + suppressed. + + *x* can be either a real number, or a string that looks like one. *digs* is an + integer. + + Return value is a string. + + +.. exception:: NotANumber + + Exception raised when a string passed to :func:`fix` or :func:`sci` as the *x* + parameter does not look like a number. This is a subclass of :exc:`ValueError` + when the standard exceptions are strings. The exception value is the improperly + formatted string that caused the exception to be raised. + +Example:: + + >>> import fpformat + >>> fpformat.fix(1.23, 1) + '1.2' + diff --git a/Doc/library/framework.rst b/Doc/library/framework.rst new file mode 100644 index 0000000..c665fb7 --- /dev/null +++ b/Doc/library/framework.rst @@ -0,0 +1,335 @@ + +:mod:`FrameWork` --- Interactive application framework +====================================================== + +.. module:: FrameWork + :platform: Mac + :synopsis: Interactive application framework. + + +The :mod:`FrameWork` module contains classes that together provide a framework +for an interactive Macintosh application. The programmer builds an application +by creating subclasses that override various methods of the bases classes, +thereby implementing the functionality wanted. Overriding functionality can +often be done on various different levels, i.e. to handle clicks in a single +dialog window in a non-standard way it is not necessary to override the complete +event handling. + +Work on the :mod:`FrameWork` has pretty much stopped, now that :mod:`PyObjC` is +available for full Cocoa access from Python, and the documentation describes +only the most important functionality, and not in the most logical manner at +that. Examine the source or the examples for more details. The following are +some comments posted on the MacPython newsgroup about the strengths and +limitations of :mod:`FrameWork`: + + +.. epigraph:: + + The strong point of :mod:`FrameWork` is that it allows you to break into the + control-flow at many different places. :mod:`W`, for instance, uses a different + way to enable/disable menus and that plugs right in leaving the rest intact. + The weak points of :mod:`FrameWork` are that it has no abstract command + interface (but that shouldn't be difficult), that its dialog support is minimal + and that its control/toolbar support is non-existent. + +The :mod:`FrameWork` module defines the following functions: + + +.. function:: Application() + + An object representing the complete application. See below for a description of + the methods. The default :meth:`__init__` routine creates an empty window + dictionary and a menu bar with an apple menu. + + +.. function:: MenuBar() + + An object representing the menubar. This object is usually not created by the + user. + + +.. function:: Menu(bar, title[, after]) + + An object representing a menu. Upon creation you pass the ``MenuBar`` the menu + appears in, the *title* string and a position (1-based) *after* where the menu + should appear (default: at the end). + + +.. function:: MenuItem(menu, title[, shortcut, callback]) + + Create a menu item object. The arguments are the menu to create, the item title + string and optionally the keyboard shortcut and a callback routine. The callback + is called with the arguments menu-id, item number within menu (1-based), current + front window and the event record. + + Instead of a callable object the callback can also be a string. In this case + menu selection causes the lookup of a method in the topmost window and the + application. The method name is the callback string with ``'domenu_'`` + prepended. + + Calling the ``MenuBar`` :meth:`fixmenudimstate` method sets the correct dimming + for all menu items based on the current front window. + + +.. function:: Separator(menu) + + Add a separator to the end of a menu. + + +.. function:: SubMenu(menu, label) + + Create a submenu named *label* under menu *menu*. The menu object is returned. + + +.. function:: Window(parent) + + Creates a (modeless) window. *Parent* is the application object to which the + window belongs. The window is not displayed until later. + + +.. function:: DialogWindow(parent) + + Creates a modeless dialog window. + + +.. function:: windowbounds(width, height) + + Return a ``(left, top, right, bottom)`` tuple suitable for creation of a window + of given width and height. The window will be staggered with respect to previous + windows, and an attempt is made to keep the whole window on-screen. However, the + window will however always be the exact size given, so parts may be offscreen. + + +.. function:: setwatchcursor() + + Set the mouse cursor to a watch. + + +.. function:: setarrowcursor() + + Set the mouse cursor to an arrow. + + +.. _application-objects: + +Application Objects +------------------- + +Application objects have the following methods, among others: + + +.. method:: Application.makeusermenus() + + Override this method if you need menus in your application. Append the menus to + the attribute :attr:`menubar`. + + +.. method:: Application.getabouttext() + + Override this method to return a text string describing your application. + Alternatively, override the :meth:`do_about` method for more elaborate "about" + messages. + + +.. method:: Application.mainloop([mask[, wait]]) + + This routine is the main event loop, call it to set your application rolling. + *Mask* is the mask of events you want to handle, *wait* is the number of ticks + you want to leave to other concurrent application (default 0, which is probably + not a good idea). While raising *self* to exit the mainloop is still supported + it is not recommended: call ``self._quit()`` instead. + + The event loop is split into many small parts, each of which can be overridden. + The default methods take care of dispatching events to windows and dialogs, + handling drags and resizes, Apple Events, events for non-FrameWork windows, etc. + + In general, all event handlers should return ``1`` if the event is fully handled + and ``0`` otherwise (because the front window was not a FrameWork window, for + instance). This is needed so that update events and such can be passed on to + other windows like the Sioux console window. Calling :func:`MacOS.HandleEvent` + is not allowed within *our_dispatch* or its callees, since this may result in an + infinite loop if the code is called through the Python inner-loop event handler. + + +.. method:: Application.asyncevents(onoff) + + Call this method with a nonzero parameter to enable asynchronous event handling. + This will tell the inner interpreter loop to call the application event handler + *async_dispatch* whenever events are available. This will cause FrameWork window + updates and the user interface to remain working during long computations, but + will slow the interpreter down and may cause surprising results in non-reentrant + code (such as FrameWork itself). By default *async_dispatch* will immediately + call *our_dispatch* but you may override this to handle only certain events + asynchronously. Events you do not handle will be passed to Sioux and such. + + The old on/off value is returned. + + +.. method:: Application._quit() + + Terminate the running :meth:`mainloop` call at the next convenient moment. + + +.. method:: Application.do_char(c, event) + + The user typed character *c*. The complete details of the event can be found in + the *event* structure. This method can also be provided in a ``Window`` object, + which overrides the application-wide handler if the window is frontmost. + + +.. method:: Application.do_dialogevent(event) + + Called early in the event loop to handle modeless dialog events. The default + method simply dispatches the event to the relevant dialog (not through the + ``DialogWindow`` object involved). Override if you need special handling of + dialog events (keyboard shortcuts, etc). + + +.. method:: Application.idle(event) + + Called by the main event loop when no events are available. The null-event is + passed (so you can look at mouse position, etc). + + +.. _window-objects: + +Window Objects +-------------- + +Window objects have the following methods, among others: + + +.. method:: Window.open() + + Override this method to open a window. Store the MacOS window-id in + :attr:`self.wid` and call the :meth:`do_postopen` method to register the window + with the parent application. + + +.. method:: Window.close() + + Override this method to do any special processing on window close. Call the + :meth:`do_postclose` method to cleanup the parent state. + + +.. method:: Window.do_postresize(width, height, macoswindowid) + + Called after the window is resized. Override if more needs to be done than + calling ``InvalRect``. + + +.. method:: Window.do_contentclick(local, modifiers, event) + + The user clicked in the content part of a window. The arguments are the + coordinates (window-relative), the key modifiers and the raw event. + + +.. method:: Window.do_update(macoswindowid, event) + + An update event for the window was received. Redraw the window. + + +.. method:: Window.do_activate(activate, event) + + The window was activated (``activate == 1``) or deactivated (``activate == 0``). + Handle things like focus highlighting, etc. + + +.. _controlswindow-object: + +ControlsWindow Object +--------------------- + +ControlsWindow objects have the following methods besides those of ``Window`` +objects: + + +.. method:: ControlsWindow.do_controlhit(window, control, pcode, event) + + Part *pcode* of control *control* was hit by the user. Tracking and such has + already been taken care of. + + +.. _scrolledwindow-object: + +ScrolledWindow Object +--------------------- + +ScrolledWindow objects are ControlsWindow objects with the following extra +methods: + + +.. method:: ScrolledWindow.scrollbars([wantx[, wanty]]) + + Create (or destroy) horizontal and vertical scrollbars. The arguments specify + which you want (default: both). The scrollbars always have minimum ``0`` and + maximum ``32767``. + + +.. method:: ScrolledWindow.getscrollbarvalues() + + You must supply this method. It should return a tuple ``(x, y)`` giving the + current position of the scrollbars (between ``0`` and ``32767``). You can return + ``None`` for either to indicate the whole document is visible in that direction. + + +.. method:: ScrolledWindow.updatescrollbars() + + Call this method when the document has changed. It will call + :meth:`getscrollbarvalues` and update the scrollbars. + + +.. method:: ScrolledWindow.scrollbar_callback(which, what, value) + + Supplied by you and called after user interaction. *which* will be ``'x'`` or + ``'y'``, *what* will be ``'-'``, ``'--'``, ``'set'``, ``'++'`` or ``'+'``. For + ``'set'``, *value* will contain the new scrollbar position. + + +.. method:: ScrolledWindow.scalebarvalues(absmin, absmax, curmin, curmax) + + Auxiliary method to help you calculate values to return from + :meth:`getscrollbarvalues`. You pass document minimum and maximum value and + topmost (leftmost) and bottommost (rightmost) visible values and it returns the + correct number or ``None``. + + +.. method:: ScrolledWindow.do_activate(onoff, event) + + Takes care of dimming/highlighting scrollbars when a window becomes frontmost. + If you override this method, call this one at the end of your method. + + +.. method:: ScrolledWindow.do_postresize(width, height, window) + + Moves scrollbars to the correct position. Call this method initially if you + override it. + + +.. method:: ScrolledWindow.do_controlhit(window, control, pcode, event) + + Handles scrollbar interaction. If you override it call this method first, a + nonzero return value indicates the hit was in the scrollbars and has been + handled. + + +.. _dialogwindow-objects: + +DialogWindow Objects +-------------------- + +DialogWindow objects have the following methods besides those of ``Window`` +objects: + + +.. method:: DialogWindow.open(resid) + + Create the dialog window, from the DLOG resource with id *resid*. The dialog + object is stored in :attr:`self.wid`. + + +.. method:: DialogWindow.do_itemhit(item, event) + + Item number *item* was hit. You are responsible for redrawing toggle buttons, + etc. + diff --git a/Doc/library/frameworks.rst b/Doc/library/frameworks.rst new file mode 100644 index 0000000..5d8dad5 --- /dev/null +++ b/Doc/library/frameworks.rst @@ -0,0 +1,18 @@ + +.. _frameworks: + +****************** +Program Frameworks +****************** + +The modules described in this chapter are frameworks that will largely dictate +the structure of your program. Currently the modules described here are all +oriented toward writing command-line interfaces. + +The full list of modules described in this chapter is: + + +.. toctree:: + + cmd.rst + shlex.rst diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst new file mode 100644 index 0000000..60e88cf --- /dev/null +++ b/Doc/library/ftplib.rst @@ -0,0 +1,320 @@ + +:mod:`ftplib` --- FTP protocol client +===================================== + +.. module:: ftplib + :synopsis: FTP protocol client (requires sockets). + + +.. index:: + pair: FTP; protocol + single: FTP; ftplib (standard module) + +This module defines the class :class:`FTP` and a few related items. The +:class:`FTP` class implements the client side of the FTP protocol. You can use +this to write Python programs that perform a variety of automated FTP jobs, such +as mirroring other ftp servers. It is also used by the module :mod:`urllib` to +handle URLs that use FTP. For more information on FTP (File Transfer Protocol), +see Internet :rfc:`959`. + +Here's a sample session using the :mod:`ftplib` module:: + + >>> from ftplib import FTP + >>> ftp = FTP('ftp.cwi.nl') # connect to host, default port + >>> ftp.login() # user anonymous, passwd anonymous@ + >>> ftp.retrlines('LIST') # list directory contents + total 24418 + drwxrwsr-x 5 ftp-usr pdmaint 1536 Mar 20 09:48 . + dr-xr-srwt 105 ftp-usr pdmaint 1536 Mar 21 14:32 .. + -rw-r--r-- 1 ftp-usr pdmaint 5305 Mar 20 09:48 INDEX + . + . + . + >>> ftp.retrbinary('RETR README', open('README', 'wb').write) + '226 Transfer complete.' + >>> ftp.quit() + +The module defines the following items: + + +.. class:: FTP([host[, user[, passwd[, acct[, timeout]]]]]) + + Return a new instance of the :class:`FTP` class. When *host* is given, the + method call ``connect(host)`` is made. When *user* is given, additionally the + method call ``login(user, passwd, acct)`` is made (where *passwd* and *acct* + default to the empty string when not given). The optional *timeout* parameter + specifies a timeout in seconds for the connection attempt (if is not specified, + or passed as None, the global default timeout setting will be used). + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. data:: all_errors + + The set of all exceptions (as a tuple) that methods of :class:`FTP` instances + may raise as a result of problems with the FTP connection (as opposed to + programming errors made by the caller). This set includes the four exceptions + listed below as well as :exc:`socket.error` and :exc:`IOError`. + + +.. exception:: error_reply + + Exception raised when an unexpected reply is received from the server. + + +.. exception:: error_temp + + Exception raised when an error code in the range 400--499 is received. + + +.. exception:: error_perm + + Exception raised when an error code in the range 500--599 is received. + + +.. exception:: error_proto + + Exception raised when a reply is received from the server that does not begin + with a digit in the range 1--5. + + +.. seealso:: + + Module :mod:`netrc` + Parser for the :file:`.netrc` file format. The file :file:`.netrc` is typically + used by FTP clients to load user authentication information before prompting the + user. + + .. index:: single: ftpmirror.py + + The file :file:`Tools/scripts/ftpmirror.py` in the Python source distribution is + a script that can mirror FTP sites, or portions thereof, using the :mod:`ftplib` + module. It can be used as an extended example that applies this module. + + +.. _ftp-objects: + +FTP Objects +----------- + +Several methods are available in two flavors: one for handling text files and +another for binary files. These are named for the command which is used +followed by ``lines`` for the text version or ``binary`` for the binary version. + +:class:`FTP` instances have the following methods: + + +.. method:: FTP.set_debuglevel(level) + + Set the instance's debugging level. This controls the amount of debugging + output printed. The default, ``0``, produces no debugging output. A value of + ``1`` produces a moderate amount of debugging output, generally a single line + per request. A value of ``2`` or higher produces the maximum amount of + debugging output, logging each line sent and received on the control connection. + + +.. method:: FTP.connect(host[, port[, timeout]]) + + Connect to the given host and port. The default port number is ``21``, as + specified by the FTP protocol specification. It is rarely needed to specify a + different port number. This function should be called only once for each + instance; it should not be called at all if a host was given when the instance + was created. All other methods can only be used after a connection has been + made. + + The optional *timeout* parameter specifies a timeout in seconds for the + connection attempt. If is not specified, or passed as None, the object timeout + is used (the timeout that you passed when instantiating the class); if the + object timeout is also None, the global default timeout setting will be used. + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. method:: FTP.getwelcome() + + Return the welcome message sent by the server in reply to the initial + connection. (This message sometimes contains disclaimers or help information + that may be relevant to the user.) + + +.. method:: FTP.login([user[, passwd[, acct]]]) + + Log in as the given *user*. The *passwd* and *acct* parameters are optional and + default to the empty string. If no *user* is specified, it defaults to + ``'anonymous'``. If *user* is ``'anonymous'``, the default *passwd* is + ``'anonymous@'``. This function should be called only once for each instance, + after a connection has been established; it should not be called at all if a + host and user were given when the instance was created. Most FTP commands are + only allowed after the client has logged in. + + +.. method:: FTP.abort() + + Abort a file transfer that is in progress. Using this does not always work, but + it's worth a try. + + +.. method:: FTP.sendcmd(command) + + Send a simple command string to the server and return the response string. + + +.. method:: FTP.voidcmd(command) + + Send a simple command string to the server and handle the response. Return + nothing if a response code in the range 200--299 is received. Raise an exception + otherwise. + + +.. method:: FTP.retrbinary(command, callback[, maxblocksize[, rest]]) + + Retrieve a file in binary transfer mode. *command* should be an appropriate + ``RETR`` command: ``'RETR filename'``. The *callback* function is called for + each block of data received, with a single string argument giving the data + block. The optional *maxblocksize* argument specifies the maximum chunk size to + read on the low-level socket object created to do the actual transfer (which + will also be the largest size of the data blocks passed to *callback*). A + reasonable default is chosen. *rest* means the same thing as in the + :meth:`transfercmd` method. + + +.. method:: FTP.retrlines(command[, callback]) + + Retrieve a file or directory listing in ASCII transfer mode. *command* should be + an appropriate ``RETR`` command (see :meth:`retrbinary`) or a ``LIST`` command + (usually just the string ``'LIST'``). The *callback* function is called for + each line, with the trailing CRLF stripped. The default *callback* prints the + line to ``sys.stdout``. + + +.. method:: FTP.set_pasv(boolean) + + Enable "passive" mode if *boolean* is true, other disable passive mode. (In + Python 2.0 and before, passive mode was off by default; in Python 2.1 and later, + it is on by default.) + + +.. method:: FTP.storbinary(command, file[, blocksize]) + + Store a file in binary transfer mode. *command* should be an appropriate + ``STOR`` command: ``"STOR filename"``. *file* is an open file object which is + read until EOF using its :meth:`read` method in blocks of size *blocksize* to + provide the data to be stored. The *blocksize* argument defaults to 8192. + + .. versionchanged:: 2.1 + default for *blocksize* added. + + +.. method:: FTP.storlines(command, file) + + Store a file in ASCII transfer mode. *command* should be an appropriate + ``STOR`` command (see :meth:`storbinary`). Lines are read until EOF from the + open file object *file* using its :meth:`readline` method to provide the data to + be stored. + + +.. method:: FTP.transfercmd(cmd[, rest]) + + Initiate a transfer over the data connection. If the transfer is active, send a + ``EPRT`` or ``PORT`` command and the transfer command specified by *cmd*, and + accept the connection. If the server is passive, send a ``EPSV`` or ``PASV`` + command, connect to it, and start the transfer command. Either way, return the + socket for the connection. + + If optional *rest* is given, a ``REST`` command is sent to the server, passing + *rest* as an argument. *rest* is usually a byte offset into the requested file, + telling the server to restart sending the file's bytes at the requested offset, + skipping over the initial bytes. Note however that RFC 959 requires only that + *rest* be a string containing characters in the printable range from ASCII code + 33 to ASCII code 126. The :meth:`transfercmd` method, therefore, converts + *rest* to a string, but no check is performed on the string's contents. If the + server does not recognize the ``REST`` command, an :exc:`error_reply` exception + will be raised. If this happens, simply call :meth:`transfercmd` without a + *rest* argument. + + +.. method:: FTP.ntransfercmd(cmd[, rest]) + + Like :meth:`transfercmd`, but returns a tuple of the data connection and the + expected size of the data. If the expected size could not be computed, ``None`` + will be returned as the expected size. *cmd* and *rest* means the same thing as + in :meth:`transfercmd`. + + +.. method:: FTP.nlst(argument[, ...]) + + Return a list of files as returned by the ``NLST`` command. The optional + *argument* is a directory to list (default is the current server directory). + Multiple arguments can be used to pass non-standard options to the ``NLST`` + command. + + +.. method:: FTP.dir(argument[, ...]) + + Produce a directory listing as returned by the ``LIST`` command, printing it to + standard output. The optional *argument* is a directory to list (default is the + current server directory). Multiple arguments can be used to pass non-standard + options to the ``LIST`` command. If the last argument is a function, it is used + as a *callback* function as for :meth:`retrlines`; the default prints to + ``sys.stdout``. This method returns ``None``. + + +.. method:: FTP.rename(fromname, toname) + + Rename file *fromname* on the server to *toname*. + + +.. method:: FTP.delete(filename) + + Remove the file named *filename* from the server. If successful, returns the + text of the response, otherwise raises :exc:`error_perm` on permission errors or + :exc:`error_reply` on other errors. + + +.. method:: FTP.cwd(pathname) + + Set the current directory on the server. + + +.. method:: FTP.mkd(pathname) + + Create a new directory on the server. + + +.. method:: FTP.pwd() + + Return the pathname of the current directory on the server. + + +.. method:: FTP.rmd(dirname) + + Remove the directory named *dirname* on the server. + + +.. method:: FTP.size(filename) + + Request the size of the file named *filename* on the server. On success, the + size of the file is returned as an integer, otherwise ``None`` is returned. + Note that the ``SIZE`` command is not standardized, but is supported by many + common server implementations. + + +.. method:: FTP.quit() + + Send a ``QUIT`` command to the server and close the connection. This is the + "polite" way to close a connection, but it may raise an exception of the server + reponds with an error to the ``QUIT`` command. This implies a call to the + :meth:`close` method which renders the :class:`FTP` instance useless for + subsequent calls (see below). + + +.. method:: FTP.close() + + Close the connection unilaterally. This should not be applied to an already + closed connection such as after a successful call to :meth:`quit`. After this + call the :class:`FTP` instance should not be used any more (after a call to + :meth:`close` or :meth:`quit` you cannot reopen the connection by issuing + another :meth:`login` method). + diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst new file mode 100644 index 0000000..b0a5577c --- /dev/null +++ b/Doc/library/functions.rst @@ -0,0 +1,1138 @@ + +.. _built-in-funcs: + +Built-in Functions +================== + +The Python interpreter has a number of functions built into it that are always +available. They are listed here in alphabetical order. + + +.. function:: __import__(name[, globals[, locals[, fromlist[, level]]]]) + + .. index:: + statement: import + module: ihooks + module: rexec + module: imp + + .. note:: + + This is an advanced function that is not needed in everyday Python + programming. + + The function is invoked by the :keyword:`import` statement. It mainly exists + so that you can replace it with another function that has a compatible + interface, in order to change the semantics of the :keyword:`import` statement. + For examples of why and how you would do this, see the standard library modules + :mod:`ihooks` and :mod:`rexec`. See also the built-in module :mod:`imp`, which + defines some useful operations out of which you can build your own + :func:`__import__` function. + + For example, the statement ``import spam`` results in the following call: + ``__import__('spam',`` ``globals(),`` ``locals(), [], -1)``; the statement + ``from spam.ham import eggs`` results in ``__import__('spam.ham', globals(), + locals(), ['eggs'], -1)``. Note that even though ``locals()`` and ``['eggs']`` + are passed in as arguments, the :func:`__import__` function does not set the + local variable named ``eggs``; this is done by subsequent code that is generated + for the import statement. (In fact, the standard implementation does not use + its *locals* argument at all, and uses its *globals* only to determine the + package context of the :keyword:`import` statement.) + + When the *name* variable is of the form ``package.module``, normally, the + top-level package (the name up till the first dot) is returned, *not* the + module named by *name*. However, when a non-empty *fromlist* argument is + given, the module named by *name* is returned. This is done for + compatibility with the bytecode generated for the different kinds of import + statement; when using ``import spam.ham.eggs``, the top-level package + :mod:`spam` must be placed in the importing namespace, but when using ``from + spam.ham import eggs``, the ``spam.ham`` subpackage must be used to find the + ``eggs`` variable. As a workaround for this behavior, use :func:`getattr` to + extract the desired components. For example, you could define the following + helper:: + + def my_import(name): + mod = __import__(name) + components = name.split('.') + for comp in components[1:]: + mod = getattr(mod, comp) + return mod + + *level* specifies whether to use absolute or relative imports. The default is + ``-1`` which indicates both absolute and relative imports will be attempted. + ``0`` means only perform absolute imports. Positive values for *level* indicate + the number of parent directories to search relative to the directory of the + module calling :func:`__import__`. + + .. versionchanged:: 2.5 + The level parameter was added. + + .. versionchanged:: 2.5 + Keyword support for parameters was added. + + +.. function:: abs(x) + + Return the absolute value of a number. The argument may be a plain or long + integer or a floating point number. If the argument is a complex number, its + magnitude is returned. + + +.. function:: all(iterable) + + Return True if all elements of the *iterable* are true. Equivalent to:: + + def all(iterable): + for element in iterable: + if not element: + return False + return True + + .. versionadded:: 2.5 + + +.. function:: any(iterable) + + Return True if any element of the *iterable* is true. Equivalent to:: + + def any(iterable): + for element in iterable: + if element: + return True + return False + + .. versionadded:: 2.5 + + +.. function:: basestring() + + This abstract type is the superclass for :class:`str`. It + cannot be called or instantiated, but it can be used to test whether an object + is an instance of :class:`str` (or a user-defined type inherited from + :class:`basestring`). + + .. versionadded:: 2.3 + + +.. function:: bin(x) + + Convert an integer number to a binary string. The result is a valid Python + expression. If *x* is not a Python :class:`int` object, it has to define an + :meth:`__index__` method that returns an integer. + + .. versionadded:: 3.0 + + +.. function:: bool([x]) + + Convert a value to a Boolean, using the standard truth testing procedure. If + *x* is false or omitted, this returns :const:`False`; otherwise it returns + :const:`True`. :class:`bool` is also a class, which is a subclass of + :class:`int`. Class :class:`bool` cannot be subclassed further. Its only + instances are :const:`False` and :const:`True`. + + .. index:: pair: Boolean; type + + .. versionadded:: 2.2.1 + + .. versionchanged:: 2.3 + If no argument is given, this function returns :const:`False`. + + +.. function:: chr(i) + + Return the string of one character whose Unicode codepoint is the integer *i*. For + example, ``chr(97)`` returns the string ``'a'``. This is the inverse of + :func:`ord`. The valid range for the argument depends how Python was + configured -- it may be either UCS2 [0..0xFFFF] or UCS4 [0..0x10FFFF]. + :exc:`ValueError` will be raised if *i* is outside that range. + + +.. function:: classmethod(function) + + Return a class method for *function*. + + A class method receives the class as implicit first argument, just like an + instance method receives the instance. To declare a class method, use this + idiom:: + + class C: + @classmethod + def f(cls, arg1, arg2, ...): ... + + The ``@classmethod`` form is a function decorator -- see the description of + function definitions in :ref:`function` for details. + + It can be called either on the class (such as ``C.f()``) or on an instance (such + as ``C().f()``). The instance is ignored except for its class. If a class + method is called for a derived class, the derived class object is passed as the + implied first argument. + + Class methods are different than C++ or Java static methods. If you want those, + see :func:`staticmethod` in this section. + + For more information on class methods, consult the documentation on the standard + type hierarchy in :ref:`types`. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.4 + Function decorator syntax added. + + +.. function:: cmp(x, y) + + Compare the two objects *x* and *y* and return an integer according to the + outcome. The return value is negative if ``x < y``, zero if ``x == y`` and + strictly positive if ``x > y``. + + +.. function:: compile(source, filename, mode[, flags[, dont_inherit]]) + + Compile the *source* into a code object. Code objects can be executed by a call + to :func:`exec` or evaluated by a call to :func:`eval`. The *filename* argument + should give the file from which the code was read; pass some recognizable value + if it wasn't read from a file (``''`` is commonly used). The *mode* + argument specifies what kind of code must be compiled; it can be ``'exec'`` if + *source* consists of a sequence of statements, ``'eval'`` if it consists of a + single expression, or ``'single'`` if it consists of a single interactive + statement (in the latter case, expression statements that evaluate to something + else than ``None`` will be printed). + + When compiling multi-line statements, two caveats apply: line endings must be + represented by a single newline character (``'\n'``), and the input must be + terminated by at least one newline character. If line endings are represented + by ``'\r\n'``, use the string :meth:`replace` method to change them into + ``'\n'``. + + The optional arguments *flags* and *dont_inherit* (which are new in Python 2.2) + control which future statements (see :pep:`236`) affect the compilation of + *source*. If neither is present (or both are zero) the code is compiled with + those future statements that are in effect in the code that is calling compile. + If the *flags* argument is given and *dont_inherit* is not (or is zero) then the + future statements specified by the *flags* argument are used in addition to + those that would be used anyway. If *dont_inherit* is a non-zero integer then + the *flags* argument is it -- the future statements in effect around the call to + compile are ignored. + + Future statements are specified by bits which can be bitwise or-ed together to + specify multiple statements. The bitfield required to specify a given feature + can be found as the :attr:`compiler_flag` attribute on the :class:`_Feature` + instance in the :mod:`__future__` module. + + +.. function:: complex([real[, imag]]) + + Create a complex number with the value *real* + *imag*\*j or convert a string or + number to a complex number. If the first parameter is a string, it will be + interpreted as a complex number and the function must be called without a second + parameter. The second parameter can never be a string. Each argument may be any + numeric type (including complex). If *imag* is omitted, it defaults to zero and + the function serves as a numeric conversion function like :func:`int`, + :func:`long` and :func:`float`. If both arguments are omitted, returns ``0j``. + + The complex type is described in :ref:`typesnumeric`. + + +.. function:: delattr(object, name) + + This is a relative of :func:`setattr`. The arguments are an object and a + string. The string must be the name of one of the object's attributes. The + function deletes the named attribute, provided the object allows it. For + example, ``delattr(x, 'foobar')`` is equivalent to ``del x.foobar``. + + +.. function:: dict([arg]) + :noindex: + + Create a new data dictionary, optionally with items taken from *arg*. + The dictionary type is described in :ref:`typesmapping`. + + For other containers see the built in :class:`list`, :class:`set`, and + :class:`tuple` classes, and the :mod:`collections` module. + + +.. function:: dir([object]) + + Without arguments, return the list of names in the current local scope. With an + argument, attempt to return a list of valid attributes for that object. + + If the object has a method named :meth:`__dir__`, this method will be called and + must return the list of attributes. This allows objects that implement a custom + :func:`__getattr__` or :func:`__getattribute__` function to customize the way + :func:`dir` reports their attributes. + + If the object does not provide :meth:`__dir__`, the function tries its best to + gather information from the object's :attr:`__dict__` attribute, if defined, and + from its type object. The resulting list is not necessarily complete, and may + be inaccurate when the object has a custom :func:`__getattr__`. + + The default :func:`dir` mechanism behaves differently with different types of + objects, as it attempts to produce the most relevant, rather than complete, + information: + + * If the object is a module object, the list contains the names of the module's + attributes. + + * If the object is a type or class object, the list contains the names of its + attributes, and recursively of the attributes of its bases. + + * Otherwise, the list contains the object's attributes' names, the names of its + class's attributes, and recursively of the attributes of its class's base + classes. + + The resulting list is sorted alphabetically. For example:: + + >>> import struct + >>> dir() + ['__builtins__', '__doc__', '__name__', 'struct'] + >>> dir(struct) + ['__doc__', '__name__', 'calcsize', 'error', 'pack', 'unpack'] + >>> class Foo(object): + ... def __dir__(self): + ... return ["kan", "ga", "roo"] + ... + >>> f = Foo() + >>> dir(f) + ['ga', 'kan', 'roo'] + + .. note:: + + Because :func:`dir` is supplied primarily as a convenience for use at an + interactive prompt, it tries to supply an interesting set of names more than it + tries to supply a rigorously or consistently defined set of names, and its + detailed behavior may change across releases. + + +.. function:: divmod(a, b) + + Take two (non complex) numbers as arguments and return a pair of numbers + consisting of their quotient and remainder when using long division. With mixed + operand types, the rules for binary arithmetic operators apply. For plain and + long integers, the result is the same as ``(a // b, a % b)``. For floating point + numbers the result is ``(q, a % b)``, where *q* is usually ``math.floor(a / b)`` + but may be 1 less than that. In any case ``q * b + a % b`` is very close to + *a*, if ``a % b`` is non-zero it has the same sign as *b*, and ``0 <= abs(a % b) + < abs(b)``. + + .. versionchanged:: 2.3 + Using :func:`divmod` with complex numbers is deprecated. + + +.. function:: enumerate(iterable) + + Return an enumerate object. *iterable* must be a sequence, an iterator, or some + other object which supports iteration. The :meth:`__next__` method of the + iterator returned by :func:`enumerate` returns a tuple containing a count (from + zero) and the corresponding value obtained from iterating over *iterable*. + :func:`enumerate` is useful for obtaining an indexed series: ``(0, seq[0])``, + ``(1, seq[1])``, ``(2, seq[2])``, .... For example:: + + >>> for i, season in enumerate(['Spring', 'Summer', 'Fall', 'Winter')]: + >>> print i, season + 0 Spring + 1 Summer + 2 Fall + 3 Winter + + .. versionadded:: 2.3 + + +.. function:: eval(expression[, globals[, locals]]) + + The arguments are a string and optional globals and locals. If provided, + *globals* must be a dictionary. If provided, *locals* can be any mapping + object. + + .. versionchanged:: 2.4 + formerly *locals* was required to be a dictionary. + + The *expression* argument is parsed and evaluated as a Python expression + (technically speaking, a condition list) using the *globals* and *locals* + dictionaries as global and local name space. If the *globals* dictionary is + present and lacks '__builtins__', the current globals are copied into *globals* + before *expression* is parsed. This means that *expression* normally has full + access to the standard :mod:`__builtin__` module and restricted environments are + propagated. If the *locals* dictionary is omitted it defaults to the *globals* + dictionary. If both dictionaries are omitted, the expression is executed in the + environment where :keyword:`eval` is called. The return value is the result of + the evaluated expression. Syntax errors are reported as exceptions. Example:: + + >>> x = 1 + >>> print eval('x+1') + 2 + + This function can also be used to execute arbitrary code objects (such as those + created by :func:`compile`). In this case pass a code object instead of a + string. The code object must have been compiled passing ``'eval'`` as the + *kind* argument. + + Hints: dynamic execution of statements is supported by the :func:`exec` + function. The :func:`globals` and :func:`locals` functions + returns the current global and local dictionary, respectively, which may be + useful to pass around for use by :func:`eval` or :func:`exec`. + + +.. function:: exec(object[, globals[, locals]]) + + This function supports dynamic execution of Python code. *object* must be either + a string, an open file object, or a code object. If it is a string, the string + is parsed as a suite of Python statements which is then executed (unless a + syntax error occurs). If it is an open file, the file is parsed until EOF and + executed. If it is a code object, it is simply executed. In all cases, the + code that's executed is expected to be valid as file input (see the section + "File input" in the Reference Manual). Be aware that the :keyword:`return` and + :keyword:`yield` statements may not be used outside of function definitions even + within the context of code passed to the :func:`exec` function. The return value + is ``None``. + + In all cases, if the optional parts are omitted, the code is executed in the + current scope. If only *globals* is provided, it must be a dictionary, which + will be used for both the global and the local variables. If *globals* and + *locals* are given, they are used for the global and local variables, + respectively. If provided, *locals* can be any mapping object. + + If the *globals* dictionary does not contain a value for the key + ``__builtins__``, a reference to the dictionary of the built-in module + :mod:`__builtin__` is inserted under that key. That way you can control what + builtins are available to the executed code by inserting your own + ``__builtins__`` dictionary into *globals* before passing it to :func:`exec`. + + .. note:: + + The built-in functions :func:`globals` and :func:`locals` return the current + global and local dictionary, respectively, which may be useful to pass around + for use as the second and third argument to :func:`exec`. + + .. warning:: + + The default *locals* act as described for function :func:`locals` below: + modifications to the default *locals* dictionary should not be attempted. Pass + an explicit *locals* dictionary if you need to see effects of the code on + *locals* after function :func:`execfile` returns. :func:`exec` cannot be + used reliably to modify a function's locals. + + +.. function:: filter(function, iterable) + + Construct a list from those elements of *iterable* for which *function* returns + true. *iterable* may be either a sequence, a container which supports + iteration, or an iterator, If *iterable* is a string or a tuple, the result + also has that type; otherwise it is always a list. If *function* is ``None``, + the identity function is assumed, that is, all elements of *iterable* that are + false are removed. + + Note that ``filter(function, iterable)`` is equivalent to ``[item for item in + iterable if function(item)]`` if function is not ``None`` and ``[item for item + in iterable if item]`` if function is ``None``. + + +.. function:: float([x]) + + Convert a string or a number to floating point. If the argument is a string, it + must contain a possibly signed decimal or floating point number, possibly + embedded in whitespace. Otherwise, the argument may be a plain or long integer + or a floating point number, and a floating point number with the same value + (within Python's floating point precision) is returned. If no argument is + given, returns ``0.0``. + + .. note:: + + .. index:: + single: NaN + single: Infinity + + When passing in a string, values for NaN and Infinity may be returned, depending + on the underlying C library. The specific set of strings accepted which cause + these values to be returned depends entirely on the C library and is known to + vary. + + The float type is described in :ref:`typesnumeric`. + +.. function:: frozenset([iterable]) + :noindex: + + Return a frozenset object, optionally with elements taken from *iterable*. + The frozenset type is described in :ref:`types-set`. + + For other containers see the built in :class:`dict`, :class:`list`, and + :class:`tuple` classes, and the :mod:`collections` module. + + .. versionadded:: 2.4 + + +.. function:: getattr(object, name[, default]) + + Return the value of the named attributed of *object*. *name* must be a string. + If the string is the name of one of the object's attributes, the result is the + value of that attribute. For example, ``getattr(x, 'foobar')`` is equivalent to + ``x.foobar``. If the named attribute does not exist, *default* is returned if + provided, otherwise :exc:`AttributeError` is raised. + + +.. function:: globals() + + Return a dictionary representing the current global symbol table. This is always + the dictionary of the current module (inside a function or method, this is the + module where it is defined, not the module from which it is called). + + +.. function:: hasattr(object, name) + + The arguments are an object and a string. The result is ``True`` if the string + is the name of one of the object's attributes, ``False`` if not. (This is + implemented by calling ``getattr(object, name)`` and seeing whether it raises an + exception or not.) + + +.. function:: hash(object) + + Return the hash value of the object (if it has one). Hash values are integers. + They are used to quickly compare dictionary keys during a dictionary lookup. + Numeric values that compare equal have the same hash value (even if they are of + different types, as is the case for 1 and 1.0). + + +.. function:: help([object]) + + Invoke the built-in help system. (This function is intended for interactive + use.) If no argument is given, the interactive help system starts on the + interpreter console. If the argument is a string, then the string is looked up + as the name of a module, function, class, method, keyword, or documentation + topic, and a help page is printed on the console. If the argument is any other + kind of object, a help page on the object is generated. + + .. versionadded:: 2.2 + + +.. function:: hex(x) + + Convert an integer number to a hexadecimal string. The result is a valid Python + expression. If *x* is not a Python :class:`int` object, it has to define an + :meth:`__index__` method that returns an integer. + + .. versionchanged:: 2.4 + Formerly only returned an unsigned literal. + + +.. function:: id(object) + + Return the "identity" of an object. This is an integer (or long integer) which + is guaranteed to be unique and constant for this object during its lifetime. + Two objects with non-overlapping lifetimes may have the same :func:`id` value. + (Implementation note: this is the address of the object.) + + +.. function:: int([x[, radix]]) + + Convert a string or number to an integer. If the argument is a string, it + must contain a possibly signed number of arbitrary size, + possibly embedded in whitespace. The *radix* parameter gives the base for the + conversion and may be any integer in the range [2, 36], or zero. If *radix* is + zero, the interpretation is the same as for integer literals. If *radix* is + specified and *x* is not a string, :exc:`TypeError` is raised. Otherwise, the + argument may be another integer, a floating point number or any other object + that has an :meth:`__int__` method. Conversion + of floating point numbers to integers truncates (towards zero). If no + arguments are given, returns ``0``. + + The integer type is described in :ref:`typesnumeric`. + + +.. function:: isinstance(object, classinfo) + + Return true if the *object* argument is an instance of the *classinfo* argument, + or of a (direct or indirect) subclass thereof. Also return true if *classinfo* + is a type object (new-style class) and *object* is an object of that type or of + a (direct or indirect) subclass thereof. If *object* is not a class instance or + an object of the given type, the function always returns false. If *classinfo* + is neither a class object nor a type object, it may be a tuple of class or type + objects, or may recursively contain other such tuples (other sequence types are + not accepted). If *classinfo* is not a class, type, or tuple of classes, types, + and such tuples, a :exc:`TypeError` exception is raised. + + .. versionchanged:: 2.2 + Support for a tuple of type information was added. + + +.. function:: issubclass(class, classinfo) + + Return true if *class* is a subclass (direct or indirect) of *classinfo*. A + class is considered a subclass of itself. *classinfo* may be a tuple of class + objects, in which case every entry in *classinfo* will be checked. In any other + case, a :exc:`TypeError` exception is raised. + + .. versionchanged:: 2.3 + Support for a tuple of type information was added. + + +.. function:: iter(o[, sentinel]) + + Return an iterator object. The first argument is interpreted very differently + depending on the presence of the second argument. Without a second argument, *o* + must be a collection object which supports the iteration protocol (the + :meth:`__iter__` method), or it must support the sequence protocol (the + :meth:`__getitem__` method with integer arguments starting at ``0``). If it + does not support either of those protocols, :exc:`TypeError` is raised. If the + second argument, *sentinel*, is given, then *o* must be a callable object. The + iterator created in this case will call *o* with no arguments for each call to + its :meth:`__next__` method; if the value returned is equal to *sentinel*, + :exc:`StopIteration` will be raised, otherwise the value will be returned. + + .. versionadded:: 2.2 + + +.. function:: len(s) + + Return the length (the number of items) of an object. The argument may be a + sequence (string, tuple or list) or a mapping (dictionary). + + +.. function:: list([iterable]) + + Return a list whose items are the same and in the same order as *iterable*'s + items. *iterable* may be either a sequence, a container that supports + iteration, or an iterator object. If *iterable* is already a list, a copy is + made and returned, similar to ``iterable[:]``. For instance, ``list('abc')`` + returns ``['a', 'b', 'c']`` and ``list( (1, 2, 3) )`` returns ``[1, 2, 3]``. If + no argument is given, returns a new empty list, ``[]``. + + :class:`list` is a mutable sequence type, as documented in + :ref:`typesseq`. For other containers see the built in :class:`dict`, + :class:`set`, and :class:`tuple` classes, and the :mod:`collections` module. + + +.. function:: locals() + + Update and return a dictionary representing the current local symbol table. + + .. warning:: + + The contents of this dictionary should not be modified; changes may not affect + the values of local variables used by the interpreter. + + Free variables are returned by *locals* when it is called in a function block. + Modifications of free variables may not affect the values used by the + interpreter. Free variables are not returned in class blocks. + + +.. function:: map(function, iterable, ...) + + Apply *function* to every item of *iterable* and return a list of the results. + If additional *iterable* arguments are passed, *function* must take that many + arguments and is applied to the items from all iterables in parallel. If one + iterable is shorter than another it is assumed to be extended with ``None`` + items. If *function* is ``None``, the identity function is assumed; if there + are multiple arguments, :func:`map` returns a list consisting of tuples + containing the corresponding items from all iterables (a kind of transpose + operation). The *iterable* arguments may be a sequence or any iterable object; + the result is always a list. + + +.. function:: max(iterable[, args...][key]) + + With a single argument *iterable*, return the largest item of a non-empty + iterable (such as a string, tuple or list). With more than one argument, return + the largest of the arguments. + + The optional *key* argument specifies a one-argument ordering function like that + used for :meth:`list.sort`. The *key* argument, if supplied, must be in keyword + form (for example, ``max(a,b,c,key=func)``). + + .. versionchanged:: 2.5 + Added support for the optional *key* argument. + + +.. function:: min(iterable[, args...][key]) + + With a single argument *iterable*, return the smallest item of a non-empty + iterable (such as a string, tuple or list). With more than one argument, return + the smallest of the arguments. + + The optional *key* argument specifies a one-argument ordering function like that + used for :meth:`list.sort`. The *key* argument, if supplied, must be in keyword + form (for example, ``min(a,b,c,key=func)``). + + .. versionchanged:: 2.5 + Added support for the optional *key* argument. + + +.. function:: next(iterator[, default]) + + Retrieve the next item from the *iterable* by calling its :meth:`__next__` + method. If *default* is given, it is returned if the iterator is exhausted, + otherwise :exc:`StopIteration` is raised. + + +.. function:: object() + + Return a new featureless object. :class:`object` is a base for all new style + classes. It has the methods that are common to all instances of new style + classes. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.3 + This function does not accept any arguments. Formerly, it accepted arguments but + ignored them. + + +.. function:: oct(x) + + Convert an integer number to an octal string. The result is a valid Python + expression. If *x* is not a Python :class:`int` object, it has to define an + :meth:`__index__` method that returns an integer. + + .. versionchanged:: 2.4 + Formerly only returned an unsigned literal. + + +.. function:: open(filename[, mode[, bufsize]]) + + Open a file, returning an object of the :class:`file` type described in + section :ref:`bltin-file-objects`. If the file cannot be opened, + :exc:`IOError` is raised. When opening a file, it's preferable to use + :func:`open` instead of invoking the :class:`file` constructor directly. + + The first two arguments are the same as for ``stdio``'s :cfunc:`fopen`: + *filename* is the file name to be opened, and *mode* is a string indicating how + the file is to be opened. + + The most commonly-used values of *mode* are ``'r'`` for reading, ``'w'`` for + writing (truncating the file if it already exists), and ``'a'`` for appending + (which on *some* Unix systems means that *all* writes append to the end of the + file regardless of the current seek position). If *mode* is omitted, it + defaults to ``'r'``. When opening a binary file, you should append ``'b'`` to + the *mode* value to open the file in binary mode, which will improve + portability. (Appending ``'b'`` is useful even on systems that don't treat + binary and text files differently, where it serves as documentation.) See below + for more possible values of *mode*. + + .. index:: + single: line-buffered I/O + single: unbuffered I/O + single: buffer size, I/O + single: I/O control; buffering + + The optional *bufsize* argument specifies the file's desired buffer size: 0 + means unbuffered, 1 means line buffered, any other positive value means use a + buffer of (approximately) that size. A negative *bufsize* means to use the + system default, which is usually line buffered for tty devices and fully + buffered for other files. If omitted, the system default is used. [#]_ + + Modes ``'r+'``, ``'w+'`` and ``'a+'`` open the file for updating (note that + ``'w+'`` truncates the file). Append ``'b'`` to the mode to open the file in + binary mode, on systems that differentiate between binary and text files; on + systems that don't have this distinction, adding the ``'b'`` has no effect. + + In addition to the standard :cfunc:`fopen` values *mode* may be ``'U'`` or + ``'rU'``. Python is usually built with universal newline support; supplying + ``'U'`` opens the file as a text file, but lines may be terminated by any of the + following: the Unix end-of-line convention ``'\n'``, the Macintosh convention + ``'\r'``, or the Windows convention ``'\r\n'``. All of these external + representations are seen as ``'\n'`` by the Python program. If Python is built + without universal newline support a *mode* with ``'U'`` is the same as normal + text mode. Note that file objects so opened also have an attribute called + :attr:`newlines` which has a value of ``None`` (if no newlines have yet been + seen), ``'\n'``, ``'\r'``, ``'\r\n'``, or a tuple containing all the newline + types seen. + + Python enforces that the mode, after stripping ``'U'``, begins with ``'r'``, + ``'w'`` or ``'a'``. + + See also the :mod:`fileinput` module. + + .. versionchanged:: 2.5 + Restriction on first letter of mode string introduced. + + +.. function:: ord(c) + + Given a string of length one, return an integer representing the Unicode code + point of the character when the argument is a unicode object, or the value of + the byte when the argument is an 8-bit string. For example, ``ord('a')`` returns + the integer ``97``, ``ord(u'\u2020')`` returns ``8224``. This is the inverse of + :func:`chr` for 8-bit strings and of :func:`unichr` for unicode objects. If a + unicode argument is given and Python was built with UCS2 Unicode, then the + character's code point must be in the range [0..65535] inclusive; otherwise the + string length is two, and a :exc:`TypeError` will be raised. + + +.. function:: pow(x, y[, z]) + + Return *x* to the power *y*; if *z* is present, return *x* to the power *y*, + modulo *z* (computed more efficiently than ``pow(x, y) % z``). The two-argument + form ``pow(x, y)`` is equivalent to using the power operator: ``x**y``. + + The arguments must have numeric types. With mixed operand types, the coercion + rules for binary arithmetic operators apply. For int and long int operands, the + result has the same type as the operands (after coercion) unless the second + argument is negative; in that case, all arguments are converted to float and a + float result is delivered. For example, ``10**2`` returns ``100``, but + ``10**-2`` returns ``0.01``. (This last feature was added in Python 2.2. In + Python 2.1 and before, if both arguments were of integer types and the second + argument was negative, an exception was raised.) If the second argument is + negative, the third argument must be omitted. If *z* is present, *x* and *y* + must be of integer types, and *y* must be non-negative. (This restriction was + added in Python 2.2. In Python 2.1 and before, floating 3-argument ``pow()`` + returned platform-dependent results depending on floating-point rounding + accidents.) + + +.. function:: property([fget[, fset[, fdel[, doc]]]]) + + Return a property attribute for new-style classes (classes that derive from + :class:`object`). + + *fget* is a function for getting an attribute value, likewise *fset* is a + function for setting, and *fdel* a function for del'ing, an attribute. Typical + use is to define a managed attribute x:: + + class C(object): + def __init__(self): self._x = None + def getx(self): return self._x + def setx(self, value): self._x = value + def delx(self): del self._x + x = property(getx, setx, delx, "I'm the 'x' property.") + + If given, *doc* will be the docstring of the property attribute. Otherwise, the + property will copy *fget*'s docstring (if it exists). This makes it possible to + create read-only properties easily using :func:`property` as a decorator:: + + class Parrot(object): + def __init__(self): + self._voltage = 100000 + + @property + def voltage(self): + """Get the current voltage.""" + return self._voltage + + turns the :meth:`voltage` method into a "getter" for a read-only attribute with + the same name. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.5 + Use *fget*'s docstring if no *doc* given. + + +.. function:: range([start,] stop[, step]) + + This is a versatile function to create sequences containing arithmetic + progressions. It is most often used in :keyword:`for` loops. The arguments + must be plain integers. If the *step* argument is omitted, it defaults to + ``1``. If the *start* argument is omitted, it defaults to ``0``. The full form + returns a list of plain integers ``[start, start + step, start + 2 * step, + ...]``. If *step* is positive, the last element is the largest ``start + i * + step`` less than *stop*; if *step* is negative, the last element is the smallest + ``start + i * step`` greater than *stop*. *step* must not be zero (or else + :exc:`ValueError` is raised). Example:: + + >>> list(range(10)) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> list(range(1, 11)) + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + >>> list(range(0, 30, 5)) + [0, 5, 10, 15, 20, 25] + >>> list(range(0, 10, 3)) + [0, 3, 6, 9] + >>> list(range(0, -10, -1)) + [0, -1, -2, -3, -4, -5, -6, -7, -8, -9] + >>> list(range(0)) + [] + >>> list(range(1, 0)) + [] + + +.. function:: repr(object) + + Return a string containing a printable representation of an object. This is the + same value yielded by conversions (reverse quotes). It is sometimes useful to be + able to access this operation as an ordinary function. For many types, this + function makes an attempt to return a string that would yield an object with the + same value when passed to :func:`eval`. + + +.. function:: reversed(seq) + + Return a reverse iterator. *seq* must be an object which supports the sequence + protocol (the :meth:`__len__` method and the :meth:`__getitem__` method with + integer arguments starting at ``0``). + + .. versionadded:: 2.4 + + +.. function:: round(x[, n]) + + Return the floating point value *x* rounded to *n* digits after the decimal + point. If *n* is omitted, it defaults to zero. The result is a floating point + number. Values are rounded to the closest multiple of 10 to the power minus + *n*; if two multiples are equally close, rounding is done away from 0 (so. for + example, ``round(0.5)`` is ``1.0`` and ``round(-0.5)`` is ``-1.0``). + + +.. function:: set([iterable]) + :noindex: + + Return a new set, optionally with elements are taken from *iterable*. + The set type is described in :ref:`types-set`. + + For other containers see the built in :class:`dict`, :class:`list`, and + :class:`tuple` classes, and the :mod:`collections` module. + + .. versionadded:: 2.4 + + +.. function:: setattr(object, name, value) + + This is the counterpart of :func:`getattr`. The arguments are an object, a + string and an arbitrary value. The string may name an existing attribute or a + new attribute. The function assigns the value to the attribute, provided the + object allows it. For example, ``setattr(x, 'foobar', 123)`` is equivalent to + ``x.foobar = 123``. + + +.. function:: slice([start,] stop[, step]) + + .. index:: single: Numerical Python + + Return a slice object representing the set of indices specified by + ``range(start, stop, step)``. The *start* and *step* arguments default to + ``None``. Slice objects have read-only data attributes :attr:`start`, + :attr:`stop` and :attr:`step` which merely return the argument values (or their + default). They have no other explicit functionality; however they are used by + Numerical Python and other third party extensions. Slice objects are also + generated when extended indexing syntax is used. For example: + ``a[start:stop:step]`` or ``a[start:stop, i]``. + + +.. function:: sorted(iterable[, cmp[, key[, reverse]]]) + + Return a new sorted list from the items in *iterable*. + + The optional arguments *cmp*, *key*, and *reverse* have the same meaning as + those for the :meth:`list.sort` method (described in section + :ref:`typesseq-mutable`). + + *cmp* specifies a custom comparison function of two arguments (iterable + elements) which should return a negative, zero or positive number depending on + whether the first argument is considered smaller than, equal to, or larger than + the second argument: ``cmp=lambda x,y: cmp(x.lower(), y.lower())`` + + *key* specifies a function of one argument that is used to extract a comparison + key from each list element: ``key=str.lower`` + + *reverse* is a boolean value. If set to ``True``, then the list elements are + sorted as if each comparison were reversed. + + In general, the *key* and *reverse* conversion processes are much faster than + specifying an equivalent *cmp* function. This is because *cmp* is called + multiple times for each list element while *key* and *reverse* touch each + element only once. + + .. versionadded:: 2.4 + + +.. function:: staticmethod(function) + + Return a static method for *function*. + + A static method does not receive an implicit first argument. To declare a static + method, use this idiom:: + + class C: + @staticmethod + def f(arg1, arg2, ...): ... + + The ``@staticmethod`` form is a function decorator -- see the description of + function definitions in :ref:`function` for details. + + It can be called either on the class (such as ``C.f()``) or on an instance (such + as ``C().f()``). The instance is ignored except for its class. + + Static methods in Python are similar to those found in Java or C++. For a more + advanced concept, see :func:`classmethod` in this section. + + For more information on static methods, consult the documentation on the + standard type hierarchy in :ref:`types`. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.4 + Function decorator syntax added. + + +.. function:: str([object[, encoding[, errors]]]) + + Return a string version of an object, using one of the following modes: + + If *encoding* and/or *errors* are given, :func:`str` will decode the + *object* which can either be a byte string or a character buffer using + the codec for *encoding*. The *encoding* parameter is a string giving + the name of an encoding; if the encoding is not known, :exc:`LookupError` + is raised. Error handling is done according to *errors*; this specifies the + treatment of characters which are invalid in the input encoding. If + *errors* is ``'strict'`` (the default), a :exc:`ValueError` is raised on + errors, while a value of ``'ignore'`` causes errors to be silently ignored, + and a value of ``'replace'`` causes the official Unicode replacement character, + U+FFFD, to be used to replace input characters which cannot be decoded. + See also the :mod:`codecs` module. + + When only *object* is given, this returns its nicely printable representation. + For strings, this is the string itself. The difference with ``repr(object)`` + is that ``str(object)`` does not always attempt to return a string that is + acceptable to :func:`eval`; its goal is to return a printable string. + With no arguments, this returns the empty string. + + Objects can specify what ``str(object)`` returns by defining a :meth:`__str__` + special method. + + For more information on strings see :ref:`typesseq` which describes sequence + functionality (strings are sequences), and also the string-specific methods + described in the :ref:`string-methods` section. To output formatted strings + use template strings or the ``%`` operator described in the + :ref:`string-formatting` section. In addition see the :ref:`stringservices` + section. See also :func:`unicode`. + + +.. function:: sum(iterable[, start]) + + Sums *start* and the items of an *iterable* from left to right and returns the + total. *start* defaults to ``0``. The *iterable*'s items are normally numbers, + and are not allowed to be strings. The fast, correct way to concatenate a + sequence of strings is by calling ``''.join(sequence)``. + + .. versionadded:: 2.3 + + +.. function:: super(type[, object-or-type]) + + Return the superclass of *type*. If the second argument is omitted the super + object returned is unbound. If the second argument is an object, + ``isinstance(obj, type)`` must be true. If the second argument is a type, + ``issubclass(type2, type)`` must be true. :func:`super` only works for new-style + classes. + + A typical use for calling a cooperative superclass method is:: + + class C(B): + def meth(self, arg): + super(C, self).meth(arg) + + Note that :func:`super` is implemented as part of the binding process for + explicit dotted attribute lookups such as ``super(C, self).__getitem__(name)``. + Accordingly, :func:`super` is undefined for implicit lookups using statements or + operators such as ``super(C, self)[name]``. + + .. versionadded:: 2.2 + + +.. function:: tuple([iterable]) + + Return a tuple whose items are the same and in the same order as *iterable*'s + items. *iterable* may be a sequence, a container that supports iteration, or an + iterator object. If *iterable* is already a tuple, it is returned unchanged. + For instance, ``tuple('abc')`` returns ``('a', 'b', 'c')`` and ``tuple([1, 2, + 3])`` returns ``(1, 2, 3)``. If no argument is given, returns a new empty + tuple, ``()``. + + :class:`tuple` is an immutable sequence type, as documented in + :ref:`typesseq`. For other containers see the built in :class:`dict`, + :class:`list`, and :class:`set` classes, and the :mod:`collections` module. + + +.. function:: type(object) + + .. index:: object: type + + Return the type of an *object*. The return value is a type object. The + :func:`isinstance` built-in function is recommended for testing the type of an + object. + + With three arguments, :func:`type` functions as a constructor as detailed below. + + +.. function:: type(name, bases, dict) + :noindex: + + Return a new type object. This is essentially a dynamic form of the + :keyword:`class` statement. The *name* string is the class name and becomes the + :attr:`__name__` attribute; the *bases* tuple itemizes the base classes and + becomes the :attr:`__bases__` attribute; and the *dict* dictionary is the + namespace containing definitions for class body and becomes the :attr:`__dict__` + attribute. For example, the following two statements create identical + :class:`type` objects:: + + >>> class X(object): + ... a = 1 + ... + >>> X = type('X', (object,), dict(a=1)) + + .. versionadded:: 2.2 + + +.. function:: vars([object]) + + Without arguments, return a dictionary corresponding to the current local symbol + table. With a module, class or class instance object as argument (or anything + else that has a :attr:`__dict__` attribute), returns a dictionary corresponding + to the object's symbol table. The returned dictionary should not be modified: + the effects on the corresponding symbol table are undefined. [#]_ + + +.. function:: zip([iterable, ...]) + + This function returns a list of tuples, where the *i*-th tuple contains the + *i*-th element from each of the argument sequences or iterables. The returned + list is truncated in length to the length of the shortest argument sequence. + When there are multiple arguments which are all of the same length, :func:`zip` + is similar to :func:`map` with an initial argument of ``None``. With a single + sequence argument, it returns a list of 1-tuples. With no arguments, it returns + an empty list. + + .. versionadded:: 2.0 + + .. versionchanged:: 2.4 + Formerly, :func:`zip` required at least one argument and ``zip()`` raised a + :exc:`TypeError` instead of returning an empty list. + +.. % --------------------------------------------------------------------------- + + +.. _non-essential-built-in-funcs: + +Non-essential Built-in Functions +================================ + +There are several built-in functions that are no longer essential to learn, know +or use in modern Python programming. They have been kept here to maintain +backwards compatibility with programs written for older versions of Python. + +Python programmers, trainers, students and bookwriters should feel free to +bypass these functions without concerns about missing something important. + + +.. function:: buffer(object[, offset[, size]]) + + The *object* argument must be an object that supports the buffer call interface + (such as strings, arrays, and buffers). A new buffer object will be created + which references the *object* argument. The buffer object will be a slice from + the beginning of *object* (or from the specified *offset*). The slice will + extend to the end of *object* (or will have a length given by the *size* + argument). + + + +.. rubric:: Footnotes + +.. [#] Specifying a buffer size currently has no effect on systems that don't have + :cfunc:`setvbuf`. The interface to specify the buffer size is not done using a + method that calls :cfunc:`setvbuf`, because that may dump core when called after + any I/O has been performed, and there's no reliable way to determine whether + this is the case. + +.. [#] In the current implementation, local variable bindings cannot normally be + affected this way, but variables retrieved from other scopes (such as modules) + can be. This may change. + diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst new file mode 100644 index 0000000..4874b55 --- /dev/null +++ b/Doc/library/functools.rst @@ -0,0 +1,145 @@ +:mod:`functools` --- Higher order functions and operations on callable objects +============================================================================== + +.. module:: functools + :synopsis: Higher order functions and operations on callable objects. +.. moduleauthor:: Peter Harris +.. moduleauthor:: Raymond Hettinger +.. moduleauthor:: Nick Coghlan +.. sectionauthor:: Peter Harris + + +.. versionadded:: 2.5 + +The :mod:`functools` module is for higher-order functions: functions that act on +or return other functions. In general, any callable object can be treated as a +function for the purposes of this module. + +The :mod:`functools` module defines the following function: + + +.. function:: partial(func[,*args][, **keywords]) + + Return a new :class:`partial` object which when called will behave like *func* + called with the positional arguments *args* and keyword arguments *keywords*. If + more arguments are supplied to the call, they are appended to *args*. If + additional keyword arguments are supplied, they extend and override *keywords*. + Roughly equivalent to:: + + def partial(func, *args, **keywords): + def newfunc(*fargs, **fkeywords): + newkeywords = keywords.copy() + newkeywords.update(fkeywords) + return func(*(args + fargs), **newkeywords) + newfunc.func = func + newfunc.args = args + newfunc.keywords = keywords + return newfunc + + The :func:`partial` is used for partial function application which "freezes" + some portion of a function's arguments and/or keywords resulting in a new object + with a simplified signature. For example, :func:`partial` can be used to create + a callable that behaves like the :func:`int` function where the *base* argument + defaults to two:: + + >>> basetwo = partial(int, base=2) + >>> basetwo.__doc__ = 'Convert base 2 string to an int.' + >>> basetwo('10010') + 18 + + +.. function:: reduce(function, sequence[, initializer]) + + Apply *function* of two arguments cumulatively to the items of *sequence*, from + left to right, so as to reduce the sequence to a single value. For example, + ``reduce(lambda x, y: x+y, [1, 2, 3, 4, 5])`` calculates ``((((1+2)+3)+4)+5)``. + The left argument, *x*, is the accumulated value and the right argument, *y*, is + the update value from the *sequence*. If the optional *initializer* is present, + it is placed before the items of the sequence in the calculation, and serves as + a default when the sequence is empty. If *initializer* is not given and + *sequence* contains only one item, the first item is returned. + + +.. function:: update_wrapper(wrapper, wrapped[, assigned][, updated]) + + Update a *wrapper* function to look like the *wrapped* function. The optional + arguments are tuples to specify which attributes of the original function are + assigned directly to the matching attributes on the wrapper function and which + attributes of the wrapper function are updated with the corresponding attributes + from the original function. The default values for these arguments are the + module level constants *WRAPPER_ASSIGNMENTS* (which assigns to the wrapper + function's *__name__*, *__module__* and *__doc__*, the documentation string) and + *WRAPPER_UPDATES* (which updates the wrapper function's *__dict__*, i.e. the + instance dictionary). + + The main intended use for this function is in decorator functions which wrap the + decorated function and return the wrapper. If the wrapper function is not + updated, the metadata of the returned function will reflect the wrapper + definition rather than the original function definition, which is typically less + than helpful. + + +.. function:: wraps(wrapped[, assigned][, updated]) + + This is a convenience function for invoking ``partial(update_wrapper, + wrapped=wrapped, assigned=assigned, updated=updated)`` as a function decorator + when defining a wrapper function. For example:: + + >>> def my_decorator(f): + ... @wraps(f) + ... def wrapper(*args, **kwds): + ... print 'Calling decorated function' + ... return f(*args, **kwds) + ... return wrapper + ... + >>> @my_decorator + ... def example(): + ... """Docstring""" + ... print 'Called example function' + ... + >>> example() + Calling decorated function + Called example function + >>> example.__name__ + 'example' + >>> example.__doc__ + 'Docstring' + + Without the use of this decorator factory, the name of the example function + would have been ``'wrapper'``, and the docstring of the original :func:`example` + would have been lost. + + +.. _partial-objects: + +:class:`partial` Objects +------------------------ + +:class:`partial` objects are callable objects created by :func:`partial`. They +have three read-only attributes: + + +.. attribute:: partial.func + + A callable object or function. Calls to the :class:`partial` object will be + forwarded to :attr:`func` with new arguments and keywords. + + +.. attribute:: partial.args + + The leftmost positional arguments that will be prepended to the positional + arguments provided to a :class:`partial` object call. + + +.. attribute:: partial.keywords + + The keyword arguments that will be supplied when the :class:`partial` object is + called. + +:class:`partial` objects are like :class:`function` objects in that they are +callable, weak referencable, and can have attributes. There are some important +differences. For instance, the :attr:`__name__` and :attr:`__doc__` attributes +are not created automatically. Also, :class:`partial` objects defined in +classes behave like static methods and do not transform into bound methods +during instance attribute look-up. + diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst new file mode 100644 index 0000000..70e4a6b --- /dev/null +++ b/Doc/library/gc.rst @@ -0,0 +1,211 @@ + +:mod:`gc` --- Garbage Collector interface +========================================= + +.. module:: gc + :synopsis: Interface to the cycle-detecting garbage collector. +.. moduleauthor:: Neil Schemenauer +.. sectionauthor:: Neil Schemenauer + + +This module provides an interface to the optional garbage collector. It +provides the ability to disable the collector, tune the collection frequency, +and set debugging options. It also provides access to unreachable objects that +the collector found but cannot free. Since the collector supplements the +reference counting already used in Python, you can disable the collector if you +are sure your program does not create reference cycles. Automatic collection +can be disabled by calling ``gc.disable()``. To debug a leaking program call +``gc.set_debug(gc.DEBUG_LEAK)``. Notice that this includes +``gc.DEBUG_SAVEALL``, causing garbage-collected objects to be saved in +gc.garbage for inspection. + +The :mod:`gc` module provides the following functions: + + +.. function:: enable() + + Enable automatic garbage collection. + + +.. function:: disable() + + Disable automatic garbage collection. + + +.. function:: isenabled() + + Returns true if automatic collection is enabled. + + +.. function:: collect([generation]) + + With no arguments, run a full collection. The optional argument *generation* + may be an integer specifying which generation to collect (from 0 to 2). A + :exc:`ValueError` is raised if the generation number is invalid. The number of + unreachable objects found is returned. + + .. versionchanged:: 2.5 + The optional *generation* argument was added. + + +.. function:: set_debug(flags) + + Set the garbage collection debugging flags. Debugging information will be + written to ``sys.stderr``. See below for a list of debugging flags which can be + combined using bit operations to control debugging. + + +.. function:: get_debug() + + Return the debugging flags currently set. + + +.. function:: get_objects() + + Returns a list of all objects tracked by the collector, excluding the list + returned. + + .. versionadded:: 2.2 + + +.. function:: set_threshold(threshold0[, threshold1[, threshold2]]) + + Set the garbage collection thresholds (the collection frequency). Setting + *threshold0* to zero disables collection. + + The GC classifies objects into three generations depending on how many + collection sweeps they have survived. New objects are placed in the youngest + generation (generation ``0``). If an object survives a collection it is moved + into the next older generation. Since generation ``2`` is the oldest + generation, objects in that generation remain there after a collection. In + order to decide when to run, the collector keeps track of the number object + allocations and deallocations since the last collection. When the number of + allocations minus the number of deallocations exceeds *threshold0*, collection + starts. Initially only generation ``0`` is examined. If generation ``0`` has + been examined more than *threshold1* times since generation ``1`` has been + examined, then generation ``1`` is examined as well. Similarly, *threshold2* + controls the number of collections of generation ``1`` before collecting + generation ``2``. + + +.. function:: get_count() + + Return the current collection counts as a tuple of ``(count0, count1, + count2)``. + + .. versionadded:: 2.5 + + +.. function:: get_threshold() + + Return the current collection thresholds as a tuple of ``(threshold0, + threshold1, threshold2)``. + + +.. function:: get_referrers(*objs) + + Return the list of objects that directly refer to any of objs. This function + will only locate those containers which support garbage collection; extension + types which do refer to other objects but do not support garbage collection will + not be found. + + Note that objects which have already been dereferenced, but which live in cycles + and have not yet been collected by the garbage collector can be listed among the + resulting referrers. To get only currently live objects, call :func:`collect` + before calling :func:`get_referrers`. + + Care must be taken when using objects returned by :func:`get_referrers` because + some of them could still be under construction and hence in a temporarily + invalid state. Avoid using :func:`get_referrers` for any purpose other than + debugging. + + .. versionadded:: 2.2 + + +.. function:: get_referents(*objs) + + Return a list of objects directly referred to by any of the arguments. The + referents returned are those objects visited by the arguments' C-level + :attr:`tp_traverse` methods (if any), and may not be all objects actually + directly reachable. :attr:`tp_traverse` methods are supported only by objects + that support garbage collection, and are only required to visit objects that may + be involved in a cycle. So, for example, if an integer is directly reachable + from an argument, that integer object may or may not appear in the result list. + + .. versionadded:: 2.3 + +The following variable is provided for read-only access (you can mutate its +value but should not rebind it): + + +.. data:: garbage + + A list of objects which the collector found to be unreachable but could not be + freed (uncollectable objects). By default, this list contains only objects with + :meth:`__del__` methods. [#]_ Objects that have :meth:`__del__` methods and are + part of a reference cycle cause the entire reference cycle to be uncollectable, + including objects not necessarily in the cycle but reachable only from it. + Python doesn't collect such cycles automatically because, in general, it isn't + possible for Python to guess a safe order in which to run the :meth:`__del__` + methods. If you know a safe order, you can force the issue by examining the + *garbage* list, and explicitly breaking cycles due to your objects within the + list. Note that these objects are kept alive even so by virtue of being in the + *garbage* list, so they should be removed from *garbage* too. For example, + after breaking cycles, do ``del gc.garbage[:]`` to empty the list. It's + generally better to avoid the issue by not creating cycles containing objects + with :meth:`__del__` methods, and *garbage* can be examined in that case to + verify that no such cycles are being created. + + If :const:`DEBUG_SAVEALL` is set, then all unreachable objects will be added to + this list rather than freed. + +The following constants are provided for use with :func:`set_debug`: + + +.. data:: DEBUG_STATS + + Print statistics during collection. This information can be useful when tuning + the collection frequency. + + +.. data:: DEBUG_COLLECTABLE + + Print information on collectable objects found. + + +.. data:: DEBUG_UNCOLLECTABLE + + Print information of uncollectable objects found (objects which are not + reachable but cannot be freed by the collector). These objects will be added to + the ``garbage`` list. + + +.. data:: DEBUG_INSTANCES + + When :const:`DEBUG_COLLECTABLE` or :const:`DEBUG_UNCOLLECTABLE` is set, print + information about instance objects found. + + +.. data:: DEBUG_OBJECTS + + When :const:`DEBUG_COLLECTABLE` or :const:`DEBUG_UNCOLLECTABLE` is set, print + information about objects other than instance objects found. + + +.. data:: DEBUG_SAVEALL + + When set, all unreachable objects found will be appended to *garbage* rather + than being freed. This can be useful for debugging a leaking program. + + +.. data:: DEBUG_LEAK + + The debugging flags necessary for the collector to print information about a + leaking program (equal to ``DEBUG_COLLECTABLE | DEBUG_UNCOLLECTABLE | + DEBUG_INSTANCES | DEBUG_OBJECTS | DEBUG_SAVEALL``). + +.. rubric:: Footnotes + +.. [#] Prior to Python 2.2, the list contained all instance objects in unreachable + cycles, not only those with :meth:`__del__` methods. + diff --git a/Doc/library/gdbm.rst b/Doc/library/gdbm.rst new file mode 100644 index 0000000..ce27f6c --- /dev/null +++ b/Doc/library/gdbm.rst @@ -0,0 +1,122 @@ + +:mod:`gdbm` --- GNU's reinterpretation of dbm +============================================= + +.. module:: gdbm + :platform: Unix + :synopsis: GNU's reinterpretation of dbm. + + +.. index:: module: dbm + +This module is quite similar to the :mod:`dbm` module, but uses ``gdbm`` instead +to provide some additional functionality. Please note that the file formats +created by ``gdbm`` and ``dbm`` are incompatible. + +The :mod:`gdbm` module provides an interface to the GNU DBM library. ``gdbm`` +objects behave like mappings (dictionaries), except that keys and values are +always strings. Printing a ``gdbm`` object doesn't print the keys and values, +and the :meth:`items` and :meth:`values` methods are not supported. + +The module defines the following constant and functions: + + +.. exception:: error + + Raised on ``gdbm``\ -specific errors, such as I/O errors. :exc:`KeyError` is + raised for general mapping errors like specifying an incorrect key. + + +.. function:: open(filename, [flag, [mode]]) + + Open a ``gdbm`` database and return a ``gdbm`` object. The *filename* argument + is the name of the database file. + + The optional *flag* argument can be: + + +---------+-------------------------------------------+ + | Value | Meaning | + +=========+===========================================+ + | ``'r'`` | Open existing database for reading only | + | | (default) | + +---------+-------------------------------------------+ + | ``'w'`` | Open existing database for reading and | + | | writing | + +---------+-------------------------------------------+ + | ``'c'`` | Open database for reading and writing, | + | | creating it if it doesn't exist | + +---------+-------------------------------------------+ + | ``'n'`` | Always create a new, empty database, open | + | | for reading and writing | + +---------+-------------------------------------------+ + + The following additional characters may be appended to the flag to control + how the database is opened: + + +---------+--------------------------------------------+ + | Value | Meaning | + +=========+============================================+ + | ``'f'`` | Open the database in fast mode. Writes | + | | to the database will not be synchronized. | + +---------+--------------------------------------------+ + | ``'s'`` | Synchronized mode. This will cause changes | + | | to the database to be immediately written | + | | to the file. | + +---------+--------------------------------------------+ + | ``'u'`` | Do not lock database. | + +---------+--------------------------------------------+ + + Not all flags are valid for all versions of ``gdbm``. The module constant + :const:`open_flags` is a string of supported flag characters. The exception + :exc:`error` is raised if an invalid flag is specified. + + The optional *mode* argument is the Unix mode of the file, used only when the + database has to be created. It defaults to octal ``0666``. + +In addition to the dictionary-like methods, ``gdbm`` objects have the following +methods: + + +.. function:: firstkey() + + It's possible to loop over every key in the database using this method and the + :meth:`nextkey` method. The traversal is ordered by ``gdbm``'s internal hash + values, and won't be sorted by the key values. This method returns the starting + key. + + +.. function:: nextkey(key) + + Returns the key that follows *key* in the traversal. The following code prints + every key in the database ``db``, without having to create a list in memory that + contains them all:: + + k = db.firstkey() + while k != None: + print k + k = db.nextkey(k) + + +.. function:: reorganize() + + If you have carried out a lot of deletions and would like to shrink the space + used by the ``gdbm`` file, this routine will reorganize the database. ``gdbm`` + will not shorten the length of a database file except by using this + reorganization; otherwise, deleted file space will be kept and reused as new + (key, value) pairs are added. + + +.. function:: sync() + + When the database has been opened in fast mode, this method forces any + unwritten data to be written to the disk. + + +.. seealso:: + + Module :mod:`anydbm` + Generic interface to ``dbm``\ -style databases. + + Module :mod:`whichdb` + Utility module used to determine the type of an existing database. + diff --git a/Doc/library/gensuitemodule.rst b/Doc/library/gensuitemodule.rst new file mode 100644 index 0000000..3fc5254 --- /dev/null +++ b/Doc/library/gensuitemodule.rst @@ -0,0 +1,63 @@ + +:mod:`gensuitemodule` --- Generate OSA stub packages +==================================================== + +.. module:: gensuitemodule + :platform: Mac + :synopsis: Create a stub package from an OSA dictionary +.. sectionauthor:: Jack Jansen + + +.. % \moduleauthor{Jack Jansen?}{email} + +The :mod:`gensuitemodule` module creates a Python package implementing stub code +for the AppleScript suites that are implemented by a specific application, +according to its AppleScript dictionary. + +It is usually invoked by the user through the :program:`PythonIDE`, but it can +also be run as a script from the command line (pass :option:`--help` for help on +the options) or imported from Python code. For an example of its use see +:file:`Mac/scripts/genallsuites.py` in a source distribution, which generates +the stub packages that are included in the standard library. + +It defines the following public functions: + + +.. function:: is_scriptable(application) + + Returns true if ``application``, which should be passed as a pathname, appears + to be scriptable. Take the return value with a grain of salt: :program:`Internet + Explorer` appears not to be scriptable but definitely is. + + +.. function:: processfile(application[, output, basepkgname, edit_modnames, creatorsignature, dump, verbose]) + + Create a stub package for ``application``, which should be passed as a full + pathname. For a :file:`.app` bundle this is the pathname to the bundle, not to + the executable inside the bundle; for an unbundled CFM application you pass the + filename of the application binary. + + This function asks the application for its OSA terminology resources, decodes + these resources and uses the resultant data to create the Python code for the + package implementing the client stubs. + + ``output`` is the pathname where the resulting package is stored, if not + specified a standard "save file as" dialog is presented to the user. + ``basepkgname`` is the base package on which this package will build, and + defaults to :mod:`StdSuites`. Only when generating :mod:`StdSuites` itself do + you need to specify this. ``edit_modnames`` is a dictionary that can be used to + change modulenames that are too ugly after name mangling. ``creator_signature`` + can be used to override the 4-char creator code, which is normally obtained from + the :file:`PkgInfo` file in the package or from the CFM file creator signature. + When ``dump`` is given it should refer to a file object, and ``processfile`` + will stop after decoding the resources and dump the Python representation of the + terminology resources to this file. ``verbose`` should also be a file object, + and specifying it will cause ``processfile`` to tell you what it is doing. + + +.. function:: processfile_fromresource(application[, output, basepkgname, edit_modnames, creatorsignature, dump, verbose]) + + This function does the same as ``processfile``, except that it uses a different + method to get the terminology resources. It opens ``application`` as a resource + file and reads all ``"aete"`` and ``"aeut"`` resources from this file. + diff --git a/Doc/library/getopt.rst b/Doc/library/getopt.rst new file mode 100644 index 0000000..0d9641d --- /dev/null +++ b/Doc/library/getopt.rst @@ -0,0 +1,147 @@ + +:mod:`getopt` --- Parser for command line options +================================================= + +.. module:: getopt + :synopsis: Portable parser for command line options; support both short and long option + names. + + +This module helps scripts to parse the command line arguments in ``sys.argv``. +It supports the same conventions as the Unix :cfunc:`getopt` function (including +the special meanings of arguments of the form '``-``' and '``-``\ ``-``'). Long +options similar to those supported by GNU software may be used as well via an +optional third argument. This module provides a single function and an +exception: + +.. % That's to fool latex2html into leaving the two hyphens alone! + + +.. function:: getopt(args, options[, long_options]) + + Parses command line options and parameter list. *args* is the argument list to + be parsed, without the leading reference to the running program. Typically, this + means ``sys.argv[1:]``. *options* is the string of option letters that the + script wants to recognize, with options that require an argument followed by a + colon (``':'``; i.e., the same format that Unix :cfunc:`getopt` uses). + + .. note:: + + Unlike GNU :cfunc:`getopt`, after a non-option argument, all further arguments + are considered also non-options. This is similar to the way non-GNU Unix systems + work. + + *long_options*, if specified, must be a list of strings with the names of the + long options which should be supported. The leading ``'-``\ ``-'`` characters + should not be included in the option name. Long options which require an + argument should be followed by an equal sign (``'='``). To accept only long + options, *options* should be an empty string. Long options on the command line + can be recognized so long as they provide a prefix of the option name that + matches exactly one of the accepted options. For example, if *long_options* is + ``['foo', 'frob']``, the option :option:`--fo` will match as :option:`--foo`, + but :option:`--f` will not match uniquely, so :exc:`GetoptError` will be raised. + + The return value consists of two elements: the first is a list of ``(option, + value)`` pairs; the second is the list of program arguments left after the + option list was stripped (this is a trailing slice of *args*). Each + option-and-value pair returned has the option as its first element, prefixed + with a hyphen for short options (e.g., ``'-x'``) or two hyphens for long + options (e.g., ``'-``\ ``-long-option'``), and the option argument as its + second element, or an empty string if the option has no argument. The + options occur in the list in the same order in which they were found, thus + allowing multiple occurrences. Long and short options may be mixed. + + +.. function:: gnu_getopt(args, options[, long_options]) + + This function works like :func:`getopt`, except that GNU style scanning mode is + used by default. This means that option and non-option arguments may be + intermixed. The :func:`getopt` function stops processing options as soon as a + non-option argument is encountered. + + If the first character of the option string is '+', or if the environment + variable POSIXLY_CORRECT is set, then option processing stops as soon as a + non-option argument is encountered. + + .. versionadded:: 2.3 + + +.. exception:: GetoptError + + This is raised when an unrecognized option is found in the argument list or when + an option requiring an argument is given none. The argument to the exception is + a string indicating the cause of the error. For long options, an argument given + to an option which does not require one will also cause this exception to be + raised. The attributes :attr:`msg` and :attr:`opt` give the error message and + related option; if there is no specific option to which the exception relates, + :attr:`opt` is an empty string. + + .. versionchanged:: 1.6 + Introduced :exc:`GetoptError` as a synonym for :exc:`error`. + + +.. exception:: error + + Alias for :exc:`GetoptError`; for backward compatibility. + +An example using only Unix style options:: + + >>> import getopt + >>> args = '-a -b -cfoo -d bar a1 a2'.split() + >>> args + ['-a', '-b', '-cfoo', '-d', 'bar', 'a1', 'a2'] + >>> optlist, args = getopt.getopt(args, 'abc:d:') + >>> optlist + [('-a', ''), ('-b', ''), ('-c', 'foo'), ('-d', 'bar')] + >>> args + ['a1', 'a2'] + +Using long option names is equally easy:: + + >>> s = '--condition=foo --testing --output-file abc.def -x a1 a2' + >>> args = s.split() + >>> args + ['--condition=foo', '--testing', '--output-file', 'abc.def', '-x', 'a1', 'a2'] + >>> optlist, args = getopt.getopt(args, 'x', [ + ... 'condition=', 'output-file=', 'testing']) + >>> optlist + [('--condition', 'foo'), ('--testing', ''), ('--output-file', 'abc.def'), ('-x', + '')] + >>> args + ['a1', 'a2'] + +In a script, typical usage is something like this:: + + import getopt, sys + + def main(): + try: + opts, args = getopt.getopt(sys.argv[1:], "ho:v", ["help", "output="]) + except getopt.GetoptError as err: + # print help information and exit: + print str(err) # will print something like "option -a not recognized" + usage() + sys.exit(2) + output = None + verbose = False + for o, a in opts: + if o == "-v": + verbose = True + elif o in ("-h", "--help"): + usage() + sys.exit() + elif o in ("-o", "--output"): + output = a + else: + assert False, "unhandled option" + # ... + + if __name__ == "__main__": + main() + + +.. seealso:: + + Module :mod:`optparse` + More object-oriented command line option parsing. + diff --git a/Doc/library/getpass.rst b/Doc/library/getpass.rst new file mode 100644 index 0000000..45c6e53 --- /dev/null +++ b/Doc/library/getpass.rst @@ -0,0 +1,38 @@ + +:mod:`getpass` --- Portable password input +========================================== + +.. module:: getpass + :synopsis: Portable reading of passwords and retrieval of the userid. +.. moduleauthor:: Piers Lauder +.. sectionauthor:: Fred L. Drake, Jr. + + +.. % Windows (& Mac?) support by Guido van Rossum. + +The :mod:`getpass` module provides two functions: + + +.. function:: getpass([prompt[, stream]]) + + Prompt the user for a password without echoing. The user is prompted using the + string *prompt*, which defaults to ``'Password: '``. On Unix, the prompt is + written to the file-like object *stream*, which defaults to ``sys.stdout`` (this + argument is ignored on Windows). + + Availability: Macintosh, Unix, Windows. + + .. versionchanged:: 2.5 + The *stream* parameter was added. + + +.. function:: getuser() + + Return the "login name" of the user. Availability: Unix, Windows. + + This function checks the environment variables :envvar:`LOGNAME`, + :envvar:`USER`, :envvar:`LNAME` and :envvar:`USERNAME`, in order, and returns + the value of the first one which is set to a non-empty string. If none are set, + the login name from the password database is returned on systems which support + the :mod:`pwd` module, otherwise, an exception is raised. + diff --git a/Doc/library/gettext.rst b/Doc/library/gettext.rst new file mode 100644 index 0000000..51628e6 --- /dev/null +++ b/Doc/library/gettext.rst @@ -0,0 +1,765 @@ + +:mod:`gettext` --- Multilingual internationalization services +============================================================= + +.. module:: gettext + :synopsis: Multilingual internationalization services. +.. moduleauthor:: Barry A. Warsaw +.. sectionauthor:: Barry A. Warsaw + + +The :mod:`gettext` module provides internationalization (I18N) and localization +(L10N) services for your Python modules and applications. It supports both the +GNU ``gettext`` message catalog API and a higher level, class-based API that may +be more appropriate for Python files. The interface described below allows you +to write your module and application messages in one natural language, and +provide a catalog of translated messages for running under different natural +languages. + +Some hints on localizing your Python modules and applications are also given. + + +GNU :program:`gettext` API +-------------------------- + +The :mod:`gettext` module defines the following API, which is very similar to +the GNU :program:`gettext` API. If you use this API you will affect the +translation of your entire application globally. Often this is what you want if +your application is monolingual, with the choice of language dependent on the +locale of your user. If you are localizing a Python module, or if your +application needs to switch languages on the fly, you probably want to use the +class-based API instead. + + +.. function:: bindtextdomain(domain[, localedir]) + + Bind the *domain* to the locale directory *localedir*. More concretely, + :mod:`gettext` will look for binary :file:`.mo` files for the given domain using + the path (on Unix): :file:`localedir/language/LC_MESSAGES/domain.mo`, where + *languages* is searched for in the environment variables :envvar:`LANGUAGE`, + :envvar:`LC_ALL`, :envvar:`LC_MESSAGES`, and :envvar:`LANG` respectively. + + If *localedir* is omitted or ``None``, then the current binding for *domain* is + returned. [#]_ + + +.. function:: bind_textdomain_codeset(domain[, codeset]) + + Bind the *domain* to *codeset*, changing the encoding of strings returned by the + :func:`gettext` family of functions. If *codeset* is omitted, then the current + binding is returned. + + .. versionadded:: 2.4 + + +.. function:: textdomain([domain]) + + Change or query the current global domain. If *domain* is ``None``, then the + current global domain is returned, otherwise the global domain is set to + *domain*, which is returned. + + +.. function:: gettext(message) + + Return the localized translation of *message*, based on the current global + domain, language, and locale directory. This function is usually aliased as + :func:`_` in the local namespace (see examples below). + + +.. function:: lgettext(message) + + Equivalent to :func:`gettext`, but the translation is returned in the preferred + system encoding, if no other encoding was explicitly set with + :func:`bind_textdomain_codeset`. + + .. versionadded:: 2.4 + + +.. function:: dgettext(domain, message) + + Like :func:`gettext`, but look the message up in the specified *domain*. + + +.. function:: ldgettext(domain, message) + + Equivalent to :func:`dgettext`, but the translation is returned in the preferred + system encoding, if no other encoding was explicitly set with + :func:`bind_textdomain_codeset`. + + .. versionadded:: 2.4 + + +.. function:: ngettext(singular, plural, n) + + Like :func:`gettext`, but consider plural forms. If a translation is found, + apply the plural formula to *n*, and return the resulting message (some + languages have more than two plural forms). If no translation is found, return + *singular* if *n* is 1; return *plural* otherwise. + + The Plural formula is taken from the catalog header. It is a C or Python + expression that has a free variable *n*; the expression evaluates to the index + of the plural in the catalog. See the GNU gettext documentation for the precise + syntax to be used in :file:`.po` files and the formulas for a variety of + languages. + + .. versionadded:: 2.3 + + +.. function:: lngettext(singular, plural, n) + + Equivalent to :func:`ngettext`, but the translation is returned in the preferred + system encoding, if no other encoding was explicitly set with + :func:`bind_textdomain_codeset`. + + .. versionadded:: 2.4 + + +.. function:: dngettext(domain, singular, plural, n) + + Like :func:`ngettext`, but look the message up in the specified *domain*. + + .. versionadded:: 2.3 + + +.. function:: ldngettext(domain, singular, plural, n) + + Equivalent to :func:`dngettext`, but the translation is returned in the + preferred system encoding, if no other encoding was explicitly set with + :func:`bind_textdomain_codeset`. + + .. versionadded:: 2.4 + +Note that GNU :program:`gettext` also defines a :func:`dcgettext` method, but +this was deemed not useful and so it is currently unimplemented. + +Here's an example of typical usage for this API:: + + import gettext + gettext.bindtextdomain('myapplication', '/path/to/my/language/directory') + gettext.textdomain('myapplication') + _ = gettext.gettext + # ... + print _('This is a translatable string.') + + +Class-based API +--------------- + +The class-based API of the :mod:`gettext` module gives you more flexibility and +greater convenience than the GNU :program:`gettext` API. It is the recommended +way of localizing your Python applications and modules. :mod:`gettext` defines +a "translations" class which implements the parsing of GNU :file:`.mo` format +files, and has methods for returning either standard 8-bit strings or Unicode +strings. Instances of this "translations" class can also install themselves in +the built-in namespace as the function :func:`_`. + + +.. function:: find(domain[, localedir[, languages[, all]]]) + + This function implements the standard :file:`.mo` file search algorithm. It + takes a *domain*, identical to what :func:`textdomain` takes. Optional + *localedir* is as in :func:`bindtextdomain` Optional *languages* is a list of + strings, where each string is a language code. + + If *localedir* is not given, then the default system locale directory is used. + [#]_ If *languages* is not given, then the following environment variables are + searched: :envvar:`LANGUAGE`, :envvar:`LC_ALL`, :envvar:`LC_MESSAGES`, and + :envvar:`LANG`. The first one returning a non-empty value is used for the + *languages* variable. The environment variables should contain a colon separated + list of languages, which will be split on the colon to produce the expected list + of language code strings. + + :func:`find` then expands and normalizes the languages, and then iterates + through them, searching for an existing file built of these components: + + :file:`localedir/language/LC_MESSAGES/domain.mo` + + The first such file name that exists is returned by :func:`find`. If no such + file is found, then ``None`` is returned. If *all* is given, it returns a list + of all file names, in the order in which they appear in the languages list or + the environment variables. + + +.. function:: translation(domain[, localedir[, languages[, class_[, fallback[, codeset]]]]]) + + Return a :class:`Translations` instance based on the *domain*, *localedir*, and + *languages*, which are first passed to :func:`find` to get a list of the + associated :file:`.mo` file paths. Instances with identical :file:`.mo` file + names are cached. The actual class instantiated is either *class_* if provided, + otherwise :class:`GNUTranslations`. The class's constructor must take a single + file object argument. If provided, *codeset* will change the charset used to + encode translated strings. + + If multiple files are found, later files are used as fallbacks for earlier ones. + To allow setting the fallback, :func:`copy.copy` is used to clone each + translation object from the cache; the actual instance data is still shared with + the cache. + + If no :file:`.mo` file is found, this function raises :exc:`IOError` if + *fallback* is false (which is the default), and returns a + :class:`NullTranslations` instance if *fallback* is true. + + .. versionchanged:: 2.4 + Added the *codeset* parameter. + + +.. function:: install(domain[, localedir[, unicode [, codeset[, names]]]]) + + This installs the function :func:`_` in Python's builtin namespace, based on + *domain*, *localedir*, and *codeset* which are passed to the function + :func:`translation`. The *unicode* flag is passed to the resulting translation + object's :meth:`install` method. + + For the *names* parameter, please see the description of the translation + object's :meth:`install` method. + + As seen below, you usually mark the strings in your application that are + candidates for translation, by wrapping them in a call to the :func:`_` + function, like this:: + + print _('This string will be translated.') + + For convenience, you want the :func:`_` function to be installed in Python's + builtin namespace, so it is easily accessible in all modules of your + application. + + .. versionchanged:: 2.4 + Added the *codeset* parameter. + + .. versionchanged:: 2.5 + Added the *names* parameter. + + +The :class:`NullTranslations` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Translation classes are what actually implement the translation of original +source file message strings to translated message strings. The base class used +by all translation classes is :class:`NullTranslations`; this provides the basic +interface you can use to write your own specialized translation classes. Here +are the methods of :class:`NullTranslations`: + + +.. method:: NullTranslations.__init__([fp]) + + Takes an optional file object *fp*, which is ignored by the base class. + Initializes "protected" instance variables *_info* and *_charset* which are set + by derived classes, as well as *_fallback*, which is set through + :meth:`add_fallback`. It then calls ``self._parse(fp)`` if *fp* is not + ``None``. + + +.. method:: NullTranslations._parse(fp) + + No-op'd in the base class, this method takes file object *fp*, and reads the + data from the file, initializing its message catalog. If you have an + unsupported message catalog file format, you should override this method to + parse your format. + + +.. method:: NullTranslations.add_fallback(fallback) + + Add *fallback* as the fallback object for the current translation object. A + translation object should consult the fallback if it cannot provide a + translation for a given message. + + +.. method:: NullTranslations.gettext(message) + + If a fallback has been set, forward :meth:`gettext` to the fallback. Otherwise, + return the translated message. Overridden in derived classes. + + +.. method:: NullTranslations.lgettext(message) + + If a fallback has been set, forward :meth:`lgettext` to the fallback. Otherwise, + return the translated message. Overridden in derived classes. + + .. versionadded:: 2.4 + + +.. method:: NullTranslations.ugettext(message) + + If a fallback has been set, forward :meth:`ugettext` to the fallback. Otherwise, + return the translated message as a Unicode string. Overridden in derived + classes. + + +.. method:: NullTranslations.ngettext(singular, plural, n) + + If a fallback has been set, forward :meth:`ngettext` to the fallback. Otherwise, + return the translated message. Overridden in derived classes. + + .. versionadded:: 2.3 + + +.. method:: NullTranslations.lngettext(singular, plural, n) + + If a fallback has been set, forward :meth:`ngettext` to the fallback. Otherwise, + return the translated message. Overridden in derived classes. + + .. versionadded:: 2.4 + + +.. method:: NullTranslations.ungettext(singular, plural, n) + + If a fallback has been set, forward :meth:`ungettext` to the fallback. + Otherwise, return the translated message as a Unicode string. Overridden in + derived classes. + + .. versionadded:: 2.3 + + +.. method:: NullTranslations.info() + + Return the "protected" :attr:`_info` variable. + + +.. method:: NullTranslations.charset() + + Return the "protected" :attr:`_charset` variable. + + +.. method:: NullTranslations.output_charset() + + Return the "protected" :attr:`_output_charset` variable, which defines the + encoding used to return translated messages. + + .. versionadded:: 2.4 + + +.. method:: NullTranslations.set_output_charset(charset) + + Change the "protected" :attr:`_output_charset` variable, which defines the + encoding used to return translated messages. + + .. versionadded:: 2.4 + + +.. method:: NullTranslations.install([unicode [, names]]) + + If the *unicode* flag is false, this method installs :meth:`self.gettext` into + the built-in namespace, binding it to ``_``. If *unicode* is true, it binds + :meth:`self.ugettext` instead. By default, *unicode* is false. + + If the *names* parameter is given, it must be a sequence containing the names of + functions you want to install in the builtin namespace in addition to :func:`_`. + Supported names are ``'gettext'`` (bound to :meth:`self.gettext` or + :meth:`self.ugettext` according to the *unicode* flag), ``'ngettext'`` (bound to + :meth:`self.ngettext` or :meth:`self.ungettext` according to the *unicode* + flag), ``'lgettext'`` and ``'lngettext'``. + + Note that this is only one way, albeit the most convenient way, to make the + :func:`_` function available to your application. Because it affects the entire + application globally, and specifically the built-in namespace, localized modules + should never install :func:`_`. Instead, they should use this code to make + :func:`_` available to their module:: + + import gettext + t = gettext.translation('mymodule', ...) + _ = t.gettext + + This puts :func:`_` only in the module's global namespace and so only affects + calls within this module. + + .. versionchanged:: 2.5 + Added the *names* parameter. + + +The :class:`GNUTranslations` class +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :mod:`gettext` module provides one additional class derived from +:class:`NullTranslations`: :class:`GNUTranslations`. This class overrides +:meth:`_parse` to enable reading GNU :program:`gettext` format :file:`.mo` files +in both big-endian and little-endian format. It also coerces both message ids +and message strings to Unicode. + +:class:`GNUTranslations` parses optional meta-data out of the translation +catalog. It is convention with GNU :program:`gettext` to include meta-data as +the translation for the empty string. This meta-data is in :rfc:`822`\ -style +``key: value`` pairs, and should contain the ``Project-Id-Version`` key. If the +key ``Content-Type`` is found, then the ``charset`` property is used to +initialize the "protected" :attr:`_charset` instance variable, defaulting to +``None`` if not found. If the charset encoding is specified, then all message +ids and message strings read from the catalog are converted to Unicode using +this encoding. The :meth:`ugettext` method always returns a Unicode, while the +:meth:`gettext` returns an encoded 8-bit string. For the message id arguments +of both methods, either Unicode strings or 8-bit strings containing only +US-ASCII characters are acceptable. Note that the Unicode version of the +methods (i.e. :meth:`ugettext` and :meth:`ungettext`) are the recommended +interface to use for internationalized Python programs. + +The entire set of key/value pairs are placed into a dictionary and set as the +"protected" :attr:`_info` instance variable. + +If the :file:`.mo` file's magic number is invalid, or if other problems occur +while reading the file, instantiating a :class:`GNUTranslations` class can raise +:exc:`IOError`. + +The following methods are overridden from the base class implementation: + + +.. method:: GNUTranslations.gettext(message) + + Look up the *message* id in the catalog and return the corresponding message + string, as an 8-bit string encoded with the catalog's charset encoding, if + known. If there is no entry in the catalog for the *message* id, and a fallback + has been set, the look up is forwarded to the fallback's :meth:`gettext` method. + Otherwise, the *message* id is returned. + + +.. method:: GNUTranslations.lgettext(message) + + Equivalent to :meth:`gettext`, but the translation is returned in the preferred + system encoding, if no other encoding was explicitly set with + :meth:`set_output_charset`. + + .. versionadded:: 2.4 + + +.. method:: GNUTranslations.ugettext(message) + + Look up the *message* id in the catalog and return the corresponding message + string, as a Unicode string. If there is no entry in the catalog for the + *message* id, and a fallback has been set, the look up is forwarded to the + fallback's :meth:`ugettext` method. Otherwise, the *message* id is returned. + + +.. method:: GNUTranslations.ngettext(singular, plural, n) + + Do a plural-forms lookup of a message id. *singular* is used as the message id + for purposes of lookup in the catalog, while *n* is used to determine which + plural form to use. The returned message string is an 8-bit string encoded with + the catalog's charset encoding, if known. + + If the message id is not found in the catalog, and a fallback is specified, the + request is forwarded to the fallback's :meth:`ngettext` method. Otherwise, when + *n* is 1 *singular* is returned, and *plural* is returned in all other cases. + + .. versionadded:: 2.3 + + +.. method:: GNUTranslations.lngettext(singular, plural, n) + + Equivalent to :meth:`gettext`, but the translation is returned in the preferred + system encoding, if no other encoding was explicitly set with + :meth:`set_output_charset`. + + .. versionadded:: 2.4 + + +.. method:: GNUTranslations.ungettext(singular, plural, n) + + Do a plural-forms lookup of a message id. *singular* is used as the message id + for purposes of lookup in the catalog, while *n* is used to determine which + plural form to use. The returned message string is a Unicode string. + + If the message id is not found in the catalog, and a fallback is specified, the + request is forwarded to the fallback's :meth:`ungettext` method. Otherwise, + when *n* is 1 *singular* is returned, and *plural* is returned in all other + cases. + + Here is an example:: + + n = len(os.listdir('.')) + cat = GNUTranslations(somefile) + message = cat.ungettext( + 'There is %(num)d file in this directory', + 'There are %(num)d files in this directory', + n) % {'num': n} + + .. versionadded:: 2.3 + + +Solaris message catalog support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Solaris operating system defines its own binary :file:`.mo` file format, but +since no documentation can be found on this format, it is not supported at this +time. + + +The Catalog constructor +^^^^^^^^^^^^^^^^^^^^^^^ + +.. index:: single: GNOME + +GNOME uses a version of the :mod:`gettext` module by James Henstridge, but this +version has a slightly different API. Its documented usage was:: + + import gettext + cat = gettext.Catalog(domain, localedir) + _ = cat.gettext + print _('hello world') + +For compatibility with this older module, the function :func:`Catalog` is an +alias for the :func:`translation` function described above. + +One difference between this module and Henstridge's: his catalog objects +supported access through a mapping API, but this appears to be unused and so is +not currently supported. + + +Internationalizing your programs and modules +-------------------------------------------- + +Internationalization (I18N) refers to the operation by which a program is made +aware of multiple languages. Localization (L10N) refers to the adaptation of +your program, once internationalized, to the local language and cultural habits. +In order to provide multilingual messages for your Python programs, you need to +take the following steps: + +#. prepare your program or module by specially marking translatable strings + +#. run a suite of tools over your marked files to generate raw messages catalogs + +#. create language specific translations of the message catalogs + +#. use the :mod:`gettext` module so that message strings are properly translated + +In order to prepare your code for I18N, you need to look at all the strings in +your files. Any string that needs to be translated should be marked by wrapping +it in ``_('...')`` --- that is, a call to the function :func:`_`. For example:: + + filename = 'mylog.txt' + message = _('writing a log message') + fp = open(filename, 'w') + fp.write(message) + fp.close() + +In this example, the string ``'writing a log message'`` is marked as a candidate +for translation, while the strings ``'mylog.txt'`` and ``'w'`` are not. + +The Python distribution comes with two tools which help you generate the message +catalogs once you've prepared your source code. These may or may not be +available from a binary distribution, but they can be found in a source +distribution, in the :file:`Tools/i18n` directory. + +The :program:`pygettext` [#]_ program scans all your Python source code looking +for the strings you previously marked as translatable. It is similar to the GNU +:program:`gettext` program except that it understands all the intricacies of +Python source code, but knows nothing about C or C++ source code. You don't +need GNU ``gettext`` unless you're also going to be translating C code (such as +C extension modules). + +:program:`pygettext` generates textual Uniforum-style human readable message +catalog :file:`.pot` files, essentially structured human readable files which +contain every marked string in the source code, along with a placeholder for the +translation strings. :program:`pygettext` is a command line script that supports +a similar command line interface as :program:`xgettext`; for details on its use, +run:: + + pygettext.py --help + +Copies of these :file:`.pot` files are then handed over to the individual human +translators who write language-specific versions for every supported natural +language. They send you back the filled in language-specific versions as a +:file:`.po` file. Using the :program:`msgfmt.py` [#]_ program (in the +:file:`Tools/i18n` directory), you take the :file:`.po` files from your +translators and generate the machine-readable :file:`.mo` binary catalog files. +The :file:`.mo` files are what the :mod:`gettext` module uses for the actual +translation processing during run-time. + +How you use the :mod:`gettext` module in your code depends on whether you are +internationalizing a single module or your entire application. The next two +sections will discuss each case. + + +Localizing your module +^^^^^^^^^^^^^^^^^^^^^^ + +If you are localizing your module, you must take care not to make global +changes, e.g. to the built-in namespace. You should not use the GNU ``gettext`` +API but instead the class-based API. + +Let's say your module is called "spam" and the module's various natural language +translation :file:`.mo` files reside in :file:`/usr/share/locale` in GNU +:program:`gettext` format. Here's what you would put at the top of your +module:: + + import gettext + t = gettext.translation('spam', '/usr/share/locale') + _ = t.lgettext + +If your translators were providing you with Unicode strings in their :file:`.po` +files, you'd instead do:: + + import gettext + t = gettext.translation('spam', '/usr/share/locale') + _ = t.ugettext + + +Localizing your application +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you are localizing your application, you can install the :func:`_` function +globally into the built-in namespace, usually in the main driver file of your +application. This will let all your application-specific files just use +``_('...')`` without having to explicitly install it in each file. + +In the simple case then, you need only add the following bit of code to the main +driver file of your application:: + + import gettext + gettext.install('myapplication') + +If you need to set the locale directory or the *unicode* flag, you can pass +these into the :func:`install` function:: + + import gettext + gettext.install('myapplication', '/usr/share/locale', unicode=1) + + +Changing languages on the fly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If your program needs to support many languages at the same time, you may want +to create multiple translation instances and then switch between them +explicitly, like so:: + + import gettext + + lang1 = gettext.translation('myapplication', languages=['en']) + lang2 = gettext.translation('myapplication', languages=['fr']) + lang3 = gettext.translation('myapplication', languages=['de']) + + # start by using language1 + lang1.install() + + # ... time goes by, user selects language 2 + lang2.install() + + # ... more time goes by, user selects language 3 + lang3.install() + + +Deferred translations +^^^^^^^^^^^^^^^^^^^^^ + +In most coding situations, strings are translated where they are coded. +Occasionally however, you need to mark strings for translation, but defer actual +translation until later. A classic example is:: + + animals = ['mollusk', + 'albatross', + 'rat', + 'penguin', + 'python', + ] + # ... + for a in animals: + print a + +Here, you want to mark the strings in the ``animals`` list as being +translatable, but you don't actually want to translate them until they are +printed. + +Here is one way you can handle this situation:: + + def _(message): return message + + animals = [_('mollusk'), + _('albatross'), + _('rat'), + _('penguin'), + _('python'), + ] + + del _ + + # ... + for a in animals: + print _(a) + +This works because the dummy definition of :func:`_` simply returns the string +unchanged. And this dummy definition will temporarily override any definition +of :func:`_` in the built-in namespace (until the :keyword:`del` command). Take +care, though if you have a previous definition of :func:`_` in the local +namespace. + +Note that the second use of :func:`_` will not identify "a" as being +translatable to the :program:`pygettext` program, since it is not a string. + +Another way to handle this is with the following example:: + + def N_(message): return message + + animals = [N_('mollusk'), + N_('albatross'), + N_('rat'), + N_('penguin'), + N_('python'), + ] + + # ... + for a in animals: + print _(a) + +In this case, you are marking translatable strings with the function :func:`N_`, +[#]_ which won't conflict with any definition of :func:`_`. However, you will +need to teach your message extraction program to look for translatable strings +marked with :func:`N_`. :program:`pygettext` and :program:`xpot` both support +this through the use of command line switches. + + +:func:`gettext` vs. :func:`lgettext` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In Python 2.4 the :func:`lgettext` family of functions were introduced. The +intention of these functions is to provide an alternative which is more +compliant with the current implementation of GNU gettext. Unlike +:func:`gettext`, which returns strings encoded with the same codeset used in the +translation file, :func:`lgettext` will return strings encoded with the +preferred system encoding, as returned by :func:`locale.getpreferredencoding`. +Also notice that Python 2.4 introduces new functions to explicitly choose the +codeset used in translated strings. If a codeset is explicitly set, even +:func:`lgettext` will return translated strings in the requested codeset, as +would be expected in the GNU gettext implementation. + + +Acknowledgements +---------------- + +The following people contributed code, feedback, design suggestions, previous +implementations, and valuable experience to the creation of this module: + +* Peter Funk + +* James Henstridge + +* Juan David Ibáñez Palomar + +* Marc-André Lemburg + +* Martin von Löwis + +* François Pinard + +* Barry Warsaw + +* Gustavo Niemeyer + +.. rubric:: Footnotes + +.. [#] The default locale directory is system dependent; for example, on RedHat Linux + it is :file:`/usr/share/locale`, but on Solaris it is :file:`/usr/lib/locale`. + The :mod:`gettext` module does not try to support these system dependent + defaults; instead its default is :file:`sys.prefix/share/locale`. For this + reason, it is always best to call :func:`bindtextdomain` with an explicit + absolute path at the start of your application. + +.. [#] See the footnote for :func:`bindtextdomain` above. + +.. [#] François Pinard has written a program called :program:`xpot` which does a + similar job. It is available as part of his :program:`po-utils` package at http + ://po-utils.progiciels-bpi.ca/. + +.. [#] :program:`msgfmt.py` is binary compatible with GNU :program:`msgfmt` except that + it provides a simpler, all-Python implementation. With this and + :program:`pygettext.py`, you generally won't need to install the GNU + :program:`gettext` package to internationalize your Python applications. + +.. [#] The choice of :func:`N_` here is totally arbitrary; it could have just as easily + been :func:`MarkThisStringForTranslation`. + diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst new file mode 100644 index 0000000..80bdac2 --- /dev/null +++ b/Doc/library/glob.rst @@ -0,0 +1,54 @@ + +:mod:`glob` --- Unix style pathname pattern expansion +===================================================== + +.. module:: glob + :synopsis: Unix shell style pathname pattern expansion. + + +.. index:: single: filenames; pathname expansion + +The :mod:`glob` module finds all the pathnames matching a specified pattern +according to the rules used by the Unix shell. No tilde expansion is done, but +``*``, ``?``, and character ranges expressed with ``[]`` will be correctly +matched. This is done by using the :func:`os.listdir` and +:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a +subshell. (For tilde and shell variable expansion, use +:func:`os.path.expanduser` and :func:`os.path.expandvars`.) + + +.. function:: glob(pathname) + + Return a possibly-empty list of path names that match *pathname*, which must be + a string containing a path specification. *pathname* can be either absolute + (like :file:`/usr/src/Python-1.5/Makefile`) or relative (like + :file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken + symlinks are included in the results (as in the shell). + + +.. function:: iglob(pathname) + + Return an iterator which yields the same values as :func:`glob` without actually + storing them all simultaneously. + + .. versionadded:: 2.5 + +For example, consider a directory containing only the following files: +:file:`1.gif`, :file:`2.txt`, and :file:`card.gif`. :func:`glob` will produce +the following results. Notice how any leading components of the path are +preserved. :: + + >>> import glob + >>> glob.glob('./[0-9].*') + ['./1.gif', './2.txt'] + >>> glob.glob('*.gif') + ['1.gif', 'card.gif'] + >>> glob.glob('?.gif') + ['1.gif'] + + +.. seealso:: + + Module :mod:`fnmatch` + Shell-style filename (not path) expansion + diff --git a/Doc/library/grp.rst b/Doc/library/grp.rst new file mode 100644 index 0000000..a71c308 --- /dev/null +++ b/Doc/library/grp.rst @@ -0,0 +1,63 @@ + +:mod:`grp` --- The group database +================================= + +.. module:: grp + :platform: Unix + :synopsis: The group database (getgrnam() and friends). + + +This module provides access to the Unix group database. It is available on all +Unix versions. + +Group database entries are reported as a tuple-like object, whose attributes +correspond to the members of the ``group`` structure (Attribute field below, see +````): + ++-------+-----------+---------------------------------+ +| Index | Attribute | Meaning | ++=======+===========+=================================+ +| 0 | gr_name | the name of the group | ++-------+-----------+---------------------------------+ +| 1 | gr_passwd | the (encrypted) group password; | +| | | often empty | ++-------+-----------+---------------------------------+ +| 2 | gr_gid | the numerical group ID | ++-------+-----------+---------------------------------+ +| 3 | gr_mem | all the group member's user | +| | | names | ++-------+-----------+---------------------------------+ + +The gid is an integer, name and password are strings, and the member list is a +list of strings. (Note that most users are not explicitly listed as members of +the group they are in according to the password database. Check both databases +to get complete membership information.) + +It defines the following items: + + +.. function:: getgrgid(gid) + + Return the group database entry for the given numeric group ID. :exc:`KeyError` + is raised if the entry asked for cannot be found. + + +.. function:: getgrnam(name) + + Return the group database entry for the given group name. :exc:`KeyError` is + raised if the entry asked for cannot be found. + + +.. function:: getgrall() + + Return a list of all available group entries, in arbitrary order. + + +.. seealso:: + + Module :mod:`pwd` + An interface to the user database, similar to this. + + Module :mod:`spwd` + An interface to the shadow password database, similar to this. + diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst new file mode 100644 index 0000000..5978031 --- /dev/null +++ b/Doc/library/gzip.rst @@ -0,0 +1,68 @@ + +:mod:`gzip` --- Support for :program:`gzip` files +================================================= + +.. module:: gzip + :synopsis: Interfaces for gzip compression and decompression using file objects. + + +The data compression provided by the ``zlib`` module is compatible with that +used by the GNU compression program :program:`gzip`. Accordingly, the +:mod:`gzip` module provides the :class:`GzipFile` class to read and write +:program:`gzip`\ -format files, automatically compressing or decompressing the +data so it looks like an ordinary file object. Note that additional file +formats which can be decompressed by the :program:`gzip` and :program:`gunzip` +programs, such as those produced by :program:`compress` and :program:`pack`, +are not supported by this module. + +The module defines the following items: + + +.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]]) + + Constructor for the :class:`GzipFile` class, which simulates most of the methods + of a file object, with the exception of the :meth:`readinto` and + :meth:`truncate` methods. At least one of *fileobj* and *filename* must be + given a non-trivial value. + + The new class instance is based on *fileobj*, which can be a regular file, a + :class:`StringIO` object, or any other object which simulates a file. It + defaults to ``None``, in which case *filename* is opened to provide a file + object. + + When *fileobj* is not ``None``, the *filename* argument is only used to be + included in the :program:`gzip` file header, which may includes the original + filename of the uncompressed file. It defaults to the filename of *fileobj*, if + discernible; otherwise, it defaults to the empty string, and in this case the + original filename is not included in the header. + + The *mode* argument can be any of ``'r'``, ``'rb'``, ``'a'``, ``'ab'``, ``'w'``, + or ``'wb'``, depending on whether the file will be read or written. The default + is the mode of *fileobj* if discernible; otherwise, the default is ``'rb'``. If + not given, the 'b' flag will be added to the mode to ensure the file is opened + in binary mode for cross-platform portability. + + The *compresslevel* argument is an integer from ``1`` to ``9`` controlling the + level of compression; ``1`` is fastest and produces the least compression, and + ``9`` is slowest and produces the most compression. The default is ``9``. + + Calling a :class:`GzipFile` object's :meth:`close` method does not close + *fileobj*, since you might wish to append more material after the compressed + data. This also allows you to pass a :class:`StringIO` object opened for + writing as *fileobj*, and retrieve the resulting memory buffer using the + :class:`StringIO` object's :meth:`getvalue` method. + + +.. function:: open(filename[, mode[, compresslevel]]) + + This is a shorthand for ``GzipFile(filename,`` ``mode,`` ``compresslevel)``. + The *filename* argument is required; *mode* defaults to ``'rb'`` and + *compresslevel* defaults to ``9``. + + +.. seealso:: + + Module :mod:`zlib` + The basic data compression module needed to support the :program:`gzip` file + format. + diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst new file mode 100644 index 0000000..f255554 --- /dev/null +++ b/Doc/library/hashlib.rst @@ -0,0 +1,121 @@ + +:mod:`hashlib` --- Secure hashes and message digests +==================================================== + +.. module:: hashlib + :synopsis: Secure hash and message digest algorithms. +.. moduleauthor:: Gregory P. Smith +.. sectionauthor:: Gregory P. Smith + + +.. versionadded:: 2.5 + +.. index:: + single: message digest, MD5 + single: secure hash algorithm, SHA1, SHA224, SHA256, SHA384, SHA512 + +This module implements a common interface to many different secure hash and +message digest algorithms. Included are the FIPS secure hash algorithms SHA1, +SHA224, SHA256, SHA384, and SHA512 (defined in FIPS 180-2) as well as RSA's MD5 +algorithm (defined in Internet :rfc:`1321`). The terms secure hash and message +digest are interchangeable. Older algorithms were called message digests. The +modern term is secure hash. + +.. warning:: + + Some algorithms have known hash collision weaknesses, see the FAQ at the end. + +There is one constructor method named for each type of :dfn:`hash`. All return +a hash object with the same simple interface. For example: use :func:`sha1` to +create a SHA1 hash object. You can now feed this object with arbitrary strings +using the :meth:`update` method. At any point you can ask it for the +:dfn:`digest` of the concatenation of the strings fed to it so far using the +:meth:`digest` or :meth:`hexdigest` methods. + +.. index:: single: OpenSSL + +Constructors for hash algorithms that are always present in this module are +:func:`md5`, :func:`sha1`, :func:`sha224`, :func:`sha256`, :func:`sha384`, and +:func:`sha512`. Additional algorithms may also be available depending upon the +OpenSSL library that Python uses on your platform. + +For example, to obtain the digest of the string ``'Nobody inspects the spammish +repetition'``:: + + >>> import hashlib + >>> m = hashlib.md5() + >>> m.update("Nobody inspects") + >>> m.update(" the spammish repetition") + >>> m.digest() + '\xbbd\x9c\x83\xdd\x1e\xa5\xc9\xd9\xde\xc9\xa1\x8d\xf0\xff\xe9' + +More condensed:: + + >>> hashlib.sha224("Nobody inspects the spammish repetition").hexdigest() + 'a4337bc45a8fc544c03f52dc550cd6e1e87021bc896588bd79e901e2' + +A generic :func:`new` constructor that takes the string name of the desired +algorithm as its first parameter also exists to allow access to the above listed +hashes as well as any other algorithms that your OpenSSL library may offer. The +named constructors are much faster than :func:`new` and should be preferred. + +Using :func:`new` with an algorithm provided by OpenSSL:: + + >>> h = hashlib.new('ripemd160') + >>> h.update("Nobody inspects the spammish repetition") + >>> h.hexdigest() + 'cc4a5ce1b3df48aec5d22d1f16b894a0b894eccc' + +The following values are provided as constant attributes of the hash objects +returned by the constructors: + + +.. data:: digest_size + + The size of the resulting digest in bytes. + +A hash object has the following methods: + + +.. method:: hash.update(arg) + + Update the hash object with the string *arg*. Repeated calls are equivalent to + a single call with the concatenation of all the arguments: ``m.update(a); + m.update(b)`` is equivalent to ``m.update(a+b)``. + + +.. method:: hash.digest() + + Return the digest of the strings passed to the :meth:`update` method so far. + This is a string of :attr:`digest_size` bytes which may contain non-ASCII + characters, including null bytes. + + +.. method:: hash.hexdigest() + + Like :meth:`digest` except the digest is returned as a string of double length, + containing only hexadecimal digits. This may be used to exchange the value + safely in email or other non-binary environments. + + +.. method:: hash.copy() + + Return a copy ("clone") of the hash object. This can be used to efficiently + compute the digests of strings that share a common initial substring. + + +.. seealso:: + + Module :mod:`hmac` + A module to generate message authentication codes using hashes. + + Module :mod:`base64` + Another way to encode binary hashes for non-binary environments. + + http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf + The FIPS 180-2 publication on Secure Hash Algorithms. + + http://www.cryptography.com/cnews/hash.html + Hash Collision FAQ with information on which algorithms have known issues and + what that means regarding their use. + diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst new file mode 100644 index 0000000..2d38c26 --- /dev/null +++ b/Doc/library/heapq.rst @@ -0,0 +1,224 @@ + +:mod:`heapq` --- Heap queue algorithm +===================================== + +.. module:: heapq + :synopsis: Heap queue algorithm (a.k.a. priority queue). +.. moduleauthor:: Kevin O'Connor +.. sectionauthor:: Guido van Rossum +.. sectionauthor:: François Pinard + + +.. % Theoretical explanation: + +.. versionadded:: 2.3 + +This module provides an implementation of the heap queue algorithm, also known +as the priority queue algorithm. + +Heaps are arrays for which ``heap[k] <= heap[2*k+1]`` and ``heap[k] <= +heap[2*k+2]`` for all *k*, counting elements from zero. For the sake of +comparison, non-existing elements are considered to be infinite. The +interesting property of a heap is that ``heap[0]`` is always its smallest +element. + +The API below differs from textbook heap algorithms in two aspects: (a) We use +zero-based indexing. This makes the relationship between the index for a node +and the indexes for its children slightly less obvious, but is more suitable +since Python uses zero-based indexing. (b) Our pop method returns the smallest +item, not the largest (called a "min heap" in textbooks; a "max heap" is more +common in texts because of its suitability for in-place sorting). + +These two make it possible to view the heap as a regular Python list without +surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` maintains the +heap invariant! + +To create a heap, use a list initialized to ``[]``, or you can transform a +populated list into a heap via function :func:`heapify`. + +The following functions are provided: + + +.. function:: heappush(heap, item) + + Push the value *item* onto the *heap*, maintaining the heap invariant. + + +.. function:: heappop(heap) + + Pop and return the smallest item from the *heap*, maintaining the heap + invariant. If the heap is empty, :exc:`IndexError` is raised. + + +.. function:: heapify(x) + + Transform list *x* into a heap, in-place, in linear time. + + +.. function:: heapreplace(heap, item) + + Pop and return the smallest item from the *heap*, and also push the new *item*. + The heap size doesn't change. If the heap is empty, :exc:`IndexError` is raised. + This is more efficient than :func:`heappop` followed by :func:`heappush`, and + can be more appropriate when using a fixed-size heap. Note that the value + returned may be larger than *item*! That constrains reasonable uses of this + routine unless written as part of a conditional replacement:: + + if item > heap[0]: + item = heapreplace(heap, item) + +Example of use:: + + >>> from heapq import heappush, heappop + >>> heap = [] + >>> data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] + >>> for item in data: + ... heappush(heap, item) + ... + >>> ordered = [] + >>> while heap: + ... ordered.append(heappop(heap)) + ... + >>> print ordered + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> data.sort() + >>> print data == ordered + True + >>> + +The module also offers three general purpose functions based on heaps. + + +.. function:: merge(*iterables) + + Merge multiple sorted inputs into a single sorted output (for example, merge + timestamped entries from multiple log files). Returns an iterator over over the + sorted values. + + Similar to ``sorted(itertools.chain(*iterables))`` but returns an iterable, does + not pull the data into memory all at once, and assumes that each of the input + streams is already sorted (smallest to largest). + + .. versionadded:: 2.6 + + +.. function:: nlargest(n, iterable[, key]) + + Return a list with the *n* largest elements from the dataset defined by + *iterable*. *key*, if provided, specifies a function of one argument that is + used to extract a comparison key from each element in the iterable: + ``key=str.lower`` Equivalent to: ``sorted(iterable, key=key, + reverse=True)[:n]`` + + .. versionadded:: 2.4 + + .. versionchanged:: 2.5 + Added the optional *key* argument. + + +.. function:: nsmallest(n, iterable[, key]) + + Return a list with the *n* smallest elements from the dataset defined by + *iterable*. *key*, if provided, specifies a function of one argument that is + used to extract a comparison key from each element in the iterable: + ``key=str.lower`` Equivalent to: ``sorted(iterable, key=key)[:n]`` + + .. versionadded:: 2.4 + + .. versionchanged:: 2.5 + Added the optional *key* argument. + +The latter two functions perform best for smaller values of *n*. For larger +values, it is more efficient to use the :func:`sorted` function. Also, when +``n==1``, it is more efficient to use the builtin :func:`min` and :func:`max` +functions. + + +Theory +------ + +(This explanation is due to François Pinard. The Python code for this module +was contributed by Kevin O'Connor.) + +Heaps are arrays for which ``a[k] <= a[2*k+1]`` and ``a[k] <= a[2*k+2]`` for all +*k*, counting elements from 0. For the sake of comparison, non-existing +elements are considered to be infinite. The interesting property of a heap is +that ``a[0]`` is always its smallest element. + +The strange invariant above is meant to be an efficient memory representation +for a tournament. The numbers below are *k*, not ``a[k]``:: + + 0 + + 1 2 + + 3 4 5 6 + + 7 8 9 10 11 12 13 14 + + 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + +In the tree above, each cell *k* is topping ``2*k+1`` and ``2*k+2``. In an usual +binary tournament we see in sports, each cell is the winner over the two cells +it tops, and we can trace the winner down the tree to see all opponents s/he +had. However, in many computer applications of such tournaments, we do not need +to trace the history of a winner. To be more memory efficient, when a winner is +promoted, we try to replace it by something else at a lower level, and the rule +becomes that a cell and the two cells it tops contain three different items, but +the top cell "wins" over the two topped cells. + +If this heap invariant is protected at all time, index 0 is clearly the overall +winner. The simplest algorithmic way to remove it and find the "next" winner is +to move some loser (let's say cell 30 in the diagram above) into the 0 position, +and then percolate this new 0 down the tree, exchanging values, until the +invariant is re-established. This is clearly logarithmic on the total number of +items in the tree. By iterating over all items, you get an O(n log n) sort. + +A nice feature of this sort is that you can efficiently insert new items while +the sort is going on, provided that the inserted items are not "better" than the +last 0'th element you extracted. This is especially useful in simulation +contexts, where the tree holds all incoming events, and the "win" condition +means the smallest scheduled time. When an event schedule other events for +execution, they are scheduled into the future, so they can easily go into the +heap. So, a heap is a good structure for implementing schedulers (this is what +I used for my MIDI sequencer :-). + +Various structures for implementing schedulers have been extensively studied, +and heaps are good for this, as they are reasonably speedy, the speed is almost +constant, and the worst case is not much different than the average case. +However, there are other representations which are more efficient overall, yet +the worst cases might be terrible. + +Heaps are also very useful in big disk sorts. You most probably all know that a +big sort implies producing "runs" (which are pre-sorted sequences, which size is +usually related to the amount of CPU memory), followed by a merging passes for +these runs, which merging is often very cleverly organised [#]_. It is very +important that the initial sort produces the longest runs possible. Tournaments +are a good way to that. If, using all the memory available to hold a +tournament, you replace and percolate items that happen to fit the current run, +you'll produce runs which are twice the size of the memory for random input, and +much better for input fuzzily ordered. + +Moreover, if you output the 0'th item on disk and get an input which may not fit +in the current tournament (because the value "wins" over the last output value), +it cannot fit in the heap, so the size of the heap decreases. The freed memory +could be cleverly reused immediately for progressively building a second heap, +which grows at exactly the same rate the first heap is melting. When the first +heap completely vanishes, you switch heaps and start a new run. Clever and +quite effective! + +In a word, heaps are useful memory structures to know. I use them in a few +applications, and I think it is good to keep a 'heap' module around. :-) + +.. rubric:: Footnotes + +.. [#] The disk balancing algorithms which are current, nowadays, are more annoying + than clever, and this is a consequence of the seeking capabilities of the disks. + On devices which cannot seek, like big tape drives, the story was quite + different, and one had to be very clever to ensure (far in advance) that each + tape movement will be the most effective possible (that is, will best + participate at "progressing" the merge). Some tapes were even able to read + backwards, and this was also used to avoid the rewinding time. Believe me, real + good tape sorts were quite spectacular to watch! From all times, sorting has + always been a Great Art! :-) + diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst new file mode 100644 index 0000000..10d41f7 --- /dev/null +++ b/Doc/library/hmac.rst @@ -0,0 +1,61 @@ + +:mod:`hmac` --- Keyed-Hashing for Message Authentication +======================================================== + +.. module:: hmac + :synopsis: Keyed-Hashing for Message Authentication (HMAC) implementation for Python. +.. moduleauthor:: Gerhard Häring +.. sectionauthor:: Gerhard Häring + + +.. versionadded:: 2.2 + +This module implements the HMAC algorithm as described by :rfc:`2104`. + + +.. function:: new(key[, msg[, digestmod]]) + + Return a new hmac object. If *msg* is present, the method call ``update(msg)`` + is made. *digestmod* is the digest constructor or module for the HMAC object to + use. It defaults to the :func:`hashlib.md5` constructor. + + .. note:: + + The md5 hash has known weaknesses but remains the default for backwards + compatibility. Choose a better one for your application. + +An HMAC object has the following methods: + + +.. method:: hmac.update(msg) + + Update the hmac object with the string *msg*. Repeated calls are equivalent to + a single call with the concatenation of all the arguments: ``m.update(a); + m.update(b)`` is equivalent to ``m.update(a + b)``. + + +.. method:: hmac.digest() + + Return the digest of the strings passed to the :meth:`update` method so far. + This string will be the same length as the *digest_size* of the digest given to + the constructor. It may contain non-ASCII characters, including NUL bytes. + + +.. method:: hmac.hexdigest() + + Like :meth:`digest` except the digest is returned as a string twice the length + containing only hexadecimal digits. This may be used to exchange the value + safely in email or other non-binary environments. + + +.. method:: hmac.copy() + + Return a copy ("clone") of the hmac object. This can be used to efficiently + compute the digests of strings that share a common initial substring. + + +.. seealso:: + + Module :mod:`hashlib` + The python module providing secure hash functions. + diff --git a/Doc/library/hotshot.rst b/Doc/library/hotshot.rst new file mode 100644 index 0000000..f6b5b13 --- /dev/null +++ b/Doc/library/hotshot.rst @@ -0,0 +1,152 @@ + +:mod:`hotshot` --- High performance logging profiler +==================================================== + +.. module:: hotshot + :synopsis: High performance logging profiler, mostly written in C. +.. moduleauthor:: Fred L. Drake, Jr. +.. sectionauthor:: Anthony Baxter + + +.. versionadded:: 2.2 + +This module provides a nicer interface to the :mod:`_hotshot` C module. Hotshot +is a replacement for the existing :mod:`profile` module. As it's written mostly +in C, it should result in a much smaller performance impact than the existing +:mod:`profile` module. + +.. note:: + + The :mod:`hotshot` module focuses on minimizing the overhead while profiling, at + the expense of long data post-processing times. For common usages it is + recommended to use :mod:`cProfile` instead. :mod:`hotshot` is not maintained and + might be removed from the standard library in the future. + +.. versionchanged:: 2.5 + the results should be more meaningful than in the past: the timing core + contained a critical bug. + +.. warning:: + + The :mod:`hotshot` profiler does not yet work well with threads. It is useful to + use an unthreaded script to run the profiler over the code you're interested in + measuring if at all possible. + + +.. class:: Profile(logfile[, lineevents[, linetimings]]) + + The profiler object. The argument *logfile* is the name of a log file to use for + logged profile data. The argument *lineevents* specifies whether to generate + events for every source line, or just on function call/return. It defaults to + ``0`` (only log function call/return). The argument *linetimings* specifies + whether to record timing information. It defaults to ``1`` (store timing + information). + + +.. _hotshot-objects: + +Profile Objects +--------------- + +Profile objects have the following methods: + + +.. method:: Profile.addinfo(key, value) + + Add an arbitrary labelled value to the profile output. + + +.. method:: Profile.close() + + Close the logfile and terminate the profiler. + + +.. method:: Profile.fileno() + + Return the file descriptor of the profiler's log file. + + +.. method:: Profile.run(cmd) + + Profile an :func:`exec`\ -compatible string in the script environment. The + globals from the :mod:`__main__` module are used as both the globals and locals + for the script. + + +.. method:: Profile.runcall(func, *args, **keywords) + + Profile a single call of a callable. Additional positional and keyword arguments + may be passed along; the result of the call is returned, and exceptions are + allowed to propagate cleanly, while ensuring that profiling is disabled on the + way out. + + +.. method:: Profile.runctx(cmd, globals, locals) + + Profile an :func:`exec`\ -compatible string in a specific environment. The + string is compiled before profiling begins. + + +.. method:: Profile.start() + + Start the profiler. + + +.. method:: Profile.stop() + + Stop the profiler. + + +Using hotshot data +------------------ + +.. module:: hotshot.stats + :synopsis: Statistical analysis for Hotshot + + +.. versionadded:: 2.2 + +This module loads hotshot profiling data into the standard :mod:`pstats` Stats +objects. + + +.. function:: load(filename) + + Load hotshot data from *filename*. Returns an instance of the + :class:`pstats.Stats` class. + + +.. seealso:: + + Module :mod:`profile` + The :mod:`profile` module's :class:`Stats` class + + +.. _hotshot-example: + +Example Usage +------------- + +Note that this example runs the python "benchmark" pystones. It can take some +time to run, and will produce large output files. :: + + >>> import hotshot, hotshot.stats, test.pystone + >>> prof = hotshot.Profile("stones.prof") + >>> benchtime, stones = prof.runcall(test.pystone.pystones) + >>> prof.close() + >>> stats = hotshot.stats.load("stones.prof") + >>> stats.strip_dirs() + >>> stats.sort_stats('time', 'calls') + >>> stats.print_stats(20) + 850004 function calls in 10.090 CPU seconds + + Ordered by: internal time, call count + + ncalls tottime percall cumtime percall filename:lineno(function) + 1 3.295 3.295 10.090 10.090 pystone.py:79(Proc0) + 150000 1.315 0.000 1.315 0.000 pystone.py:203(Proc7) + 50000 1.313 0.000 1.463 0.000 pystone.py:229(Func2) + . + . + . + diff --git a/Doc/library/htmllib.rst b/Doc/library/htmllib.rst new file mode 100644 index 0000000..96a7d08 --- /dev/null +++ b/Doc/library/htmllib.rst @@ -0,0 +1,186 @@ + +:mod:`htmllib` --- A parser for HTML documents +============================================== + +.. module:: htmllib + :synopsis: A parser for HTML documents. + + +.. index:: + single: HTML + single: hypertext + +.. index:: + module: sgmllib + module: formatter + single: SGMLParser (in module sgmllib) + +This module defines a class which can serve as a base for parsing text files +formatted in the HyperText Mark-up Language (HTML). The class is not directly +concerned with I/O --- it must be provided with input in string form via a +method, and makes calls to methods of a "formatter" object in order to produce +output. The :class:`HTMLParser` class is designed to be used as a base class +for other classes in order to add functionality, and allows most of its methods +to be extended or overridden. In turn, this class is derived from and extends +the :class:`SGMLParser` class defined in module :mod:`sgmllib`. The +:class:`HTMLParser` implementation supports the HTML 2.0 language as described +in :rfc:`1866`. Two implementations of formatter objects are provided in the +:mod:`formatter` module; refer to the documentation for that module for +information on the formatter interface. + +The following is a summary of the interface defined by +:class:`sgmllib.SGMLParser`: + +* The interface to feed data to an instance is through the :meth:`feed` method, + which takes a string argument. This can be called with as little or as much + text at a time as desired; ``p.feed(a); p.feed(b)`` has the same effect as + ``p.feed(a+b)``. When the data contains complete HTML markup constructs, these + are processed immediately; incomplete constructs are saved in a buffer. To + force processing of all unprocessed data, call the :meth:`close` method. + + For example, to parse the entire contents of a file, use:: + + parser.feed(open('myfile.html').read()) + parser.close() + +* The interface to define semantics for HTML tags is very simple: derive a class + and define methods called :meth:`start_tag`, :meth:`end_tag`, or :meth:`do_tag`. + The parser will call these at appropriate moments: :meth:`start_tag` or + :meth:`do_tag` is called when an opening tag of the form ```` is + encountered; :meth:`end_tag` is called when a closing tag of the form ```` + is encountered. If an opening tag requires a corresponding closing tag, like + ``

    `` ... ``

    ``, the class should define the :meth:`start_tag` method; if + a tag requires no closing tag, like ``

    ``, the class should define the + :meth:`do_tag` method. + +The module defines a parser class and an exception: + + +.. class:: HTMLParser(formatter) + + This is the basic HTML parser class. It supports all entity names required by + the XHTML 1.0 Recommendation (http://www.w3.org/TR/xhtml1). It also defines + handlers for all HTML 2.0 and many HTML 3.0 and 3.2 elements. + + +.. exception:: HTMLParseError + + Exception raised by the :class:`HTMLParser` class when it encounters an error + while parsing. + + .. versionadded:: 2.4 + + +.. seealso:: + + Module :mod:`formatter` + Interface definition for transforming an abstract flow of formatting events into + specific output events on writer objects. + + Module :mod:`HTMLParser` + Alternate HTML parser that offers a slightly lower-level view of the input, but + is designed to work with XHTML, and does not implement some of the SGML syntax + not used in "HTML as deployed" and which isn't legal for XHTML. + + Module :mod:`htmlentitydefs` + Definition of replacement text for XHTML 1.0 entities. + + Module :mod:`sgmllib` + Base class for :class:`HTMLParser`. + + +.. _html-parser-objects: + +HTMLParser Objects +------------------ + +In addition to tag methods, the :class:`HTMLParser` class provides some +additional methods and instance variables for use within tag methods. + + +.. attribute:: HTMLParser.formatter + + This is the formatter instance associated with the parser. + + +.. attribute:: HTMLParser.nofill + + Boolean flag which should be true when whitespace should not be collapsed, or + false when it should be. In general, this should only be true when character + data is to be treated as "preformatted" text, as within a ``

    `` element.
    +   The default value is false.  This affects the operation of :meth:`handle_data`
    +   and :meth:`save_end`.
    +
    +
    +.. method:: HTMLParser.anchor_bgn(href, name, type)
    +
    +   This method is called at the start of an anchor region.  The arguments
    +   correspond to the attributes of the ```` tag with the same names.  The
    +   default implementation maintains a list of hyperlinks (defined by the ``HREF``
    +   attribute for ```` tags) within the document.  The list of hyperlinks is
    +   available as the data attribute :attr:`anchorlist`.
    +
    +
    +.. method:: HTMLParser.anchor_end()
    +
    +   This method is called at the end of an anchor region.  The default
    +   implementation adds a textual footnote marker using an index into the list of
    +   hyperlinks created by :meth:`anchor_bgn`.
    +
    +
    +.. method:: HTMLParser.handle_image(source, alt[, ismap[, align[, width[, height]]]])
    +
    +   This method is called to handle images.  The default implementation simply
    +   passes the *alt* value to the :meth:`handle_data` method.
    +
    +
    +.. method:: HTMLParser.save_bgn()
    +
    +   Begins saving character data in a buffer instead of sending it to the formatter
    +   object.  Retrieve the stored data via :meth:`save_end`. Use of the
    +   :meth:`save_bgn` / :meth:`save_end` pair may not be nested.
    +
    +
    +.. method:: HTMLParser.save_end()
    +
    +   Ends buffering character data and returns all data saved since the preceding
    +   call to :meth:`save_bgn`.  If the :attr:`nofill` flag is false, whitespace is
    +   collapsed to single spaces.  A call to this method without a preceding call to
    +   :meth:`save_bgn` will raise a :exc:`TypeError` exception.
    +
    +
    +:mod:`htmlentitydefs` --- Definitions of HTML general entities
    +==============================================================
    +
    +.. module:: htmlentitydefs
    +   :synopsis: Definitions of HTML general entities.
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +This module defines three dictionaries, ``name2codepoint``, ``codepoint2name``,
    +and ``entitydefs``. ``entitydefs`` is used by the :mod:`htmllib` module to
    +provide the :attr:`entitydefs` member of the :class:`HTMLParser` class.  The
    +definition provided here contains all the entities defined by XHTML 1.0  that
    +can be handled using simple textual substitution in the Latin-1 character set
    +(ISO-8859-1).
    +
    +
    +.. data:: entitydefs
    +
    +   A dictionary mapping XHTML 1.0 entity definitions to their replacement text in
    +   ISO Latin-1.
    +
    +
    +.. data:: name2codepoint
    +
    +   A dictionary that maps HTML entity names to the Unicode codepoints.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: codepoint2name
    +
    +   A dictionary that maps Unicode codepoints to HTML entity names.
    +
    +   .. versionadded:: 2.3
    +
    diff --git a/Doc/library/htmlparser.rst b/Doc/library/htmlparser.rst
    new file mode 100644
    index 0000000..85a38fb
    --- /dev/null
    +++ b/Doc/library/htmlparser.rst
    @@ -0,0 +1,183 @@
    +
    +:mod:`HTMLParser` --- Simple HTML and XHTML parser
    +==================================================
    +
    +.. module:: HTMLParser
    +   :synopsis: A simple parser that can handle HTML and XHTML.
    +
    +
    +.. versionadded:: 2.2
    +
    +.. index::
    +   single: HTML
    +   single: XHTML
    +
    +This module defines a class :class:`HTMLParser` which serves as the basis for
    +parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
    +Unlike the parser in :mod:`htmllib`, this parser is not based on the SGML parser
    +in :mod:`sgmllib`.
    +
    +
    +.. class:: HTMLParser()
    +
    +   The :class:`HTMLParser` class is instantiated without arguments.
    +
    +   An HTMLParser instance is fed HTML data and calls handler functions when tags
    +   begin and end.  The :class:`HTMLParser` class is meant to be overridden by the
    +   user to provide a desired behavior.
    +
    +   Unlike the parser in :mod:`htmllib`, this parser does not check that end tags
    +   match start tags or call the end-tag handler for elements which are closed
    +   implicitly by closing an outer element.
    +
    +An exception is defined as well:
    +
    +
    +.. exception:: HTMLParseError
    +
    +   Exception raised by the :class:`HTMLParser` class when it encounters an error
    +   while parsing.  This exception provides three attributes: :attr:`msg` is a brief
    +   message explaining the error, :attr:`lineno` is the number of the line on which
    +   the broken construct was detected, and :attr:`offset` is the number of
    +   characters into the line at which the construct starts.
    +
    +:class:`HTMLParser` instances have the following methods:
    +
    +
    +.. method:: HTMLParser.reset()
    +
    +   Reset the instance.  Loses all unprocessed data.  This is called implicitly at
    +   instantiation time.
    +
    +
    +.. method:: HTMLParser.feed(data)
    +
    +   Feed some text to the parser.  It is processed insofar as it consists of
    +   complete elements; incomplete data is buffered until more data is fed or
    +   :meth:`close` is called.
    +
    +
    +.. method:: HTMLParser.close()
    +
    +   Force processing of all buffered data as if it were followed by an end-of-file
    +   mark.  This method may be redefined by a derived class to define additional
    +   processing at the end of the input, but the redefined version should always call
    +   the :class:`HTMLParser` base class method :meth:`close`.
    +
    +
    +.. method:: HTMLParser.getpos()
    +
    +   Return current line number and offset.
    +
    +
    +.. method:: HTMLParser.get_starttag_text()
    +
    +   Return the text of the most recently opened start tag.  This should not normally
    +   be needed for structured processing, but may be useful in dealing with HTML "as
    +   deployed" or for re-generating input with minimal changes (whitespace between
    +   attributes can be preserved, etc.).
    +
    +
    +.. method:: HTMLParser.handle_starttag(tag, attrs)
    +
    +   This method is called to handle the start of a tag.  It is intended to be
    +   overridden by a derived class; the base class implementation does nothing.
    +
    +   The *tag* argument is the name of the tag converted to lower case. The *attrs*
    +   argument is a list of ``(name, value)`` pairs containing the attributes found
    +   inside the tag's ``<>`` brackets.  The *name* will be translated to lower case,
    +   and quotes in the *value* have been removed, and character and entity references
    +   have been replaced.  For instance, for the tag ````, this method would be called as
    +   ``handle_starttag('a', [('href', 'http://www.cwi.nl/')])``.
    +
    +   .. versionchanged:: 2.6
    +      All entity references from htmlentitydefs are now replaced in the attribute
    +      values.
    +
    +
    +.. method:: HTMLParser.handle_startendtag(tag, attrs)
    +
    +   Similar to :meth:`handle_starttag`, but called when the parser encounters an
    +   XHTML-style empty tag (````).  This method may be overridden by
    +   subclasses which require this particular lexical information; the default
    +   implementation simple calls :meth:`handle_starttag` and :meth:`handle_endtag`.
    +
    +
    +.. method:: HTMLParser.handle_endtag(tag)
    +
    +   This method is called to handle the end tag of an element.  It is intended to be
    +   overridden by a derived class; the base class implementation does nothing.  The
    +   *tag* argument is the name of the tag converted to lower case.
    +
    +
    +.. method:: HTMLParser.handle_data(data)
    +
    +   This method is called to process arbitrary data.  It is intended to be
    +   overridden by a derived class; the base class implementation does nothing.
    +
    +
    +.. method:: HTMLParser.handle_charref(name)
    +
    +   This method is called to process a character reference of the form ``&#ref;``.
    +   It is intended to be overridden by a derived class; the base class
    +   implementation does nothing.
    +
    +
    +.. method:: HTMLParser.handle_entityref(name)
    +
    +   This method is called to process a general entity reference of the form
    +   ``&name;`` where *name* is an general entity reference.  It is intended to be
    +   overridden by a derived class; the base class implementation does nothing.
    +
    +
    +.. method:: HTMLParser.handle_comment(data)
    +
    +   This method is called when a comment is encountered.  The *comment* argument is
    +   a string containing the text between the ``--`` and ``--`` delimiters, but not
    +   the delimiters themselves.  For example, the comment ```` will cause
    +   this method to be called with the argument ``'text'``.  It is intended to be
    +   overridden by a derived class; the base class implementation does nothing.
    +
    +
    +.. method:: HTMLParser.handle_decl(decl)
    +
    +   Method called when an SGML declaration is read by the parser.  The *decl*
    +   parameter will be the entire contents of the declaration inside the ```` markup.  It is intended to be overridden by a derived class; the base
    +   class implementation does nothing.
    +
    +
    +.. method:: HTMLParser.handle_pi(data)
    +
    +   Method called when a processing instruction is encountered.  The *data*
    +   parameter will contain the entire processing instruction. For example, for the
    +   processing instruction ````, this method would be called as
    +   ``handle_pi("proc color='red'")``.  It is intended to be overridden by a derived
    +   class; the base class implementation does nothing.
    +
    +   .. note::
    +
    +      The :class:`HTMLParser` class uses the SGML syntactic rules for processing
    +      instructions.  An XHTML processing instruction using the trailing ``'?'`` will
    +      cause the ``'?'`` to be included in *data*.
    +
    +
    +.. _htmlparser-example:
    +
    +Example HTML Parser Application
    +-------------------------------
    +
    +As a basic example, below is a very basic HTML parser that uses the
    +:class:`HTMLParser` class to print out tags as they are encountered::
    +
    +   from HTMLParser import HTMLParser
    +
    +   class MyHTMLParser(HTMLParser):
    +
    +       def handle_starttag(self, tag, attrs):
    +           print "Encountered the beginning of a %s tag" % tag
    +
    +       def handle_endtag(self, tag):
    +           print "Encountered the end of a %s tag" % tag
    +
    diff --git a/Doc/library/httplib.rst b/Doc/library/httplib.rst
    new file mode 100644
    index 0000000..aae2219
    --- /dev/null
    +++ b/Doc/library/httplib.rst
    @@ -0,0 +1,552 @@
    +
    +:mod:`httplib` --- HTTP protocol client
    +=======================================
    +
    +.. module:: httplib
    +   :synopsis: HTTP and HTTPS protocol client (requires sockets).
    +
    +
    +.. index::
    +   pair: HTTP; protocol
    +   single: HTTP; httplib (standard module)
    +
    +.. index:: module: urllib
    +
    +This module defines classes which implement the client side of the HTTP and
    +HTTPS protocols.  It is normally not used directly --- the module :mod:`urllib`
    +uses it to handle URLs that use HTTP and HTTPS.
    +
    +.. note::
    +
    +   HTTPS support is only available if the :mod:`socket` module was compiled with
    +   SSL support.
    +
    +.. note::
    +
    +   The public interface for this module changed substantially in Python 2.0.  The
    +   :class:`HTTP` class is retained only for backward compatibility with 1.5.2.  It
    +   should not be used in new code.  Refer to the online docstrings for usage.
    +
    +The module provides the following classes:
    +
    +
    +.. class:: HTTPConnection(host[, port[, strict[, timeout]]])
    +
    +   An :class:`HTTPConnection` instance represents one transaction with an HTTP
    +   server.  It should be instantiated passing it a host and optional port number.
    +   If no port number is passed, the port is extracted from the host string if it
    +   has the form ``host:port``, else the default HTTP port (80) is used.  When True,
    +   the optional parameter *strict* causes ``BadStatusLine`` to be raised if the
    +   status line can't be parsed as a valid HTTP/1.0 or 1.1 status line.  If the
    +   optional *timeout* parameter is given, connection attempts will timeout after
    +   that many seconds (if it is not given or ``None``, the global default  timeout
    +   setting is used).
    +
    +   For example, the following calls all create instances that connect to the server
    +   at the same host and port::
    +
    +      >>> h1 = httplib.HTTPConnection('www.cwi.nl')
    +      >>> h2 = httplib.HTTPConnection('www.cwi.nl:80')
    +      >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80)
    +      >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80, timeout=10)
    +
    +   .. versionadded:: 2.0
    +
    +   .. versionchanged:: 2.6
    +      *timeout* was added.
    +
    +
    +.. class:: HTTPSConnection(host[, port[, key_file[, cert_file[, strict[, timeout]]]]])
    +
    +   A subclass of :class:`HTTPConnection` that uses SSL for communication with
    +   secure servers.  Default port is ``443``. *key_file* is the name of a PEM
    +   formatted file that contains your private key. *cert_file* is a PEM formatted
    +   certificate chain file.
    +
    +   .. warning::
    +
    +      This does not do any certificate verification!
    +
    +   .. versionadded:: 2.0
    +
    +   .. versionchanged:: 2.6
    +      *timeout* was added.
    +
    +
    +.. class:: HTTPResponse(sock[, debuglevel=0][, strict=0])
    +
    +   Class whose instances are returned upon successful connection.  Not instantiated
    +   directly by user.
    +
    +   .. versionadded:: 2.0
    +
    +The following exceptions are raised as appropriate:
    +
    +
    +.. exception:: HTTPException
    +
    +   The base class of the other exceptions in this module.  It is a subclass of
    +   :exc:`Exception`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: NotConnected
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: InvalidURL
    +
    +   A subclass of :exc:`HTTPException`, raised if a port is given and is either
    +   non-numeric or empty.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. exception:: UnknownProtocol
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: UnknownTransferEncoding
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: UnimplementedFileMode
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: IncompleteRead
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: ImproperConnectionState
    +
    +   A subclass of :exc:`HTTPException`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: CannotSendRequest
    +
    +   A subclass of :exc:`ImproperConnectionState`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: CannotSendHeader
    +
    +   A subclass of :exc:`ImproperConnectionState`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: ResponseNotReady
    +
    +   A subclass of :exc:`ImproperConnectionState`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. exception:: BadStatusLine
    +
    +   A subclass of :exc:`HTTPException`.  Raised if a server responds with a HTTP
    +   status code that we don't understand.
    +
    +   .. versionadded:: 2.0
    +
    +The constants defined in this module are:
    +
    +
    +.. data:: HTTP_PORT
    +
    +   The default port for the HTTP protocol (always ``80``).
    +
    +
    +.. data:: HTTPS_PORT
    +
    +   The default port for the HTTPS protocol (always ``443``).
    +
    +and also the following constants for integer status codes:
    +
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| Constant                                 | Value   | Definition                                                            |
    ++==========================================+=========+=======================================================================+
    +| :const:`CONTINUE`                        | ``100`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.1.1                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`SWITCHING_PROTOCOLS`             | ``101`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.1.2                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`PROCESSING`                      | ``102`` | WEBDAV, `RFC 2518, Section 10.1                                       |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`OK`                              | ``200`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.1                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`CREATED`                         | ``201`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.2                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`ACCEPTED`                        | ``202`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.3                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NON_AUTHORITATIVE_INFORMATION`   | ``203`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.4                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NO_CONTENT`                      | ``204`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.5                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`RESET_CONTENT`                   | ``205`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.6                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`PARTIAL_CONTENT`                 | ``206`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.2.7                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`MULTI_STATUS`                    | ``207`` | WEBDAV `RFC 2518, Section 10.2                                        |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`IM_USED`                         | ``226`` | Delta encoding in HTTP,                                               |
    +|                                          |         | :rfc:`3229`, Section 10.4.1                                           |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`MULTIPLE_CHOICES`                | ``300`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.1                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`MOVED_PERMANENTLY`               | ``301`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.2                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`FOUND`                           | ``302`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.3                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`SEE_OTHER`                       | ``303`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.4                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NOT_MODIFIED`                    | ``304`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.5                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`USE_PROXY`                       | ``305`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.6                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`TEMPORARY_REDIRECT`              | ``307`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.3.8                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`BAD_REQUEST`                     | ``400`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.1                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`UNAUTHORIZED`                    | ``401`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.2                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`PAYMENT_REQUIRED`                | ``402`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.3                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`FORBIDDEN`                       | ``403`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.4                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NOT_FOUND`                       | ``404`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.5                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`METHOD_NOT_ALLOWED`              | ``405`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.6                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NOT_ACCEPTABLE`                  | ``406`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.7                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`PROXY_AUTHENTICATION_REQUIRED`   | ``407`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.8                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`REQUEST_TIMEOUT`                 | ``408`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.9                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`CONFLICT`                        | ``409`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.10                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`GONE`                            | ``410`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.11                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`LENGTH_REQUIRED`                 | ``411`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.12                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`PRECONDITION_FAILED`             | ``412`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.13                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`REQUEST_ENTITY_TOO_LARGE`        | ``413`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.14                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`REQUEST_URI_TOO_LONG`            | ``414`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.15                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`UNSUPPORTED_MEDIA_TYPE`          | ``415`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.16                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`REQUESTED_RANGE_NOT_SATISFIABLE` | ``416`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.17                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`EXPECTATION_FAILED`              | ``417`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.4.18                                                               |
    +|                                          |         | `_ |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`UNPROCESSABLE_ENTITY`            | ``422`` | WEBDAV, `RFC 2518, Section 10.3                                       |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`LOCKED`                          | ``423`` | WEBDAV `RFC 2518, Section 10.4                                        |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`FAILED_DEPENDENCY`               | ``424`` | WEBDAV, `RFC 2518, Section 10.5                                       |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`UPGRADE_REQUIRED`                | ``426`` | HTTP Upgrade to TLS,                                                  |
    +|                                          |         | :rfc:`2817`, Section 6                                                |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`INTERNAL_SERVER_ERROR`           | ``500`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.5.1                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NOT_IMPLEMENTED`                 | ``501`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.5.2                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`BAD_GATEWAY`                     | ``502`` | HTTP/1.1 `RFC 2616, Section                                           |
    +|                                          |         | 10.5.3                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`SERVICE_UNAVAILABLE`             | ``503`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.5.4                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`GATEWAY_TIMEOUT`                 | ``504`` | HTTP/1.1 `RFC 2616, Section                                           |
    +|                                          |         | 10.5.5                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`HTTP_VERSION_NOT_SUPPORTED`      | ``505`` | HTTP/1.1, `RFC 2616, Section                                          |
    +|                                          |         | 10.5.6                                                                |
    +|                                          |         | `_  |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`INSUFFICIENT_STORAGE`            | ``507`` | WEBDAV, `RFC 2518, Section 10.6                                       |
    +|                                          |         | `_               |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +| :const:`NOT_EXTENDED`                    | ``510`` | An HTTP Extension Framework,                                          |
    +|                                          |         | :rfc:`2774`, Section 7                                                |
    ++------------------------------------------+---------+-----------------------------------------------------------------------+
    +
    +
    +.. data:: responses
    +
    +   This dictionary maps the HTTP 1.1 status codes to the W3C names.
    +
    +   Example: ``httplib.responses[httplib.NOT_FOUND]`` is ``'Not Found'``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. _httpconnection-objects:
    +
    +HTTPConnection Objects
    +----------------------
    +
    +:class:`HTTPConnection` instances have the following methods:
    +
    +
    +.. method:: HTTPConnection.request(method, url[, body[, headers]])
    +
    +   This will send a request to the server using the HTTP request method *method*
    +   and the selector *url*.  If the *body* argument is present, it should be a
    +   string of data to send after the headers are finished. Alternatively, it may
    +   be an open file object, in which case the contents of the file is sent; this
    +   file object should support ``fileno()`` and ``read()`` methods. The header
    +   Content-Length is automatically set to the correct value. The *headers*
    +   argument should be a mapping of extra HTTP headers to send with the request.
    +
    +   .. versionchanged:: 2.6
    +      *body* can be a file object.
    +
    +
    +.. method:: HTTPConnection.getresponse()
    +
    +   Should be called after a request is sent to get the response from the server.
    +   Returns an :class:`HTTPResponse` instance.
    +
    +   .. note::
    +
    +      Note that you must have read the whole response before you can send a new
    +      request to the server.
    +
    +
    +.. method:: HTTPConnection.set_debuglevel(level)
    +
    +   Set the debugging level (the amount of debugging output printed). The default
    +   debug level is ``0``, meaning no debugging output is printed.
    +
    +
    +.. method:: HTTPConnection.connect()
    +
    +   Connect to the server specified when the object was created.
    +
    +
    +.. method:: HTTPConnection.close()
    +
    +   Close the connection to the server.
    +
    +As an alternative to using the :meth:`request` method described above, you can
    +also send your request step by step, by using the four functions below.
    +
    +
    +.. method:: HTTPConnection.putrequest(request, selector[, skip_host[, skip_accept_encoding]])
    +
    +   This should be the first call after the connection to the server has been made.
    +   It sends a line to the server consisting of the *request* string, the *selector*
    +   string, and the HTTP version (``HTTP/1.1``).  To disable automatic sending of
    +   ``Host:`` or ``Accept-Encoding:`` headers (for example to accept additional
    +   content encodings), specify *skip_host* or *skip_accept_encoding* with non-False
    +   values.
    +
    +   .. versionchanged:: 2.4
    +      *skip_accept_encoding* argument added.
    +
    +
    +.. method:: HTTPConnection.putheader(header, argument[, ...])
    +
    +   Send an :rfc:`822`\ -style header to the server.  It sends a line to the server
    +   consisting of the header, a colon and a space, and the first argument.  If more
    +   arguments are given, continuation lines are sent, each consisting of a tab and
    +   an argument.
    +
    +
    +.. method:: HTTPConnection.endheaders()
    +
    +   Send a blank line to the server, signalling the end of the headers.
    +
    +
    +.. method:: HTTPConnection.send(data)
    +
    +   Send data to the server.  This should be used directly only after the
    +   :meth:`endheaders` method has been called and before :meth:`getresponse` is
    +   called.
    +
    +
    +.. _httpresponse-objects:
    +
    +HTTPResponse Objects
    +--------------------
    +
    +:class:`HTTPResponse` instances have the following methods and attributes:
    +
    +
    +.. method:: HTTPResponse.read([amt])
    +
    +   Reads and returns the response body, or up to the next *amt* bytes.
    +
    +
    +.. method:: HTTPResponse.getheader(name[, default])
    +
    +   Get the contents of the header *name*, or *default* if there is no matching
    +   header.
    +
    +
    +.. method:: HTTPResponse.getheaders()
    +
    +   Return a list of (header, value) tuples.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. attribute:: HTTPResponse.msg
    +
    +   A :class:`mimetools.Message` instance containing the response headers.
    +
    +
    +.. attribute:: HTTPResponse.version
    +
    +   HTTP protocol version used by server.  10 for HTTP/1.0, 11 for HTTP/1.1.
    +
    +
    +.. attribute:: HTTPResponse.status
    +
    +   Status code returned by server.
    +
    +
    +.. attribute:: HTTPResponse.reason
    +
    +   Reason phrase returned by server.
    +
    +
    +.. _httplib-examples:
    +
    +Examples
    +--------
    +
    +Here is an example session that uses the ``GET`` method::
    +
    +   >>> import httplib
    +   >>> conn = httplib.HTTPConnection("www.python.org")
    +   >>> conn.request("GET", "/index.html")
    +   >>> r1 = conn.getresponse()
    +   >>> print r1.status, r1.reason
    +   200 OK
    +   >>> data1 = r1.read()
    +   >>> conn.request("GET", "/parrot.spam")
    +   >>> r2 = conn.getresponse()
    +   >>> print r2.status, r2.reason
    +   404 Not Found
    +   >>> data2 = r2.read()
    +   >>> conn.close()
    +
    +Here is an example session that shows how to ``POST`` requests::
    +
    +   >>> import httplib, urllib
    +   >>> params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
    +   >>> headers = {"Content-type": "application/x-www-form-urlencoded",
    +   ...            "Accept": "text/plain"}
    +   >>> conn = httplib.HTTPConnection("musi-cal.mojam.com:80")
    +   >>> conn.request("POST", "/cgi-bin/query", params, headers)
    +   >>> response = conn.getresponse()
    +   >>> print response.status, response.reason
    +   200 OK
    +   >>> data = response.read()
    +   >>> conn.close()
    +
    diff --git a/Doc/library/i18n.rst b/Doc/library/i18n.rst
    new file mode 100644
    index 0000000..8e57102
    --- /dev/null
    +++ b/Doc/library/i18n.rst
    @@ -0,0 +1,19 @@
    +
    +.. _i18n:
    +
    +********************
    +Internationalization
    +********************
    +
    +The modules described in this chapter help you write software that is
    +independent of language and locale by providing mechanisms for selecting a
    +language to be used in  program messages or by tailoring output to match local
    +conventions.
    +
    +The list of modules described in this chapter is:
    +
    +
    +.. toctree::
    +
    +   gettext.rst
    +   locale.rst
    diff --git a/Doc/library/ic.rst b/Doc/library/ic.rst
    new file mode 100644
    index 0000000..d5e03bd
    --- /dev/null
    +++ b/Doc/library/ic.rst
    @@ -0,0 +1,119 @@
    +
    +:mod:`ic` --- Access to the Mac OS X Internet Config
    +====================================================
    +
    +.. module:: ic
    +   :platform: Mac
    +   :synopsis: Access to the Mac OS X Internet Config.
    +
    +
    +This module provides access to various internet-related preferences set through
    +:program:`System Preferences` or the :program:`Finder`.
    +
    +.. index:: module: icglue
    +
    +There is a low-level companion module :mod:`icglue` which provides the basic
    +Internet Config access functionality.  This low-level module is not documented,
    +but the docstrings of the routines document the parameters and the routine names
    +are the same as for the Pascal or C API to Internet Config, so the standard IC
    +programmers' documentation can be used if this module is needed.
    +
    +The :mod:`ic` module defines the :exc:`error` exception and symbolic names for
    +all error codes Internet Config can produce; see the source for details.
    +
    +
    +.. exception:: error
    +
    +   Exception raised on errors in the :mod:`ic` module.
    +
    +The :mod:`ic` module defines the following class and function:
    +
    +
    +.. class:: IC([signature[, ic]])
    +
    +   Create an Internet Config object. The signature is a 4-character creator code of
    +   the current application (default ``'Pyth'``) which may influence some of ICs
    +   settings. The optional *ic* argument is a low-level ``icglue.icinstance``
    +   created beforehand, this may be useful if you want to get preferences from a
    +   different config file, etc.
    +
    +
    +.. function:: launchurl(url[, hint])
    +              parseurl(data[, start[, end[, hint]]])
    +              mapfile(file)
    +              maptypecreator(type, creator[, filename])
    +              settypecreator(file)
    +
    +   These functions are "shortcuts" to the methods of the same name, described
    +   below.
    +
    +
    +IC Objects
    +----------
    +
    +:class:`IC` objects have a mapping interface, hence to obtain the mail address
    +you simply get ``ic['MailAddress']``. Assignment also works, and changes the
    +option in the configuration file.
    +
    +The module knows about various datatypes, and converts the internal IC
    +representation to a "logical" Python data structure. Running the :mod:`ic`
    +module standalone will run a test program that lists all keys and values in your
    +IC database, this will have to serve as documentation.
    +
    +If the module does not know how to represent the data it returns an instance of
    +the ``ICOpaqueData`` type, with the raw data in its :attr:`data` attribute.
    +Objects of this type are also acceptable values for assignment.
    +
    +Besides the dictionary interface, :class:`IC` objects have the following
    +methods:
    +
    +
    +.. method:: IC.launchurl(url[, hint])
    +
    +   Parse the given URL, launch the correct application and pass it the URL. The
    +   optional *hint* can be a scheme name such as ``'mailto:'``, in which case
    +   incomplete URLs are completed with this scheme.  If *hint* is not provided,
    +   incomplete URLs are invalid.
    +
    +
    +.. method:: IC.parseurl(data[, start[, end[, hint]]])
    +
    +   Find an URL somewhere in *data* and return start position, end position and the
    +   URL. The optional *start* and *end* can be used to limit the search, so for
    +   instance if a user clicks in a long text field you can pass the whole text field
    +   and the click-position in *start* and this routine will return the whole URL in
    +   which the user clicked.  As above, *hint* is an optional scheme used to complete
    +   incomplete URLs.
    +
    +
    +.. method:: IC.mapfile(file)
    +
    +   Return the mapping entry for the given *file*, which can be passed as either a
    +   filename or an :func:`FSSpec` result, and which need not exist.
    +
    +   The mapping entry is returned as a tuple ``(version, type, creator, postcreator,
    +   flags, extension, appname, postappname, mimetype, entryname)``, where *version*
    +   is the entry version number, *type* is the 4-character filetype, *creator* is
    +   the 4-character creator type, *postcreator* is the 4-character creator code of
    +   an optional application to post-process the file after downloading, *flags* are
    +   various bits specifying whether to transfer in binary or ascii and such,
    +   *extension* is the filename extension for this file type, *appname* is the
    +   printable name of the application to which this file belongs, *postappname* is
    +   the name of the postprocessing application, *mimetype* is the MIME type of this
    +   file and *entryname* is the name of this entry.
    +
    +
    +.. method:: IC.maptypecreator(type, creator[, filename])
    +
    +   Return the mapping entry for files with given 4-character *type* and *creator*
    +   codes. The optional *filename* may be specified to further help finding the
    +   correct entry (if the creator code is ``'????'``, for instance).
    +
    +   The mapping entry is returned in the same format as for *mapfile*.
    +
    +
    +.. method:: IC.settypecreator(file)
    +
    +   Given an existing *file*, specified either as a filename or as an :func:`FSSpec`
    +   result, set its creator and type correctly based on its extension.  The finder
    +   is told about the change, so the finder icon will be updated quickly.
    diff --git a/Doc/library/idle.rst b/Doc/library/idle.rst
    new file mode 100644
    index 0000000..44b59e9
    --- /dev/null
    +++ b/Doc/library/idle.rst
    @@ -0,0 +1,288 @@
    +.. _idle:
    +
    +Idle
    +====
    +
    +.. moduleauthor:: Guido van Rossum 
    +
    +
    +.. % \declaremodule{standard}{idle}
    +.. % \modulesynopsis{A Python Integrated Development Environment}
    +
    +.. index::
    +   single: Idle
    +   single: Python Editor
    +   single: Integrated Development Environment
    +
    +Idle is the Python IDE built with the :mod:`Tkinter` GUI toolkit.
    +
    +IDLE has the following features:
    +
    +* coded in 100% pure Python, using the :mod:`Tkinter` GUI toolkit
    +
    +* cross-platform: works on Windows and Unix (on Mac OS, there are currently
    +  problems with Tcl/Tk)
    +
    +* multi-window text editor with multiple undo, Python colorizing and many other
    +  features, e.g. smart indent and call tips
    +
    +* Python shell window (a.k.a. interactive interpreter)
    +
    +* debugger (not complete, but you can set breakpoints, view  and step)
    +
    +
    +Menus
    +-----
    +
    +
    +File menu
    +^^^^^^^^^
    +
    +New window
    +   create a new editing window
    +
    +Open...
    +   open an existing file
    +
    +Open module...
    +   open an existing module (searches sys.path)
    +
    +Class browser
    +   show classes and methods in current file
    +
    +Path browser
    +   show sys.path directories, modules, classes and methods
    +
    +.. index::
    +   single: Class browser
    +   single: Path browser
    +
    +Save
    +   save current window to the associated file (unsaved windows have a \* before and
    +   after the window title)
    +
    +Save As...
    +   save current window to new file, which becomes the associated file
    +
    +Save Copy As...
    +   save current window to different file without changing the associated file
    +
    +Close
    +   close current window (asks to save if unsaved)
    +
    +Exit
    +   close all windows and quit IDLE (asks to save if unsaved)
    +
    +
    +Edit menu
    +^^^^^^^^^
    +
    +Undo
    +   Undo last change to current window (max 1000 changes)
    +
    +Redo
    +   Redo last undone change to current window
    +
    +Cut
    +   Copy selection into system-wide clipboard; then delete selection
    +
    +Copy
    +   Copy selection into system-wide clipboard
    +
    +Paste
    +   Insert system-wide clipboard into window
    +
    +Select All
    +   Select the entire contents of the edit buffer
    +
    +Find...
    +   Open a search dialog box with many options
    +
    +Find again
    +   Repeat last search
    +
    +Find selection
    +   Search for the string in the selection
    +
    +Find in Files...
    +   Open a search dialog box for searching files
    +
    +Replace...
    +   Open a search-and-replace dialog box
    +
    +Go to line
    +   Ask for a line number and show that line
    +
    +Indent region
    +   Shift selected lines right 4 spaces
    +
    +Dedent region
    +   Shift selected lines left 4 spaces
    +
    +Comment out region
    +   Insert ## in front of selected lines
    +
    +Uncomment region
    +   Remove leading # or ## from selected lines
    +
    +Tabify region
    +   Turns *leading* stretches of spaces into tabs
    +
    +Untabify region
    +   Turn *all* tabs into the right number of spaces
    +
    +Expand word
    +   Expand the word you have typed to match another word in the same buffer; repeat
    +   to get a different expansion
    +
    +Format Paragraph
    +   Reformat the current blank-line-separated paragraph
    +
    +Import module
    +   Import or reload the current module
    +
    +Run script
    +   Execute the current file in the __main__ namespace
    +
    +.. index::
    +   single: Import module
    +   single: Run script
    +
    +
    +Windows menu
    +^^^^^^^^^^^^
    +
    +Zoom Height
    +   toggles the window between normal size (24x80) and maximum height.
    +
    +The rest of this menu lists the names of all open windows; select one to bring
    +it to the foreground (deiconifying it if necessary).
    +
    +
    +Debug menu (in the Python Shell window only)
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Go to file/line
    +   look around the insert point for a filename and linenumber, open the file, and
    +   show the line.
    +
    +Open stack viewer
    +   show the stack traceback of the last exception
    +
    +Debugger toggle
    +   Run commands in the shell under the debugger
    +
    +JIT Stack viewer toggle
    +   Open stack viewer on traceback
    +
    +.. index::
    +   single: stack viewer
    +   single: debugger
    +
    +
    +Basic editing and navigation
    +----------------------------
    +
    +* :kbd:`Backspace` deletes to the left; :kbd:`Del` deletes to the right
    +
    +* Arrow keys and :kbd:`Page Up`/:kbd:`Page Down` to move around
    +
    +* :kbd:`Home`/:kbd:`End` go to begin/end of line
    +
    +* :kbd:`C-Home`/:kbd:`C-End` go to begin/end of file
    +
    +* Some :program:`Emacs` bindings may also work, including :kbd:`C-B`,
    +  :kbd:`C-P`, :kbd:`C-A`, :kbd:`C-E`, :kbd:`C-D`, :kbd:`C-L`
    +
    +
    +Automatic indentation
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +After a block-opening statement, the next line is indented by 4 spaces (in the
    +Python Shell window by one tab).  After certain keywords (break, return etc.)
    +the next line is dedented.  In leading indentation, :kbd:`Backspace` deletes up
    +to 4 spaces if they are there. :kbd:`Tab` inserts 1-4 spaces (in the Python
    +Shell window one tab). See also the indent/dedent region commands in the edit
    +menu.
    +
    +
    +Python Shell window
    +^^^^^^^^^^^^^^^^^^^
    +
    +* :kbd:`C-C` interrupts executing command
    +
    +* :kbd:`C-D` sends end-of-file; closes window if typed at a ``>>>`` prompt
    +
    +* :kbd:`Alt-p` retrieves previous command matching what you have typed
    +
    +* :kbd:`Alt-n` retrieves next
    +
    +* :kbd:`Return` while on any previous command retrieves that command
    +
    +* :kbd:`Alt-/` (Expand word) is also useful here
    +
    +.. index:: single: indentation
    +
    +
    +Syntax colors
    +-------------
    +
    +The coloring is applied in a background "thread," so you may occasionally see
    +uncolorized text.  To change the color scheme, edit the ``[Colors]`` section in
    +:file:`config.txt`.
    +
    +Python syntax colors:
    +   Keywords
    +      orange
    +
    +   Strings 
    +      green
    +
    +   Comments
    +      red
    +
    +   Definitions
    +      blue
    +
    +Shell colors:
    +   Console output
    +      brown
    +
    +   stdout
    +      blue
    +
    +   stderr
    +      dark green
    +
    +   stdin
    +      black
    +
    +
    +Command line usage
    +^^^^^^^^^^^^^^^^^^
    +
    +::
    +
    +   idle.py [-c command] [-d] [-e] [-s] [-t title] [arg] ...
    +
    +   -c command  run this command
    +   -d          enable debugger
    +   -e          edit mode; arguments are files to be edited
    +   -s          run $IDLESTARTUP or $PYTHONSTARTUP first
    +   -t title    set title of shell window
    +
    +If there are arguments:
    +
    +#. If :option:`-e` is used, arguments are files opened for editing and
    +   ``sys.argv`` reflects the arguments passed to IDLE itself.
    +
    +#. Otherwise, if :option:`-c` is used, all arguments are placed in
    +   ``sys.argv[1:...]``, with ``sys.argv[0]`` set to ``'-c'``.
    +
    +#. Otherwise, if neither :option:`-e` nor :option:`-c` is used, the first
    +   argument is a script which is executed with the remaining arguments in
    +   ``sys.argv[1:...]``  and ``sys.argv[0]`` set to the script name.  If the script
    +   name is '-', no script is executed but an interactive Python session is started;
    +   the arguments are still available in ``sys.argv``.
    +
    +
    diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst
    new file mode 100644
    index 0000000..fc7c230
    --- /dev/null
    +++ b/Doc/library/imaplib.rst
    @@ -0,0 +1,540 @@
    +
    +:mod:`imaplib` --- IMAP4 protocol client
    +========================================
    +
    +.. module:: imaplib
    +   :synopsis: IMAP4 protocol client (requires sockets).
    +.. moduleauthor:: Piers Lauder 
    +.. sectionauthor:: Piers Lauder 
    +
    +
    +.. index::
    +   pair: IMAP4; protocol
    +   pair: IMAP4_SSL; protocol
    +   pair: IMAP4_stream; protocol
    +
    +.. % Based on HTML documentation by Piers Lauder
    +.. % ;
    +.. % converted by Fred L. Drake, Jr. .
    +.. % Revised by ESR, January 2000.
    +.. % Changes for IMAP4_SSL by Tino Lange , March 2002
    +.. % Changes for IMAP4_stream by Piers Lauder
    +.. % , November 2002
    +
    +This module defines three classes, :class:`IMAP4`, :class:`IMAP4_SSL` and
    +:class:`IMAP4_stream`, which encapsulate a connection to an IMAP4 server and
    +implement a large subset of the IMAP4rev1 client protocol as defined in
    +:rfc:`2060`. It is backward compatible with IMAP4 (:rfc:`1730`) servers, but
    +note that the ``STATUS`` command is not supported in IMAP4.
    +
    +Three classes are provided by the :mod:`imaplib` module, :class:`IMAP4` is the
    +base class:
    +
    +
    +.. class:: IMAP4([host[, port]])
    +
    +   This class implements the actual IMAP4 protocol.  The connection is created and
    +   protocol version (IMAP4 or IMAP4rev1) is determined when the instance is
    +   initialized. If *host* is not specified, ``''`` (the local host) is used. If
    +   *port* is omitted, the standard IMAP4 port (143) is used.
    +
    +Three exceptions are defined as attributes of the :class:`IMAP4` class:
    +
    +
    +.. exception:: IMAP4.error
    +
    +   Exception raised on any errors.  The reason for the exception is passed to the
    +   constructor as a string.
    +
    +
    +.. exception:: IMAP4.abort
    +
    +   IMAP4 server errors cause this exception to be raised.  This is a sub-class of
    +   :exc:`IMAP4.error`.  Note that closing the instance and instantiating a new one
    +   will usually allow recovery from this exception.
    +
    +
    +.. exception:: IMAP4.readonly
    +
    +   This exception is raised when a writable mailbox has its status changed by the
    +   server.  This is a sub-class of :exc:`IMAP4.error`.  Some other client now has
    +   write permission, and the mailbox will need to be re-opened to re-obtain write
    +   permission.
    +
    +There's also a subclass for secure connections:
    +
    +
    +.. class:: IMAP4_SSL([host[, port[, keyfile[, certfile]]]])
    +
    +   This is a subclass derived from :class:`IMAP4` that connects over an SSL
    +   encrypted socket (to use this class you need a socket module that was compiled
    +   with SSL support).  If *host* is not specified, ``''`` (the local host) is used.
    +   If *port* is omitted, the standard IMAP4-over-SSL port (993) is used.  *keyfile*
    +   and *certfile* are also optional - they can contain a PEM formatted private key
    +   and certificate chain file for the SSL connection.
    +
    +The second subclass allows for connections created by a child process:
    +
    +
    +.. class:: IMAP4_stream(command)
    +
    +   This is a subclass derived from :class:`IMAP4` that connects to the
    +   ``stdin/stdout`` file descriptors created by passing *command* to
    +   ``os.popen2()``.
    +
    +   .. versionadded:: 2.3
    +
    +The following utility functions are defined:
    +
    +
    +.. function:: Internaldate2tuple(datestr)
    +
    +   Converts an IMAP4 INTERNALDATE string to Coordinated Universal Time. Returns a
    +   :mod:`time` module tuple.
    +
    +
    +.. function:: Int2AP(num)
    +
    +   Converts an integer into a string representation using characters from the set
    +   [``A`` .. ``P``].
    +
    +
    +.. function:: ParseFlags(flagstr)
    +
    +   Converts an IMAP4 ``FLAGS`` response to a tuple of individual flags.
    +
    +
    +.. function:: Time2Internaldate(date_time)
    +
    +   Converts a :mod:`time` module tuple to an IMAP4 ``INTERNALDATE`` representation.
    +   Returns a string in the form: ``"DD-Mmm-YYYY HH:MM:SS +HHMM"`` (including
    +   double-quotes).
    +
    +Note that IMAP4 message numbers change as the mailbox changes; in particular,
    +after an ``EXPUNGE`` command performs deletions the remaining messages are
    +renumbered. So it is highly advisable to use UIDs instead, with the UID command.
    +
    +At the end of the module, there is a test section that contains a more extensive
    +example of usage.
    +
    +
    +.. seealso::
    +
    +   Documents describing the protocol, and sources and binaries  for servers
    +   implementing it, can all be found at the University of Washington's *IMAP
    +   Information Center* (http://www.cac.washington.edu/imap/).
    +
    +
    +.. _imap4-objects:
    +
    +IMAP4 Objects
    +-------------
    +
    +All IMAP4rev1 commands are represented by methods of the same name, either
    +upper-case or lower-case.
    +
    +All arguments to commands are converted to strings, except for ``AUTHENTICATE``,
    +and the last argument to ``APPEND`` which is passed as an IMAP4 literal.  If
    +necessary (the string contains IMAP4 protocol-sensitive characters and isn't
    +enclosed with either parentheses or double quotes) each string is quoted.
    +However, the *password* argument to the ``LOGIN`` command is always quoted. If
    +you want to avoid having an argument string quoted (eg: the *flags* argument to
    +``STORE``) then enclose the string in parentheses (eg: ``r'(\Deleted)'``).
    +
    +Each command returns a tuple: ``(type, [data, ...])`` where *type* is usually
    +``'OK'`` or ``'NO'``, and *data* is either the text from the command response,
    +or mandated results from the command. Each *data* is either a string, or a
    +tuple. If a tuple, then the first part is the header of the response, and the
    +second part contains the data (ie: 'literal' value).
    +
    +The *message_set* options to commands below is a string specifying one or more
    +messages to be acted upon.  It may be a simple message number (``'1'``), a range
    +of message numbers (``'2:4'``), or a group of non-contiguous ranges separated by
    +commas (``'1:3,6:9'``).  A range can contain an asterisk to indicate an infinite
    +upper bound (``'3:*'``).
    +
    +An :class:`IMAP4` instance has the following methods:
    +
    +
    +.. method:: IMAP4.append(mailbox, flags, date_time, message)
    +
    +   Append *message* to named mailbox.
    +
    +
    +.. method:: IMAP4.authenticate(mechanism, authobject)
    +
    +   Authenticate command --- requires response processing.
    +
    +   *mechanism* specifies which authentication mechanism is to be used - it should
    +   appear in the instance variable ``capabilities`` in the form ``AUTH=mechanism``.
    +
    +   *authobject* must be a callable object::
    +
    +      data = authobject(response)
    +
    +   It will be called to process server continuation responses. It should return
    +   ``data`` that will be encoded and sent to server. It should return ``None`` if
    +   the client abort response ``*`` should be sent instead.
    +
    +
    +.. method:: IMAP4.check()
    +
    +   Checkpoint mailbox on server.
    +
    +
    +.. method:: IMAP4.close()
    +
    +   Close currently selected mailbox. Deleted messages are removed from writable
    +   mailbox. This is the recommended command before ``LOGOUT``.
    +
    +
    +.. method:: IMAP4.copy(message_set, new_mailbox)
    +
    +   Copy *message_set* messages onto end of *new_mailbox*.
    +
    +
    +.. method:: IMAP4.create(mailbox)
    +
    +   Create new mailbox named *mailbox*.
    +
    +
    +.. method:: IMAP4.delete(mailbox)
    +
    +   Delete old mailbox named *mailbox*.
    +
    +
    +.. method:: IMAP4.deleteacl(mailbox, who)
    +
    +   Delete the ACLs (remove any rights) set for who on mailbox.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. method:: IMAP4.expunge()
    +
    +   Permanently remove deleted items from selected mailbox. Generates an ``EXPUNGE``
    +   response for each deleted message. Returned data contains a list of ``EXPUNGE``
    +   message numbers in order received.
    +
    +
    +.. method:: IMAP4.fetch(message_set, message_parts)
    +
    +   Fetch (parts of) messages.  *message_parts* should be a string of message part
    +   names enclosed within parentheses, eg: ``"(UID BODY[TEXT])"``.  Returned data
    +   are tuples of message part envelope and data.
    +
    +
    +.. method:: IMAP4.getacl(mailbox)
    +
    +   Get the ``ACL``\ s for *mailbox*. The method is non-standard, but is supported
    +   by the ``Cyrus`` server.
    +
    +
    +.. method:: IMAP4.getannotation(mailbox, entry, attribute)
    +
    +   Retrieve the specified ``ANNOTATION``\ s for *mailbox*. The method is
    +   non-standard, but is supported by the ``Cyrus`` server.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. method:: IMAP4.getquota(root)
    +
    +   Get the ``quota`` *root*'s resource usage and limits. This method is part of the
    +   IMAP4 QUOTA extension defined in rfc2087.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.getquotaroot(mailbox)
    +
    +   Get the list of ``quota`` ``roots`` for the named *mailbox*. This method is part
    +   of the IMAP4 QUOTA extension defined in rfc2087.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.list([directory[, pattern]])
    +
    +   List mailbox names in *directory* matching *pattern*.  *directory* defaults to
    +   the top-level mail folder, and *pattern* defaults to match anything.  Returned
    +   data contains a list of ``LIST`` responses.
    +
    +
    +.. method:: IMAP4.login(user, password)
    +
    +   Identify the client using a plaintext password. The *password* will be quoted.
    +
    +
    +.. method:: IMAP4.login_cram_md5(user, password)
    +
    +   Force use of ``CRAM-MD5`` authentication when identifying the client to protect
    +   the password.  Will only work if the server ``CAPABILITY`` response includes the
    +   phrase ``AUTH=CRAM-MD5``.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.logout()
    +
    +   Shutdown connection to server. Returns server ``BYE`` response.
    +
    +
    +.. method:: IMAP4.lsub([directory[, pattern]])
    +
    +   List subscribed mailbox names in directory matching pattern. *directory*
    +   defaults to the top level directory and *pattern* defaults to match any mailbox.
    +   Returned data are tuples of message part envelope and data.
    +
    +
    +.. method:: IMAP4.myrights(mailbox)
    +
    +   Show my ACLs for a mailbox (i.e. the rights that I have on mailbox).
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. method:: IMAP4.namespace()
    +
    +   Returns IMAP namespaces as defined in RFC2342.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.noop()
    +
    +   Send ``NOOP`` to server.
    +
    +
    +.. method:: IMAP4.open(host, port)
    +
    +   Opens socket to *port* at *host*. The connection objects established by this
    +   method will be used in the ``read``, ``readline``, ``send``, and ``shutdown``
    +   methods. You may override this method.
    +
    +
    +.. method:: IMAP4.partial(message_num, message_part, start, length)
    +
    +   Fetch truncated part of a message. Returned data is a tuple of message part
    +   envelope and data.
    +
    +
    +.. method:: IMAP4.proxyauth(user)
    +
    +   Assume authentication as *user*. Allows an authorised administrator to proxy
    +   into any user's mailbox.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.read(size)
    +
    +   Reads *size* bytes from the remote server. You may override this method.
    +
    +
    +.. method:: IMAP4.readline()
    +
    +   Reads one line from the remote server. You may override this method.
    +
    +
    +.. method:: IMAP4.recent()
    +
    +   Prompt server for an update. Returned data is ``None`` if no new messages, else
    +   value of ``RECENT`` response.
    +
    +
    +.. method:: IMAP4.rename(oldmailbox, newmailbox)
    +
    +   Rename mailbox named *oldmailbox* to *newmailbox*.
    +
    +
    +.. method:: IMAP4.response(code)
    +
    +   Return data for response *code* if received, or ``None``. Returns the given
    +   code, instead of the usual type.
    +
    +
    +.. method:: IMAP4.search(charset, criterion[, ...])
    +
    +   Search mailbox for matching messages.  *charset* may be ``None``, in which case
    +   no ``CHARSET`` will be specified in the request to the server.  The IMAP
    +   protocol requires that at least one criterion be specified; an exception will be
    +   raised when the server returns an error.
    +
    +   Example::
    +
    +      # M is a connected IMAP4 instance...
    +      typ, msgnums = M.search(None, 'FROM', '"LDJ"')
    +
    +      # or:
    +      typ, msgnums = M.search(None, '(FROM "LDJ")')
    +
    +
    +.. method:: IMAP4.select([mailbox[, readonly]])
    +
    +   Select a mailbox. Returned data is the count of messages in *mailbox*
    +   (``EXISTS`` response).  The default *mailbox* is ``'INBOX'``.  If the *readonly*
    +   flag is set, modifications to the mailbox are not allowed.
    +
    +
    +.. method:: IMAP4.send(data)
    +
    +   Sends ``data`` to the remote server. You may override this method.
    +
    +
    +.. method:: IMAP4.setacl(mailbox, who, what)
    +
    +   Set an ``ACL`` for *mailbox*. The method is non-standard, but is supported by
    +   the ``Cyrus`` server.
    +
    +
    +.. method:: IMAP4.setannotation(mailbox, entry, attribute[, ...])
    +
    +   Set ``ANNOTATION``\ s for *mailbox*. The method is non-standard, but is
    +   supported by the ``Cyrus`` server.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. method:: IMAP4.setquota(root, limits)
    +
    +   Set the ``quota`` *root*'s resource *limits*. This method is part of the IMAP4
    +   QUOTA extension defined in rfc2087.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: IMAP4.shutdown()
    +
    +   Close connection established in ``open``. You may override this method.
    +
    +
    +.. method:: IMAP4.socket()
    +
    +   Returns socket instance used to connect to server.
    +
    +
    +.. method:: IMAP4.sort(sort_criteria, charset, search_criterion[, ...])
    +
    +   The ``sort`` command is a variant of ``search`` with sorting semantics for the
    +   results.  Returned data contains a space separated list of matching message
    +   numbers.
    +
    +   Sort has two arguments before the *search_criterion* argument(s); a
    +   parenthesized list of *sort_criteria*, and the searching *charset*.  Note that
    +   unlike ``search``, the searching *charset* argument is mandatory.  There is also
    +   a ``uid sort`` command which corresponds to ``sort`` the way that ``uid search``
    +   corresponds to ``search``.  The ``sort`` command first searches the mailbox for
    +   messages that match the given searching criteria using the charset argument for
    +   the interpretation of strings in the searching criteria.  It then returns the
    +   numbers of matching messages.
    +
    +   This is an ``IMAP4rev1`` extension command.
    +
    +
    +.. method:: IMAP4.status(mailbox, names)
    +
    +   Request named status conditions for *mailbox*.
    +
    +
    +.. method:: IMAP4.store(message_set, command, flag_list)
    +
    +   Alters flag dispositions for messages in mailbox.  *command* is specified by
    +   section 6.4.6 of :rfc:`2060` as being one of "FLAGS", "+FLAGS", or "-FLAGS",
    +   optionally with a suffix of ".SILENT".
    +
    +   For example, to set the delete flag on all messages::
    +
    +      typ, data = M.search(None, 'ALL')
    +      for num in data[0].split():
    +         M.store(num, '+FLAGS', '\\Deleted')
    +      M.expunge()
    +
    +
    +.. method:: IMAP4.subscribe(mailbox)
    +
    +   Subscribe to new mailbox.
    +
    +
    +.. method:: IMAP4.thread(threading_algorithm, charset, search_criterion[, ...])
    +
    +   The ``thread`` command is a variant of ``search`` with threading semantics for
    +   the results.  Returned data contains a space separated list of thread members.
    +
    +   Thread members consist of zero or more messages numbers, delimited by spaces,
    +   indicating successive parent and child.
    +
    +   Thread has two arguments before the *search_criterion* argument(s); a
    +   *threading_algorithm*, and the searching *charset*.  Note that unlike
    +   ``search``, the searching *charset* argument is mandatory.  There is also a
    +   ``uid thread`` command which corresponds to ``thread`` the way that ``uid
    +   search`` corresponds to ``search``.  The ``thread`` command first searches the
    +   mailbox for messages that match the given searching criteria using the charset
    +   argument for the interpretation of strings in the searching criteria. It then
    +   returns the matching messages threaded according to the specified threading
    +   algorithm.
    +
    +   This is an ``IMAP4rev1`` extension command.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. method:: IMAP4.uid(command, arg[, ...])
    +
    +   Execute command args with messages identified by UID, rather than message
    +   number.  Returns response appropriate to command.  At least one argument must be
    +   supplied; if none are provided, the server will return an error and an exception
    +   will be raised.
    +
    +
    +.. method:: IMAP4.unsubscribe(mailbox)
    +
    +   Unsubscribe from old mailbox.
    +
    +
    +.. method:: IMAP4.xatom(name[, arg[, ...]])
    +
    +   Allow simple extension commands notified by server in ``CAPABILITY`` response.
    +
    +Instances of :class:`IMAP4_SSL` have just one additional method:
    +
    +
    +.. method:: IMAP4_SSL.ssl()
    +
    +   Returns SSLObject instance used for the secure connection with the server.
    +
    +The following attributes are defined on instances of :class:`IMAP4`:
    +
    +
    +.. attribute:: IMAP4.PROTOCOL_VERSION
    +
    +   The most recent supported protocol in the ``CAPABILITY`` response from the
    +   server.
    +
    +
    +.. attribute:: IMAP4.debug
    +
    +   Integer value to control debugging output.  The initialize value is taken from
    +   the module variable ``Debug``.  Values greater than three trace each command.
    +
    +
    +.. _imap4-example:
    +
    +IMAP4 Example
    +-------------
    +
    +Here is a minimal example (without error checking) that opens a mailbox and
    +retrieves and prints all messages::
    +
    +   import getpass, imaplib
    +
    +   M = imaplib.IMAP4()
    +   M.login(getpass.getuser(), getpass.getpass())
    +   M.select()
    +   typ, data = M.search(None, 'ALL')
    +   for num in data[0].split():
    +       typ, data = M.fetch(num, '(RFC822)')
    +       print 'Message %s\n%s\n' % (num, data[0][1])
    +   M.close()
    +   M.logout()
    +
    diff --git a/Doc/library/imghdr.rst b/Doc/library/imghdr.rst
    new file mode 100644
    index 0000000..90a8304
    --- /dev/null
    +++ b/Doc/library/imghdr.rst
    @@ -0,0 +1,71 @@
    +
    +:mod:`imghdr` --- Determine the type of an image
    +================================================
    +
    +.. module:: imghdr
    +   :synopsis: Determine the type of image contained in a file or byte stream.
    +
    +
    +The :mod:`imghdr` module determines the type of image contained in a file or
    +byte stream.
    +
    +The :mod:`imghdr` module defines the following function:
    +
    +
    +.. function:: what(filename[, h])
    +
    +   Tests the image data contained in the file named by *filename*, and returns a
    +   string describing the image type.  If optional *h* is provided, the *filename*
    +   is ignored and *h* is assumed to contain the byte stream to test.
    +
    +The following image types are recognized, as listed below with the return value
    +from :func:`what`:
    +
    ++------------+-----------------------------------+
    +| Value      | Image format                      |
    ++============+===================================+
    +| ``'rgb'``  | SGI ImgLib Files                  |
    ++------------+-----------------------------------+
    +| ``'gif'``  | GIF 87a and 89a Files             |
    ++------------+-----------------------------------+
    +| ``'pbm'``  | Portable Bitmap Files             |
    ++------------+-----------------------------------+
    +| ``'pgm'``  | Portable Graymap Files            |
    ++------------+-----------------------------------+
    +| ``'ppm'``  | Portable Pixmap Files             |
    ++------------+-----------------------------------+
    +| ``'tiff'`` | TIFF Files                        |
    ++------------+-----------------------------------+
    +| ``'rast'`` | Sun Raster Files                  |
    ++------------+-----------------------------------+
    +| ``'xbm'``  | X Bitmap Files                    |
    ++------------+-----------------------------------+
    +| ``'jpeg'`` | JPEG data in JFIF or Exif formats |
    ++------------+-----------------------------------+
    +| ``'bmp'``  | BMP files                         |
    ++------------+-----------------------------------+
    +| ``'png'``  | Portable Network Graphics         |
    ++------------+-----------------------------------+
    +
    +.. versionadded:: 2.5
    +   Exif detection.
    +
    +You can extend the list of file types :mod:`imghdr` can recognize by appending
    +to this variable:
    +
    +
    +.. data:: tests
    +
    +   A list of functions performing the individual tests.  Each function takes two
    +   arguments: the byte-stream and an open file-like object. When :func:`what` is
    +   called with a byte-stream, the file-like object will be ``None``.
    +
    +   The test function should return a string describing the image type if the test
    +   succeeded, or ``None`` if it failed.
    +
    +Example::
    +
    +   >>> import imghdr
    +   >>> imghdr.what('/tmp/bass.gif')
    +   'gif'
    +
    diff --git a/Doc/library/imp.rst b/Doc/library/imp.rst
    new file mode 100644
    index 0000000..f80bea3
    --- /dev/null
    +++ b/Doc/library/imp.rst
    @@ -0,0 +1,298 @@
    +
    +:mod:`imp` --- Access the :keyword:`import` internals
    +=====================================================
    +
    +.. module:: imp
    +   :synopsis: Access the implementation of the import statement.
    +
    +
    +.. index:: statement: import
    +
    +This module provides an interface to the mechanisms used to implement the
    +:keyword:`import` statement.  It defines the following constants and functions:
    +
    +
    +.. function:: get_magic()
    +
    +   .. index:: pair: file; byte-code
    +
    +   Return the magic string value used to recognize byte-compiled code files
    +   (:file:`.pyc` files).  (This value may be different for each Python version.)
    +
    +
    +.. function:: get_suffixes()
    +
    +   Return a list of triples, each describing a particular type of module. Each
    +   triple has the form ``(suffix, mode, type)``, where *suffix* is a string to be
    +   appended to the module name to form the filename to search for, *mode* is the
    +   mode string to pass to the built-in :func:`open` function to open the file (this
    +   can be ``'r'`` for text files or ``'rb'`` for binary files), and *type* is the
    +   file type, which has one of the values :const:`PY_SOURCE`, :const:`PY_COMPILED`,
    +   or :const:`C_EXTENSION`, described below.
    +
    +
    +.. function:: find_module(name[, path])
    +
    +   Try to find the module *name* on the search path *path*.  If *path* is a list of
    +   directory names, each directory is searched for files with any of the suffixes
    +   returned by :func:`get_suffixes` above.  Invalid names in the list are silently
    +   ignored (but all list items must be strings).  If *path* is omitted or ``None``,
    +   the list of directory names given by ``sys.path`` is searched, but first it
    +   searches a few special places: it tries to find a built-in module with the given
    +   name (:const:`C_BUILTIN`), then a frozen module (:const:`PY_FROZEN`), and on
    +   some systems some other places are looked in as well (on the Mac, it looks for a
    +   resource (:const:`PY_RESOURCE`); on Windows, it looks in the registry which may
    +   point to a specific file).
    +
    +   If search is successful, the return value is a triple ``(file, pathname,
    +   description)`` where *file* is an open file object positioned at the beginning,
    +   *pathname* is the pathname of the file found, and *description* is a triple as
    +   contained in the list returned by :func:`get_suffixes` describing the kind of
    +   module found. If the module does not live in a file, the returned *file* is
    +   ``None``, *filename* is the empty string, and the *description* tuple contains
    +   empty strings for its suffix and mode; the module type is as indicate in
    +   parentheses above.  If the search is unsuccessful, :exc:`ImportError` is raised.
    +   Other exceptions indicate problems with the arguments or environment.
    +
    +   This function does not handle hierarchical module names (names containing dots).
    +   In order to find *P*.*M*, that is, submodule *M* of package *P*, use
    +   :func:`find_module` and :func:`load_module` to find and load package *P*, and
    +   then use :func:`find_module` with the *path* argument set to ``P.__path__``.
    +   When *P* itself has a dotted name, apply this recipe recursively.
    +
    +
    +.. function:: load_module(name, file, filename, description)
    +
    +   Load a module that was previously found by :func:`find_module` (or by an
    +   otherwise conducted search yielding compatible results).  This function does
    +   more than importing the module: if the module was already imported, it will
    +   reload the module! The *name* argument indicates the full module name (including
    +   the package name, if this is a submodule of a package).  The *file* argument is
    +   an open file, and *filename* is the corresponding file name; these can be
    +   ``None`` and ``''``, respectively, when the module is not being loaded from a
    +   file.  The *description* argument is a tuple, as would be returned by
    +   :func:`get_suffixes`, describing what kind of module must be loaded.
    +
    +   If the load is successful, the return value is the module object; otherwise, an
    +   exception (usually :exc:`ImportError`) is raised.
    +
    +   **Important:** the caller is responsible for closing the *file* argument, if it
    +   was not ``None``, even when an exception is raised.  This is best done using a
    +   :keyword:`try` ... :keyword:`finally` statement.
    +
    +
    +.. function:: new_module(name)
    +
    +   Return a new empty module object called *name*.  This object is *not* inserted
    +   in ``sys.modules``.
    +
    +
    +.. function:: lock_held()
    +
    +   Return ``True`` if the import lock is currently held, else ``False``. On
    +   platforms without threads, always return ``False``.
    +
    +   On platforms with threads, a thread executing an import holds an internal lock
    +   until the import is complete. This lock blocks other threads from doing an
    +   import until the original import completes, which in turn prevents other threads
    +   from seeing incomplete module objects constructed by the original thread while
    +   in the process of completing its import (and the imports, if any, triggered by
    +   that).
    +
    +
    +.. function:: acquire_lock()
    +
    +   Acquires the interpreter's import lock for the current thread.  This lock should
    +   be used by import hooks to ensure thread-safety when importing modules. On
    +   platforms without threads, this function does nothing.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: release_lock()
    +
    +   Release the interpreter's import lock. On platforms without threads, this
    +   function does nothing.
    +
    +   .. versionadded:: 2.3
    +
    +The following constants with integer values, defined in this module, are used to
    +indicate the search result of :func:`find_module`.
    +
    +
    +.. data:: PY_SOURCE
    +
    +   The module was found as a source file.
    +
    +
    +.. data:: PY_COMPILED
    +
    +   The module was found as a compiled code object file.
    +
    +
    +.. data:: C_EXTENSION
    +
    +   The module was found as dynamically loadable shared library.
    +
    +
    +.. data:: PY_RESOURCE
    +
    +   The module was found as a Mac OS 9 resource.  This value can only be returned on
    +   a Mac OS 9 or earlier Macintosh.
    +
    +
    +.. data:: PKG_DIRECTORY
    +
    +   The module was found as a package directory.
    +
    +
    +.. data:: C_BUILTIN
    +
    +   The module was found as a built-in module.
    +
    +
    +.. data:: PY_FROZEN
    +
    +   The module was found as a frozen module (see :func:`init_frozen`).
    +
    +The following constant and functions are obsolete; their functionality is
    +available through :func:`find_module` or :func:`load_module`. They are kept
    +around for backward compatibility:
    +
    +
    +.. data:: SEARCH_ERROR
    +
    +   Unused.
    +
    +
    +.. function:: init_builtin(name)
    +
    +   Initialize the built-in module called *name* and return its module object along
    +   with storing it in ``sys.modules``.  If the module was already initialized, it
    +   will be initialized *again*.  Re-initialization involves the copying of the
    +   built-in module's ``__dict__`` from the cached module over the module's entry in
    +   ``sys.modules``.  If there is no built-in module called *name*, ``None`` is
    +   returned.
    +
    +
    +.. function:: init_frozen(name)
    +
    +   Initialize the frozen module called *name* and return its module object.  If
    +   the module was already initialized, it will be initialized *again*.  If there
    +   is no frozen module called *name*, ``None`` is returned.  (Frozen modules are
    +   modules written in Python whose compiled byte-code object is incorporated
    +   into a custom-built Python interpreter by Python's :program:`freeze`
    +   utility. See :file:`Tools/freeze/` for now.)
    +
    +
    +.. function:: is_builtin(name)
    +
    +   Return ``1`` if there is a built-in module called *name* which can be
    +   initialized again.  Return ``-1`` if there is a built-in module called *name*
    +   which cannot be initialized again (see :func:`init_builtin`).  Return ``0`` if
    +   there is no built-in module called *name*.
    +
    +
    +.. function:: is_frozen(name)
    +
    +   Return ``True`` if there is a frozen module (see :func:`init_frozen`) called
    +   *name*, or ``False`` if there is no such module.
    +
    +
    +.. function:: load_compiled(name, pathname, [file])
    +
    +   .. index:: pair: file; byte-code
    +
    +   Load and initialize a module implemented as a byte-compiled code file and return
    +   its module object.  If the module was already initialized, it will be
    +   initialized *again*.  The *name* argument is used to create or access a module
    +   object.  The *pathname* argument points to the byte-compiled code file.  The
    +   *file* argument is the byte-compiled code file, open for reading in binary mode,
    +   from the beginning. It must currently be a real file object, not a user-defined
    +   class emulating a file.
    +
    +
    +.. function:: load_dynamic(name, pathname[, file])
    +
    +   Load and initialize a module implemented as a dynamically loadable shared
    +   library and return its module object.  If the module was already initialized, it
    +   will be initialized *again*. Re-initialization involves copying the ``__dict__``
    +   attribute of the cached instance of the module over the value used in the module
    +   cached in ``sys.modules``.  The *pathname* argument must point to the shared
    +   library.  The *name* argument is used to construct the name of the
    +   initialization function: an external C function called ``initname()`` in the
    +   shared library is called.  The optional *file* argument is ignored.  (Note:
    +   using shared libraries is highly system dependent, and not all systems support
    +   it.)
    +
    +
    +.. function:: load_source(name, pathname[, file])
    +
    +   Load and initialize a module implemented as a Python source file and return its
    +   module object.  If the module was already initialized, it will be initialized
    +   *again*.  The *name* argument is used to create or access a module object.  The
    +   *pathname* argument points to the source file.  The *file* argument is the
    +   source file, open for reading as text, from the beginning. It must currently be
    +   a real file object, not a user-defined class emulating a file.  Note that if a
    +   properly matching byte-compiled file (with suffix :file:`.pyc` or :file:`.pyo`)
    +   exists, it will be used instead of parsing the given source file.
    +
    +
    +.. class:: NullImporter(path_string)
    +
    +   The :class:`NullImporter` type is a :pep:`302` import hook that handles
    +   non-directory path strings by failing to find any modules.  Calling this type
    +   with an existing directory or empty string raises :exc:`ImportError`.
    +   Otherwise, a :class:`NullImporter` instance is returned.
    +
    +   Python adds instances of this type to ``sys.path_importer_cache`` for any path
    +   entries that are not directories and are not handled by any other path hooks on
    +   ``sys.path_hooks``.  Instances have only one method:
    +
    +
    +   .. method:: NullImporter.find_module(fullname [, path])
    +
    +      This method always returns ``None``, indicating that the requested module could
    +      not be found.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. _examples-imp:
    +
    +Examples
    +--------
    +
    +The following function emulates what was the standard import statement up to
    +Python 1.4 (no hierarchical module names).  (This *implementation* wouldn't work
    +in that version, since :func:`find_module` has been extended and
    +:func:`load_module` has been added in 1.4.) ::
    +
    +   import imp
    +   import sys
    +
    +   def __import__(name, globals=None, locals=None, fromlist=None):
    +       # Fast path: see if the module has already been imported.
    +       try:
    +           return sys.modules[name]
    +       except KeyError:
    +           pass
    +
    +       # If any of the following calls raises an exception,
    +       # there's a problem we can't handle -- let the caller handle it.
    +
    +       fp, pathname, description = imp.find_module(name)
    +
    +       try:
    +           return imp.load_module(name, fp, pathname, description)
    +       finally:
    +           # Since we may exit via an exception, close fp explicitly.
    +           if fp:
    +               fp.close()
    +
    +.. index:: module: knee
    +
    +A more complete example that implements hierarchical module names and includes a
    +:func:`reload` function can be found in the module :mod:`knee`.  The :mod:`knee`
    +module can be found in :file:`Demo/imputil/` in the Python source distribution.
    +
    diff --git a/Doc/library/index.rst b/Doc/library/index.rst
    new file mode 100644
    index 0000000..1e872ac
    --- /dev/null
    +++ b/Doc/library/index.rst
    @@ -0,0 +1,81 @@
    +.. _library-index:
    +
    +###############################
    +  The Python Standard Library
    +###############################
    +
    +:Release: |version|
    +:Date: |today|
    +
    +While the :ref:`reference-index` describes the exact syntax and
    +semantics of the Python language, this library reference manual
    +describes the standard library that is distributed with Python. It also
    +describes some of the optional components that are commonly included
    +in Python distributions.
    +
    +Python's standard library is very extensive, offering a wide range of
    +facilities as indicated by the long table of contents listed below. The
    +library contains built-in modules (written in C) that provide access to
    +system functionality such as file I/O that would otherwise be
    +inaccessible to Python programmers, as well as modules written in Python
    +that provide standardized solutions for many problems that occur in
    +everyday programming. Some of these modules are explicitly designed to
    +encourage and enhance the portability of Python programs by abstracting
    +away platform-specifics into platform-neutral APIs.
    +
    +The Python installers for the Windows and Mac platforms usually include
    +the entire standard library and often also include many additional
    +components. For Unix-like operating systems Python is normally provided
    +as a collection of packages, so it may be necessary to use the packaging
    +tools provided with the operating system to obtain some or all of the
    +optional components.
    +
    +In addition to the standard library, there is a growing collection of
    +over 2500 additional components available from the `Python Package Index
    +`_.
    +
    +
    +.. toctree::
    +   :maxdepth: 2
    +
    +   intro.rst
    +   functions.rst
    +   constants.rst
    +   objects.rst
    +   stdtypes.rst
    +   exceptions.rst
    +
    +   strings.rst
    +   datatypes.rst
    +   numeric.rst
    +   filesys.rst
    +   persistence.rst
    +   archiving.rst
    +   fileformats.rst
    +   crypto.rst
    +   allos.rst
    +   someos.rst
    +   ipc.rst
    +   netdata.rst
    +   markup.rst
    +   internet.rst
    +   mm.rst
    +   i18n.rst
    +   frameworks.rst
    +   tk.rst
    +   development.rst
    +   pdb.rst
    +   profile.rst
    +   hotshot.rst
    +   timeit.rst
    +   trace.rst
    +   python.rst
    +   custominterp.rst
    +   modules.rst
    +   language.rst
    +   misc.rst
    +   windows.rst
    +   unix.rst
    +   mac.rst
    +   macosa.rst
    +   undoc.rst
    diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst
    new file mode 100644
    index 0000000..edec9d5
    --- /dev/null
    +++ b/Doc/library/inspect.rst
    @@ -0,0 +1,507 @@
    +
    +:mod:`inspect` --- Inspect live objects
    +=======================================
    +
    +.. module:: inspect
    +   :synopsis: Extract information and source code from live objects.
    +.. moduleauthor:: Ka-Ping Yee 
    +.. sectionauthor:: Ka-Ping Yee 
    +
    +
    +.. versionadded:: 2.1
    +
    +The :mod:`inspect` module provides several useful functions to help get
    +information about live objects such as modules, classes, methods, functions,
    +tracebacks, frame objects, and code objects.  For example, it can help you
    +examine the contents of a class, retrieve the source code of a method, extract
    +and format the argument list for a function, or get all the information you need
    +to display a detailed traceback.
    +
    +There are four main kinds of services provided by this module: type checking,
    +getting source code, inspecting classes and functions, and examining the
    +interpreter stack.
    +
    +
    +.. _inspect-types:
    +
    +Types and members
    +-----------------
    +
    +The :func:`getmembers` function retrieves the members of an object such as a
    +class or module. The eleven functions whose names begin with "is" are mainly
    +provided as convenient choices for the second argument to :func:`getmembers`.
    +They also help you determine when you can expect to find the following special
    +attributes:
    +
    ++-----------+-----------------+---------------------------+-------+
    +| Type      | Attribute       | Description               | Notes |
    ++===========+=================+===========================+=======+
    +| module    | __doc__         | documentation string      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __file__        | filename (missing for     |       |
    +|           |                 | built-in modules)         |       |
    ++-----------+-----------------+---------------------------+-------+
    +| class     | __doc__         | documentation string      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __module__      | name of module in which   |       |
    +|           |                 | this class was defined    |       |
    ++-----------+-----------------+---------------------------+-------+
    +| method    | __doc__         | documentation string      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __name__        | name with which this      |       |
    +|           |                 | method was defined        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | im_class        | class object that asked   | \(1)  |
    +|           |                 | for this method           |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | im_func         | function object           |       |
    +|           |                 | containing implementation |       |
    +|           |                 | of method                 |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | im_self         | instance to which this    |       |
    +|           |                 | method is bound, or       |       |
    +|           |                 | ``None``                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +| function  | __doc__         | documentation string      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __name__        | name with which this      |       |
    +|           |                 | function was defined      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __code__        | code object containing    |       |
    +|           |                 | compiled function         |       |
    +|           |                 | bytecode                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __defaults__    | tuple of any default      |       |
    +|           |                 | values for arguments      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __globals__     | global namespace in which |       |
    +|           |                 | this function was defined |       |
    ++-----------+-----------------+---------------------------+-------+
    +| traceback | tb_frame        | frame object at this      |       |
    +|           |                 | level                     |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | tb_lasti        | index of last attempted   |       |
    +|           |                 | instruction in bytecode   |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | tb_lineno       | current line number in    |       |
    +|           |                 | Python source code        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | tb_next         | next inner traceback      |       |
    +|           |                 | object (called by this    |       |
    +|           |                 | level)                    |       |
    ++-----------+-----------------+---------------------------+-------+
    +| frame     | f_back          | next outer frame object   |       |
    +|           |                 | (this frame's caller)     |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_builtins      | built-in namespace seen   |       |
    +|           |                 | by this frame             |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_code          | code object being         |       |
    +|           |                 | executed in this frame    |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_exc_traceback | traceback if raised in    |       |
    +|           |                 | this frame, or ``None``   |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_exc_type      | exception type if raised  |       |
    +|           |                 | in this frame, or         |       |
    +|           |                 | ``None``                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_exc_value     | exception value if raised |       |
    +|           |                 | in this frame, or         |       |
    +|           |                 | ``None``                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_globals       | global namespace seen by  |       |
    +|           |                 | this frame                |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_lasti         | index of last attempted   |       |
    +|           |                 | instruction in bytecode   |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_lineno        | current line number in    |       |
    +|           |                 | Python source code        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_locals        | local namespace seen by   |       |
    +|           |                 | this frame                |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_restricted    | 0 or 1 if frame is in     |       |
    +|           |                 | restricted execution mode |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | f_trace         | tracing function for this |       |
    +|           |                 | frame, or ``None``        |       |
    ++-----------+-----------------+---------------------------+-------+
    +| code      | co_argcount     | number of arguments (not  |       |
    +|           |                 | including \* or \*\*      |       |
    +|           |                 | args)                     |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_code         | string of raw compiled    |       |
    +|           |                 | bytecode                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_consts       | tuple of constants used   |       |
    +|           |                 | in the bytecode           |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_filename     | name of file in which     |       |
    +|           |                 | this code object was      |       |
    +|           |                 | created                   |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_firstlineno  | number of first line in   |       |
    +|           |                 | Python source code        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_flags        | bitmap: 1=optimized ``|`` |       |
    +|           |                 | 2=newlocals ``|`` 4=\*arg |       |
    +|           |                 | ``|`` 8=\*\*arg           |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_lnotab       | encoded mapping of line   |       |
    +|           |                 | numbers to bytecode       |       |
    +|           |                 | indices                   |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_name         | name with which this code |       |
    +|           |                 | object was defined        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_names        | tuple of names of local   |       |
    +|           |                 | variables                 |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_nlocals      | number of local variables |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_stacksize    | virtual machine stack     |       |
    +|           |                 | space required            |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | co_varnames     | tuple of names of         |       |
    +|           |                 | arguments and local       |       |
    +|           |                 | variables                 |       |
    ++-----------+-----------------+---------------------------+-------+
    +| builtin   | __doc__         | documentation string      |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __name__        | original name of this     |       |
    +|           |                 | function or method        |       |
    ++-----------+-----------------+---------------------------+-------+
    +|           | __self__        | instance to which a       |       |
    +|           |                 | method is bound, or       |       |
    +|           |                 | ``None``                  |       |
    ++-----------+-----------------+---------------------------+-------+
    +
    +Note:
    +
    +(1)
    +   .. versionchanged:: 2.2
    +      :attr:`im_class` used to refer to the class that defined the method.
    +
    +
    +.. function:: getmembers(object[, predicate])
    +
    +   Return all the members of an object in a list of (name, value) pairs sorted by
    +   name.  If the optional *predicate* argument is supplied, only members for which
    +   the predicate returns a true value are included.
    +
    +
    +.. function:: getmoduleinfo(path)
    +
    +   Return a tuple of values that describe how Python will interpret the file
    +   identified by *path* if it is a module, or ``None`` if it would not be
    +   identified as a module.  The return tuple is ``(name, suffix, mode, mtype)``,
    +   where *name* is the name of the module without the name of any enclosing
    +   package, *suffix* is the trailing part of the file name (which may not be a
    +   dot-delimited extension), *mode* is the :func:`open` mode that would be used
    +   (``'r'`` or ``'rb'``), and *mtype* is an integer giving the type of the
    +   module.  *mtype* will have a value which can be compared to the constants
    +   defined in the :mod:`imp` module; see the documentation for that module for
    +   more information on module types.
    +
    +
    +.. function:: getmodulename(path)
    +
    +   Return the name of the module named by the file *path*, without including the
    +   names of enclosing packages.  This uses the same algorithm as the interpreter
    +   uses when searching for modules.  If the name cannot be matched according to the
    +   interpreter's rules, ``None`` is returned.
    +
    +
    +.. function:: ismodule(object)
    +
    +   Return true if the object is a module.
    +
    +
    +.. function:: isclass(object)
    +
    +   Return true if the object is a class.
    +
    +
    +.. function:: ismethod(object)
    +
    +   Return true if the object is a method.
    +
    +
    +.. function:: isfunction(object)
    +
    +   Return true if the object is a Python function or unnamed (lambda) function.
    +
    +
    +.. function:: istraceback(object)
    +
    +   Return true if the object is a traceback.
    +
    +
    +.. function:: isframe(object)
    +
    +   Return true if the object is a frame.
    +
    +
    +.. function:: iscode(object)
    +
    +   Return true if the object is a code.
    +
    +
    +.. function:: isbuiltin(object)
    +
    +   Return true if the object is a built-in function.
    +
    +
    +.. function:: isroutine(object)
    +
    +   Return true if the object is a user-defined or built-in function or method.
    +
    +
    +.. function:: ismethoddescriptor(object)
    +
    +   Return true if the object is a method descriptor, but not if ismethod() or
    +   isclass() or isfunction() are true.
    +
    +   This is new as of Python 2.2, and, for example, is true of int.__add__. An
    +   object passing this test has a __get__ attribute but not a __set__ attribute,
    +   but beyond that the set of attributes varies.  __name__ is usually sensible, and
    +   __doc__ often is.
    +
    +   Methods implemented via descriptors that also pass one of the other tests return
    +   false from the ismethoddescriptor() test, simply because the other tests promise
    +   more -- you can, e.g., count on having the im_func attribute (etc) when an
    +   object passes ismethod().
    +
    +
    +.. function:: isdatadescriptor(object)
    +
    +   Return true if the object is a data descriptor.
    +
    +   Data descriptors have both a __get__ and a __set__ attribute.  Examples are
    +   properties (defined in Python), getsets, and members.  The latter two are
    +   defined in C and there are more specific tests available for those types, which
    +   is robust across Python implementations.  Typically, data descriptors will also
    +   have __name__ and __doc__ attributes (properties, getsets, and members have both
    +   of these attributes), but this is not guaranteed.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: isgetsetdescriptor(object)
    +
    +   Return true if the object is a getset descriptor.
    +
    +   getsets are attributes defined in extension modules via ``PyGetSetDef``
    +   structures.  For Python implementations without such types, this method will
    +   always return ``False``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ismemberdescriptor(object)
    +
    +   Return true if the object is a member descriptor.
    +
    +   Member descriptors are attributes defined in extension modules via
    +   ``PyMemberDef`` structures.  For Python implementations without such types, this
    +   method will always return ``False``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. _inspect-source:
    +
    +Retrieving source code
    +----------------------
    +
    +
    +.. function:: getdoc(object)
    +
    +   Get the documentation string for an object. All tabs are expanded to spaces.  To
    +   clean up docstrings that are indented to line up with blocks of code, any
    +   whitespace than can be uniformly removed from the second line onwards is
    +   removed.
    +
    +
    +.. function:: getcomments(object)
    +
    +   Return in a single string any lines of comments immediately preceding the
    +   object's source code (for a class, function, or method), or at the top of the
    +   Python source file (if the object is a module).
    +
    +
    +.. function:: getfile(object)
    +
    +   Return the name of the (text or binary) file in which an object was defined.
    +   This will fail with a :exc:`TypeError` if the object is a built-in module,
    +   class, or function.
    +
    +
    +.. function:: getmodule(object)
    +
    +   Try to guess which module an object was defined in.
    +
    +
    +.. function:: getsourcefile(object)
    +
    +   Return the name of the Python source file in which an object was defined.  This
    +   will fail with a :exc:`TypeError` if the object is a built-in module, class, or
    +   function.
    +
    +
    +.. function:: getsourcelines(object)
    +
    +   Return a list of source lines and starting line number for an object. The
    +   argument may be a module, class, method, function, traceback, frame, or code
    +   object.  The source code is returned as a list of the lines corresponding to the
    +   object and the line number indicates where in the original source file the first
    +   line of code was found.  An :exc:`IOError` is raised if the source code cannot
    +   be retrieved.
    +
    +
    +.. function:: getsource(object)
    +
    +   Return the text of the source code for an object. The argument may be a module,
    +   class, method, function, traceback, frame, or code object.  The source code is
    +   returned as a single string.  An :exc:`IOError` is raised if the source code
    +   cannot be retrieved.
    +
    +
    +.. _inspect-classes-functions:
    +
    +Classes and functions
    +---------------------
    +
    +
    +.. function:: getclasstree(classes[, unique])
    +
    +   Arrange the given list of classes into a hierarchy of nested lists. Where a
    +   nested list appears, it contains classes derived from the class whose entry
    +   immediately precedes the list.  Each entry is a 2-tuple containing a class and a
    +   tuple of its base classes.  If the *unique* argument is true, exactly one entry
    +   appears in the returned structure for each class in the given list.  Otherwise,
    +   classes using multiple inheritance and their descendants will appear multiple
    +   times.
    +
    +
    +.. function:: getargspec(func)
    +
    +   Get the names and default values of a function's arguments. A tuple of four
    +   things is returned: ``(args, varargs, varkw, defaults)``. *args* is a list of
    +   the argument names (it may contain nested lists). *varargs* and *varkw* are the
    +   names of the ``*`` and ``**`` arguments or ``None``. *defaults* is a tuple of
    +   default argument values or None if there are no default arguments; if this tuple
    +   has *n* elements, they correspond to the last *n* elements listed in *args*.
    +
    +
    +.. function:: getargvalues(frame)
    +
    +   Get information about arguments passed into a particular frame. A tuple of four
    +   things is returned: ``(args, varargs, varkw, locals)``. *args* is a list of the
    +   argument names (it may contain nested lists). *varargs* and *varkw* are the
    +   names of the ``*`` and ``**`` arguments or ``None``. *locals* is the locals
    +   dictionary of the given frame.
    +
    +
    +.. function:: formatargspec(args[, varargs, varkw, defaults, formatarg, formatvarargs, formatvarkw, formatvalue, join])
    +
    +   Format a pretty argument spec from the four values returned by
    +   :func:`getargspec`.  The format\* arguments are the corresponding optional
    +   formatting functions that are called to turn names and values into strings.
    +
    +
    +.. function:: formatargvalues(args[, varargs, varkw, locals, formatarg, formatvarargs, formatvarkw, formatvalue, join])
    +
    +   Format a pretty argument spec from the four values returned by
    +   :func:`getargvalues`.  The format\* arguments are the corresponding optional
    +   formatting functions that are called to turn names and values into strings.
    +
    +
    +.. function:: getmro(cls)
    +
    +   Return a tuple of class cls's base classes, including cls, in method resolution
    +   order.  No class appears more than once in this tuple. Note that the method
    +   resolution order depends on cls's type.  Unless a very peculiar user-defined
    +   metatype is in use, cls will be the first element of the tuple.
    +
    +
    +.. _inspect-stack:
    +
    +The interpreter stack
    +---------------------
    +
    +When the following functions return "frame records," each record is a tuple of
    +six items: the frame object, the filename, the line number of the current line,
    +the function name, a list of lines of context from the source code, and the
    +index of the current line within that list.
    +
    +.. warning::
    +
    +   Keeping references to frame objects, as found in the first element of the frame
    +   records these functions return, can cause your program to create reference
    +   cycles.  Once a reference cycle has been created, the lifespan of all objects
    +   which can be accessed from the objects which form the cycle can become much
    +   longer even if Python's optional cycle detector is enabled.  If such cycles must
    +   be created, it is important to ensure they are explicitly broken to avoid the
    +   delayed destruction of objects and increased memory consumption which occurs.
    +
    +   Though the cycle detector will catch these, destruction of the frames (and local
    +   variables) can be made deterministic by removing the cycle in a
    +   :keyword:`finally` clause.  This is also important if the cycle detector was
    +   disabled when Python was compiled or using :func:`gc.disable`.  For example::
    +
    +      def handle_stackframe_without_leak():
    +          frame = inspect.currentframe()
    +          try:
    +              # do something with the frame
    +          finally:
    +              del frame
    +
    +The optional *context* argument supported by most of these functions specifies
    +the number of lines of context to return, which are centered around the current
    +line.
    +
    +
    +.. function:: getframeinfo(frame[, context])
    +
    +   Get information about a frame or traceback object.  A 5-tuple is returned, the
    +   last five elements of the frame's frame record.
    +
    +
    +.. function:: getouterframes(frame[, context])
    +
    +   Get a list of frame records for a frame and all outer frames.  These frames
    +   represent the calls that lead to the creation of *frame*. The first entry in the
    +   returned list represents *frame*; the last entry represents the outermost call
    +   on *frame*'s stack.
    +
    +
    +.. function:: getinnerframes(traceback[, context])
    +
    +   Get a list of frame records for a traceback's frame and all inner frames.  These
    +   frames represent calls made as a consequence of *frame*.  The first entry in the
    +   list represents *traceback*; the last entry represents where the exception was
    +   raised.
    +
    +
    +.. function:: currentframe()
    +
    +   Return the frame object for the caller's stack frame.
    +
    +
    +.. function:: stack([context])
    +
    +   Return a list of frame records for the caller's stack.  The first entry in the
    +   returned list represents the caller; the last entry represents the outermost
    +   call on the stack.
    +
    +
    +.. function:: trace([context])
    +
    +   Return a list of frame records for the stack between the current frame and the
    +   frame in which an exception currently being handled was raised in.  The first
    +   entry in the list represents the caller; the last entry represents where the
    +   exception was raised.
    +
    diff --git a/Doc/library/internet.rst b/Doc/library/internet.rst
    new file mode 100644
    index 0000000..16b0a44
    --- /dev/null
    +++ b/Doc/library/internet.rst
    @@ -0,0 +1,47 @@
    +
    +.. _internet:
    +
    +******************************
    +Internet Protocols and Support
    +******************************
    +
    +.. index::
    +   single: WWW
    +   single: Internet
    +   single: World Wide Web
    +
    +.. index:: module: socket
    +
    +The modules described in this chapter implement Internet protocols and  support
    +for related technology.  They are all implemented in Python. Most of these
    +modules require the presence of the system-dependent module :mod:`socket`, which
    +is currently supported on most popular platforms.  Here is an overview:
    +
    +
    +.. toctree::
    +
    +   webbrowser.rst
    +   cgi.rst
    +   cgitb.rst
    +   wsgiref.rst
    +   urllib.rst
    +   urllib2.rst
    +   httplib.rst
    +   ftplib.rst
    +   poplib.rst
    +   imaplib.rst
    +   nntplib.rst
    +   smtplib.rst
    +   smtpd.rst
    +   telnetlib.rst
    +   uuid.rst
    +   urlparse.rst
    +   socketserver.rst
    +   basehttpserver.rst
    +   simplehttpserver.rst
    +   cgihttpserver.rst
    +   cookielib.rst
    +   cookie.rst
    +   xmlrpclib.rst
    +   simplexmlrpcserver.rst
    +   docxmlrpcserver.rst
    diff --git a/Doc/library/intro.rst b/Doc/library/intro.rst
    new file mode 100644
    index 0000000..33bdefd
    --- /dev/null
    +++ b/Doc/library/intro.rst
    @@ -0,0 +1,51 @@
    +
    +.. _library-intro:
    +
    +************
    +Introduction
    +************
    +
    +The "Python library" contains several different kinds of components.
    +
    +It contains data types that would normally be considered part of the "core" of a
    +language, such as numbers and lists.  For these types, the Python language core
    +defines the form of literals and places some constraints on their semantics, but
    +does not fully define the semantics.  (On the other hand, the language core does
    +define syntactic properties like the spelling and priorities of operators.)
    +
    +The library also contains built-in functions and exceptions --- objects that can
    +be used by all Python code without the need of an :keyword:`import` statement.
    +Some of these are defined by the core language, but many are not essential for
    +the core semantics and are only described here.
    +
    +The bulk of the library, however, consists of a collection of modules. There are
    +many ways to dissect this collection.  Some modules are written in C and built
    +in to the Python interpreter; others are written in Python and imported in
    +source form.  Some modules provide interfaces that are highly specific to
    +Python, like printing a stack trace; some provide interfaces that are specific
    +to particular operating systems, such as access to specific hardware; others
    +provide interfaces that are specific to a particular application domain, like
    +the World Wide Web. Some modules are available in all versions and ports of
    +Python; others are only available when the underlying system supports or
    +requires them; yet others are available only when a particular configuration
    +option was chosen at the time when Python was compiled and installed.
    +
    +This manual is organized "from the inside out:" it first describes the built-in
    +data types, then the built-in functions and exceptions, and finally the modules,
    +grouped in chapters of related modules.  The ordering of the chapters as well as
    +the ordering of the modules within each chapter is roughly from most relevant to
    +least important.
    +
    +This means that if you start reading this manual from the start, and skip to the
    +next chapter when you get bored, you will get a reasonable overview of the
    +available modules and application areas that are supported by the Python
    +library.  Of course, you don't *have* to read it like a novel --- you can also
    +browse the table of contents (in front of the manual), or look for a specific
    +function, module or term in the index (in the back).  And finally, if you enjoy
    +learning about random subjects, you choose a random page number (see module
    +:mod:`random`) and read a section or two.  Regardless of the order in which you
    +read the sections of this manual, it helps to start with chapter :ref:`builtin`,
    +as the remainder of the manual assumes familiarity with this material.
    +
    +Let the show begin!
    +
    diff --git a/Doc/library/ipc.rst b/Doc/library/ipc.rst
    new file mode 100644
    index 0000000..fd425ed
    --- /dev/null
    +++ b/Doc/library/ipc.rst
    @@ -0,0 +1,24 @@
    +
    +.. _ipc:
    +
    +*****************************************
    +Interprocess Communication and Networking
    +*****************************************
    +
    +The modules described in this chapter provide mechanisms for different processes
    +to communicate.
    +
    +Some modules only work for two processes that are on the same machine, e.g.
    +:mod:`signal` and :mod:`subprocess`.  Other modules support networking protocols
    +that two or more processes can used to communicate across machines.
    +
    +The list of modules described in this chapter is:
    +
    +
    +.. toctree::
    +
    +   subprocess.rst
    +   socket.rst
    +   signal.rst
    +   asyncore.rst
    +   asynchat.rst
    diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
    new file mode 100644
    index 0000000..9f9cb24
    --- /dev/null
    +++ b/Doc/library/itertools.rst
    @@ -0,0 +1,547 @@
    +
    +:mod:`itertools` --- Functions creating iterators for efficient looping
    +=======================================================================
    +
    +.. module:: itertools
    +   :synopsis: Functions creating iterators for efficient looping.
    +.. moduleauthor:: Raymond Hettinger 
    +.. sectionauthor:: Raymond Hettinger 
    +
    +
    +.. versionadded:: 2.3
    +
    +This module implements a number of iterator building blocks inspired by
    +constructs from the Haskell and SML programming languages.  Each has been recast
    +in a form suitable for Python.
    +
    +The module standardizes a core set of fast, memory efficient tools that are
    +useful by themselves or in combination.  Standardization helps avoid the
    +readability and reliability problems which arise when many different individuals
    +create their own slightly varying implementations, each with their own quirks
    +and naming conventions.
    +
    +The tools are designed to combine readily with one another.  This makes it easy
    +to construct more specialized tools succinctly and efficiently in pure Python.
    +
    +For instance, SML provides a tabulation tool: ``tabulate(f)`` which produces a
    +sequence ``f(0), f(1), ...``.  This toolbox provides :func:`imap` and
    +:func:`count` which can be combined to form ``imap(f, count())`` and produce an
    +equivalent result.
    +
    +Likewise, the functional tools are designed to work well with the high-speed
    +functions provided by the :mod:`operator` module.
    +
    +The module author welcomes suggestions for other basic building blocks to be
    +added to future versions of the module.
    +
    +Whether cast in pure python form or compiled code, tools that use iterators are
    +more memory efficient (and faster) than their list based counterparts. Adopting
    +the principles of just-in-time manufacturing, they create data when and where
    +needed instead of consuming memory with the computer equivalent of "inventory".
    +
    +The performance advantage of iterators becomes more acute as the number of
    +elements increases -- at some point, lists grow large enough to severely impact
    +memory cache performance and start running slowly.
    +
    +
    +.. seealso::
    +
    +   The Standard ML Basis Library, `The Standard ML Basis Library
    +   `_.
    +
    +   Haskell, A Purely Functional Language, `Definition of Haskell and the Standard
    +   Libraries `_.
    +
    +
    +.. _itertools-functions:
    +
    +Itertool functions
    +------------------
    +
    +The following module functions all construct and return iterators. Some provide
    +streams of infinite length, so they should only be accessed by functions or
    +loops that truncate the stream.
    +
    +
    +.. function:: chain(*iterables)
    +
    +   Make an iterator that returns elements from the first iterable until it is
    +   exhausted, then proceeds to the next iterable, until all of the iterables are
    +   exhausted.  Used for treating consecutive sequences as a single sequence.
    +   Equivalent to::
    +
    +      def chain(*iterables):
    +          for it in iterables:
    +              for element in it:
    +                  yield element
    +
    +
    +.. function:: count([n])
    +
    +   Make an iterator that returns consecutive integers starting with *n*. If not
    +   specified *n* defaults to zero.   Does not currently support python long
    +   integers.  Often used as an argument to :func:`imap` to generate consecutive
    +   data points. Also, used with :func:`izip` to add sequence numbers.  Equivalent
    +   to::
    +
    +      def count(n=0):
    +          while True:
    +              yield n
    +              n += 1
    +
    +   Note, :func:`count` does not check for overflow and will return negative numbers
    +   after exceeding ``sys.maxint``.  This behavior may change in the future.
    +
    +
    +.. function:: cycle(iterable)
    +
    +   Make an iterator returning elements from the iterable and saving a copy of each.
    +   When the iterable is exhausted, return elements from the saved copy.  Repeats
    +   indefinitely.  Equivalent to::
    +
    +      def cycle(iterable):
    +          saved = []
    +          for element in iterable:
    +              yield element
    +              saved.append(element)
    +          while saved:
    +              for element in saved:
    +                    yield element
    +
    +   Note, this member of the toolkit may require significant auxiliary storage
    +   (depending on the length of the iterable).
    +
    +
    +.. function:: dropwhile(predicate, iterable)
    +
    +   Make an iterator that drops elements from the iterable as long as the predicate
    +   is true; afterwards, returns every element.  Note, the iterator does not produce
    +   *any* output until the predicate first becomes false, so it may have a lengthy
    +   start-up time.  Equivalent to::
    +
    +      def dropwhile(predicate, iterable):
    +          iterable = iter(iterable)
    +          for x in iterable:
    +              if not predicate(x):
    +                  yield x
    +                  break
    +          for x in iterable:
    +              yield x
    +
    +
    +.. function:: groupby(iterable[, key])
    +
    +   Make an iterator that returns consecutive keys and groups from the *iterable*.
    +   The *key* is a function computing a key value for each element.  If not
    +   specified or is ``None``, *key* defaults to an identity function and returns
    +   the element unchanged.  Generally, the iterable needs to already be sorted on
    +   the same key function.
    +
    +   The operation of :func:`groupby` is similar to the ``uniq`` filter in Unix.  It
    +   generates a break or new group every time the value of the key function changes
    +   (which is why it is usually necessary to have sorted the data using the same key
    +   function).  That behavior differs from SQL's GROUP BY which aggregates common
    +   elements regardless of their input order.
    +
    +   The returned group is itself an iterator that shares the underlying iterable
    +   with :func:`groupby`.  Because the source is shared, when the :func:`groupby`
    +   object is advanced, the previous group is no longer visible.  So, if that data
    +   is needed later, it should be stored as a list::
    +
    +      groups = []
    +      uniquekeys = []
    +      data = sorted(data, key=keyfunc)
    +      for k, g in groupby(data, keyfunc):
    +          groups.append(list(g))      # Store group iterator as a list
    +          uniquekeys.append(k)
    +
    +   :func:`groupby` is equivalent to::
    +
    +      class groupby(object):
    +          def __init__(self, iterable, key=None):
    +              if key is None:
    +                  key = lambda x: x
    +              self.keyfunc = key
    +              self.it = iter(iterable)
    +              self.tgtkey = self.currkey = self.currvalue = []
    +          def __iter__(self):
    +              return self
    +          def __next__(self):
    +              while self.currkey == self.tgtkey:
    +                  self.currvalue = next(self.it) # Exit on StopIteration
    +                  self.currkey = self.keyfunc(self.currvalue)
    +              self.tgtkey = self.currkey
    +              return (self.currkey, self._grouper(self.tgtkey))
    +          def _grouper(self, tgtkey):
    +              while self.currkey == tgtkey:
    +                  yield self.currvalue
    +                  self.currvalue = next(self.it) # Exit on StopIteration
    +                  self.currkey = self.keyfunc(self.currvalue)
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. function:: ifilter(predicate, iterable)
    +
    +   Make an iterator that filters elements from iterable returning only those for
    +   which the predicate is ``True``. If *predicate* is ``None``, return the items
    +   that are true. Equivalent to::
    +
    +      def ifilter(predicate, iterable):
    +          if predicate is None:
    +              predicate = bool
    +          for x in iterable:
    +              if predicate(x):
    +                  yield x
    +
    +
    +.. function:: ifilterfalse(predicate, iterable)
    +
    +   Make an iterator that filters elements from iterable returning only those for
    +   which the predicate is ``False``. If *predicate* is ``None``, return the items
    +   that are false. Equivalent to::
    +
    +      def ifilterfalse(predicate, iterable):
    +          if predicate is None:
    +              predicate = bool
    +          for x in iterable:
    +              if not predicate(x):
    +                  yield x
    +
    +
    +.. function:: imap(function, *iterables)
    +
    +   Make an iterator that computes the function using arguments from each of the
    +   iterables.  If *function* is set to ``None``, then :func:`imap` returns the
    +   arguments as a tuple.  Like :func:`map` but stops when the shortest iterable is
    +   exhausted instead of filling in ``None`` for shorter iterables.  The reason for
    +   the difference is that infinite iterator arguments are typically an error for
    +   :func:`map` (because the output is fully evaluated) but represent a common and
    +   useful way of supplying arguments to :func:`imap`. Equivalent to::
    +
    +      def imap(function, *iterables):
    +          iterables = map(iter, iterables)
    +          while True:
    +              args = [next(i) for i in iterables]
    +              if function is None:
    +                  yield tuple(args)
    +              else:
    +                  yield function(*args)
    +
    +
    +.. function:: islice(iterable, [start,] stop [, step])
    +
    +   Make an iterator that returns selected elements from the iterable. If *start* is
    +   non-zero, then elements from the iterable are skipped until start is reached.
    +   Afterward, elements are returned consecutively unless *step* is set higher than
    +   one which results in items being skipped.  If *stop* is ``None``, then iteration
    +   continues until the iterator is exhausted, if at all; otherwise, it stops at the
    +   specified position.  Unlike regular slicing, :func:`islice` does not support
    +   negative values for *start*, *stop*, or *step*.  Can be used to extract related
    +   fields from data where the internal structure has been flattened (for example, a
    +   multi-line report may list a name field on every third line).  Equivalent to::
    +
    +      def islice(iterable, *args):
    +          s = slice(*args)
    +          it = iter(range(s.start or 0, s.stop or sys.maxint, s.step or 1))
    +          nexti = next(it)
    +          for i, element in enumerate(iterable):
    +              if i == nexti:
    +                  yield element
    +                  nexti = next(it)
    +
    +   If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
    +   then the step defaults to one.
    +
    +   .. versionchanged:: 2.5
    +      accept ``None`` values for default *start* and *step*.
    +
    +
    +.. function:: izip(*iterables)
    +
    +   Make an iterator that aggregates elements from each of the iterables. Like
    +   :func:`zip` except that it returns an iterator instead of a list.  Used for
    +   lock-step iteration over several iterables at a time.  Equivalent to::
    +
    +      def izip(*iterables):
    +          iterables = map(iter, iterables)
    +          while iterables:
    +              result = [next(it) for it in iterables]
    +              yield tuple(result)
    +
    +   .. versionchanged:: 2.4
    +      When no iterables are specified, returns a zero length iterator instead of
    +      raising a :exc:`TypeError` exception.
    +
    +   Note, the left-to-right evaluation order of the iterables is guaranteed. This
    +   makes possible an idiom for clustering a data series into n-length groups using
    +   ``izip(*[iter(s)]*n)``.  For data that doesn't fit n-length groups exactly, the
    +   last tuple can be pre-padded with fill values using ``izip(*[chain(s,
    +   [None]*(n-1))]*n)``.
    +
    +   Note, when :func:`izip` is used with unequal length inputs, subsequent
    +   iteration over the longer iterables cannot reliably be continued after
    +   :func:`izip` terminates.  Potentially, up to one entry will be missing from
    +   each of the left-over iterables. This occurs because a value is fetched from
    +   each iterator in- turn, but the process ends when one of the iterators
    +   terminates.  This leaves the last fetched values in limbo (they cannot be
    +   returned in a final, incomplete tuple and they are cannot be pushed back into
    +   the iterator for retrieval with ``next(it)``).  In general, :func:`izip`
    +   should only be used with unequal length inputs when you don't care about
    +   trailing, unmatched values from the longer iterables.
    +
    +
    +.. function:: izip_longest(*iterables[, fillvalue])
    +
    +   Make an iterator that aggregates elements from each of the iterables. If the
    +   iterables are of uneven length, missing values are filled-in with *fillvalue*.
    +   Iteration continues until the longest iterable is exhausted.  Equivalent to::
    +
    +      def izip_longest(*args, **kwds):
    +          fillvalue = kwds.get('fillvalue')
    +          def sentinel(counter = ([fillvalue]*(len(args)-1)).pop):
    +              yield counter()         # yields the fillvalue, or raises IndexError
    +          fillers = repeat(fillvalue)
    +          iters = [chain(it, sentinel(), fillers) for it in args]
    +          try:
    +              for tup in izip(*iters):
    +                  yield tup
    +          except IndexError:
    +              pass
    +
    +   If one of the iterables is potentially infinite, then the :func:`izip_longest`
    +   function should be wrapped with something that limits the number of calls (for
    +   example :func:`islice` or :func:`takewhile`).
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: repeat(object[, times])
    +
    +   Make an iterator that returns *object* over and over again. Runs indefinitely
    +   unless the *times* argument is specified. Used as argument to :func:`imap` for
    +   invariant parameters to the called function.  Also used with :func:`izip` to
    +   create an invariant part of a tuple record.  Equivalent to::
    +
    +      def repeat(object, times=None):
    +          if times is None:
    +              while True:
    +                  yield object
    +          else:
    +              for i in range(times):
    +                  yield object
    +
    +
    +.. function:: starmap(function, iterable)
    +
    +   Make an iterator that computes the function using arguments tuples obtained from
    +   the iterable.  Used instead of :func:`imap` when argument parameters are already
    +   grouped in tuples from a single iterable (the data has been "pre-zipped").  The
    +   difference between :func:`imap` and :func:`starmap` parallels the distinction
    +   between ``function(a,b)`` and ``function(*c)``. Equivalent to::
    +
    +      def starmap(function, iterable):
    +          iterable = iter(iterable)
    +          while True:
    +              yield function(*next(iterable))
    +
    +
    +.. function:: takewhile(predicate, iterable)
    +
    +   Make an iterator that returns elements from the iterable as long as the
    +   predicate is true.  Equivalent to::
    +
    +      def takewhile(predicate, iterable):
    +          for x in iterable:
    +              if predicate(x):
    +                  yield x
    +              else:
    +                  break
    +
    +
    +.. function:: tee(iterable[, n=2])
    +
    +   Return *n* independent iterators from a single iterable. The case where ``n==2``
    +   is equivalent to::
    +
    +      def tee(iterable):
    +          def gen(next, data={}, cnt=[0]):
    +              for i in count():
    +                  if i == cnt[0]:
    +                      item = data[i] = next()
    +                      cnt[0] += 1
    +                  else:
    +                      item = data.pop(i)
    +                  yield item
    +          it = iter(iterable)
    +          return (gen(it.__next__), gen(it.__next__))
    +
    +   Note, once :func:`tee` has made a split, the original *iterable* should not be
    +   used anywhere else; otherwise, the *iterable* could get advanced without the tee
    +   objects being informed.
    +
    +   Note, this member of the toolkit may require significant auxiliary storage
    +   (depending on how much temporary data needs to be stored). In general, if one
    +   iterator is going to use most or all of the data before the other iterator, it
    +   is faster to use :func:`list` instead of :func:`tee`.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. _itertools-example:
    +
    +Examples
    +--------
    +
    +The following examples show common uses for each tool and demonstrate ways they
    +can be combined. ::
    +
    +   >>> amounts = [120.15, 764.05, 823.14]
    +   >>> for checknum, amount in izip(count(1200), amounts):
    +   ...     print 'Check %d is for $%.2f' % (checknum, amount)
    +   ...
    +   Check 1200 is for $120.15
    +   Check 1201 is for $764.05
    +   Check 1202 is for $823.14
    +
    +   >>> import operator
    +   >>> for cube in imap(operator.pow, range(1,5), repeat(3)):
    +   ...    print cube
    +   ...
    +   1
    +   8
    +   27
    +   64
    +
    +   >>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura',
    +   ...                '', 'martin', '', 'walter', '', 'mark']
    +   >>> for name in islice(reportlines, 3, None, 2):
    +   ...    print name.title()
    +   ...
    +   Alex
    +   Laura
    +   Martin
    +   Walter
    +   Mark
    +
    +   # Show a dictionary sorted and grouped by value
    +   >>> from operator import itemgetter
    +   >>> d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
    +   >>> di = sorted(d.iteritems(), key=itemgetter(1))
    +   >>> for k, g in groupby(di, key=itemgetter(1)):
    +   ...     print k, map(itemgetter(0), g)
    +   ...
    +   1 ['a', 'c', 'e']
    +   2 ['b', 'd', 'f']
    +   3 ['g']
    +
    +   # Find runs of consecutive numbers using groupby.  The key to the solution
    +   # is differencing with a range so that consecutive numbers all appear in
    +   # same group.
    +   >>> data = [ 1,  4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
    +   >>> for k, g in groupby(enumerate(data), lambda t:t[0]-t[1]):
    +   ...     print map(operator.itemgetter(1), g)
    +   ... 
    +   [1]
    +   [4, 5, 6]
    +   [10]
    +   [15, 16, 17, 18]
    +   [22]
    +   [25, 26, 27, 28]
    +
    +
    +
    +.. _itertools-recipes:
    +
    +Recipes
    +-------
    +
    +This section shows recipes for creating an extended toolset using the existing
    +itertools as building blocks.
    +
    +The extended tools offer the same high performance as the underlying toolset.
    +The superior memory performance is kept by processing elements one at a time
    +rather than bringing the whole iterable into memory all at once. Code volume is
    +kept small by linking the tools together in a functional style which helps
    +eliminate temporary variables.  High speed is retained by preferring
    +"vectorized" building blocks over the use of for-loops and generators which
    +incur interpreter overhead. ::
    +
    +   def take(n, seq):
    +       return list(islice(seq, n))
    +
    +   def enumerate(iterable):
    +       return izip(count(), iterable)
    +
    +   def tabulate(function):
    +       "Return function(0), function(1), ..."
    +       return imap(function, count())
    +
    +   def iteritems(mapping):
    +       return izip(mapping.iterkeys(), mapping.itervalues())
    +
    +   def nth(iterable, n):
    +       "Returns the nth item or raise StopIteration"
    +       return islice(iterable, n, None).next()
    +
    +   def all(seq, pred=None):
    +       "Returns True if pred(x) is true for every element in the iterable"
    +       for elem in ifilterfalse(pred, seq):
    +           return False
    +       return True
    +
    +   def any(seq, pred=None):
    +       "Returns True if pred(x) is true for at least one element in the iterable"
    +       for elem in ifilter(pred, seq):
    +           return True
    +       return False
    +
    +   def no(seq, pred=None):
    +       "Returns True if pred(x) is false for every element in the iterable"
    +       for elem in ifilter(pred, seq):
    +           return False
    +       return True
    +
    +   def quantify(seq, pred=None):
    +       "Count how many times the predicate is true in the sequence"
    +       return sum(imap(pred, seq))
    +
    +   def padnone(seq):
    +       """Returns the sequence elements and then returns None indefinitely.
    +
    +       Useful for emulating the behavior of the built-in map() function.
    +       """
    +       return chain(seq, repeat(None))
    +
    +   def ncycles(seq, n):
    +       "Returns the sequence elements n times"
    +       return chain(*repeat(seq, n))
    +
    +   def dotproduct(vec1, vec2):
    +       return sum(imap(operator.mul, vec1, vec2))
    +
    +   def flatten(listOfLists):
    +       return list(chain(*listOfLists))
    +
    +   def repeatfunc(func, times=None, *args):
    +       """Repeat calls to func with specified arguments.
    +
    +       Example:  repeatfunc(random.random)
    +       """
    +       if times is None:
    +           return starmap(func, repeat(args))
    +       else:
    +           return starmap(func, repeat(args, times))
    +
    +   def pairwise(iterable):
    +       "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    +       a, b = tee(iterable)
    +       next(b, None)
    +       return izip(a, b)
    +
    +   def grouper(n, iterable, padvalue=None):
    +       "grouper(3, 'abcdefg', 'x') --> ('a','b','c'), ('d','e','f'), ('g','x','x')"
    +       return izip(*[chain(iterable, repeat(padvalue, n-1))]*n)
    +
    +
    +
    diff --git a/Doc/library/keyword.rst b/Doc/library/keyword.rst
    new file mode 100644
    index 0000000..32a2d34
    --- /dev/null
    +++ b/Doc/library/keyword.rst
    @@ -0,0 +1,22 @@
    +
    +:mod:`keyword` --- Testing for Python keywords
    +==============================================
    +
    +.. module:: keyword
    +   :synopsis: Test whether a string is a keyword in Python.
    +
    +
    +This module allows a Python program to determine if a string is a keyword.
    +
    +
    +.. function:: iskeyword(s)
    +
    +   Return true if *s* is a Python keyword.
    +
    +
    +.. data:: kwlist
    +
    +   Sequence containing all the keywords defined for the interpreter.  If any
    +   keywords are defined to only be active when particular :mod:`__future__`
    +   statements are in effect, these will be included as well.
    +
    diff --git a/Doc/library/language.rst b/Doc/library/language.rst
    new file mode 100644
    index 0000000..7d6af7d
    --- /dev/null
    +++ b/Doc/library/language.rst
    @@ -0,0 +1,29 @@
    +
    +.. _language:
    +
    +************************
    +Python Language Services
    +************************
    +
    +Python provides a number of modules to assist in working with the Python
    +language.  These modules support tokenizing, parsing, syntax analysis, bytecode
    +disassembly, and various other facilities.
    +
    +These modules include:
    +
    +
    +.. toctree::
    +
    +   parser.rst
    +   _ast.rst
    +   symbol.rst
    +   token.rst
    +   keyword.rst
    +   tokenize.rst
    +   tabnanny.rst
    +   pyclbr.rst
    +   py_compile.rst
    +   compileall.rst
    +   dis.rst
    +   pickletools.rst
    +   distutils.rst
    diff --git a/Doc/library/linecache.rst b/Doc/library/linecache.rst
    new file mode 100644
    index 0000000..f3d8379
    --- /dev/null
    +++ b/Doc/library/linecache.rst
    @@ -0,0 +1,52 @@
    +
    +:mod:`linecache` --- Random access to text lines
    +================================================
    +
    +.. module:: linecache
    +   :synopsis: This module provides random access to individual lines from text files.
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`linecache` module allows one to get any line from any file, while
    +attempting to optimize internally, using a cache, the common case where many
    +lines are read from a single file.  This is used by the :mod:`traceback` module
    +to retrieve source lines for inclusion in  the formatted traceback.
    +
    +The :mod:`linecache` module defines the following functions:
    +
    +
    +.. function:: getline(filename, lineno[, module_globals])
    +
    +   Get line *lineno* from file named *filename*. This function will never throw an
    +   exception --- it will return ``''`` on errors (the terminating newline character
    +   will be included for lines that are found).
    +
    +   .. index:: triple: module; search; path
    +
    +   If a file named *filename* is not found, the function will look for it in the
    +   module search path, ``sys.path``, after first checking for a :pep:`302`
    +   ``__loader__`` in *module_globals*, in case the module was imported from a
    +   zipfile or other non-filesystem import source.
    +
    +   .. versionadded:: 2.5
    +      The *module_globals* parameter was added.
    +
    +
    +.. function:: clearcache()
    +
    +   Clear the cache.  Use this function if you no longer need lines from files
    +   previously read using :func:`getline`.
    +
    +
    +.. function:: checkcache([filename])
    +
    +   Check the cache for validity.  Use this function if files in the cache  may have
    +   changed on disk, and you require the updated version.  If *filename* is omitted,
    +   it will check all the entries in the cache.
    +
    +Example::
    +
    +   >>> import linecache
    +   >>> linecache.getline('/etc/passwd', 4)
    +   'sys:x:3:3:sys:/dev:/bin/sh\n'
    +
    diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
    new file mode 100644
    index 0000000..6d427b7
    --- /dev/null
    +++ b/Doc/library/locale.rst
    @@ -0,0 +1,578 @@
    +
    +:mod:`locale` --- Internationalization services
    +===============================================
    +
    +.. module:: locale
    +   :synopsis: Internationalization services.
    +.. moduleauthor:: Martin von Löwis 
    +.. sectionauthor:: Martin von Löwis 
    +
    +
    +The :mod:`locale` module opens access to the POSIX locale database and
    +functionality. The POSIX locale mechanism allows programmers to deal with
    +certain cultural issues in an application, without requiring the programmer to
    +know all the specifics of each country where the software is executed.
    +
    +.. index:: module: _locale
    +
    +The :mod:`locale` module is implemented on top of the :mod:`_locale` module,
    +which in turn uses an ANSI C locale implementation if available.
    +
    +The :mod:`locale` module defines the following exception and functions:
    +
    +
    +.. exception:: Error
    +
    +   Exception raised when :func:`setlocale` fails.
    +
    +
    +.. function:: setlocale(category[, locale])
    +
    +   If *locale* is specified, it may be a string, a tuple of the form ``(language
    +   code, encoding)``, or ``None``. If it is a tuple, it is converted to a string
    +   using the locale aliasing engine.  If *locale* is given and not ``None``,
    +   :func:`setlocale` modifies the locale setting for the *category*.  The available
    +   categories are listed in the data description below.  The value is the name of a
    +   locale.  An empty string specifies the user's default settings. If the
    +   modification of the locale fails, the exception :exc:`Error` is raised.  If
    +   successful, the new locale setting is returned.
    +
    +   If *locale* is omitted or ``None``, the current setting for *category* is
    +   returned.
    +
    +   :func:`setlocale` is not thread safe on most systems. Applications typically
    +   start with a call of ::
    +
    +      import locale
    +      locale.setlocale(locale.LC_ALL, '')
    +
    +   This sets the locale for all categories to the user's default setting (typically
    +   specified in the :envvar:`LANG` environment variable).  If the locale is not
    +   changed thereafter, using multithreading should not cause problems.
    +
    +   .. versionchanged:: 2.0
    +      Added support for tuple values of the *locale* parameter.
    +
    +
    +.. function:: localeconv()
    +
    +   Returns the database of the local conventions as a dictionary. This dictionary
    +   has the following strings as keys:
    +
    +   +----------------------+-------------------------------------+--------------------------------+
    +   | Category             | Key                                 | Meaning                        |
    +   +======================+=====================================+================================+
    +   | :const:`LC_NUMERIC`  | ``'decimal_point'``                 | Decimal point character.       |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'grouping'``                      | Sequence of numbers specifying |
    +   |                      |                                     | which relative positions the   |
    +   |                      |                                     | ``'thousands_sep'`` is         |
    +   |                      |                                     | expected.  If the sequence is  |
    +   |                      |                                     | terminated with                |
    +   |                      |                                     | :const:`CHAR_MAX`, no further  |
    +   |                      |                                     | grouping is performed. If the  |
    +   |                      |                                     | sequence terminates with a     |
    +   |                      |                                     | ``0``,  the last group size is |
    +   |                      |                                     | repeatedly used.               |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'thousands_sep'``                 | Character used between groups. |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   | :const:`LC_MONETARY` | ``'int_curr_symbol'``               | International currency symbol. |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'currency_symbol'``               | Local currency symbol.         |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'p_cs_precedes/n_cs_precedes'``   | Whether the currency symbol    |
    +   |                      |                                     | precedes the value (for        |
    +   |                      |                                     | positive resp. negative        |
    +   |                      |                                     | values).                       |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'p_sep_by_space/n_sep_by_space'`` | Whether the currency symbol is |
    +   |                      |                                     | separated from the value  by a |
    +   |                      |                                     | space (for positive resp.      |
    +   |                      |                                     | negative values).              |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'mon_decimal_point'``             | Decimal point used for         |
    +   |                      |                                     | monetary values.               |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'frac_digits'``                   | Number of fractional digits    |
    +   |                      |                                     | used in local formatting of    |
    +   |                      |                                     | monetary values.               |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'int_frac_digits'``               | Number of fractional digits    |
    +   |                      |                                     | used in international          |
    +   |                      |                                     | formatting of monetary values. |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'mon_thousands_sep'``             | Group separator used for       |
    +   |                      |                                     | monetary values.               |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'mon_grouping'``                  | Equivalent to ``'grouping'``,  |
    +   |                      |                                     | used for monetary values.      |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'positive_sign'``                 | Symbol used to annotate a      |
    +   |                      |                                     | positive monetary value.       |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'negative_sign'``                 | Symbol used to annotate a      |
    +   |                      |                                     | negative monetary value.       |
    +   +----------------------+-------------------------------------+--------------------------------+
    +   |                      | ``'p_sign_posn/n_sign_posn'``       | The position of the sign (for  |
    +   |                      |                                     | positive resp. negative        |
    +   |                      |                                     | values), see below.            |
    +   +----------------------+-------------------------------------+--------------------------------+
    +
    +   All numeric values can be set to :const:`CHAR_MAX` to indicate that there is no
    +   value specified in this locale.
    +
    +   The possible values for ``'p_sign_posn'`` and ``'n_sign_posn'`` are given below.
    +
    +   +--------------+-----------------------------------------+
    +   | Value        | Explanation                             |
    +   +==============+=========================================+
    +   | ``0``        | Currency and value are surrounded by    |
    +   |              | parentheses.                            |
    +   +--------------+-----------------------------------------+
    +   | ``1``        | The sign should precede the value and   |
    +   |              | currency symbol.                        |
    +   +--------------+-----------------------------------------+
    +   | ``2``        | The sign should follow the value and    |
    +   |              | currency symbol.                        |
    +   +--------------+-----------------------------------------+
    +   | ``3``        | The sign should immediately precede the |
    +   |              | value.                                  |
    +   +--------------+-----------------------------------------+
    +   | ``4``        | The sign should immediately follow the  |
    +   |              | value.                                  |
    +   +--------------+-----------------------------------------+
    +   | ``CHAR_MAX`` | Nothing is specified in this locale.    |
    +   +--------------+-----------------------------------------+
    +
    +
    +.. function:: nl_langinfo(option)
    +
    +   Return some locale-specific information as a string. This function is not
    +   available on all systems, and the set of possible options might also vary across
    +   platforms. The possible argument values are numbers, for which symbolic
    +   constants are available in the locale module.
    +
    +
    +.. function:: getdefaultlocale([envvars])
    +
    +   Tries to determine the default locale settings and returns them as a tuple of
    +   the form ``(language code, encoding)``.
    +
    +   According to POSIX, a program which has not called ``setlocale(LC_ALL, '')``
    +   runs using the portable ``'C'`` locale.  Calling ``setlocale(LC_ALL, '')`` lets
    +   it use the default locale as defined by the :envvar:`LANG` variable.  Since we
    +   do not want to interfere with the current locale setting we thus emulate the
    +   behavior in the way described above.
    +
    +   To maintain compatibility with other platforms, not only the :envvar:`LANG`
    +   variable is tested, but a list of variables given as envvars parameter.  The
    +   first found to be defined will be used.  *envvars* defaults to the search path
    +   used in GNU gettext; it must always contain the variable name ``LANG``.  The GNU
    +   gettext search path contains ``'LANGUAGE'``, ``'LC_ALL'``, ``'LC_CTYPE'``, and
    +   ``'LANG'``, in that order.
    +
    +   Except for the code ``'C'``, the language code corresponds to :rfc:`1766`.
    +   *language code* and *encoding* may be ``None`` if their values cannot be
    +   determined.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. function:: getlocale([category])
    +
    +   Returns the current setting for the given locale category as sequence containing
    +   *language code*, *encoding*. *category* may be one of the :const:`LC_\*` values
    +   except :const:`LC_ALL`.  It defaults to :const:`LC_CTYPE`.
    +
    +   Except for the code ``'C'``, the language code corresponds to :rfc:`1766`.
    +   *language code* and *encoding* may be ``None`` if their values cannot be
    +   determined.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. function:: getpreferredencoding([do_setlocale])
    +
    +   Return the encoding used for text data, according to user preferences.  User
    +   preferences are expressed differently on different systems, and might not be
    +   available programmatically on some systems, so this function only returns a
    +   guess.
    +
    +   On some systems, it is necessary to invoke :func:`setlocale` to obtain the user
    +   preferences, so this function is not thread-safe. If invoking setlocale is not
    +   necessary or desired, *do_setlocale* should be set to ``False``.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: normalize(localename)
    +
    +   Returns a normalized locale code for the given locale name.  The returned locale
    +   code is formatted for use with :func:`setlocale`.  If normalization fails, the
    +   original name is returned unchanged.
    +
    +   If the given encoding is not known, the function defaults to the default
    +   encoding for the locale code just like :func:`setlocale`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. function:: resetlocale([category])
    +
    +   Sets the locale for *category* to the default setting.
    +
    +   The default setting is determined by calling :func:`getdefaultlocale`.
    +   *category* defaults to :const:`LC_ALL`.
    +
    +   .. versionadded:: 2.0
    +
    +
    +.. function:: strcoll(string1, string2)
    +
    +   Compares two strings according to the current :const:`LC_COLLATE` setting. As
    +   any other compare function, returns a negative, or a positive value, or ``0``,
    +   depending on whether *string1* collates before or after *string2* or is equal to
    +   it.
    +
    +
    +.. function:: strxfrm(string)
    +
    +   .. index:: builtin: cmp
    +
    +   Transforms a string to one that can be used for the built-in function
    +   :func:`cmp`, and still returns locale-aware results.  This function can be used
    +   when the same string is compared repeatedly, e.g. when collating a sequence of
    +   strings.
    +
    +
    +.. function:: format(format, val[, grouping[, monetary]])
    +
    +   Formats a number *val* according to the current :const:`LC_NUMERIC` setting.
    +   The format follows the conventions of the ``%`` operator.  For floating point
    +   values, the decimal point is modified if appropriate.  If *grouping* is true,
    +   also takes the grouping into account.
    +
    +   If *monetary* is true, the conversion uses monetary thousands separator and
    +   grouping strings.
    +
    +   Please note that this function will only work for exactly one %char specifier.
    +   For whole format strings, use :func:`format_string`.
    +
    +   .. versionchanged:: 2.5
    +      Added the *monetary* parameter.
    +
    +
    +.. function:: format_string(format, val[, grouping])
    +
    +   Processes formatting specifiers as in ``format % val``, but takes the current
    +   locale settings into account.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: currency(val[, symbol[, grouping[, international]]])
    +
    +   Formats a number *val* according to the current :const:`LC_MONETARY` settings.
    +
    +   The returned string includes the currency symbol if *symbol* is true, which is
    +   the default. If *grouping* is true (which is not the default), grouping is done
    +   with the value. If *international* is true (which is not the default), the
    +   international currency symbol is used.
    +
    +   Note that this function will not work with the 'C' locale, so you have to set a
    +   locale via :func:`setlocale` first.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: str(float)
    +
    +   Formats a floating point number using the same format as the built-in function
    +   ``str(float)``, but takes the decimal point into account.
    +
    +
    +.. function:: atof(string)
    +
    +   Converts a string to a floating point number, following the :const:`LC_NUMERIC`
    +   settings.
    +
    +
    +.. function:: atoi(string)
    +
    +   Converts a string to an integer, following the :const:`LC_NUMERIC` conventions.
    +
    +
    +.. data:: LC_CTYPE
    +
    +   .. index:: module: string
    +
    +   Locale category for the character type functions.  Depending on the settings of
    +   this category, the functions of module :mod:`string` dealing with case change
    +   their behaviour.
    +
    +
    +.. data:: LC_COLLATE
    +
    +   Locale category for sorting strings.  The functions :func:`strcoll` and
    +   :func:`strxfrm` of the :mod:`locale` module are affected.
    +
    +
    +.. data:: LC_TIME
    +
    +   Locale category for the formatting of time.  The function :func:`time.strftime`
    +   follows these conventions.
    +
    +
    +.. data:: LC_MONETARY
    +
    +   Locale category for formatting of monetary values.  The available options are
    +   available from the :func:`localeconv` function.
    +
    +
    +.. data:: LC_MESSAGES
    +
    +   Locale category for message display. Python currently does not support
    +   application specific locale-aware messages.  Messages displayed by the operating
    +   system, like those returned by :func:`os.strerror` might be affected by this
    +   category.
    +
    +
    +.. data:: LC_NUMERIC
    +
    +   Locale category for formatting numbers.  The functions :func:`format`,
    +   :func:`atoi`, :func:`atof` and :func:`str` of the :mod:`locale` module are
    +   affected by that category.  All other numeric formatting operations are not
    +   affected.
    +
    +
    +.. data:: LC_ALL
    +
    +   Combination of all locale settings.  If this flag is used when the locale is
    +   changed, setting the locale for all categories is attempted. If that fails for
    +   any category, no category is changed at all.  When the locale is retrieved using
    +   this flag, a string indicating the setting for all categories is returned. This
    +   string can be later used to restore the settings.
    +
    +
    +.. data:: CHAR_MAX
    +
    +   This is a symbolic constant used for different values returned by
    +   :func:`localeconv`.
    +
    +The :func:`nl_langinfo` function accepts one of the following keys. Most
    +descriptions are taken from the corresponding description in the GNU C library.
    +
    +
    +.. data:: CODESET
    +
    +   Return a string with the name of the character encoding used in the selected
    +   locale.
    +
    +
    +.. data:: D_T_FMT
    +
    +   Return a string that can be used as a format string for strftime(3) to represent
    +   time and date in a locale-specific way.
    +
    +
    +.. data:: D_FMT
    +
    +   Return a string that can be used as a format string for strftime(3) to represent
    +   a date in a locale-specific way.
    +
    +
    +.. data:: T_FMT
    +
    +   Return a string that can be used as a format string for strftime(3) to represent
    +   a time in a locale-specific way.
    +
    +
    +.. data:: T_FMT_AMPM
    +
    +   The return value can be used as a format string for 'strftime' to represent time
    +   in the am/pm format.
    +
    +
    +.. data:: DAY_1 ... DAY_7
    +
    +   Return name of the n-th day of the week.
    +
    +   .. warning::
    +
    +      This follows the US convention of :const:`DAY_1` being Sunday, not the
    +      international convention (ISO 8601) that Monday is the first day of the week.
    +
    +
    +.. data:: ABDAY_1 ... ABDAY_7
    +
    +   Return abbreviated name of the n-th day of the week.
    +
    +
    +.. data:: MON_1 ... MON_12
    +
    +   Return name of the n-th month.
    +
    +
    +.. data:: ABMON_1 ... ABMON_12
    +
    +   Return abbreviated name of the n-th month.
    +
    +
    +.. data:: RADIXCHAR
    +
    +   Return radix character (decimal dot, decimal comma, etc.)
    +
    +
    +.. data:: THOUSEP
    +
    +   Return separator character for thousands (groups of three digits).
    +
    +
    +.. data:: YESEXPR
    +
    +   Return a regular expression that can be used with the regex function to
    +   recognize a positive response to a yes/no question.
    +
    +   .. warning::
    +
    +      The expression is in the syntax suitable for the :cfunc:`regex` function from
    +      the C library, which might differ from the syntax used in :mod:`re`.
    +
    +
    +.. data:: NOEXPR
    +
    +   Return a regular expression that can be used with the regex(3) function to
    +   recognize a negative response to a yes/no question.
    +
    +
    +.. data:: CRNCYSTR
    +
    +   Return the currency symbol, preceded by "-" if the symbol should appear before
    +   the value, "+" if the symbol should appear after the value, or "." if the symbol
    +   should replace the radix character.
    +
    +
    +.. data:: ERA
    +
    +   The return value represents the era used in the current locale.
    +
    +   Most locales do not define this value.  An example of a locale which does define
    +   this value is the Japanese one.  In Japan, the traditional representation of
    +   dates includes the name of the era corresponding to the then-emperor's reign.
    +
    +   Normally it should not be necessary to use this value directly. Specifying the
    +   ``E`` modifier in their format strings causes the :func:`strftime` function to
    +   use this information.  The format of the returned string is not specified, and
    +   therefore you should not assume knowledge of it on different systems.
    +
    +
    +.. data:: ERA_YEAR
    +
    +   The return value gives the year in the relevant era of the locale.
    +
    +
    +.. data:: ERA_D_T_FMT
    +
    +   This return value can be used as a format string for :func:`strftime` to
    +   represent dates and times in a locale-specific era-based way.
    +
    +
    +.. data:: ERA_D_FMT
    +
    +   This return value can be used as a format string for :func:`strftime` to
    +   represent time in a locale-specific era-based way.
    +
    +
    +.. data:: ALT_DIGITS
    +
    +   The return value is a representation of up to 100 values used to represent the
    +   values 0 to 99.
    +
    +Example::
    +
    +   >>> import locale
    +   >>> loc = locale.getlocale(locale.LC_ALL) # get current locale
    +   >>> locale.setlocale(locale.LC_ALL, 'de_DE') # use German locale; name might vary with platform
    +   >>> locale.strcoll('f\xe4n', 'foo') # compare a string containing an umlaut 
    +   >>> locale.setlocale(locale.LC_ALL, '') # use user's preferred locale
    +   >>> locale.setlocale(locale.LC_ALL, 'C') # use default (C) locale
    +   >>> locale.setlocale(locale.LC_ALL, loc) # restore saved locale
    +
    +
    +Background, details, hints, tips and caveats
    +--------------------------------------------
    +
    +The C standard defines the locale as a program-wide property that may be
    +relatively expensive to change.  On top of that, some implementation are broken
    +in such a way that frequent locale changes may cause core dumps.  This makes the
    +locale somewhat painful to use correctly.
    +
    +Initially, when a program is started, the locale is the ``C`` locale, no matter
    +what the user's preferred locale is.  The program must explicitly say that it
    +wants the user's preferred locale settings by calling ``setlocale(LC_ALL, '')``.
    +
    +It is generally a bad idea to call :func:`setlocale` in some library routine,
    +since as a side effect it affects the entire program.  Saving and restoring it
    +is almost as bad: it is expensive and affects other threads that happen to run
    +before the settings have been restored.
    +
    +If, when coding a module for general use, you need a locale independent version
    +of an operation that is affected by the locale (such as :func:`string.lower`, or
    +certain formats used with :func:`time.strftime`), you will have to find a way to
    +do it without using the standard library routine.  Even better is convincing
    +yourself that using locale settings is okay.  Only as a last resort should you
    +document that your module is not compatible with non-\ ``C`` locale settings.
    +
    +.. index:: module: string
    +
    +The case conversion functions in the :mod:`string` module are affected by the
    +locale settings.  When a call to the :func:`setlocale` function changes the
    +:const:`LC_CTYPE` settings, the variables ``string.lowercase``,
    +``string.uppercase`` and ``string.letters`` are recalculated.  Note that code
    +that uses these variable through ':keyword:`from` ... :keyword:`import` ...',
    +e.g. ``from string import letters``, is not affected by subsequent
    +:func:`setlocale` calls.
    +
    +The only way to perform numeric operations according to the locale is to use the
    +special functions defined by this module: :func:`atof`, :func:`atoi`,
    +:func:`format`, :func:`str`.
    +
    +
    +.. _embedding-locale:
    +
    +For extension writers and programs that embed Python
    +----------------------------------------------------
    +
    +Extension modules should never call :func:`setlocale`, except to find out what
    +the current locale is.  But since the return value can only be used portably to
    +restore it, that is not very useful (except perhaps to find out whether or not
    +the locale is ``C``).
    +
    +When Python code uses the :mod:`locale` module to change the locale, this also
    +affects the embedding application.  If the embedding application doesn't want
    +this to happen, it should remove the :mod:`_locale` extension module (which does
    +all the work) from the table of built-in modules in the :file:`config.c` file,
    +and make sure that the :mod:`_locale` module is not accessible as a shared
    +library.
    +
    +
    +.. _locale-gettext:
    +
    +Access to message catalogs
    +--------------------------
    +
    +The locale module exposes the C library's gettext interface on systems that
    +provide this interface.  It consists of the functions :func:`gettext`,
    +:func:`dgettext`, :func:`dcgettext`, :func:`textdomain`, :func:`bindtextdomain`,
    +and :func:`bind_textdomain_codeset`.  These are similar to the same functions in
    +the :mod:`gettext` module, but use the C library's binary format for message
    +catalogs, and the C library's search algorithms for locating message catalogs.
    +
    +Python applications should normally find no need to invoke these functions, and
    +should use :mod:`gettext` instead.  A known exception to this rule are
    +applications that link use additional C libraries which internally invoke
    +:cfunc:`gettext` or :func:`dcgettext`.  For these applications, it may be
    +necessary to bind the text domain, so that the libraries can properly locate
    +their message catalogs.
    +
    diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst
    new file mode 100644
    index 0000000..218fb0d
    --- /dev/null
    +++ b/Doc/library/logging.rst
    @@ -0,0 +1,1857 @@
    +:mod:`logging` --- Logging facility for Python
    +==============================================
    +
    +.. module:: logging
    +   :synopsis: Flexible error logging system for applications.
    +
    +
    +.. moduleauthor:: Vinay Sajip 
    +.. sectionauthor:: Vinay Sajip 
    +
    +
    +.. % These apply to all modules, and may be given more than once:
    +
    +
    +
    +.. index:: pair: Errors; logging
    +
    +.. versionadded:: 2.3
    +
    +This module defines functions and classes which implement a flexible error
    +logging system for applications.
    +
    +Logging is performed by calling methods on instances of the :class:`Logger`
    +class (hereafter called :dfn:`loggers`). Each instance has a name, and they are
    +conceptually arranged in a name space hierarchy using dots (periods) as
    +separators. For example, a logger named "scan" is the parent of loggers
    +"scan.text", "scan.html" and "scan.pdf". Logger names can be anything you want,
    +and indicate the area of an application in which a logged message originates.
    +
    +Logged messages also have levels of importance associated with them. The default
    +levels provided are :const:`DEBUG`, :const:`INFO`, :const:`WARNING`,
    +:const:`ERROR` and :const:`CRITICAL`. As a convenience, you indicate the
    +importance of a logged message by calling an appropriate method of
    +:class:`Logger`. The methods are :meth:`debug`, :meth:`info`, :meth:`warning`,
    +:meth:`error` and :meth:`critical`, which mirror the default levels. You are not
    +constrained to use these levels: you can specify your own and use a more general
    +:class:`Logger` method, :meth:`log`, which takes an explicit level argument.
    +
    +The numeric values of logging levels are given in the following table. These are
    +primarily of interest if you want to define your own levels, and need them to
    +have specific values relative to the predefined levels. If you define a level
    +with the same numeric value, it overwrites the predefined value; the predefined
    +name is lost.
    +
    ++--------------+---------------+
    +| Level        | Numeric value |
    ++==============+===============+
    +| ``CRITICAL`` | 50            |
    ++--------------+---------------+
    +| ``ERROR``    | 40            |
    ++--------------+---------------+
    +| ``WARNING``  | 30            |
    ++--------------+---------------+
    +| ``INFO``     | 20            |
    ++--------------+---------------+
    +| ``DEBUG``    | 10            |
    ++--------------+---------------+
    +| ``NOTSET``   | 0             |
    ++--------------+---------------+
    +
    +Levels can also be associated with loggers, being set either by the developer or
    +through loading a saved logging configuration. When a logging method is called
    +on a logger, the logger compares its own level with the level associated with
    +the method call. If the logger's level is higher than the method call's, no
    +logging message is actually generated. This is the basic mechanism controlling
    +the verbosity of logging output.
    +
    +Logging messages are encoded as instances of the :class:`LogRecord` class. When
    +a logger decides to actually log an event, a :class:`LogRecord` instance is
    +created from the logging message.
    +
    +Logging messages are subjected to a dispatch mechanism through the use of
    +:dfn:`handlers`, which are instances of subclasses of the :class:`Handler`
    +class. Handlers are responsible for ensuring that a logged message (in the form
    +of a :class:`LogRecord`) ends up in a particular location (or set of locations)
    +which is useful for the target audience for that message (such as end users,
    +support desk staff, system administrators, developers). Handlers are passed
    +:class:`LogRecord` instances intended for particular destinations. Each logger
    +can have zero, one or more handlers associated with it (via the
    +:meth:`addHandler` method of :class:`Logger`). In addition to any handlers
    +directly associated with a logger, *all handlers associated with all ancestors
    +of the logger* are called to dispatch the message.
    +
    +Just as for loggers, handlers can have levels associated with them. A handler's
    +level acts as a filter in the same way as a logger's level does. If a handler
    +decides to actually dispatch an event, the :meth:`emit` method is used to send
    +the message to its destination. Most user-defined subclasses of :class:`Handler`
    +will need to override this :meth:`emit`.
    +
    +In addition to the base :class:`Handler` class, many useful subclasses are
    +provided:
    +
    +#. :class:`StreamHandler` instances send error messages to streams (file-like
    +   objects).
    +
    +#. :class:`FileHandler` instances send error messages to disk files.
    +
    +#. :class:`BaseRotatingHandler` is the base class for handlers that rotate log
    +   files at a certain point. It is not meant to be  instantiated directly. Instead,
    +   use :class:`RotatingFileHandler` or :class:`TimedRotatingFileHandler`.
    +
    +#. :class:`RotatingFileHandler` instances send error messages to disk files,
    +   with support for maximum log file sizes and log file rotation.
    +
    +#. :class:`TimedRotatingFileHandler` instances send error messages to disk files
    +   rotating the log file at certain timed intervals.
    +
    +#. :class:`SocketHandler` instances send error messages to TCP/IP sockets.
    +
    +#. :class:`DatagramHandler` instances send error messages to UDP sockets.
    +
    +#. :class:`SMTPHandler` instances send error messages to a designated email
    +   address.
    +
    +#. :class:`SysLogHandler` instances send error messages to a Unix syslog daemon,
    +   possibly on a remote machine.
    +
    +#. :class:`NTEventLogHandler` instances send error messages to a Windows
    +   NT/2000/XP event log.
    +
    +#. :class:`MemoryHandler` instances send error messages to a buffer in memory,
    +   which is flushed whenever specific criteria are met.
    +
    +#. :class:`HTTPHandler` instances send error messages to an HTTP server using
    +   either ``GET`` or ``POST`` semantics.
    +
    +The :class:`StreamHandler` and :class:`FileHandler` classes are defined in the
    +core logging package. The other handlers are defined in a sub- module,
    +:mod:`logging.handlers`. (There is also another sub-module,
    +:mod:`logging.config`, for configuration functionality.)
    +
    +Logged messages are formatted for presentation through instances of the
    +:class:`Formatter` class. They are initialized with a format string suitable for
    +use with the % operator and a dictionary.
    +
    +For formatting multiple messages in a batch, instances of
    +:class:`BufferingFormatter` can be used. In addition to the format string (which
    +is applied to each message in the batch), there is provision for header and
    +trailer format strings.
    +
    +When filtering based on logger level and/or handler level is not enough,
    +instances of :class:`Filter` can be added to both :class:`Logger` and
    +:class:`Handler` instances (through their :meth:`addFilter` method). Before
    +deciding to process a message further, both loggers and handlers consult all
    +their filters for permission. If any filter returns a false value, the message
    +is not processed further.
    +
    +The basic :class:`Filter` functionality allows filtering by specific logger
    +name. If this feature is used, messages sent to the named logger and its
    +children are allowed through the filter, and all others dropped.
    +
    +In addition to the classes described above, there are a number of module- level
    +functions.
    +
    +
    +.. function:: getLogger([name])
    +
    +   Return a logger with the specified name or, if no name is specified, return a
    +   logger which is the root logger of the hierarchy. If specified, the name is
    +   typically a dot-separated hierarchical name like *"a"*, *"a.b"* or *"a.b.c.d"*.
    +   Choice of these names is entirely up to the developer who is using logging.
    +
    +   All calls to this function with a given name return the same logger instance.
    +   This means that logger instances never need to be passed between different parts
    +   of an application.
    +
    +
    +.. function:: getLoggerClass()
    +
    +   Return either the standard :class:`Logger` class, or the last class passed to
    +   :func:`setLoggerClass`. This function may be called from within a new class
    +   definition, to ensure that installing a customised :class:`Logger` class will
    +   not undo customisations already applied by other code. For example::
    +
    +      class MyLogger(logging.getLoggerClass()):
    +          # ... override behaviour here
    +
    +
    +.. function:: debug(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`DEBUG` on the root logger. The *msg* is the
    +   message format string, and the *args* are the arguments which are merged into
    +   *msg* using the string formatting operator. (Note that this means that you can
    +   use keywords in the format string, together with a single dictionary argument.)
    +
    +   There are two keyword arguments in *kwargs* which are inspected: *exc_info*
    +   which, if it does not evaluate as false, causes exception information to be
    +   added to the logging message. If an exception tuple (in the format returned by
    +   :func:`sys.exc_info`) is provided, it is used; otherwise, :func:`sys.exc_info`
    +   is called to get the exception information.
    +
    +   The other optional keyword argument is *extra* which can be used to pass a
    +   dictionary which is used to populate the __dict__ of the LogRecord created for
    +   the logging event with user-defined attributes. These custom attributes can then
    +   be used as you like. For example, they could be incorporated into logged
    +   messages. For example::
    +
    +      FORMAT = "%(asctime)-15s %(clientip)s %(user)-8s %(message)s"
    +      logging.basicConfig(format=FORMAT)
    +      d = {'clientip': '192.168.0.1', 'user': 'fbloggs'}
    +      logging.warning("Protocol problem: %s", "connection reset", extra=d)
    +
    +   would print something like  ::
    +
    +      2006-02-08 22:20:02,165 192.168.0.1 fbloggs  Protocol problem: connection reset
    +
    +   The keys in the dictionary passed in *extra* should not clash with the keys used
    +   by the logging system. (See the :class:`Formatter` documentation for more
    +   information on which keys are used by the logging system.)
    +
    +   If you choose to use these attributes in logged messages, you need to exercise
    +   some care. In the above example, for instance, the :class:`Formatter` has been
    +   set up with a format string which expects 'clientip' and 'user' in the attribute
    +   dictionary of the LogRecord. If these are missing, the message will not be
    +   logged because a string formatting exception will occur. So in this case, you
    +   always need to pass the *extra* dictionary with these keys.
    +
    +   While this might be annoying, this feature is intended for use in specialized
    +   circumstances, such as multi-threaded servers where the same code executes in
    +   many contexts, and interesting conditions which arise are dependent on this
    +   context (such as remote client IP address and authenticated user name, in the
    +   above example). In such circumstances, it is likely that specialized
    +   :class:`Formatter`\ s would be used with particular :class:`Handler`\ s.
    +
    +   .. versionchanged:: 2.5
    +      *extra* was added.
    +
    +
    +.. function:: info(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`INFO` on the root logger. The arguments are
    +   interpreted as for :func:`debug`.
    +
    +
    +.. function:: warning(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`WARNING` on the root logger. The arguments are
    +   interpreted as for :func:`debug`.
    +
    +
    +.. function:: error(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`ERROR` on the root logger. The arguments are
    +   interpreted as for :func:`debug`.
    +
    +
    +.. function:: critical(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`CRITICAL` on the root logger. The arguments
    +   are interpreted as for :func:`debug`.
    +
    +
    +.. function:: exception(msg[, *args])
    +
    +   Logs a message with level :const:`ERROR` on the root logger. The arguments are
    +   interpreted as for :func:`debug`. Exception info is added to the logging
    +   message. This function should only be called from an exception handler.
    +
    +
    +.. function:: log(level, msg[, *args[, **kwargs]])
    +
    +   Logs a message with level *level* on the root logger. The other arguments are
    +   interpreted as for :func:`debug`.
    +
    +
    +.. function:: disable(lvl)
    +
    +   Provides an overriding level *lvl* for all loggers which takes precedence over
    +   the logger's own level. When the need arises to temporarily throttle logging
    +   output down across the whole application, this function can be useful.
    +
    +
    +.. function:: addLevelName(lvl, levelName)
    +
    +   Associates level *lvl* with text *levelName* in an internal dictionary, which is
    +   used to map numeric levels to a textual representation, for example when a
    +   :class:`Formatter` formats a message. This function can also be used to define
    +   your own levels. The only constraints are that all levels used must be
    +   registered using this function, levels should be positive integers and they
    +   should increase in increasing order of severity.
    +
    +
    +.. function:: getLevelName(lvl)
    +
    +   Returns the textual representation of logging level *lvl*. If the level is one
    +   of the predefined levels :const:`CRITICAL`, :const:`ERROR`, :const:`WARNING`,
    +   :const:`INFO` or :const:`DEBUG` then you get the corresponding string. If you
    +   have associated levels with names using :func:`addLevelName` then the name you
    +   have associated with *lvl* is returned. If a numeric value corresponding to one
    +   of the defined levels is passed in, the corresponding string representation is
    +   returned. Otherwise, the string "Level %s" % lvl is returned.
    +
    +
    +.. function:: makeLogRecord(attrdict)
    +
    +   Creates and returns a new :class:`LogRecord` instance whose attributes are
    +   defined by *attrdict*. This function is useful for taking a pickled
    +   :class:`LogRecord` attribute dictionary, sent over a socket, and reconstituting
    +   it as a :class:`LogRecord` instance at the receiving end.
    +
    +
    +.. function:: basicConfig([**kwargs])
    +
    +   Does basic configuration for the logging system by creating a
    +   :class:`StreamHandler` with a default :class:`Formatter` and adding it to the
    +   root logger. The functions :func:`debug`, :func:`info`, :func:`warning`,
    +   :func:`error` and :func:`critical` will call :func:`basicConfig` automatically
    +   if no handlers are defined for the root logger.
    +
    +   .. versionchanged:: 2.4
    +      Formerly, :func:`basicConfig` did not take any keyword arguments.
    +
    +   The following keyword arguments are supported.
    +
    +   +--------------+---------------------------------------------+
    +   | Format       | Description                                 |
    +   +==============+=============================================+
    +   | ``filename`` | Specifies that a FileHandler be created,    |
    +   |              | using the specified filename, rather than a |
    +   |              | StreamHandler.                              |
    +   +--------------+---------------------------------------------+
    +   | ``filemode`` | Specifies the mode to open the file, if     |
    +   |              | filename is specified (if filemode is       |
    +   |              | unspecified, it defaults to 'a').           |
    +   +--------------+---------------------------------------------+
    +   | ``format``   | Use the specified format string for the     |
    +   |              | handler.                                    |
    +   +--------------+---------------------------------------------+
    +   | ``datefmt``  | Use the specified date/time format.         |
    +   +--------------+---------------------------------------------+
    +   | ``level``    | Set the root logger level to the specified  |
    +   |              | level.                                      |
    +   +--------------+---------------------------------------------+
    +   | ``stream``   | Use the specified stream to initialize the  |
    +   |              | StreamHandler. Note that this argument is   |
    +   |              | incompatible with 'filename' - if both are  |
    +   |              | present, 'stream' is ignored.               |
    +   +--------------+---------------------------------------------+
    +
    +
    +.. function:: shutdown()
    +
    +   Informs the logging system to perform an orderly shutdown by flushing and
    +   closing all handlers.
    +
    +
    +.. function:: setLoggerClass(klass)
    +
    +   Tells the logging system to use the class *klass* when instantiating a logger.
    +   The class should define :meth:`__init__` such that only a name argument is
    +   required, and the :meth:`__init__` should call :meth:`Logger.__init__`. This
    +   function is typically called before any loggers are instantiated by applications
    +   which need to use custom logger behavior.
    +
    +
    +.. seealso::
    +
    +   :pep:`282` - A Logging System
    +      The proposal which described this feature for inclusion in the Python standard
    +      library.
    +
    +   `Original Python :mod:`logging` package `_
    +      This is the original source for the :mod:`logging` package.  The version of the
    +      package available from this site is suitable for use with Python 1.5.2, 2.1.x
    +      and 2.2.x, which do not include the :mod:`logging` package in the standard
    +      library.
    +
    +
    +Logger Objects
    +--------------
    +
    +Loggers have the following attributes and methods. Note that Loggers are never
    +instantiated directly, but always through the module-level function
    +``logging.getLogger(name)``.
    +
    +
    +.. attribute:: Logger.propagate
    +
    +   If this evaluates to false, logging messages are not passed by this logger or by
    +   child loggers to higher level (ancestor) loggers. The constructor sets this
    +   attribute to 1.
    +
    +
    +.. method:: Logger.setLevel(lvl)
    +
    +   Sets the threshold for this logger to *lvl*. Logging messages which are less
    +   severe than *lvl* will be ignored. When a logger is created, the level is set to
    +   :const:`NOTSET` (which causes all messages to be processed when the logger is
    +   the root logger, or delegation to the parent when the logger is a non-root
    +   logger). Note that the root logger is created with level :const:`WARNING`.
    +
    +   The term "delegation to the parent" means that if a logger has a level of
    +   NOTSET, its chain of ancestor loggers is traversed until either an ancestor with
    +   a level other than NOTSET is found, or the root is reached.
    +
    +   If an ancestor is found with a level other than NOTSET, then that ancestor's
    +   level is treated as the effective level of the logger where the ancestor search
    +   began, and is used to determine how a logging event is handled.
    +
    +   If the root is reached, and it has a level of NOTSET, then all messages will be
    +   processed. Otherwise, the root's level will be used as the effective level.
    +
    +
    +.. method:: Logger.isEnabledFor(lvl)
    +
    +   Indicates if a message of severity *lvl* would be processed by this logger.
    +   This method checks first the module-level level set by
    +   ``logging.disable(lvl)`` and then the logger's effective level as determined
    +   by :meth:`getEffectiveLevel`.
    +
    +
    +.. method:: Logger.getEffectiveLevel()
    +
    +   Indicates the effective level for this logger. If a value other than
    +   :const:`NOTSET` has been set using :meth:`setLevel`, it is returned. Otherwise,
    +   the hierarchy is traversed towards the root until a value other than
    +   :const:`NOTSET` is found, and that value is returned.
    +
    +
    +.. method:: Logger.debug(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`DEBUG` on this logger. The *msg* is the
    +   message format string, and the *args* are the arguments which are merged into
    +   *msg* using the string formatting operator. (Note that this means that you can
    +   use keywords in the format string, together with a single dictionary argument.)
    +
    +   There are two keyword arguments in *kwargs* which are inspected: *exc_info*
    +   which, if it does not evaluate as false, causes exception information to be
    +   added to the logging message. If an exception tuple (in the format returned by
    +   :func:`sys.exc_info`) is provided, it is used; otherwise, :func:`sys.exc_info`
    +   is called to get the exception information.
    +
    +   The other optional keyword argument is *extra* which can be used to pass a
    +   dictionary which is used to populate the __dict__ of the LogRecord created for
    +   the logging event with user-defined attributes. These custom attributes can then
    +   be used as you like. For example, they could be incorporated into logged
    +   messages. For example::
    +
    +      FORMAT = "%(asctime)-15s %(clientip)s %(user)-8s %(message)s"
    +      logging.basicConfig(format=FORMAT)
    +      dict = { 'clientip' : '192.168.0.1', 'user' : 'fbloggs' }
    +      logger = logging.getLogger("tcpserver")
    +      logger.warning("Protocol problem: %s", "connection reset", extra=d)
    +
    +   would print something like  ::
    +
    +      2006-02-08 22:20:02,165 192.168.0.1 fbloggs  Protocol problem: connection reset
    +
    +   The keys in the dictionary passed in *extra* should not clash with the keys used
    +   by the logging system. (See the :class:`Formatter` documentation for more
    +   information on which keys are used by the logging system.)
    +
    +   If you choose to use these attributes in logged messages, you need to exercise
    +   some care. In the above example, for instance, the :class:`Formatter` has been
    +   set up with a format string which expects 'clientip' and 'user' in the attribute
    +   dictionary of the LogRecord. If these are missing, the message will not be
    +   logged because a string formatting exception will occur. So in this case, you
    +   always need to pass the *extra* dictionary with these keys.
    +
    +   While this might be annoying, this feature is intended for use in specialized
    +   circumstances, such as multi-threaded servers where the same code executes in
    +   many contexts, and interesting conditions which arise are dependent on this
    +   context (such as remote client IP address and authenticated user name, in the
    +   above example). In such circumstances, it is likely that specialized
    +   :class:`Formatter`\ s would be used with particular :class:`Handler`\ s.
    +
    +   .. versionchanged:: 2.5
    +      *extra* was added.
    +
    +
    +.. method:: Logger.info(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`INFO` on this logger. The arguments are
    +   interpreted as for :meth:`debug`.
    +
    +
    +.. method:: Logger.warning(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`WARNING` on this logger. The arguments are
    +   interpreted as for :meth:`debug`.
    +
    +
    +.. method:: Logger.error(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`ERROR` on this logger. The arguments are
    +   interpreted as for :meth:`debug`.
    +
    +
    +.. method:: Logger.critical(msg[, *args[, **kwargs]])
    +
    +   Logs a message with level :const:`CRITICAL` on this logger. The arguments are
    +   interpreted as for :meth:`debug`.
    +
    +
    +.. method:: Logger.log(lvl, msg[, *args[, **kwargs]])
    +
    +   Logs a message with integer level *lvl* on this logger. The other arguments are
    +   interpreted as for :meth:`debug`.
    +
    +
    +.. method:: Logger.exception(msg[, *args])
    +
    +   Logs a message with level :const:`ERROR` on this logger. The arguments are
    +   interpreted as for :meth:`debug`. Exception info is added to the logging
    +   message. This method should only be called from an exception handler.
    +
    +
    +.. method:: Logger.addFilter(filt)
    +
    +   Adds the specified filter *filt* to this logger.
    +
    +
    +.. method:: Logger.removeFilter(filt)
    +
    +   Removes the specified filter *filt* from this logger.
    +
    +
    +.. method:: Logger.filter(record)
    +
    +   Applies this logger's filters to the record and returns a true value if the
    +   record is to be processed.
    +
    +
    +.. method:: Logger.addHandler(hdlr)
    +
    +   Adds the specified handler *hdlr* to this logger.
    +
    +
    +.. method:: Logger.removeHandler(hdlr)
    +
    +   Removes the specified handler *hdlr* from this logger.
    +
    +
    +.. method:: Logger.findCaller()
    +
    +   Finds the caller's source filename and line number. Returns the filename, line
    +   number and function name as a 3-element tuple.
    +
    +   .. versionchanged:: 2.5
    +      The function name was added. In earlier versions, the filename and line number
    +      were returned as a 2-element tuple..
    +
    +
    +.. method:: Logger.handle(record)
    +
    +   Handles a record by passing it to all handlers associated with this logger and
    +   its ancestors (until a false value of *propagate* is found). This method is used
    +   for unpickled records received from a socket, as well as those created locally.
    +   Logger-level filtering is applied using :meth:`filter`.
    +
    +
    +.. method:: Logger.makeRecord(name, lvl, fn, lno, msg, args, exc_info [, func, extra])
    +
    +   This is a factory method which can be overridden in subclasses to create
    +   specialized :class:`LogRecord` instances.
    +
    +   .. versionchanged:: 2.5
    +      *func* and *extra* were added.
    +
    +
    +.. _minimal-example:
    +
    +Basic example
    +-------------
    +
    +.. versionchanged:: 2.4
    +   formerly :func:`basicConfig` did not take any keyword arguments.
    +
    +The :mod:`logging` package provides a lot of flexibility, and its configuration
    +can appear daunting.  This section demonstrates that simple use of the logging
    +package is possible.
    +
    +The simplest example shows logging to the console::
    +
    +   import logging
    +
    +   logging.debug('A debug message')
    +   logging.info('Some information')
    +   logging.warning('A shot across the bows')
    +
    +If you run the above script, you'll see this::
    +
    +   WARNING:root:A shot across the bows
    +
    +Because no particular logger was specified, the system used the root logger. The
    +debug and info messages didn't appear because by default, the root logger is
    +configured to only handle messages with a severity of WARNING or above. The
    +message format is also a configuration default, as is the output destination of
    +the messages - ``sys.stderr``. The severity level, the message format and
    +destination can be easily changed, as shown in the example below::
    +
    +   import logging
    +
    +   logging.basicConfig(level=logging.DEBUG,
    +                       format='%(asctime)s %(levelname)s %(message)s',
    +                       filename='/tmp/myapp.log',
    +                       filemode='w')
    +   logging.debug('A debug message')
    +   logging.info('Some information')
    +   logging.warning('A shot across the bows')
    +
    +The :meth:`basicConfig` method is used to change the configuration defaults,
    +which results in output (written to ``/tmp/myapp.log``) which should look
    +something like the following::
    +
    +   2004-07-02 13:00:08,743 DEBUG A debug message
    +   2004-07-02 13:00:08,743 INFO Some information
    +   2004-07-02 13:00:08,743 WARNING A shot across the bows
    +
    +This time, all messages with a severity of DEBUG or above were handled, and the
    +format of the messages was also changed, and output went to the specified file
    +rather than the console.
    +
    +Formatting uses standard Python string formatting - see section
    +:ref:`string-formatting`. The format string takes the following common
    +specifiers. For a complete list of specifiers, consult the :class:`Formatter`
    +documentation.
    +
    ++-------------------+-----------------------------------------------+
    +| Format            | Description                                   |
    ++===================+===============================================+
    +| ``%(name)s``      | Name of the logger (logging channel).         |
    ++-------------------+-----------------------------------------------+
    +| ``%(levelname)s`` | Text logging level for the message            |
    +|                   | (``'DEBUG'``, ``'INFO'``, ``'WARNING'``,      |
    +|                   | ``'ERROR'``, ``'CRITICAL'``).                 |
    ++-------------------+-----------------------------------------------+
    +| ``%(asctime)s``   | Human-readable time when the                  |
    +|                   | :class:`LogRecord` was created.  By default   |
    +|                   | this is of the form "2003-07-08 16:49:45,896" |
    +|                   | (the numbers after the comma are millisecond  |
    +|                   | portion of the time).                         |
    ++-------------------+-----------------------------------------------+
    +| ``%(message)s``   | The logged message.                           |
    ++-------------------+-----------------------------------------------+
    +
    +To change the date/time format, you can pass an additional keyword parameter,
    +*datefmt*, as in the following::
    +
    +   import logging
    +
    +   logging.basicConfig(level=logging.DEBUG,
    +                       format='%(asctime)s %(levelname)-8s %(message)s',
    +                       datefmt='%a, %d %b %Y %H:%M:%S',
    +                       filename='/temp/myapp.log',
    +                       filemode='w')
    +   logging.debug('A debug message')
    +   logging.info('Some information')
    +   logging.warning('A shot across the bows')
    +
    +which would result in output like ::
    +
    +   Fri, 02 Jul 2004 13:06:18 DEBUG    A debug message
    +   Fri, 02 Jul 2004 13:06:18 INFO     Some information
    +   Fri, 02 Jul 2004 13:06:18 WARNING  A shot across the bows
    +
    +The date format string follows the requirements of :func:`strftime` - see the
    +documentation for the :mod:`time` module.
    +
    +If, instead of sending logging output to the console or a file, you'd rather use
    +a file-like object which you have created separately, you can pass it to
    +:func:`basicConfig` using the *stream* keyword argument. Note that if both
    +*stream* and *filename* keyword arguments are passed, the *stream* argument is
    +ignored.
    +
    +Of course, you can put variable information in your output. To do this, simply
    +have the message be a format string and pass in additional arguments containing
    +the variable information, as in the following example::
    +
    +   import logging
    +
    +   logging.basicConfig(level=logging.DEBUG,
    +                       format='%(asctime)s %(levelname)-8s %(message)s',
    +                       datefmt='%a, %d %b %Y %H:%M:%S',
    +                       filename='/temp/myapp.log',
    +                       filemode='w')
    +   logging.error('Pack my box with %d dozen %s', 5, 'liquor jugs')
    +
    +which would result in ::
    +
    +   Wed, 21 Jul 2004 15:35:16 ERROR    Pack my box with 5 dozen liquor jugs
    +
    +
    +.. _multiple-destinations:
    +
    +Logging to multiple destinations
    +--------------------------------
    +
    +Let's say you want to log to console and file with different message formats and
    +in differing circumstances. Say you want to log messages with levels of DEBUG
    +and higher to file, and those messages at level INFO and higher to the console.
    +Let's also assume that the file should contain timestamps, but the console
    +messages should not. Here's how you can achieve this::
    +
    +   import logging
    +
    +   # set up logging to file - see previous section for more details
    +   logging.basicConfig(level=logging.DEBUG,
    +                       format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
    +                       datefmt='%m-%d %H:%M',
    +                       filename='/temp/myapp.log',
    +                       filemode='w')
    +   # define a Handler which writes INFO messages or higher to the sys.stderr
    +   console = logging.StreamHandler()
    +   console.setLevel(logging.INFO)
    +   # set a format which is simpler for console use
    +   formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
    +   # tell the handler to use this format
    +   console.setFormatter(formatter)
    +   # add the handler to the root logger
    +   logging.getLogger('').addHandler(console)
    +
    +   # Now, we can log to the root logger, or any other logger. First the root...
    +   logging.info('Jackdaws love my big sphinx of quartz.')
    +
    +   # Now, define a couple of other loggers which might represent areas in your
    +   # application:
    +
    +   logger1 = logging.getLogger('myapp.area1')
    +   logger2 = logging.getLogger('myapp.area2')
    +
    +   logger1.debug('Quick zephyrs blow, vexing daft Jim.')
    +   logger1.info('How quickly daft jumping zebras vex.')
    +   logger2.warning('Jail zesty vixen who grabbed pay from quack.')
    +   logger2.error('The five boxing wizards jump quickly.')
    +
    +When you run this, on the console you will see ::
    +
    +   root        : INFO     Jackdaws love my big sphinx of quartz.
    +   myapp.area1 : INFO     How quickly daft jumping zebras vex.
    +   myapp.area2 : WARNING  Jail zesty vixen who grabbed pay from quack.
    +   myapp.area2 : ERROR    The five boxing wizards jump quickly.
    +
    +and in the file you will see something like ::
    +
    +   10-22 22:19 root         INFO     Jackdaws love my big sphinx of quartz.
    +   10-22 22:19 myapp.area1  DEBUG    Quick zephyrs blow, vexing daft Jim.
    +   10-22 22:19 myapp.area1  INFO     How quickly daft jumping zebras vex.
    +   10-22 22:19 myapp.area2  WARNING  Jail zesty vixen who grabbed pay from quack.
    +   10-22 22:19 myapp.area2  ERROR    The five boxing wizards jump quickly.
    +
    +As you can see, the DEBUG message only shows up in the file. The other messages
    +are sent to both destinations.
    +
    +This example uses console and file handlers, but you can use any number and
    +combination of handlers you choose.
    +
    +
    +.. _network-logging:
    +
    +Sending and receiving logging events across a network
    +-----------------------------------------------------
    +
    +Let's say you want to send logging events across a network, and handle them at
    +the receiving end. A simple way of doing this is attaching a
    +:class:`SocketHandler` instance to the root logger at the sending end::
    +
    +   import logging, logging.handlers
    +
    +   rootLogger = logging.getLogger('')
    +   rootLogger.setLevel(logging.DEBUG)
    +   socketHandler = logging.handlers.SocketHandler('localhost',
    +                       logging.handlers.DEFAULT_TCP_LOGGING_PORT)
    +   # don't bother with a formatter, since a socket handler sends the event as
    +   # an unformatted pickle
    +   rootLogger.addHandler(socketHandler)
    +
    +   # Now, we can log to the root logger, or any other logger. First the root...
    +   logging.info('Jackdaws love my big sphinx of quartz.')
    +
    +   # Now, define a couple of other loggers which might represent areas in your
    +   # application:
    +
    +   logger1 = logging.getLogger('myapp.area1')
    +   logger2 = logging.getLogger('myapp.area2')
    +
    +   logger1.debug('Quick zephyrs blow, vexing daft Jim.')
    +   logger1.info('How quickly daft jumping zebras vex.')
    +   logger2.warning('Jail zesty vixen who grabbed pay from quack.')
    +   logger2.error('The five boxing wizards jump quickly.')
    +
    +At the receiving end, you can set up a receiver using the :mod:`SocketServer`
    +module. Here is a basic working example::
    +
    +   import cPickle
    +   import logging
    +   import logging.handlers
    +   import SocketServer
    +   import struct
    +
    +
    +   class LogRecordStreamHandler(SocketServer.StreamRequestHandler):
    +       """Handler for a streaming logging request.
    +
    +       This basically logs the record using whatever logging policy is
    +       configured locally.
    +       """
    +
    +       def handle(self):
    +           """
    +           Handle multiple requests - each expected to be a 4-byte length,
    +           followed by the LogRecord in pickle format. Logs the record
    +           according to whatever policy is configured locally.
    +           """
    +           while 1:
    +               chunk = self.connection.recv(4)
    +               if len(chunk) < 4:
    +                   break
    +               slen = struct.unpack(">L", chunk)[0]
    +               chunk = self.connection.recv(slen)
    +               while len(chunk) < slen:
    +                   chunk = chunk + self.connection.recv(slen - len(chunk))
    +               obj = self.unPickle(chunk)
    +               record = logging.makeLogRecord(obj)
    +               self.handleLogRecord(record)
    +
    +       def unPickle(self, data):
    +           return cPickle.loads(data)
    +
    +       def handleLogRecord(self, record):
    +           # if a name is specified, we use the named logger rather than the one
    +           # implied by the record.
    +           if self.server.logname is not None:
    +               name = self.server.logname
    +           else:
    +               name = record.name
    +           logger = logging.getLogger(name)
    +           # N.B. EVERY record gets logged. This is because Logger.handle
    +           # is normally called AFTER logger-level filtering. If you want
    +           # to do filtering, do it at the client end to save wasting
    +           # cycles and network bandwidth!
    +           logger.handle(record)
    +
    +   class LogRecordSocketReceiver(SocketServer.ThreadingTCPServer):
    +       """simple TCP socket-based logging receiver suitable for testing.
    +       """
    +
    +       allow_reuse_address = 1
    +
    +       def __init__(self, host='localhost',
    +                    port=logging.handlers.DEFAULT_TCP_LOGGING_PORT,
    +                    handler=LogRecordStreamHandler):
    +           SocketServer.ThreadingTCPServer.__init__(self, (host, port), handler)
    +           self.abort = 0
    +           self.timeout = 1
    +           self.logname = None
    +
    +       def serve_until_stopped(self):
    +           import select
    +           abort = 0
    +           while not abort:
    +               rd, wr, ex = select.select([self.socket.fileno()],
    +                                          [], [],
    +                                          self.timeout)
    +               if rd:
    +                   self.handle_request()
    +               abort = self.abort
    +
    +   def main():
    +       logging.basicConfig(
    +           format="%(relativeCreated)5d %(name)-15s %(levelname)-8s %(message)s")
    +       tcpserver = LogRecordSocketReceiver()
    +       print "About to start TCP server..."
    +       tcpserver.serve_until_stopped()
    +
    +   if __name__ == "__main__":
    +       main()
    +
    +First run the server, and then the client. On the client side, nothing is
    +printed on the console; on the server side, you should see something like::
    +
    +   About to start TCP server...
    +      59 root            INFO     Jackdaws love my big sphinx of quartz.
    +      59 myapp.area1     DEBUG    Quick zephyrs blow, vexing daft Jim.
    +      69 myapp.area1     INFO     How quickly daft jumping zebras vex.
    +      69 myapp.area2     WARNING  Jail zesty vixen who grabbed pay from quack.
    +      69 myapp.area2     ERROR    The five boxing wizards jump quickly.
    +
    +
    +Handler Objects
    +---------------
    +
    +Handlers have the following attributes and methods. Note that :class:`Handler`
    +is never instantiated directly; this class acts as a base for more useful
    +subclasses. However, the :meth:`__init__` method in subclasses needs to call
    +:meth:`Handler.__init__`.
    +
    +
    +.. method:: Handler.__init__(level=NOTSET)
    +
    +   Initializes the :class:`Handler` instance by setting its level, setting the list
    +   of filters to the empty list and creating a lock (using :meth:`createLock`) for
    +   serializing access to an I/O mechanism.
    +
    +
    +.. method:: Handler.createLock()
    +
    +   Initializes a thread lock which can be used to serialize access to underlying
    +   I/O functionality which may not be threadsafe.
    +
    +
    +.. method:: Handler.acquire()
    +
    +   Acquires the thread lock created with :meth:`createLock`.
    +
    +
    +.. method:: Handler.release()
    +
    +   Releases the thread lock acquired with :meth:`acquire`.
    +
    +
    +.. method:: Handler.setLevel(lvl)
    +
    +   Sets the threshold for this handler to *lvl*. Logging messages which are less
    +   severe than *lvl* will be ignored. When a handler is created, the level is set
    +   to :const:`NOTSET` (which causes all messages to be processed).
    +
    +
    +.. method:: Handler.setFormatter(form)
    +
    +   Sets the :class:`Formatter` for this handler to *form*.
    +
    +
    +.. method:: Handler.addFilter(filt)
    +
    +   Adds the specified filter *filt* to this handler.
    +
    +
    +.. method:: Handler.removeFilter(filt)
    +
    +   Removes the specified filter *filt* from this handler.
    +
    +
    +.. method:: Handler.filter(record)
    +
    +   Applies this handler's filters to the record and returns a true value if the
    +   record is to be processed.
    +
    +
    +.. method:: Handler.flush()
    +
    +   Ensure all logging output has been flushed. This version does nothing and is
    +   intended to be implemented by subclasses.
    +
    +
    +.. method:: Handler.close()
    +
    +   Tidy up any resources used by the handler. This version does nothing and is
    +   intended to be implemented by subclasses.
    +
    +
    +.. method:: Handler.handle(record)
    +
    +   Conditionally emits the specified logging record, depending on filters which may
    +   have been added to the handler. Wraps the actual emission of the record with
    +   acquisition/release of the I/O thread lock.
    +
    +
    +.. method:: Handler.handleError(record)
    +
    +   This method should be called from handlers when an exception is encountered
    +   during an :meth:`emit` call. By default it does nothing, which means that
    +   exceptions get silently ignored. This is what is mostly wanted for a logging
    +   system - most users will not care about errors in the logging system, they are
    +   more interested in application errors. You could, however, replace this with a
    +   custom handler if you wish. The specified record is the one which was being
    +   processed when the exception occurred.
    +
    +
    +.. method:: Handler.format(record)
    +
    +   Do formatting for a record - if a formatter is set, use it. Otherwise, use the
    +   default formatter for the module.
    +
    +
    +.. method:: Handler.emit(record)
    +
    +   Do whatever it takes to actually log the specified logging record. This version
    +   is intended to be implemented by subclasses and so raises a
    +   :exc:`NotImplementedError`.
    +
    +
    +StreamHandler
    +^^^^^^^^^^^^^
    +
    +The :class:`StreamHandler` class, located in the core :mod:`logging` package,
    +sends logging output to streams such as *sys.stdout*, *sys.stderr* or any
    +file-like object (or, more precisely, any object which supports :meth:`write`
    +and :meth:`flush` methods).
    +
    +
    +.. class:: StreamHandler([strm])
    +
    +   Returns a new instance of the :class:`StreamHandler` class. If *strm* is
    +   specified, the instance will use it for logging output; otherwise, *sys.stderr*
    +   will be used.
    +
    +
    +.. method:: StreamHandler.emit(record)
    +
    +   If a formatter is specified, it is used to format the record. The record is then
    +   written to the stream with a trailing newline. If exception information is
    +   present, it is formatted using :func:`traceback.print_exception` and appended to
    +   the stream.
    +
    +
    +.. method:: StreamHandler.flush()
    +
    +   Flushes the stream by calling its :meth:`flush` method. Note that the
    +   :meth:`close` method is inherited from :class:`Handler` and so does nothing, so
    +   an explicit :meth:`flush` call may be needed at times.
    +
    +
    +FileHandler
    +^^^^^^^^^^^
    +
    +The :class:`FileHandler` class, located in the core :mod:`logging` package,
    +sends logging output to a disk file.  It inherits the output functionality from
    +:class:`StreamHandler`.
    +
    +
    +.. class:: FileHandler(filename[, mode[, encoding]])
    +
    +   Returns a new instance of the :class:`FileHandler` class. The specified file is
    +   opened and used as the stream for logging. If *mode* is not specified,
    +   :const:`'a'` is used.  If *encoding* is not *None*, it is used to open the file
    +   with that encoding.  By default, the file grows indefinitely.
    +
    +
    +.. method:: FileHandler.close()
    +
    +   Closes the file.
    +
    +
    +.. method:: FileHandler.emit(record)
    +
    +   Outputs the record to the file.
    +
    +
    +WatchedFileHandler
    +^^^^^^^^^^^^^^^^^^
    +
    +.. versionadded:: 2.6
    +
    +The :class:`WatchedFileHandler` class, located in the :mod:`logging.handlers`
    +module, is a :class:`FileHandler` which watches the file it is logging to. If
    +the file changes, it is closed and reopened using the file name.
    +
    +A file change can happen because of usage of programs such as *newsyslog* and
    +*logrotate* which perform log file rotation. This handler, intended for use
    +under Unix/Linux, watches the file to see if it has changed since the last emit.
    +(A file is deemed to have changed if its device or inode have changed.) If the
    +file has changed, the old file stream is closed, and the file opened to get a
    +new stream.
    +
    +This handler is not appropriate for use under Windows, because under Windows
    +open log files cannot be moved or renamed - logging opens the files with
    +exclusive locks - and so there is no need for such a handler. Furthermore,
    +*ST_INO* is not supported under Windows; :func:`stat` always returns zero for
    +this value.
    +
    +
    +.. class:: WatchedFileHandler(filename[,mode[, encoding]])
    +
    +   Returns a new instance of the :class:`WatchedFileHandler` class. The specified
    +   file is opened and used as the stream for logging. If *mode* is not specified,
    +   :const:`'a'` is used.  If *encoding* is not *None*, it is used to open the file
    +   with that encoding.  By default, the file grows indefinitely.
    +
    +
    +.. method:: WatchedFileHandler.emit(record)
    +
    +   Outputs the record to the file, but first checks to see if the file has changed.
    +   If it has, the existing stream is flushed and closed and the file opened again,
    +   before outputting the record to the file.
    +
    +
    +RotatingFileHandler
    +^^^^^^^^^^^^^^^^^^^
    +
    +The :class:`RotatingFileHandler` class, located in the :mod:`logging.handlers`
    +module, supports rotation of disk log files.
    +
    +
    +.. class:: RotatingFileHandler(filename[, mode[, maxBytes[, backupCount]]])
    +
    +   Returns a new instance of the :class:`RotatingFileHandler` class. The specified
    +   file is opened and used as the stream for logging. If *mode* is not specified,
    +   ``'a'`` is used. By default, the file grows indefinitely.
    +
    +   You can use the *maxBytes* and *backupCount* values to allow the file to
    +   :dfn:`rollover` at a predetermined size. When the size is about to be exceeded,
    +   the file is closed and a new file is silently opened for output. Rollover occurs
    +   whenever the current log file is nearly *maxBytes* in length; if *maxBytes* is
    +   zero, rollover never occurs.  If *backupCount* is non-zero, the system will save
    +   old log files by appending the extensions ".1", ".2" etc., to the filename. For
    +   example, with a *backupCount* of 5 and a base file name of :file:`app.log`, you
    +   would get :file:`app.log`, :file:`app.log.1`, :file:`app.log.2`, up to
    +   :file:`app.log.5`. The file being written to is always :file:`app.log`.  When
    +   this file is filled, it is closed and renamed to :file:`app.log.1`, and if files
    +   :file:`app.log.1`, :file:`app.log.2`, etc.  exist, then they are renamed to
    +   :file:`app.log.2`, :file:`app.log.3` etc.  respectively.
    +
    +
    +.. method:: RotatingFileHandler.doRollover()
    +
    +   Does a rollover, as described above.
    +
    +
    +.. method:: RotatingFileHandler.emit(record)
    +
    +   Outputs the record to the file, catering for rollover as described previously.
    +
    +
    +TimedRotatingFileHandler
    +^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +The :class:`TimedRotatingFileHandler` class, located in the
    +:mod:`logging.handlers` module, supports rotation of disk log files at certain
    +timed intervals.
    +
    +
    +.. class:: TimedRotatingFileHandler(filename [,when [,interval [,backupCount]]])
    +
    +   Returns a new instance of the :class:`TimedRotatingFileHandler` class. The
    +   specified file is opened and used as the stream for logging. On rotating it also
    +   sets the filename suffix. Rotating happens based on the product of *when* and
    +   *interval*.
    +
    +   You can use the *when* to specify the type of *interval*. The list of possible
    +   values is, note that they are not case sensitive:
    +
    +   +----------+-----------------------+
    +   | Value    | Type of interval      |
    +   +==========+=======================+
    +   | S        | Seconds               |
    +   +----------+-----------------------+
    +   | M        | Minutes               |
    +   +----------+-----------------------+
    +   | H        | Hours                 |
    +   +----------+-----------------------+
    +   | D        | Days                  |
    +   +----------+-----------------------+
    +   | W        | Week day (0=Monday)   |
    +   +----------+-----------------------+
    +   | midnight | Roll over at midnight |
    +   +----------+-----------------------+
    +
    +   If *backupCount* is non-zero, the system will save old log files by appending
    +   extensions to the filename. The extensions are date-and-time based, using the
    +   strftime format ``%Y-%m-%d_%H-%M-%S`` or a leading portion thereof, depending on
    +   the rollover interval. At most *backupCount* files will be kept, and if more
    +   would be created when rollover occurs, the oldest one is deleted.
    +
    +
    +.. method:: TimedRotatingFileHandler.doRollover()
    +
    +   Does a rollover, as described above.
    +
    +
    +.. method:: TimedRotatingFileHandler.emit(record)
    +
    +   Outputs the record to the file, catering for rollover as described above.
    +
    +
    +SocketHandler
    +^^^^^^^^^^^^^
    +
    +The :class:`SocketHandler` class, located in the :mod:`logging.handlers` module,
    +sends logging output to a network socket. The base class uses a TCP socket.
    +
    +
    +.. class:: SocketHandler(host, port)
    +
    +   Returns a new instance of the :class:`SocketHandler` class intended to
    +   communicate with a remote machine whose address is given by *host* and *port*.
    +
    +
    +.. method:: SocketHandler.close()
    +
    +   Closes the socket.
    +
    +
    +.. method:: SocketHandler.emit()
    +
    +   Pickles the record's attribute dictionary and writes it to the socket in binary
    +   format. If there is an error with the socket, silently drops the packet. If the
    +   connection was previously lost, re-establishes the connection. To unpickle the
    +   record at the receiving end into a :class:`LogRecord`, use the
    +   :func:`makeLogRecord` function.
    +
    +
    +.. method:: SocketHandler.handleError()
    +
    +   Handles an error which has occurred during :meth:`emit`. The most likely cause
    +   is a lost connection. Closes the socket so that we can retry on the next event.
    +
    +
    +.. method:: SocketHandler.makeSocket()
    +
    +   This is a factory method which allows subclasses to define the precise type of
    +   socket they want. The default implementation creates a TCP socket
    +   (:const:`socket.SOCK_STREAM`).
    +
    +
    +.. method:: SocketHandler.makePickle(record)
    +
    +   Pickles the record's attribute dictionary in binary format with a length prefix,
    +   and returns it ready for transmission across the socket.
    +
    +
    +.. method:: SocketHandler.send(packet)
    +
    +   Send a pickled string *packet* to the socket. This function allows for partial
    +   sends which can happen when the network is busy.
    +
    +
    +DatagramHandler
    +^^^^^^^^^^^^^^^
    +
    +The :class:`DatagramHandler` class, located in the :mod:`logging.handlers`
    +module, inherits from :class:`SocketHandler` to support sending logging messages
    +over UDP sockets.
    +
    +
    +.. class:: DatagramHandler(host, port)
    +
    +   Returns a new instance of the :class:`DatagramHandler` class intended to
    +   communicate with a remote machine whose address is given by *host* and *port*.
    +
    +
    +.. method:: DatagramHandler.emit()
    +
    +   Pickles the record's attribute dictionary and writes it to the socket in binary
    +   format. If there is an error with the socket, silently drops the packet. To
    +   unpickle the record at the receiving end into a :class:`LogRecord`, use the
    +   :func:`makeLogRecord` function.
    +
    +
    +.. method:: DatagramHandler.makeSocket()
    +
    +   The factory method of :class:`SocketHandler` is here overridden to create a UDP
    +   socket (:const:`socket.SOCK_DGRAM`).
    +
    +
    +.. method:: DatagramHandler.send(s)
    +
    +   Send a pickled string to a socket.
    +
    +
    +SysLogHandler
    +^^^^^^^^^^^^^
    +
    +The :class:`SysLogHandler` class, located in the :mod:`logging.handlers` module,
    +supports sending logging messages to a remote or local Unix syslog.
    +
    +
    +.. class:: SysLogHandler([address[, facility]])
    +
    +   Returns a new instance of the :class:`SysLogHandler` class intended to
    +   communicate with a remote Unix machine whose address is given by *address* in
    +   the form of a ``(host, port)`` tuple.  If *address* is not specified,
    +   ``('localhost', 514)`` is used.  The address is used to open a UDP socket.  An
    +   alternative to providing a ``(host, port)`` tuple is providing an address as a
    +   string, for example "/dev/log". In this case, a Unix domain socket is used to
    +   send the message to the syslog. If *facility* is not specified,
    +   :const:`LOG_USER` is used.
    +
    +
    +.. method:: SysLogHandler.close()
    +
    +   Closes the socket to the remote host.
    +
    +
    +.. method:: SysLogHandler.emit(record)
    +
    +   The record is formatted, and then sent to the syslog server. If exception
    +   information is present, it is *not* sent to the server.
    +
    +
    +.. method:: SysLogHandler.encodePriority(facility, priority)
    +
    +   Encodes the facility and priority into an integer. You can pass in strings or
    +   integers - if strings are passed, internal mapping dictionaries are used to
    +   convert them to integers.
    +
    +
    +NTEventLogHandler
    +^^^^^^^^^^^^^^^^^
    +
    +The :class:`NTEventLogHandler` class, located in the :mod:`logging.handlers`
    +module, supports sending logging messages to a local Windows NT, Windows 2000 or
    +Windows XP event log. Before you can use it, you need Mark Hammond's Win32
    +extensions for Python installed.
    +
    +
    +.. class:: NTEventLogHandler(appname[, dllname[, logtype]])
    +
    +   Returns a new instance of the :class:`NTEventLogHandler` class. The *appname* is
    +   used to define the application name as it appears in the event log. An
    +   appropriate registry entry is created using this name. The *dllname* should give
    +   the fully qualified pathname of a .dll or .exe which contains message
    +   definitions to hold in the log (if not specified, ``'win32service.pyd'`` is used
    +   - this is installed with the Win32 extensions and contains some basic
    +   placeholder message definitions. Note that use of these placeholders will make
    +   your event logs big, as the entire message source is held in the log. If you
    +   want slimmer logs, you have to pass in the name of your own .dll or .exe which
    +   contains the message definitions you want to use in the event log). The
    +   *logtype* is one of ``'Application'``, ``'System'`` or ``'Security'``, and
    +   defaults to ``'Application'``.
    +
    +
    +.. method:: NTEventLogHandler.close()
    +
    +   At this point, you can remove the application name from the registry as a source
    +   of event log entries. However, if you do this, you will not be able to see the
    +   events as you intended in the Event Log Viewer - it needs to be able to access
    +   the registry to get the .dll name. The current version does not do this (in fact
    +   it doesn't do anything).
    +
    +
    +.. method:: NTEventLogHandler.emit(record)
    +
    +   Determines the message ID, event category and event type, and then logs the
    +   message in the NT event log.
    +
    +
    +.. method:: NTEventLogHandler.getEventCategory(record)
    +
    +   Returns the event category for the record. Override this if you want to specify
    +   your own categories. This version returns 0.
    +
    +
    +.. method:: NTEventLogHandler.getEventType(record)
    +
    +   Returns the event type for the record. Override this if you want to specify your
    +   own types. This version does a mapping using the handler's typemap attribute,
    +   which is set up in :meth:`__init__` to a dictionary which contains mappings for
    +   :const:`DEBUG`, :const:`INFO`, :const:`WARNING`, :const:`ERROR` and
    +   :const:`CRITICAL`. If you are using your own levels, you will either need to
    +   override this method or place a suitable dictionary in the handler's *typemap*
    +   attribute.
    +
    +
    +.. method:: NTEventLogHandler.getMessageID(record)
    +
    +   Returns the message ID for the record. If you are using your own messages, you
    +   could do this by having the *msg* passed to the logger being an ID rather than a
    +   format string. Then, in here, you could use a dictionary lookup to get the
    +   message ID. This version returns 1, which is the base message ID in
    +   :file:`win32service.pyd`.
    +
    +
    +SMTPHandler
    +^^^^^^^^^^^
    +
    +The :class:`SMTPHandler` class, located in the :mod:`logging.handlers` module,
    +supports sending logging messages to an email address via SMTP.
    +
    +
    +.. class:: SMTPHandler(mailhost, fromaddr, toaddrs, subject[, credentials])
    +
    +   Returns a new instance of the :class:`SMTPHandler` class. The instance is
    +   initialized with the from and to addresses and subject line of the email. The
    +   *toaddrs* should be a list of strings. To specify a non-standard SMTP port, use
    +   the (host, port) tuple format for the *mailhost* argument. If you use a string,
    +   the standard SMTP port is used. If your SMTP server requires authentication, you
    +   can specify a (username, password) tuple for the *credentials* argument.
    +
    +   .. versionchanged:: 2.6
    +      *credentials* was added.
    +
    +
    +.. method:: SMTPHandler.emit(record)
    +
    +   Formats the record and sends it to the specified addressees.
    +
    +
    +.. method:: SMTPHandler.getSubject(record)
    +
    +   If you want to specify a subject line which is record-dependent, override this
    +   method.
    +
    +
    +MemoryHandler
    +^^^^^^^^^^^^^
    +
    +The :class:`MemoryHandler` class, located in the :mod:`logging.handlers` module,
    +supports buffering of logging records in memory, periodically flushing them to a
    +:dfn:`target` handler. Flushing occurs whenever the buffer is full, or when an
    +event of a certain severity or greater is seen.
    +
    +:class:`MemoryHandler` is a subclass of the more general
    +:class:`BufferingHandler`, which is an abstract class. This buffers logging
    +records in memory. Whenever each record is added to the buffer, a check is made
    +by calling :meth:`shouldFlush` to see if the buffer should be flushed.  If it
    +should, then :meth:`flush` is expected to do the needful.
    +
    +
    +.. class:: BufferingHandler(capacity)
    +
    +   Initializes the handler with a buffer of the specified capacity.
    +
    +
    +.. method:: BufferingHandler.emit(record)
    +
    +   Appends the record to the buffer. If :meth:`shouldFlush` returns true, calls
    +   :meth:`flush` to process the buffer.
    +
    +
    +.. method:: BufferingHandler.flush()
    +
    +   You can override this to implement custom flushing behavior. This version just
    +   zaps the buffer to empty.
    +
    +
    +.. method:: BufferingHandler.shouldFlush(record)
    +
    +   Returns true if the buffer is up to capacity. This method can be overridden to
    +   implement custom flushing strategies.
    +
    +
    +.. class:: MemoryHandler(capacity[, flushLevel [, target]])
    +
    +   Returns a new instance of the :class:`MemoryHandler` class. The instance is
    +   initialized with a buffer size of *capacity*. If *flushLevel* is not specified,
    +   :const:`ERROR` is used. If no *target* is specified, the target will need to be
    +   set using :meth:`setTarget` before this handler does anything useful.
    +
    +
    +.. method:: MemoryHandler.close()
    +
    +   Calls :meth:`flush`, sets the target to :const:`None` and clears the buffer.
    +
    +
    +.. method:: MemoryHandler.flush()
    +
    +   For a :class:`MemoryHandler`, flushing means just sending the buffered records
    +   to the target, if there is one. Override if you want different behavior.
    +
    +
    +.. method:: MemoryHandler.setTarget(target)
    +
    +   Sets the target handler for this handler.
    +
    +
    +.. method:: MemoryHandler.shouldFlush(record)
    +
    +   Checks for buffer full or a record at the *flushLevel* or higher.
    +
    +
    +HTTPHandler
    +^^^^^^^^^^^
    +
    +The :class:`HTTPHandler` class, located in the :mod:`logging.handlers` module,
    +supports sending logging messages to a Web server, using either ``GET`` or
    +``POST`` semantics.
    +
    +
    +.. class:: HTTPHandler(host, url[, method])
    +
    +   Returns a new instance of the :class:`HTTPHandler` class. The instance is
    +   initialized with a host address, url and HTTP method. The *host* can be of the
    +   form ``host:port``, should you need to use a specific port number. If no
    +   *method* is specified, ``GET`` is used.
    +
    +
    +.. method:: HTTPHandler.emit(record)
    +
    +   Sends the record to the Web server as an URL-encoded dictionary.
    +
    +
    +Formatter Objects
    +-----------------
    +
    +:class:`Formatter`\ s have the following attributes and methods. They are
    +responsible for converting a :class:`LogRecord` to (usually) a string which can
    +be interpreted by either a human or an external system. The base
    +:class:`Formatter` allows a formatting string to be specified. If none is
    +supplied, the default value of ``'%(message)s'`` is used.
    +
    +A Formatter can be initialized with a format string which makes use of knowledge
    +of the :class:`LogRecord` attributes - such as the default value mentioned above
    +making use of the fact that the user's message and arguments are pre-formatted
    +into a :class:`LogRecord`'s *message* attribute.  This format string contains
    +standard python %-style mapping keys. See section :ref:`string-formatting`
    +for more information on string formatting.
    +
    +Currently, the useful mapping keys in a :class:`LogRecord` are:
    +
    ++-------------------------+-----------------------------------------------+
    +| Format                  | Description                                   |
    ++=========================+===============================================+
    +| ``%(name)s``            | Name of the logger (logging channel).         |
    ++-------------------------+-----------------------------------------------+
    +| ``%(levelno)s``         | Numeric logging level for the message         |
    +|                         | (:const:`DEBUG`, :const:`INFO`,               |
    +|                         | :const:`WARNING`, :const:`ERROR`,             |
    +|                         | :const:`CRITICAL`).                           |
    ++-------------------------+-----------------------------------------------+
    +| ``%(levelname)s``       | Text logging level for the message            |
    +|                         | (``'DEBUG'``, ``'INFO'``, ``'WARNING'``,      |
    +|                         | ``'ERROR'``, ``'CRITICAL'``).                 |
    ++-------------------------+-----------------------------------------------+
    +| ``%(pathname)s``        | Full pathname of the source file where the    |
    +|                         | logging call was issued (if available).       |
    ++-------------------------+-----------------------------------------------+
    +| ``%(filename)s``        | Filename portion of pathname.                 |
    ++-------------------------+-----------------------------------------------+
    +| ``%(module)s``          | Module (name portion of filename).            |
    ++-------------------------+-----------------------------------------------+
    +| ``%(funcName)s``        | Name of function containing the logging call. |
    ++-------------------------+-----------------------------------------------+
    +| ``%(lineno)d``          | Source line number where the logging call was |
    +|                         | issued (if available).                        |
    ++-------------------------+-----------------------------------------------+
    +| ``%(created)f``         | Time when the :class:`LogRecord` was created  |
    +|                         | (as returned by :func:`time.time`).           |
    ++-------------------------+-----------------------------------------------+
    +| ``%(relativeCreated)d`` | Time in milliseconds when the LogRecord was   |
    +|                         | created, relative to the time the logging     |
    +|                         | module was loaded.                            |
    ++-------------------------+-----------------------------------------------+
    +| ``%(asctime)s``         | Human-readable time when the                  |
    +|                         | :class:`LogRecord` was created.  By default   |
    +|                         | this is of the form "2003-07-08 16:49:45,896" |
    +|                         | (the numbers after the comma are millisecond  |
    +|                         | portion of the time).                         |
    ++-------------------------+-----------------------------------------------+
    +| ``%(msecs)d``           | Millisecond portion of the time when the      |
    +|                         | :class:`LogRecord` was created.               |
    ++-------------------------+-----------------------------------------------+
    +| ``%(thread)d``          | Thread ID (if available).                     |
    ++-------------------------+-----------------------------------------------+
    +| ``%(threadName)s``      | Thread name (if available).                   |
    ++-------------------------+-----------------------------------------------+
    +| ``%(process)d``         | Process ID (if available).                    |
    ++-------------------------+-----------------------------------------------+
    +| ``%(message)s``         | The logged message, computed as ``msg %       |
    +|                         | args``.                                       |
    ++-------------------------+-----------------------------------------------+
    +
    +.. versionchanged:: 2.5
    +   *funcName* was added.
    +
    +
    +.. class:: Formatter([fmt[, datefmt]])
    +
    +   Returns a new instance of the :class:`Formatter` class. The instance is
    +   initialized with a format string for the message as a whole, as well as a format
    +   string for the date/time portion of a message. If no *fmt* is specified,
    +   ``'%(message)s'`` is used. If no *datefmt* is specified, the ISO8601 date format
    +   is used.
    +
    +
    +.. method:: Formatter.format(record)
    +
    +   The record's attribute dictionary is used as the operand to a string formatting
    +   operation. Returns the resulting string. Before formatting the dictionary, a
    +   couple of preparatory steps are carried out. The *message* attribute of the
    +   record is computed using *msg* % *args*. If the formatting string contains
    +   ``'(asctime)'``, :meth:`formatTime` is called to format the event time. If there
    +   is exception information, it is formatted using :meth:`formatException` and
    +   appended to the message.
    +
    +
    +.. method:: Formatter.formatTime(record[, datefmt])
    +
    +   This method should be called from :meth:`format` by a formatter which wants to
    +   make use of a formatted time. This method can be overridden in formatters to
    +   provide for any specific requirement, but the basic behavior is as follows: if
    +   *datefmt* (a string) is specified, it is used with :func:`time.strftime` to
    +   format the creation time of the record. Otherwise, the ISO8601 format is used.
    +   The resulting string is returned.
    +
    +
    +.. method:: Formatter.formatException(exc_info)
    +
    +   Formats the specified exception information (a standard exception tuple as
    +   returned by :func:`sys.exc_info`) as a string. This default implementation just
    +   uses :func:`traceback.print_exception`. The resulting string is returned.
    +
    +
    +Filter Objects
    +--------------
    +
    +:class:`Filter`\ s can be used by :class:`Handler`\ s and :class:`Logger`\ s for
    +more sophisticated filtering than is provided by levels. The base filter class
    +only allows events which are below a certain point in the logger hierarchy. For
    +example, a filter initialized with "A.B" will allow events logged by loggers
    +"A.B", "A.B.C", "A.B.C.D", "A.B.D" etc. but not "A.BB", "B.A.B" etc. If
    +initialized with the empty string, all events are passed.
    +
    +
    +.. class:: Filter([name])
    +
    +   Returns an instance of the :class:`Filter` class. If *name* is specified, it
    +   names a logger which, together with its children, will have its events allowed
    +   through the filter. If no name is specified, allows every event.
    +
    +
    +.. method:: Filter.filter(record)
    +
    +   Is the specified record to be logged? Returns zero for no, nonzero for yes. If
    +   deemed appropriate, the record may be modified in-place by this method.
    +
    +
    +LogRecord Objects
    +-----------------
    +
    +:class:`LogRecord` instances are created every time something is logged. They
    +contain all the information pertinent to the event being logged. The main
    +information passed in is in msg and args, which are combined using msg % args to
    +create the message field of the record. The record also includes information
    +such as when the record was created, the source line where the logging call was
    +made, and any exception information to be logged.
    +
    +
    +.. class:: LogRecord(name, lvl, pathname, lineno, msg, args, exc_info [, func])
    +
    +   Returns an instance of :class:`LogRecord` initialized with interesting
    +   information. The *name* is the logger name; *lvl* is the numeric level;
    +   *pathname* is the absolute pathname of the source file in which the logging
    +   call was made; *lineno* is the line number in that file where the logging
    +   call is found; *msg* is the user-supplied message (a format string); *args*
    +   is the tuple which, together with *msg*, makes up the user message; and
    +   *exc_info* is the exception tuple obtained by calling :func:`sys.exc_info`
    +   (or :const:`None`, if no exception information is available). The *func* is
    +   the name of the function from which the logging call was made. If not
    +   specified, it defaults to ``None``.
    +
    +   .. versionchanged:: 2.5
    +      *func* was added.
    +
    +
    +.. method:: LogRecord.getMessage()
    +
    +   Returns the message for this :class:`LogRecord` instance after merging any
    +   user-supplied arguments with the message.
    +
    +
    +Thread Safety
    +-------------
    +
    +The logging module is intended to be thread-safe without any special work
    +needing to be done by its clients. It achieves this though using threading
    +locks; there is one lock to serialize access to the module's shared data, and
    +each handler also creates a lock to serialize access to its underlying I/O.
    +
    +
    +Configuration
    +-------------
    +
    +
    +.. _logging-config-api:
    +
    +Configuration functions
    +^^^^^^^^^^^^^^^^^^^^^^^
    +
    +.. % 
    +
    +The following functions configure the logging module. They are located in the
    +:mod:`logging.config` module.  Their use is optional --- you can configure the
    +logging module using these functions or by making calls to the main API (defined
    +in :mod:`logging` itself) and defining handlers which are declared either in
    +:mod:`logging` or :mod:`logging.handlers`.
    +
    +
    +.. function:: fileConfig(fname[, defaults])
    +
    +   Reads the logging configuration from a ConfigParser-format file named *fname*.
    +   This function can be called several times from an application, allowing an end
    +   user the ability to select from various pre-canned configurations (if the
    +   developer provides a mechanism to present the choices and load the chosen
    +   configuration). Defaults to be passed to ConfigParser can be specified in the
    +   *defaults* argument.
    +
    +
    +.. function:: listen([port])
    +
    +   Starts up a socket server on the specified port, and listens for new
    +   configurations. If no port is specified, the module's default
    +   :const:`DEFAULT_LOGGING_CONFIG_PORT` is used. Logging configurations will be
    +   sent as a file suitable for processing by :func:`fileConfig`. Returns a
    +   :class:`Thread` instance on which you can call :meth:`start` to start the
    +   server, and which you can :meth:`join` when appropriate. To stop the server,
    +   call :func:`stopListening`. To send a configuration to the socket, read in the
    +   configuration file and send it to the socket as a string of bytes preceded by a
    +   four-byte length packed in binary using struct.\ ``pack('>L', n)``.
    +
    +
    +.. function:: stopListening()
    +
    +   Stops the listening server which was created with a call to :func:`listen`. This
    +   is typically called before calling :meth:`join` on the return value from
    +   :func:`listen`.
    +
    +
    +.. _logging-config-fileformat:
    +
    +Configuration file format
    +^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +.. % 
    +
    +The configuration file format understood by :func:`fileConfig` is based on
    +ConfigParser functionality. The file must contain sections called ``[loggers]``,
    +``[handlers]`` and ``[formatters]`` which identify by name the entities of each
    +type which are defined in the file. For each such entity, there is a separate
    +section which identified how that entity is configured. Thus, for a logger named
    +``log01`` in the ``[loggers]`` section, the relevant configuration details are
    +held in a section ``[logger_log01]``. Similarly, a handler called ``hand01`` in
    +the ``[handlers]`` section will have its configuration held in a section called
    +``[handler_hand01]``, while a formatter called ``form01`` in the
    +``[formatters]`` section will have its configuration specified in a section
    +called ``[formatter_form01]``. The root logger configuration must be specified
    +in a section called ``[logger_root]``.
    +
    +Examples of these sections in the file are given below. ::
    +
    +   [loggers]
    +   keys=root,log02,log03,log04,log05,log06,log07
    +
    +   [handlers]
    +   keys=hand01,hand02,hand03,hand04,hand05,hand06,hand07,hand08,hand09
    +
    +   [formatters]
    +   keys=form01,form02,form03,form04,form05,form06,form07,form08,form09
    +
    +The root logger must specify a level and a list of handlers. An example of a
    +root logger section is given below. ::
    +
    +   [logger_root]
    +   level=NOTSET
    +   handlers=hand01
    +
    +The ``level`` entry can be one of ``DEBUG, INFO, WARNING, ERROR, CRITICAL`` or
    +``NOTSET``. For the root logger only, ``NOTSET`` means that all messages will be
    +logged. Level values are :func:`eval`\ uated in the context of the ``logging``
    +package's namespace.
    +
    +The ``handlers`` entry is a comma-separated list of handler names, which must
    +appear in the ``[handlers]`` section. These names must appear in the
    +``[handlers]`` section and have corresponding sections in the configuration
    +file.
    +
    +For loggers other than the root logger, some additional information is required.
    +This is illustrated by the following example. ::
    +
    +   [logger_parser]
    +   level=DEBUG
    +   handlers=hand01
    +   propagate=1
    +   qualname=compiler.parser
    +
    +The ``level`` and ``handlers`` entries are interpreted as for the root logger,
    +except that if a non-root logger's level is specified as ``NOTSET``, the system
    +consults loggers higher up the hierarchy to determine the effective level of the
    +logger. The ``propagate`` entry is set to 1 to indicate that messages must
    +propagate to handlers higher up the logger hierarchy from this logger, or 0 to
    +indicate that messages are **not** propagated to handlers up the hierarchy. The
    +``qualname`` entry is the hierarchical channel name of the logger, that is to
    +say the name used by the application to get the logger.
    +
    +Sections which specify handler configuration are exemplified by the following.
    +::
    +
    +   [handler_hand01]
    +   class=StreamHandler
    +   level=NOTSET
    +   formatter=form01
    +   args=(sys.stdout,)
    +
    +The ``class`` entry indicates the handler's class (as determined by :func:`eval`
    +in the ``logging`` package's namespace). The ``level`` is interpreted as for
    +loggers, and ``NOTSET`` is taken to mean "log everything".
    +
    +The ``formatter`` entry indicates the key name of the formatter for this
    +handler. If blank, a default formatter (``logging._defaultFormatter``) is used.
    +If a name is specified, it must appear in the ``[formatters]`` section and have
    +a corresponding section in the configuration file.
    +
    +The ``args`` entry, when :func:`eval`\ uated in the context of the ``logging``
    +package's namespace, is the list of arguments to the constructor for the handler
    +class. Refer to the constructors for the relevant handlers, or to the examples
    +below, to see how typical entries are constructed. ::
    +
    +   [handler_hand02]
    +   class=FileHandler
    +   level=DEBUG
    +   formatter=form02
    +   args=('python.log', 'w')
    +
    +   [handler_hand03]
    +   class=handlers.SocketHandler
    +   level=INFO
    +   formatter=form03
    +   args=('localhost', handlers.DEFAULT_TCP_LOGGING_PORT)
    +
    +   [handler_hand04]
    +   class=handlers.DatagramHandler
    +   level=WARN
    +   formatter=form04
    +   args=('localhost', handlers.DEFAULT_UDP_LOGGING_PORT)
    +
    +   [handler_hand05]
    +   class=handlers.SysLogHandler
    +   level=ERROR
    +   formatter=form05
    +   args=(('localhost', handlers.SYSLOG_UDP_PORT), handlers.SysLogHandler.LOG_USER)
    +
    +   [handler_hand06]
    +   class=handlers.NTEventLogHandler
    +   level=CRITICAL
    +   formatter=form06
    +   args=('Python Application', '', 'Application')
    +
    +   [handler_hand07]
    +   class=handlers.SMTPHandler
    +   level=WARN
    +   formatter=form07
    +   args=('localhost', 'from@abc', ['user1@abc', 'user2@xyz'], 'Logger Subject')
    +
    +   [handler_hand08]
    +   class=handlers.MemoryHandler
    +   level=NOTSET
    +   formatter=form08
    +   target=
    +   args=(10, ERROR)
    +
    +   [handler_hand09]
    +   class=handlers.HTTPHandler
    +   level=NOTSET
    +   formatter=form09
    +   args=('localhost:9022', '/log', 'GET')
    +
    +Sections which specify formatter configuration are typified by the following. ::
    +
    +   [formatter_form01]
    +   format=F1 %(asctime)s %(levelname)s %(message)s
    +   datefmt=
    +   class=logging.Formatter
    +
    +The ``format`` entry is the overall format string, and the ``datefmt`` entry is
    +the :func:`strftime`\ -compatible date/time format string. If empty, the package
    +substitutes ISO8601 format date/times, which is almost equivalent to specifying
    +the date format string "The ISO8601 format also specifies milliseconds, which
    +are appended to the result of using the above format string, with a comma
    +separator. An example time in ISO8601 format is ``2003-01-23 00:29:50,411``.
    +
    +.. % Y-%m-%d %H:%M:%S".
    +
    +The ``class`` entry is optional.  It indicates the name of the formatter's class
    +(as a dotted module and class name.)  This option is useful for instantiating a
    +:class:`Formatter` subclass.  Subclasses of :class:`Formatter` can present
    +exception tracebacks in an expanded or condensed format.
    +
    diff --git a/Doc/library/mac.rst b/Doc/library/mac.rst
    new file mode 100644
    index 0000000..791eb81
    --- /dev/null
    +++ b/Doc/library/mac.rst
    @@ -0,0 +1,23 @@
    +.. _mac-specific-services:
    +
    +*************************
    +MacOS X specific services
    +*************************
    +
    +This chapter describes modules that are only available on the Mac OS X platform.
    +
    +See the chapters :ref:`mac-scripting` and :ref:`undoc-mac-modules` for more
    +modules, and the HOWTO :ref:`using-on-mac` for a general introduction to
    +Mac-specific Python programming.
    +
    +
    +.. toctree::
    +
    +   ic.rst
    +   macos.rst
    +   macostools.rst
    +   easydialogs.rst
    +   framework.rst
    +   autogil.rst
    +   carbon.rst
    +   colorpicker.rst
    diff --git a/Doc/library/macos.rst b/Doc/library/macos.rst
    new file mode 100644
    index 0000000..543f868
    --- /dev/null
    +++ b/Doc/library/macos.rst
    @@ -0,0 +1,95 @@
    +
    +:mod:`MacOS` --- Access to Mac OS interpreter features
    +======================================================
    +
    +.. module:: MacOS
    +   :platform: Mac
    +   :synopsis: Access to Mac OS-specific interpreter features.
    +
    +
    +This module provides access to MacOS specific functionality in the Python
    +interpreter, such as how the interpreter eventloop functions and the like. Use
    +with care.
    +
    +Note the capitalization of the module name; this is a historical artifact.
    +
    +
    +.. data:: runtimemodel
    +
    +   Always ``'macho'``, from Python 2.4 on. In earlier versions of Python the value
    +   could also be ``'ppc'`` for the classic Mac OS 8 runtime model or ``'carbon'``
    +   for the Mac OS 9 runtime model.
    +
    +
    +.. data:: linkmodel
    +
    +   The way the interpreter has been linked. As extension modules may be
    +   incompatible between linking models, packages could use this information to give
    +   more decent error messages. The value is one of ``'static'`` for a statically
    +   linked Python, ``'framework'`` for Python in a Mac OS X framework, ``'shared'``
    +   for Python in a standard Unix shared library. Older Pythons could also have the
    +   value ``'cfm'`` for Mac OS 9-compatible Python.
    +
    +
    +.. exception:: Error
    +
    +   .. index:: module: macerrors
    +
    +   This exception is raised on MacOS generated errors, either from functions in
    +   this module or from other mac-specific modules like the toolbox interfaces. The
    +   arguments are the integer error code (the :cdata:`OSErr` value) and a textual
    +   description of the error code. Symbolic names for all known error codes are
    +   defined in the standard module :mod:`macerrors`.
    +
    +
    +.. function:: GetErrorString(errno)
    +
    +   Return the textual description of MacOS error code *errno*.
    +
    +
    +.. function:: DebugStr(message [, object])
    +
    +   On Mac OS X the string is simply printed to stderr (on older Mac OS systems more
    +   elaborate functionality was available), but it provides a convenient location to
    +   attach a breakpoint in a low-level debugger like :program:`gdb`.
    +
    +
    +.. function:: SysBeep()
    +
    +   Ring the bell.
    +
    +
    +.. function:: GetTicks()
    +
    +   Get the number of clock ticks (1/60th of a second) since system boot.
    +
    +
    +.. function:: GetCreatorAndType(file)
    +
    +   Return the file creator and file type as two four-character strings. The *file*
    +   parameter can be a pathname or an ``FSSpec`` or  ``FSRef`` object.
    +
    +
    +.. function:: SetCreatorAndType(file, creator, type)
    +
    +   Set the file creator and file type. The *file* parameter can be a pathname or an
    +   ``FSSpec`` or  ``FSRef`` object. *creator* and *type* must be four character
    +   strings.
    +
    +
    +.. function:: openrf(name [, mode])
    +
    +   Open the resource fork of a file. Arguments are the same as for the built-in
    +   function :func:`open`. The object returned has file-like semantics, but it is
    +   not a Python file object, so there may be subtle differences.
    +
    +
    +.. function:: WMAvailable()
    +
    +   Checks whether the current process has access to the window manager. The method
    +   will return ``False`` if the window manager is not available, for instance when
    +   running on Mac OS X Server or when logged in via ssh, or when the current
    +   interpreter is not running from a fullblown application bundle. A script runs
    +   from an application bundle either when it has been started with
    +   :program:`pythonw` instead of :program:`python` or when running  as an applet.
    +
    diff --git a/Doc/library/macosa.rst b/Doc/library/macosa.rst
    new file mode 100644
    index 0000000..67475ed
    --- /dev/null
    +++ b/Doc/library/macosa.rst
    @@ -0,0 +1,92 @@
    +
    +.. _mac-scripting:
    +
    +*********************
    +MacPython OSA Modules
    +*********************
    +
    +This chapter describes the current implementation of the Open Scripting
    +Architecure (OSA, also commonly referred to as AppleScript) for Python, allowing
    +you to control scriptable applications from your Python program, and with a
    +fairly pythonic interface. Development on this set of modules has stopped, and a
    +replacement is expected for Python 2.5.
    +
    +For a description of the various components of AppleScript and OSA, and to get
    +an understanding of the architecture and terminology, you should read Apple's
    +documentation. The "Applescript Language Guide" explains the conceptual model
    +and the terminology, and documents the standard suite. The "Open Scripting
    +Architecture" document explains how to use OSA from an application programmers
    +point of view. In the Apple Help Viewer these books are located in the Developer
    +Documentation, Core Technologies section.
    +
    +As an example of scripting an application, the following piece of AppleScript
    +will get the name of the frontmost :program:`Finder` window and print it::
    +
    +   tell application "Finder"
    +       get name of window 1
    +   end tell
    +
    +In Python, the following code fragment will do the same::
    +
    +   import Finder
    +
    +   f = Finder.Finder()
    +   print f.get(f.window(1).name)
    +
    +As distributed the Python library includes packages that implement the standard
    +suites, plus packages that interface to a small number of common applications.
    +
    +To send AppleEvents to an application you must first create the Python package
    +interfacing to the terminology of the application (what :program:`Script Editor`
    +calls the "Dictionary"). This can be done from within the :program:`PythonIDE`
    +or by running the :file:`gensuitemodule.py` module as a standalone program from
    +the command line.
    +
    +The generated output is a package with a number of modules, one for every suite
    +used in the program plus an :mod:`__init__` module to glue it all together. The
    +Python inheritance graph follows the AppleScript inheritance graph, so if a
    +program's dictionary specifies that it includes support for the Standard Suite,
    +but extends one or two verbs with extra arguments then the output suite will
    +contain a module :mod:`Standard_Suite` that imports and re-exports everything
    +from :mod:`StdSuites.Standard_Suite` but overrides the methods that have extra
    +functionality. The output of :mod:`gensuitemodule` is pretty readable, and
    +contains the documentation that was in the original AppleScript dictionary in
    +Python docstrings, so reading it is a good source of documentation.
    +
    +The output package implements a main class with the same name as the package
    +which contains all the AppleScript verbs as methods, with the direct object as
    +the first argument and all optional parameters as keyword arguments. AppleScript
    +classes are also implemented as Python classes, as are comparisons and all the
    +other thingies.
    +
    +The main Python class implementing the verbs also allows access to the
    +properties and elements declared in the AppleScript class "application". In the
    +current release that is as far as the object orientation goes, so in the example
    +above we need to use ``f.get(f.window(1).name)`` instead of the more Pythonic
    +``f.window(1).name.get()``.
    +
    +If an AppleScript identifier is not a Python identifier the name is mangled
    +according to a small number of rules:
    +
    +* spaces are replaced with underscores
    +
    +* other non-alphanumeric characters are replaced with ``_xx_`` where ``xx`` is
    +  the hexadecimal character value
    +
    +* any Python reserved word gets an underscore appended
    +
    +Python also has support for creating scriptable applications in Python, but The
    +following modules are relevant to MacPython AppleScript support:
    +
    +.. toctree::
    +
    +   gensuitemodule.rst
    +   aetools.rst
    +   aepack.rst
    +   aetypes.rst
    +   miniaeframe.rst
    +
    +
    +In addition, support modules have been pre-generated for :mod:`Finder`,
    +:mod:`Terminal`, :mod:`Explorer`, :mod:`Netscape`, :mod:`CodeWarrior`,
    +:mod:`SystemEvents` and :mod:`StdSuites`.
    diff --git a/Doc/library/macostools.rst b/Doc/library/macostools.rst
    new file mode 100644
    index 0000000..275100e
    --- /dev/null
    +++ b/Doc/library/macostools.rst
    @@ -0,0 +1,115 @@
    +
    +:mod:`macostools` --- Convenience routines for file manipulation
    +================================================================
    +
    +.. module:: macostools
    +   :platform: Mac
    +   :synopsis: Convenience routines for file manipulation.
    +
    +
    +This module contains some convenience routines for file-manipulation on the
    +Macintosh. All file parameters can be specified as pathnames, :class:`FSRef` or
    +:class:`FSSpec` objects.  This module expects a filesystem which supports forked
    +files, so it should not be used on UFS partitions.
    +
    +The :mod:`macostools` module defines the following functions:
    +
    +
    +.. function:: copy(src, dst[, createpath[, copytimes]])
    +
    +   Copy file *src* to *dst*.  If *createpath* is non-zero the folders leading to
    +   *dst* are created if necessary. The method copies data and resource fork and
    +   some finder information (creator, type, flags) and optionally the creation,
    +   modification and backup times (default is to copy them). Custom icons, comments
    +   and icon position are not copied.
    +
    +
    +.. function:: copytree(src, dst)
    +
    +   Recursively copy a file tree from *src* to *dst*, creating folders as needed.
    +   *src* and *dst* should be specified as pathnames.
    +
    +
    +.. function:: mkalias(src, dst)
    +
    +   Create a finder alias *dst* pointing to *src*.
    +
    +
    +.. function:: touched(dst)
    +
    +   Tell the finder that some bits of finder-information such as creator or type for
    +   file *dst* has changed. The file can be specified by pathname or fsspec. This
    +   call should tell the finder to redraw the files icon.
    +
    +   .. deprecated:: 2.6
    +      The function is a no-op on OS X.
    +
    +
    +.. data:: BUFSIZ
    +
    +   The buffer size for ``copy``, default 1 megabyte.
    +
    +Note that the process of creating finder aliases is not specified in the Apple
    +documentation. Hence, aliases created with :func:`mkalias` could conceivably
    +have incompatible behaviour in some cases.
    +
    +
    +:mod:`findertools` --- The :program:`finder`'s Apple Events interface
    +=====================================================================
    +
    +.. module:: findertools
    +   :platform: Mac
    +   :synopsis: Wrappers around the finder's Apple Events interface.
    +
    +
    +.. index:: single: AppleEvents
    +
    +This module contains routines that give Python programs access to some
    +functionality provided by the finder. They are implemented as wrappers around
    +the AppleEvent interface to the finder.
    +
    +All file and folder parameters can be specified either as full pathnames, or as
    +:class:`FSRef` or :class:`FSSpec` objects.
    +
    +The :mod:`findertools` module defines the following functions:
    +
    +
    +.. function:: launch(file)
    +
    +   Tell the finder to launch *file*. What launching means depends on the file:
    +   applications are started, folders are opened and documents are opened in the
    +   correct application.
    +
    +
    +.. function:: Print(file)
    +
    +   Tell the finder to print a file. The behaviour is identical to selecting the
    +   file and using the print command in the finder's file menu.
    +
    +
    +.. function:: copy(file, destdir)
    +
    +   Tell the finder to copy a file or folder *file* to folder *destdir*. The
    +   function returns an :class:`Alias` object pointing to the new file.
    +
    +
    +.. function:: move(file, destdir)
    +
    +   Tell the finder to move a file or folder *file* to folder *destdir*. The
    +   function returns an :class:`Alias` object pointing to the new file.
    +
    +
    +.. function:: sleep()
    +
    +   Tell the finder to put the Macintosh to sleep, if your machine supports it.
    +
    +
    +.. function:: restart()
    +
    +   Tell the finder to perform an orderly restart of the machine.
    +
    +
    +.. function:: shutdown()
    +
    +   Tell the finder to perform an orderly shutdown of the machine.
    +
    diff --git a/Doc/library/macpath.rst b/Doc/library/macpath.rst
    new file mode 100644
    index 0000000..66c54e5
    --- /dev/null
    +++ b/Doc/library/macpath.rst
    @@ -0,0 +1,17 @@
    +
    +:mod:`macpath` --- MacOS 9 path manipulation functions
    +======================================================
    +
    +.. module:: macpath
    +   :synopsis: MacOS 9 path manipulation functions.
    +
    +
    +This module is the Mac OS 9 (and earlier) implementation of the :mod:`os.path`
    +module. It can be used to manipulate old-style Macintosh pathnames on Mac OS X
    +(or any other platform).
    +
    +The following functions are available in this module: :func:`normcase`,
    +:func:`normpath`, :func:`isabs`, :func:`join`, :func:`split`, :func:`isdir`,
    +:func:`isfile`, :func:`walk`, :func:`exists`. For other functions available in
    +:mod:`os.path` dummy counterparts are available.
    +
    diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst
    new file mode 100644
    index 0000000..ce8dc59
    --- /dev/null
    +++ b/Doc/library/mailbox.rst
    @@ -0,0 +1,1679 @@
    +
    +:mod:`mailbox` --- Manipulate mailboxes in various formats
    +==========================================================
    +
    +.. module:: mailbox
    +   :synopsis: Manipulate mailboxes in various formats
    +.. moduleauthor:: Gregory K. Johnson 
    +.. sectionauthor:: Gregory K. Johnson 
    +
    +
    +This module defines two classes, :class:`Mailbox` and :class:`Message`, for
    +accessing and manipulating on-disk mailboxes and the messages they contain.
    +:class:`Mailbox` offers a dictionary-like mapping from keys to messages.
    +:class:`Message` extends the :mod:`email.Message` module's :class:`Message`
    +class with format-specific state and behavior. Supported mailbox formats are
    +Maildir, mbox, MH, Babyl, and MMDF.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`email`
    +      Represent and manipulate messages.
    +
    +
    +.. _mailbox-objects:
    +
    +:class:`Mailbox` objects
    +------------------------
    +
    +
    +.. class:: Mailbox
    +
    +   A mailbox, which may be inspected and modified.
    +
    +The :class:`Mailbox` class defines an interface and is not intended to be
    +instantiated.  Instead, format-specific subclasses should inherit from
    +:class:`Mailbox` and your code should instantiate a particular subclass.
    +
    +The :class:`Mailbox` interface is dictionary-like, with small keys corresponding
    +to messages. Keys are issued by the :class:`Mailbox` instance with which they
    +will be used and are only meaningful to that :class:`Mailbox` instance. A key
    +continues to identify a message even if the corresponding message is modified,
    +such as by replacing it with another message.
    +
    +Messages may be added to a :class:`Mailbox` instance using the set-like method
    +:meth:`add` and removed using a ``del`` statement or the set-like methods
    +:meth:`remove` and :meth:`discard`.
    +
    +:class:`Mailbox` interface semantics differ from dictionary semantics in some
    +noteworthy ways. Each time a message is requested, a new representation
    +(typically a :class:`Message` instance) is generated based upon the current
    +state of the mailbox. Similarly, when a message is added to a :class:`Mailbox`
    +instance, the provided message representation's contents are copied. In neither
    +case is a reference to the message representation kept by the :class:`Mailbox`
    +instance.
    +
    +The default :class:`Mailbox` iterator iterates over message representations, not
    +keys as the default dictionary iterator does. Moreover, modification of a
    +mailbox during iteration is safe and well-defined. Messages added to the mailbox
    +after an iterator is created will not be seen by the iterator. Messages removed
    +from the mailbox before the iterator yields them will be silently skipped,
    +though using a key from an iterator may result in a :exc:`KeyError` exception if
    +the corresponding message is subsequently removed.
    +
    +.. warning::
    +
    +   Be very cautious when modifying mailboxes that might be simultaneously changed
    +   by some other process.  The safest mailbox format to use for such tasks is
    +   Maildir; try to avoid using single-file formats such as mbox for concurrent
    +   writing.  If you're modifying a mailbox, you *must* lock it by calling the
    +   :meth:`lock` and :meth:`unlock` methods *before* reading any messages in the
    +   file or making any changes by adding or deleting a message.  Failing to lock the
    +   mailbox runs the risk of losing messages or corrupting the entire mailbox.
    +
    +:class:`Mailbox` instances have the following methods:
    +
    +
    +.. method:: Mailbox.add(message)
    +
    +   Add *message* to the mailbox and return the key that has been assigned to it.
    +
    +   Parameter *message* may be a :class:`Message` instance, an
    +   :class:`email.Message.Message` instance, a string, or a file-like object (which
    +   should be open in text mode). If *message* is an instance of the appropriate
    +   format-specific :class:`Message` subclass (e.g., if it's an :class:`mboxMessage`
    +   instance and this is an :class:`mbox` instance), its format-specific information
    +   is used. Otherwise, reasonable defaults for format-specific information are
    +   used.
    +
    +
    +.. method:: Mailbox.remove(key)
    +            Mailbox.__delitem__(key)
    +            Mailbox.discard(key)
    +
    +   Delete the message corresponding to *key* from the mailbox.
    +
    +   If no such message exists, a :exc:`KeyError` exception is raised if the method
    +   was called as :meth:`remove` or :meth:`__delitem__` but no exception is raised
    +   if the method was called as :meth:`discard`. The behavior of :meth:`discard` may
    +   be preferred if the underlying mailbox format supports concurrent modification
    +   by other processes.
    +
    +
    +.. method:: Mailbox.__setitem__(key, message)
    +
    +   Replace the message corresponding to *key* with *message*. Raise a
    +   :exc:`KeyError` exception if no message already corresponds to *key*.
    +
    +   As with :meth:`add`, parameter *message* may be a :class:`Message` instance, an
    +   :class:`email.Message.Message` instance, a string, or a file-like object (which
    +   should be open in text mode). If *message* is an instance of the appropriate
    +   format-specific :class:`Message` subclass (e.g., if it's an :class:`mboxMessage`
    +   instance and this is an :class:`mbox` instance), its format-specific information
    +   is used. Otherwise, the format-specific information of the message that
    +   currently corresponds to *key* is left unchanged.
    +
    +
    +.. method:: Mailbox.iterkeys()
    +            Mailbox.keys()
    +
    +   Return an iterator over all keys if called as :meth:`iterkeys` or return a list
    +   of keys if called as :meth:`keys`.
    +
    +
    +.. method:: Mailbox.itervalues()
    +            Mailbox.__iter__()
    +            Mailbox.values()
    +
    +   Return an iterator over representations of all messages if called as
    +   :meth:`itervalues` or :meth:`__iter__` or return a list of such representations
    +   if called as :meth:`values`. The messages are represented as instances of the
    +   appropriate format-specific :class:`Message` subclass unless a custom message
    +   factory was specified when the :class:`Mailbox` instance was initialized.
    +
    +   .. note::
    +
    +      The behavior of :meth:`__iter__` is unlike that of dictionaries, which iterate
    +      over keys.
    +
    +
    +.. method:: Mailbox.iteritems()
    +            Mailbox.items()
    +
    +   Return an iterator over (*key*, *message*) pairs, where *key* is a key and
    +   *message* is a message representation, if called as :meth:`iteritems` or return
    +   a list of such pairs if called as :meth:`items`. The messages are represented as
    +   instances of the appropriate format-specific :class:`Message` subclass unless a
    +   custom message factory was specified when the :class:`Mailbox` instance was
    +   initialized.
    +
    +
    +.. method:: Mailbox.get(key[, default=None])
    +            Mailbox.__getitem__(key)
    +
    +   Return a representation of the message corresponding to *key*. If no such
    +   message exists, *default* is returned if the method was called as :meth:`get`
    +   and a :exc:`KeyError` exception is raised if the method was called as
    +   :meth:`__getitem__`. The message is represented as an instance of the
    +   appropriate format-specific :class:`Message` subclass unless a custom message
    +   factory was specified when the :class:`Mailbox` instance was initialized.
    +
    +
    +.. method:: Mailbox.get_message(key)
    +
    +   Return a representation of the message corresponding to *key* as an instance of
    +   the appropriate format-specific :class:`Message` subclass, or raise a
    +   :exc:`KeyError` exception if no such message exists.
    +
    +
    +.. method:: Mailbox.get_string(key)
    +
    +   Return a string representation of the message corresponding to *key*, or raise a
    +   :exc:`KeyError` exception if no such message exists.
    +
    +
    +.. method:: Mailbox.get_file(key)
    +
    +   Return a file-like representation of the message corresponding to *key*, or
    +   raise a :exc:`KeyError` exception if no such message exists. The file-like
    +   object behaves as if open in binary mode. This file should be closed once it is
    +   no longer needed.
    +
    +   .. note::
    +
    +      Unlike other representations of messages, file-like representations are not
    +      necessarily independent of the :class:`Mailbox` instance that created them or of
    +      the underlying mailbox. More specific documentation is provided by each
    +      subclass.
    +
    +
    +.. method:: Mailbox.has_key(key)
    +            Mailbox.__contains__(key)
    +
    +   Return ``True`` if *key* corresponds to a message, ``False`` otherwise.
    +
    +
    +.. method:: Mailbox.__len__()
    +
    +   Return a count of messages in the mailbox.
    +
    +
    +.. method:: Mailbox.clear()
    +
    +   Delete all messages from the mailbox.
    +
    +
    +.. method:: Mailbox.pop(key[, default])
    +
    +   Return a representation of the message corresponding to *key* and delete the
    +   message. If no such message exists, return *default* if it was supplied or else
    +   raise a :exc:`KeyError` exception. The message is represented as an instance of
    +   the appropriate format-specific :class:`Message` subclass unless a custom
    +   message factory was specified when the :class:`Mailbox` instance was
    +   initialized.
    +
    +
    +.. method:: Mailbox.popitem()
    +
    +   Return an arbitrary (*key*, *message*) pair, where *key* is a key and *message*
    +   is a message representation, and delete the corresponding message. If the
    +   mailbox is empty, raise a :exc:`KeyError` exception. The message is represented
    +   as an instance of the appropriate format-specific :class:`Message` subclass
    +   unless a custom message factory was specified when the :class:`Mailbox` instance
    +   was initialized.
    +
    +
    +.. method:: Mailbox.update(arg)
    +
    +   Parameter *arg* should be a *key*-to-*message* mapping or an iterable of (*key*,
    +   *message*) pairs. Updates the mailbox so that, for each given *key* and
    +   *message*, the message corresponding to *key* is set to *message* as if by using
    +   :meth:`__setitem__`. As with :meth:`__setitem__`, each *key* must already
    +   correspond to a message in the mailbox or else a :exc:`KeyError` exception will
    +   be raised, so in general it is incorrect for *arg* to be a :class:`Mailbox`
    +   instance.
    +
    +   .. note::
    +
    +      Unlike with dictionaries, keyword arguments are not supported.
    +
    +
    +.. method:: Mailbox.flush()
    +
    +   Write any pending changes to the filesystem. For some :class:`Mailbox`
    +   subclasses, changes are always written immediately and :meth:`flush` does
    +   nothing, but you should still make a habit of calling this method.
    +
    +
    +.. method:: Mailbox.lock()
    +
    +   Acquire an exclusive advisory lock on the mailbox so that other processes know
    +   not to modify it. An :exc:`ExternalClashError` is raised if the lock is not
    +   available. The particular locking mechanisms used depend upon the mailbox
    +   format.  You should *always* lock the mailbox before making any  modifications
    +   to its contents.
    +
    +
    +.. method:: Mailbox.unlock()
    +
    +   Release the lock on the mailbox, if any.
    +
    +
    +.. method:: Mailbox.close()
    +
    +   Flush the mailbox, unlock it if necessary, and close any open files. For some
    +   :class:`Mailbox` subclasses, this method does nothing.
    +
    +
    +.. _mailbox-maildir:
    +
    +:class:`Maildir`
    +^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: Maildir(dirname[, factory=rfc822.Message[, create=True]])
    +
    +   A subclass of :class:`Mailbox` for mailboxes in Maildir format. Parameter
    +   *factory* is a callable object that accepts a file-like message representation
    +   (which behaves as if opened in binary mode) and returns a custom representation.
    +   If *factory* is ``None``, :class:`MaildirMessage` is used as the default message
    +   representation. If *create* is ``True``, the mailbox is created if it does not
    +   exist.
    +
    +   It is for historical reasons that *factory* defaults to :class:`rfc822.Message`
    +   and that *dirname* is named as such rather than *path*. For a :class:`Maildir`
    +   instance that behaves like instances of other :class:`Mailbox` subclasses, set
    +   *factory* to ``None``.
    +
    +Maildir is a directory-based mailbox format invented for the qmail mail transfer
    +agent and now widely supported by other programs. Messages in a Maildir mailbox
    +are stored in separate files within a common directory structure. This design
    +allows Maildir mailboxes to be accessed and modified by multiple unrelated
    +programs without data corruption, so file locking is unnecessary.
    +
    +Maildir mailboxes contain three subdirectories, namely: :file:`tmp`,
    +:file:`new`, and :file:`cur`. Messages are created momentarily in the
    +:file:`tmp` subdirectory and then moved to the :file:`new` subdirectory to
    +finalize delivery. A mail user agent may subsequently move the message to the
    +:file:`cur` subdirectory and store information about the state of the message in
    +a special "info" section appended to its file name.
    +
    +Folders of the style introduced by the Courier mail transfer agent are also
    +supported. Any subdirectory of the main mailbox is considered a folder if
    +``'.'`` is the first character in its name. Folder names are represented by
    +:class:`Maildir` without the leading ``'.'``. Each folder is itself a Maildir
    +mailbox but should not contain other folders. Instead, a logical nesting is
    +indicated using ``'.'`` to delimit levels, e.g., "Archived.2005.07".
    +
    +.. note::
    +
    +   The Maildir specification requires the use of a colon (``':'``) in certain
    +   message file names. However, some operating systems do not permit this character
    +   in file names, If you wish to use a Maildir-like format on such an operating
    +   system, you should specify another character to use instead. The exclamation
    +   point (``'!'``) is a popular choice. For example::
    +
    +      import mailbox
    +      mailbox.Maildir.colon = '!'
    +
    +   The :attr:`colon` attribute may also be set on a per-instance basis.
    +
    +:class:`Maildir` instances have all of the methods of :class:`Mailbox` in
    +addition to the following:
    +
    +
    +.. method:: Maildir.list_folders()
    +
    +   Return a list of the names of all folders.
    +
    +
    +.. method:: Maildir.get_folder(folder)
    +
    +   Return a :class:`Maildir` instance representing the folder whose name is
    +   *folder*. A :exc:`NoSuchMailboxError` exception is raised if the folder does not
    +   exist.
    +
    +
    +.. method:: Maildir.add_folder(folder)
    +
    +   Create a folder whose name is *folder* and return a :class:`Maildir` instance
    +   representing it.
    +
    +
    +.. method:: Maildir.remove_folder(folder)
    +
    +   Delete the folder whose name is *folder*. If the folder contains any messages, a
    +   :exc:`NotEmptyError` exception will be raised and the folder will not be
    +   deleted.
    +
    +
    +.. method:: Maildir.clean()
    +
    +   Delete temporary files from the mailbox that have not been accessed in the last
    +   36 hours. The Maildir specification says that mail-reading programs should do
    +   this occasionally.
    +
    +Some :class:`Mailbox` methods implemented by :class:`Maildir` deserve special
    +remarks:
    +
    +
    +.. method:: Maildir.add(message)
    +            Maildir.__setitem__(key, message)
    +            Maildir.update(arg)
    +
    +   .. warning::
    +
    +      These methods generate unique file names based upon the current process ID. When
    +      using multiple threads, undetected name clashes may occur and cause corruption
    +      of the mailbox unless threads are coordinated to avoid using these methods to
    +      manipulate the same mailbox simultaneously.
    +
    +
    +.. method:: Maildir.flush()
    +
    +   All changes to Maildir mailboxes are immediately applied, so this method does
    +   nothing.
    +
    +
    +.. method:: Maildir.lock()
    +            Maildir.unlock()
    +
    +   Maildir mailboxes do not support (or require) locking, so these methods do
    +   nothing.
    +
    +
    +.. method:: Maildir.close()
    +
    +   :class:`Maildir` instances do not keep any open files and the underlying
    +   mailboxes do not support locking, so this method does nothing.
    +
    +
    +.. method:: Maildir.get_file(key)
    +
    +   Depending upon the host platform, it may not be possible to modify or remove the
    +   underlying message while the returned file remains open.
    +
    +
    +.. seealso::
    +
    +   `maildir man page from qmail `_
    +      The original specification of the format.
    +
    +   `Using maildir format `_
    +      Notes on Maildir by its inventor. Includes an updated name-creation scheme and
    +      details on "info" semantics.
    +
    +   `maildir man page from Courier `_
    +      Another specification of the format. Describes a common extension for supporting
    +      folders.
    +
    +
    +.. _mailbox-mbox:
    +
    +:class:`mbox`
    +^^^^^^^^^^^^^
    +
    +
    +.. class:: mbox(path[, factory=None[, create=True]])
    +
    +   A subclass of :class:`Mailbox` for mailboxes in mbox format. Parameter *factory*
    +   is a callable object that accepts a file-like message representation (which
    +   behaves as if opened in binary mode) and returns a custom representation. If
    +   *factory* is ``None``, :class:`mboxMessage` is used as the default message
    +   representation. If *create* is ``True``, the mailbox is created if it does not
    +   exist.
    +
    +The mbox format is the classic format for storing mail on Unix systems. All
    +messages in an mbox mailbox are stored in a single file with the beginning of
    +each message indicated by a line whose first five characters are "From ".
    +
    +Several variations of the mbox format exist to address perceived shortcomings in
    +the original. In the interest of compatibility, :class:`mbox` implements the
    +original format, which is sometimes referred to as :dfn:`mboxo`. This means that
    +the :mailheader:`Content-Length` header, if present, is ignored and that any
    +occurrences of "From " at the beginning of a line in a message body are
    +transformed to ">From " when storing the message, although occurences of ">From
    +" are not transformed to "From " when reading the message.
    +
    +Some :class:`Mailbox` methods implemented by :class:`mbox` deserve special
    +remarks:
    +
    +
    +.. method:: mbox.get_file(key)
    +
    +   Using the file after calling :meth:`flush` or :meth:`close` on the :class:`mbox`
    +   instance may yield unpredictable results or raise an exception.
    +
    +
    +.. method:: mbox.lock()
    +            mbox.unlock()
    +
    +   Three locking mechanisms are used---dot locking and, if available, the
    +   :cfunc:`flock` and :cfunc:`lockf` system calls.
    +
    +
    +.. seealso::
    +
    +   `mbox man page from qmail `_
    +      A specification of the format and its variations.
    +
    +   `mbox man page from tin `_
    +      Another specification of the format, with details on locking.
    +
    +   `Configuring Netscape Mail on Unix: Why The Content-Length Format is Bad `_
    +      An argument for using the original mbox format rather than a variation.
    +
    +   `"mbox" is a family of several mutually incompatible mailbox formats `_
    +      A history of mbox variations.
    +
    +
    +.. _mailbox-mh:
    +
    +:class:`MH`
    +^^^^^^^^^^^
    +
    +
    +.. class:: MH(path[, factory=None[, create=True]])
    +
    +   A subclass of :class:`Mailbox` for mailboxes in MH format. Parameter *factory*
    +   is a callable object that accepts a file-like message representation (which
    +   behaves as if opened in binary mode) and returns a custom representation. If
    +   *factory* is ``None``, :class:`MHMessage` is used as the default message
    +   representation. If *create* is ``True``, the mailbox is created if it does not
    +   exist.
    +
    +MH is a directory-based mailbox format invented for the MH Message Handling
    +System, a mail user agent. Each message in an MH mailbox resides in its own
    +file. An MH mailbox may contain other MH mailboxes (called :dfn:`folders`) in
    +addition to messages. Folders may be nested indefinitely. MH mailboxes also
    +support :dfn:`sequences`, which are named lists used to logically group messages
    +without moving them to sub-folders. Sequences are defined in a file called
    +:file:`.mh_sequences` in each folder.
    +
    +The :class:`MH` class manipulates MH mailboxes, but it does not attempt to
    +emulate all of :program:`mh`'s behaviors. In particular, it does not modify and
    +is not affected by the :file:`context` or :file:`.mh_profile` files that are
    +used by :program:`mh` to store its state and configuration.
    +
    +:class:`MH` instances have all of the methods of :class:`Mailbox` in addition to
    +the following:
    +
    +
    +.. method:: MH.list_folders()
    +
    +   Return a list of the names of all folders.
    +
    +
    +.. method:: MH.get_folder(folder)
    +
    +   Return an :class:`MH` instance representing the folder whose name is *folder*. A
    +   :exc:`NoSuchMailboxError` exception is raised if the folder does not exist.
    +
    +
    +.. method:: MH.add_folder(folder)
    +
    +   Create a folder whose name is *folder* and return an :class:`MH` instance
    +   representing it.
    +
    +
    +.. method:: MH.remove_folder(folder)
    +
    +   Delete the folder whose name is *folder*. If the folder contains any messages, a
    +   :exc:`NotEmptyError` exception will be raised and the folder will not be
    +   deleted.
    +
    +
    +.. method:: MH.get_sequences()
    +
    +   Return a dictionary of sequence names mapped to key lists. If there are no
    +   sequences, the empty dictionary is returned.
    +
    +
    +.. method:: MH.set_sequences(sequences)
    +
    +   Re-define the sequences that exist in the mailbox based upon *sequences*, a
    +   dictionary of names mapped to key lists, like returned by :meth:`get_sequences`.
    +
    +
    +.. method:: MH.pack()
    +
    +   Rename messages in the mailbox as necessary to eliminate gaps in numbering.
    +   Entries in the sequences list are updated correspondingly.
    +
    +   .. note::
    +
    +      Already-issued keys are invalidated by this operation and should not be
    +      subsequently used.
    +
    +Some :class:`Mailbox` methods implemented by :class:`MH` deserve special
    +remarks:
    +
    +
    +.. method:: MH.remove(key)
    +            MH.__delitem__(key)
    +            MH.discard(key)
    +
    +   These methods immediately delete the message. The MH convention of marking a
    +   message for deletion by prepending a comma to its name is not used.
    +
    +
    +.. method:: MH.lock()
    +            MH.unlock()
    +
    +   Three locking mechanisms are used---dot locking and, if available, the
    +   :cfunc:`flock` and :cfunc:`lockf` system calls. For MH mailboxes, locking the
    +   mailbox means locking the :file:`.mh_sequences` file and, only for the duration
    +   of any operations that affect them, locking individual message files.
    +
    +
    +.. method:: MH.get_file(key)
    +
    +   Depending upon the host platform, it may not be possible to remove the
    +   underlying message while the returned file remains open.
    +
    +
    +.. method:: MH.flush()
    +
    +   All changes to MH mailboxes are immediately applied, so this method does
    +   nothing.
    +
    +
    +.. method:: MH.close()
    +
    +   :class:`MH` instances do not keep any open files, so this method is equivelant
    +   to :meth:`unlock`.
    +
    +
    +.. seealso::
    +
    +   `nmh - Message Handling System `_
    +      Home page of :program:`nmh`, an updated version of the original :program:`mh`.
    +
    +   `MH & nmh: Email for Users & Programmers `_
    +      A GPL-licensed book on :program:`mh` and :program:`nmh`, with some information
    +      on the mailbox format.
    +
    +
    +.. _mailbox-babyl:
    +
    +:class:`Babyl`
    +^^^^^^^^^^^^^^
    +
    +
    +.. class:: Babyl(path[, factory=None[, create=True]])
    +
    +   A subclass of :class:`Mailbox` for mailboxes in Babyl format. Parameter
    +   *factory* is a callable object that accepts a file-like message representation
    +   (which behaves as if opened in binary mode) and returns a custom representation.
    +   If *factory* is ``None``, :class:`BabylMessage` is used as the default message
    +   representation. If *create* is ``True``, the mailbox is created if it does not
    +   exist.
    +
    +Babyl is a single-file mailbox format used by the Rmail mail user agent included
    +with Emacs. The beginning of a message is indicated by a line containing the two
    +characters Control-Underscore (``'\037'``) and Control-L (``'\014'``). The end
    +of a message is indicated by the start of the next message or, in the case of
    +the last message, a line containing a Control-Underscore (``'\037'``)
    +character.
    +
    +Messages in a Babyl mailbox have two sets of headers, original headers and
    +so-called visible headers. Visible headers are typically a subset of the
    +original headers that have been reformatted or abridged to be more
    +attractive. Each message in a Babyl mailbox also has an accompanying list of
    +:dfn:`labels`, or short strings that record extra information about the message,
    +and a list of all user-defined labels found in the mailbox is kept in the Babyl
    +options section.
    +
    +:class:`Babyl` instances have all of the methods of :class:`Mailbox` in addition
    +to the following:
    +
    +
    +.. method:: Babyl.get_labels()
    +
    +   Return a list of the names of all user-defined labels used in the mailbox.
    +
    +   .. note::
    +
    +      The actual messages are inspected to determine which labels exist in the mailbox
    +      rather than consulting the list of labels in the Babyl options section, but the
    +      Babyl section is updated whenever the mailbox is modified.
    +
    +Some :class:`Mailbox` methods implemented by :class:`Babyl` deserve special
    +remarks:
    +
    +
    +.. method:: Babyl.get_file(key)
    +
    +   In Babyl mailboxes, the headers of a message are not stored contiguously with
    +   the body of the message. To generate a file-like representation, the headers and
    +   body are copied together into a :class:`StringIO` instance (from the
    +   :mod:`StringIO` module), which has an API identical to that of a file. As a
    +   result, the file-like object is truly independent of the underlying mailbox but
    +   does not save memory compared to a string representation.
    +
    +
    +.. method:: Babyl.lock()
    +            Babyl.unlock()
    +
    +   Three locking mechanisms are used---dot locking and, if available, the
    +   :cfunc:`flock` and :cfunc:`lockf` system calls.
    +
    +
    +.. seealso::
    +
    +   `Format of Version 5 Babyl Files `_
    +      A specification of the Babyl format.
    +
    +   `Reading Mail with Rmail `_
    +      The Rmail manual, with some information on Babyl semantics.
    +
    +
    +.. _mailbox-mmdf:
    +
    +:class:`MMDF`
    +^^^^^^^^^^^^^
    +
    +
    +.. class:: MMDF(path[, factory=None[, create=True]])
    +
    +   A subclass of :class:`Mailbox` for mailboxes in MMDF format. Parameter *factory*
    +   is a callable object that accepts a file-like message representation (which
    +   behaves as if opened in binary mode) and returns a custom representation. If
    +   *factory* is ``None``, :class:`MMDFMessage` is used as the default message
    +   representation. If *create* is ``True``, the mailbox is created if it does not
    +   exist.
    +
    +MMDF is a single-file mailbox format invented for the Multichannel Memorandum
    +Distribution Facility, a mail transfer agent. Each message is in the same form
    +as an mbox message but is bracketed before and after by lines containing four
    +Control-A (``'\001'``) characters. As with the mbox format, the beginning of
    +each message is indicated by a line whose first five characters are "From ", but
    +additional occurrences of "From " are not transformed to ">From " when storing
    +messages because the extra message separator lines prevent mistaking such
    +occurrences for the starts of subsequent messages.
    +
    +Some :class:`Mailbox` methods implemented by :class:`MMDF` deserve special
    +remarks:
    +
    +
    +.. method:: MMDF.get_file(key)
    +
    +   Using the file after calling :meth:`flush` or :meth:`close` on the :class:`MMDF`
    +   instance may yield unpredictable results or raise an exception.
    +
    +
    +.. method:: MMDF.lock()
    +            MMDF.unlock()
    +
    +   Three locking mechanisms are used---dot locking and, if available, the
    +   :cfunc:`flock` and :cfunc:`lockf` system calls.
    +
    +
    +.. seealso::
    +
    +   `mmdf man page from tin `_
    +      A specification of MMDF format from the documentation of tin, a newsreader.
    +
    +   `MMDF `_
    +      A Wikipedia article describing the Multichannel Memorandum Distribution
    +      Facility.
    +
    +
    +.. _mailbox-message-objects:
    +
    +:class:`Message` objects
    +------------------------
    +
    +
    +.. class:: Message([message])
    +
    +   A subclass of the :mod:`email.Message` module's :class:`Message`. Subclasses of
    +   :class:`mailbox.Message` add mailbox-format-specific state and behavior.
    +
    +   If *message* is omitted, the new instance is created in a default, empty state.
    +   If *message* is an :class:`email.Message.Message` instance, its contents are
    +   copied; furthermore, any format-specific information is converted insofar as
    +   possible if *message* is a :class:`Message` instance. If *message* is a string
    +   or a file, it should contain an :rfc:`2822`\ -compliant message, which is read
    +   and parsed.
    +
    +The format-specific state and behaviors offered by subclasses vary, but in
    +general it is only the properties that are not specific to a particular mailbox
    +that are supported (although presumably the properties are specific to a
    +particular mailbox format). For example, file offsets for single-file mailbox
    +formats and file names for directory-based mailbox formats are not retained,
    +because they are only applicable to the original mailbox. But state such as
    +whether a message has been read by the user or marked as important is retained,
    +because it applies to the message itself.
    +
    +There is no requirement that :class:`Message` instances be used to represent
    +messages retrieved using :class:`Mailbox` instances. In some situations, the
    +time and memory required to generate :class:`Message` representations might not
    +not acceptable. For such situations, :class:`Mailbox` instances also offer
    +string and file-like representations, and a custom message factory may be
    +specified when a :class:`Mailbox` instance is initialized.
    +
    +
    +.. _mailbox-maildirmessage:
    +
    +:class:`MaildirMessage`
    +^^^^^^^^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: MaildirMessage([message])
    +
    +   A message with Maildir-specific behaviors. Parameter *message* has the same
    +   meaning as with the :class:`Message` constructor.
    +
    +Typically, a mail user agent application moves all of the messages in the
    +:file:`new` subdirectory to the :file:`cur` subdirectory after the first time
    +the user opens and closes the mailbox, recording that the messages are old
    +whether or not they've actually been read. Each message in :file:`cur` has an
    +"info" section added to its file name to store information about its state.
    +(Some mail readers may also add an "info" section to messages in :file:`new`.)
    +The "info" section may take one of two forms: it may contain "2," followed by a
    +list of standardized flags (e.g., "2,FR") or it may contain "1," followed by
    +so-called experimental information. Standard flags for Maildir messages are as
    +follows:
    +
    ++------+---------+--------------------------------+
    +| Flag | Meaning | Explanation                    |
    ++======+=========+================================+
    +| D    | Draft   | Under composition              |
    ++------+---------+--------------------------------+
    +| F    | Flagged | Marked as important            |
    ++------+---------+--------------------------------+
    +| P    | Passed  | Forwarded, resent, or bounced  |
    ++------+---------+--------------------------------+
    +| R    | Replied | Replied to                     |
    ++------+---------+--------------------------------+
    +| S    | Seen    | Read                           |
    ++------+---------+--------------------------------+
    +| T    | Trashed | Marked for subsequent deletion |
    ++------+---------+--------------------------------+
    +
    +:class:`MaildirMessage` instances offer the following methods:
    +
    +
    +.. method:: MaildirMessage.get_subdir()
    +
    +   Return either "new" (if the message should be stored in the :file:`new`
    +   subdirectory) or "cur" (if the message should be stored in the :file:`cur`
    +   subdirectory).
    +
    +   .. note::
    +
    +      A message is typically moved from :file:`new` to :file:`cur` after its mailbox
    +      has been accessed, whether or not the message is has been read. A message
    +      ``msg`` has been read if ``"S" not in msg.get_flags()`` is ``True``.
    +
    +
    +.. method:: MaildirMessage.set_subdir(subdir)
    +
    +   Set the subdirectory the message should be stored in. Parameter *subdir* must be
    +   either "new" or "cur".
    +
    +
    +.. method:: MaildirMessage.get_flags()
    +
    +   Return a string specifying the flags that are currently set. If the message
    +   complies with the standard Maildir format, the result is the concatenation in
    +   alphabetical order of zero or one occurrence of each of ``'D'``, ``'F'``,
    +   ``'P'``, ``'R'``, ``'S'``, and ``'T'``. The empty string is returned if no flags
    +   are set or if "info" contains experimental semantics.
    +
    +
    +.. method:: MaildirMessage.set_flags(flags)
    +
    +   Set the flags specified by *flags* and unset all others.
    +
    +
    +.. method:: MaildirMessage.add_flag(flag)
    +
    +   Set the flag(s) specified by *flag* without changing other flags. To add more
    +   than one flag at a time, *flag* may be a string of more than one character. The
    +   current "info" is overwritten whether or not it contains experimental
    +   information rather than flags.
    +
    +
    +.. method:: MaildirMessage.remove_flag(flag)
    +
    +   Unset the flag(s) specified by *flag* without changing other flags. To remove
    +   more than one flag at a time, *flag* maybe a string of more than one character.
    +   If "info" contains experimental information rather than flags, the current
    +   "info" is not modified.
    +
    +
    +.. method:: MaildirMessage.get_date()
    +
    +   Return the delivery date of the message as a floating-point number representing
    +   seconds since the epoch.
    +
    +
    +.. method:: MaildirMessage.set_date(date)
    +
    +   Set the delivery date of the message to *date*, a floating-point number
    +   representing seconds since the epoch.
    +
    +
    +.. method:: MaildirMessage.get_info()
    +
    +   Return a string containing the "info" for a message. This is useful for
    +   accessing and modifying "info" that is experimental (i.e., not a list of flags).
    +
    +
    +.. method:: MaildirMessage.set_info(info)
    +
    +   Set "info" to *info*, which should be a string.
    +
    +When a :class:`MaildirMessage` instance is created based upon an
    +:class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status`
    +and :mailheader:`X-Status` headers are omitted and the following conversions
    +take place:
    +
    ++--------------------+----------------------------------------------+
    +| Resulting state    | :class:`mboxMessage` or :class:`MMDFMessage` |
    +|                    | state                                        |
    ++====================+==============================================+
    +| "cur" subdirectory | O flag                                       |
    ++--------------------+----------------------------------------------+
    +| F flag             | F flag                                       |
    ++--------------------+----------------------------------------------+
    +| R flag             | A flag                                       |
    ++--------------------+----------------------------------------------+
    +| S flag             | R flag                                       |
    ++--------------------+----------------------------------------------+
    +| T flag             | D flag                                       |
    ++--------------------+----------------------------------------------+
    +
    +When a :class:`MaildirMessage` instance is created based upon an
    +:class:`MHMessage` instance, the following conversions take place:
    +
    ++-------------------------------+--------------------------+
    +| Resulting state               | :class:`MHMessage` state |
    ++===============================+==========================+
    +| "cur" subdirectory            | "unseen" sequence        |
    ++-------------------------------+--------------------------+
    +| "cur" subdirectory and S flag | no "unseen" sequence     |
    ++-------------------------------+--------------------------+
    +| F flag                        | "flagged" sequence       |
    ++-------------------------------+--------------------------+
    +| R flag                        | "replied" sequence       |
    ++-------------------------------+--------------------------+
    +
    +When a :class:`MaildirMessage` instance is created based upon a
    +:class:`BabylMessage` instance, the following conversions take place:
    +
    ++-------------------------------+-------------------------------+
    +| Resulting state               | :class:`BabylMessage` state   |
    ++===============================+===============================+
    +| "cur" subdirectory            | "unseen" label                |
    ++-------------------------------+-------------------------------+
    +| "cur" subdirectory and S flag | no "unseen" label             |
    ++-------------------------------+-------------------------------+
    +| P flag                        | "forwarded" or "resent" label |
    ++-------------------------------+-------------------------------+
    +| R flag                        | "answered" label              |
    ++-------------------------------+-------------------------------+
    +| T flag                        | "deleted" label               |
    ++-------------------------------+-------------------------------+
    +
    +
    +.. _mailbox-mboxmessage:
    +
    +:class:`mboxMessage`
    +^^^^^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: mboxMessage([message])
    +
    +   A message with mbox-specific behaviors. Parameter *message* has the same meaning
    +   as with the :class:`Message` constructor.
    +
    +Messages in an mbox mailbox are stored together in a single file. The sender's
    +envelope address and the time of delivery are typically stored in a line
    +beginning with "From " that is used to indicate the start of a message, though
    +there is considerable variation in the exact format of this data among mbox
    +implementations. Flags that indicate the state of the message, such as whether
    +it has been read or marked as important, are typically stored in
    +:mailheader:`Status` and :mailheader:`X-Status` headers.
    +
    +Conventional flags for mbox messages are as follows:
    +
    ++------+----------+--------------------------------+
    +| Flag | Meaning  | Explanation                    |
    ++======+==========+================================+
    +| R    | Read     | Read                           |
    ++------+----------+--------------------------------+
    +| O    | Old      | Previously detected by MUA     |
    ++------+----------+--------------------------------+
    +| D    | Deleted  | Marked for subsequent deletion |
    ++------+----------+--------------------------------+
    +| F    | Flagged  | Marked as important            |
    ++------+----------+--------------------------------+
    +| A    | Answered | Replied to                     |
    ++------+----------+--------------------------------+
    +
    +The "R" and "O" flags are stored in the :mailheader:`Status` header, and the
    +"D", "F", and "A" flags are stored in the :mailheader:`X-Status` header. The
    +flags and headers typically appear in the order mentioned.
    +
    +:class:`mboxMessage` instances offer the following methods:
    +
    +
    +.. method:: mboxMessage.get_from()
    +
    +   Return a string representing the "From " line that marks the start of the
    +   message in an mbox mailbox. The leading "From " and the trailing newline are
    +   excluded.
    +
    +
    +.. method:: mboxMessage.set_from(from_[, time_=None])
    +
    +   Set the "From " line to *from_*, which should be specified without a leading
    +   "From " or trailing newline. For convenience, *time_* may be specified and will
    +   be formatted appropriately and appended to *from_*. If *time_* is specified, it
    +   should be a :class:`struct_time` instance, a tuple suitable for passing to
    +   :meth:`time.strftime`, or ``True`` (to use :meth:`time.gmtime`).
    +
    +
    +.. method:: mboxMessage.get_flags()
    +
    +   Return a string specifying the flags that are currently set. If the message
    +   complies with the conventional format, the result is the concatenation in the
    +   following order of zero or one occurrence of each of ``'R'``, ``'O'``, ``'D'``,
    +   ``'F'``, and ``'A'``.
    +
    +
    +.. method:: mboxMessage.set_flags(flags)
    +
    +   Set the flags specified by *flags* and unset all others. Parameter *flags*
    +   should be the concatenation in any order of zero or more occurrences of each of
    +   ``'R'``, ``'O'``, ``'D'``, ``'F'``, and ``'A'``.
    +
    +
    +.. method:: mboxMessage.add_flag(flag)
    +
    +   Set the flag(s) specified by *flag* without changing other flags. To add more
    +   than one flag at a time, *flag* may be a string of more than one character.
    +
    +
    +.. method:: mboxMessage.remove_flag(flag)
    +
    +   Unset the flag(s) specified by *flag* without changing other flags. To remove
    +   more than one flag at a time, *flag* maybe a string of more than one character.
    +
    +When an :class:`mboxMessage` instance is created based upon a
    +:class:`MaildirMessage` instance, a "From " line is generated based upon the
    +:class:`MaildirMessage` instance's delivery date, and the following conversions
    +take place:
    +
    ++-----------------+-------------------------------+
    +| Resulting state | :class:`MaildirMessage` state |
    ++=================+===============================+
    +| R flag          | S flag                        |
    ++-----------------+-------------------------------+
    +| O flag          | "cur" subdirectory            |
    ++-----------------+-------------------------------+
    +| D flag          | T flag                        |
    ++-----------------+-------------------------------+
    +| F flag          | F flag                        |
    ++-----------------+-------------------------------+
    +| A flag          | R flag                        |
    ++-----------------+-------------------------------+
    +
    +When an :class:`mboxMessage` instance is created based upon an
    +:class:`MHMessage` instance, the following conversions take place:
    +
    ++-------------------+--------------------------+
    +| Resulting state   | :class:`MHMessage` state |
    ++===================+==========================+
    +| R flag and O flag | no "unseen" sequence     |
    ++-------------------+--------------------------+
    +| O flag            | "unseen" sequence        |
    ++-------------------+--------------------------+
    +| F flag            | "flagged" sequence       |
    ++-------------------+--------------------------+
    +| A flag            | "replied" sequence       |
    ++-------------------+--------------------------+
    +
    +When an :class:`mboxMessage` instance is created based upon a
    +:class:`BabylMessage` instance, the following conversions take place:
    +
    ++-------------------+-----------------------------+
    +| Resulting state   | :class:`BabylMessage` state |
    ++===================+=============================+
    +| R flag and O flag | no "unseen" label           |
    ++-------------------+-----------------------------+
    +| O flag            | "unseen" label              |
    ++-------------------+-----------------------------+
    +| D flag            | "deleted" label             |
    ++-------------------+-----------------------------+
    +| A flag            | "answered" label            |
    ++-------------------+-----------------------------+
    +
    +When a :class:`Message` instance is created based upon an :class:`MMDFMessage`
    +instance, the "From " line is copied and all flags directly correspond:
    +
    ++-----------------+----------------------------+
    +| Resulting state | :class:`MMDFMessage` state |
    ++=================+============================+
    +| R flag          | R flag                     |
    ++-----------------+----------------------------+
    +| O flag          | O flag                     |
    ++-----------------+----------------------------+
    +| D flag          | D flag                     |
    ++-----------------+----------------------------+
    +| F flag          | F flag                     |
    ++-----------------+----------------------------+
    +| A flag          | A flag                     |
    ++-----------------+----------------------------+
    +
    +
    +.. _mailbox-mhmessage:
    +
    +:class:`MHMessage`
    +^^^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: MHMessage([message])
    +
    +   A message with MH-specific behaviors. Parameter *message* has the same meaning
    +   as with the :class:`Message` constructor.
    +
    +MH messages do not support marks or flags in the traditional sense, but they do
    +support sequences, which are logical groupings of arbitrary messages. Some mail
    +reading programs (although not the standard :program:`mh` and :program:`nmh`)
    +use sequences in much the same way flags are used with other formats, as
    +follows:
    +
    ++----------+------------------------------------------+
    +| Sequence | Explanation                              |
    ++==========+==========================================+
    +| unseen   | Not read, but previously detected by MUA |
    ++----------+------------------------------------------+
    +| replied  | Replied to                               |
    ++----------+------------------------------------------+
    +| flagged  | Marked as important                      |
    ++----------+------------------------------------------+
    +
    +:class:`MHMessage` instances offer the following methods:
    +
    +
    +.. method:: MHMessage.get_sequences()
    +
    +   Return a list of the names of sequences that include this message.
    +
    +
    +.. method:: MHMessage.set_sequences(sequences)
    +
    +   Set the list of sequences that include this message.
    +
    +
    +.. method:: MHMessage.add_sequence(sequence)
    +
    +   Add *sequence* to the list of sequences that include this message.
    +
    +
    +.. method:: MHMessage.remove_sequence(sequence)
    +
    +   Remove *sequence* from the list of sequences that include this message.
    +
    +When an :class:`MHMessage` instance is created based upon a
    +:class:`MaildirMessage` instance, the following conversions take place:
    +
    ++--------------------+-------------------------------+
    +| Resulting state    | :class:`MaildirMessage` state |
    ++====================+===============================+
    +| "unseen" sequence  | no S flag                     |
    ++--------------------+-------------------------------+
    +| "replied" sequence | R flag                        |
    ++--------------------+-------------------------------+
    +| "flagged" sequence | F flag                        |
    ++--------------------+-------------------------------+
    +
    +When an :class:`MHMessage` instance is created based upon an
    +:class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status`
    +and :mailheader:`X-Status` headers are omitted and the following conversions
    +take place:
    +
    ++--------------------+----------------------------------------------+
    +| Resulting state    | :class:`mboxMessage` or :class:`MMDFMessage` |
    +|                    | state                                        |
    ++====================+==============================================+
    +| "unseen" sequence  | no R flag                                    |
    ++--------------------+----------------------------------------------+
    +| "replied" sequence | A flag                                       |
    ++--------------------+----------------------------------------------+
    +| "flagged" sequence | F flag                                       |
    ++--------------------+----------------------------------------------+
    +
    +When an :class:`MHMessage` instance is created based upon a
    +:class:`BabylMessage` instance, the following conversions take place:
    +
    ++--------------------+-----------------------------+
    +| Resulting state    | :class:`BabylMessage` state |
    ++====================+=============================+
    +| "unseen" sequence  | "unseen" label              |
    ++--------------------+-----------------------------+
    +| "replied" sequence | "answered" label            |
    ++--------------------+-----------------------------+
    +
    +
    +.. _mailbox-babylmessage:
    +
    +:class:`BabylMessage`
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: BabylMessage([message])
    +
    +   A message with Babyl-specific behaviors. Parameter *message* has the same
    +   meaning as with the :class:`Message` constructor.
    +
    +Certain message labels, called :dfn:`attributes`, are defined by convention to
    +have special meanings. The attributes are as follows:
    +
    ++-----------+------------------------------------------+
    +| Label     | Explanation                              |
    ++===========+==========================================+
    +| unseen    | Not read, but previously detected by MUA |
    ++-----------+------------------------------------------+
    +| deleted   | Marked for subsequent deletion           |
    ++-----------+------------------------------------------+
    +| filed     | Copied to another file or mailbox        |
    ++-----------+------------------------------------------+
    +| answered  | Replied to                               |
    ++-----------+------------------------------------------+
    +| forwarded | Forwarded                                |
    ++-----------+------------------------------------------+
    +| edited    | Modified by the user                     |
    ++-----------+------------------------------------------+
    +| resent    | Resent                                   |
    ++-----------+------------------------------------------+
    +
    +By default, Rmail displays only visible headers. The :class:`BabylMessage`
    +class, though, uses the original headers because they are more complete. Visible
    +headers may be accessed explicitly if desired.
    +
    +:class:`BabylMessage` instances offer the following methods:
    +
    +
    +.. method:: BabylMessage.get_labels()
    +
    +   Return a list of labels on the message.
    +
    +
    +.. method:: BabylMessage.set_labels(labels)
    +
    +   Set the list of labels on the message to *labels*.
    +
    +
    +.. method:: BabylMessage.add_label(label)
    +
    +   Add *label* to the list of labels on the message.
    +
    +
    +.. method:: BabylMessage.remove_label(label)
    +
    +   Remove *label* from the list of labels on the message.
    +
    +
    +.. method:: BabylMessage.get_visible()
    +
    +   Return an :class:`Message` instance whose headers are the message's visible
    +   headers and whose body is empty.
    +
    +
    +.. method:: BabylMessage.set_visible(visible)
    +
    +   Set the message's visible headers to be the same as the headers in *message*.
    +   Parameter *visible* should be a :class:`Message` instance, an
    +   :class:`email.Message.Message` instance, a string, or a file-like object (which
    +   should be open in text mode).
    +
    +
    +.. method:: BabylMessage.update_visible()
    +
    +   When a :class:`BabylMessage` instance's original headers are modified, the
    +   visible headers are not automatically modified to correspond. This method
    +   updates the visible headers as follows: each visible header with a corresponding
    +   original header is set to the value of the original header, each visible header
    +   without a corresponding original header is removed, and any of
    +   :mailheader:`Date`, :mailheader:`From`, :mailheader:`Reply-To`,
    +   :mailheader:`To`, :mailheader:`CC`, and :mailheader:`Subject` that are present
    +   in the original headers but not the visible headers are added to the visible
    +   headers.
    +
    +When a :class:`BabylMessage` instance is created based upon a
    +:class:`MaildirMessage` instance, the following conversions take place:
    +
    ++-------------------+-------------------------------+
    +| Resulting state   | :class:`MaildirMessage` state |
    ++===================+===============================+
    +| "unseen" label    | no S flag                     |
    ++-------------------+-------------------------------+
    +| "deleted" label   | T flag                        |
    ++-------------------+-------------------------------+
    +| "answered" label  | R flag                        |
    ++-------------------+-------------------------------+
    +| "forwarded" label | P flag                        |
    ++-------------------+-------------------------------+
    +
    +When a :class:`BabylMessage` instance is created based upon an
    +:class:`mboxMessage` or :class:`MMDFMessage` instance, the :mailheader:`Status`
    +and :mailheader:`X-Status` headers are omitted and the following conversions
    +take place:
    +
    ++------------------+----------------------------------------------+
    +| Resulting state  | :class:`mboxMessage` or :class:`MMDFMessage` |
    +|                  | state                                        |
    ++==================+==============================================+
    +| "unseen" label   | no R flag                                    |
    ++------------------+----------------------------------------------+
    +| "deleted" label  | D flag                                       |
    ++------------------+----------------------------------------------+
    +| "answered" label | A flag                                       |
    ++------------------+----------------------------------------------+
    +
    +When a :class:`BabylMessage` instance is created based upon an
    +:class:`MHMessage` instance, the following conversions take place:
    +
    ++------------------+--------------------------+
    +| Resulting state  | :class:`MHMessage` state |
    ++==================+==========================+
    +| "unseen" label   | "unseen" sequence        |
    ++------------------+--------------------------+
    +| "answered" label | "replied" sequence       |
    ++------------------+--------------------------+
    +
    +
    +.. _mailbox-mmdfmessage:
    +
    +:class:`MMDFMessage`
    +^^^^^^^^^^^^^^^^^^^^
    +
    +
    +.. class:: MMDFMessage([message])
    +
    +   A message with MMDF-specific behaviors. Parameter *message* has the same meaning
    +   as with the :class:`Message` constructor.
    +
    +As with message in an mbox mailbox, MMDF messages are stored with the sender's
    +address and the delivery date in an initial line beginning with "From ".
    +Likewise, flags that indicate the state of the message are typically stored in
    +:mailheader:`Status` and :mailheader:`X-Status` headers.
    +
    +Conventional flags for MMDF messages are identical to those of mbox message and
    +are as follows:
    +
    ++------+----------+--------------------------------+
    +| Flag | Meaning  | Explanation                    |
    ++======+==========+================================+
    +| R    | Read     | Read                           |
    ++------+----------+--------------------------------+
    +| O    | Old      | Previously detected by MUA     |
    ++------+----------+--------------------------------+
    +| D    | Deleted  | Marked for subsequent deletion |
    ++------+----------+--------------------------------+
    +| F    | Flagged  | Marked as important            |
    ++------+----------+--------------------------------+
    +| A    | Answered | Replied to                     |
    ++------+----------+--------------------------------+
    +
    +The "R" and "O" flags are stored in the :mailheader:`Status` header, and the
    +"D", "F", and "A" flags are stored in the :mailheader:`X-Status` header. The
    +flags and headers typically appear in the order mentioned.
    +
    +:class:`MMDFMessage` instances offer the following methods, which are identical
    +to those offered by :class:`mboxMessage`:
    +
    +
    +.. method:: MMDFMessage.get_from()
    +
    +   Return a string representing the "From " line that marks the start of the
    +   message in an mbox mailbox. The leading "From " and the trailing newline are
    +   excluded.
    +
    +
    +.. method:: MMDFMessage.set_from(from_[, time_=None])
    +
    +   Set the "From " line to *from_*, which should be specified without a leading
    +   "From " or trailing newline. For convenience, *time_* may be specified and will
    +   be formatted appropriately and appended to *from_*. If *time_* is specified, it
    +   should be a :class:`struct_time` instance, a tuple suitable for passing to
    +   :meth:`time.strftime`, or ``True`` (to use :meth:`time.gmtime`).
    +
    +
    +.. method:: MMDFMessage.get_flags()
    +
    +   Return a string specifying the flags that are currently set. If the message
    +   complies with the conventional format, the result is the concatenation in the
    +   following order of zero or one occurrence of each of ``'R'``, ``'O'``, ``'D'``,
    +   ``'F'``, and ``'A'``.
    +
    +
    +.. method:: MMDFMessage.set_flags(flags)
    +
    +   Set the flags specified by *flags* and unset all others. Parameter *flags*
    +   should be the concatenation in any order of zero or more occurrences of each of
    +   ``'R'``, ``'O'``, ``'D'``, ``'F'``, and ``'A'``.
    +
    +
    +.. method:: MMDFMessage.add_flag(flag)
    +
    +   Set the flag(s) specified by *flag* without changing other flags. To add more
    +   than one flag at a time, *flag* may be a string of more than one character.
    +
    +
    +.. method:: MMDFMessage.remove_flag(flag)
    +
    +   Unset the flag(s) specified by *flag* without changing other flags. To remove
    +   more than one flag at a time, *flag* maybe a string of more than one character.
    +
    +When an :class:`MMDFMessage` instance is created based upon a
    +:class:`MaildirMessage` instance, a "From " line is generated based upon the
    +:class:`MaildirMessage` instance's delivery date, and the following conversions
    +take place:
    +
    ++-----------------+-------------------------------+
    +| Resulting state | :class:`MaildirMessage` state |
    ++=================+===============================+
    +| R flag          | S flag                        |
    ++-----------------+-------------------------------+
    +| O flag          | "cur" subdirectory            |
    ++-----------------+-------------------------------+
    +| D flag          | T flag                        |
    ++-----------------+-------------------------------+
    +| F flag          | F flag                        |
    ++-----------------+-------------------------------+
    +| A flag          | R flag                        |
    ++-----------------+-------------------------------+
    +
    +When an :class:`MMDFMessage` instance is created based upon an
    +:class:`MHMessage` instance, the following conversions take place:
    +
    ++-------------------+--------------------------+
    +| Resulting state   | :class:`MHMessage` state |
    ++===================+==========================+
    +| R flag and O flag | no "unseen" sequence     |
    ++-------------------+--------------------------+
    +| O flag            | "unseen" sequence        |
    ++-------------------+--------------------------+
    +| F flag            | "flagged" sequence       |
    ++-------------------+--------------------------+
    +| A flag            | "replied" sequence       |
    ++-------------------+--------------------------+
    +
    +When an :class:`MMDFMessage` instance is created based upon a
    +:class:`BabylMessage` instance, the following conversions take place:
    +
    ++-------------------+-----------------------------+
    +| Resulting state   | :class:`BabylMessage` state |
    ++===================+=============================+
    +| R flag and O flag | no "unseen" label           |
    ++-------------------+-----------------------------+
    +| O flag            | "unseen" label              |
    ++-------------------+-----------------------------+
    +| D flag            | "deleted" label             |
    ++-------------------+-----------------------------+
    +| A flag            | "answered" label            |
    ++-------------------+-----------------------------+
    +
    +When an :class:`MMDFMessage` instance is created based upon an
    +:class:`mboxMessage` instance, the "From " line is copied and all flags directly
    +correspond:
    +
    ++-----------------+----------------------------+
    +| Resulting state | :class:`mboxMessage` state |
    ++=================+============================+
    +| R flag          | R flag                     |
    ++-----------------+----------------------------+
    +| O flag          | O flag                     |
    ++-----------------+----------------------------+
    +| D flag          | D flag                     |
    ++-----------------+----------------------------+
    +| F flag          | F flag                     |
    ++-----------------+----------------------------+
    +| A flag          | A flag                     |
    ++-----------------+----------------------------+
    +
    +
    +Exceptions
    +----------
    +
    +The following exception classes are defined in the :mod:`mailbox` module:
    +
    +
    +.. class:: Error()
    +
    +   The based class for all other module-specific exceptions.
    +
    +
    +.. class:: NoSuchMailboxError()
    +
    +   Raised when a mailbox is expected but is not found, such as when instantiating a
    +   :class:`Mailbox` subclass with a path that does not exist (and with the *create*
    +   parameter set to ``False``), or when opening a folder that does not exist.
    +
    +
    +.. class:: NotEmptyErrorError()
    +
    +   Raised when a mailbox is not empty but is expected to be, such as when deleting
    +   a folder that contains messages.
    +
    +
    +.. class:: ExternalClashError()
    +
    +   Raised when some mailbox-related condition beyond the control of the program
    +   causes it to be unable to proceed, such as when failing to acquire a lock that
    +   another program already holds a lock, or when a uniquely-generated file name
    +   already exists.
    +
    +
    +.. class:: FormatError()
    +
    +   Raised when the data in a file cannot be parsed, such as when an :class:`MH`
    +   instance attempts to read a corrupted :file:`.mh_sequences` file.
    +
    +
    +.. _mailbox-deprecated:
    +
    +Deprecated classes and methods
    +------------------------------
    +
    +Older versions of the :mod:`mailbox` module do not support modification of
    +mailboxes, such as adding or removing message, and do not provide classes to
    +represent format-specific message properties. For backward compatibility, the
    +older mailbox classes are still available, but the newer classes should be used
    +in preference to them.
    +
    +Older mailbox objects support only iteration and provide a single public method:
    +
    +
    +.. method:: oldmailbox.next()
    +
    +   Return the next message in the mailbox, created with the optional *factory*
    +   argument passed into the mailbox object's constructor. By default this is an
    +   :class:`rfc822.Message` object (see the :mod:`rfc822` module).  Depending on the
    +   mailbox implementation the *fp* attribute of this object may be a true file
    +   object or a class instance simulating a file object, taking care of things like
    +   message boundaries if multiple mail messages are contained in a single file,
    +   etc.  If no more messages are available, this method returns ``None``.
    +
    +Most of the older mailbox classes have names that differ from the current
    +mailbox class names, except for :class:`Maildir`. For this reason, the new
    +:class:`Maildir` class defines a :meth:`next` method and its constructor differs
    +slightly from those of the other new mailbox classes.
    +
    +The older mailbox classes whose names are not the same as their newer
    +counterparts are as follows:
    +
    +
    +.. class:: UnixMailbox(fp[, factory])
    +
    +   Access to a classic Unix-style mailbox, where all messages are contained in a
    +   single file and separated by ``From`` (a.k.a. ``From_``) lines.  The file object
    +   *fp* points to the mailbox file.  The optional *factory* parameter is a callable
    +   that should create new message objects.  *factory* is called with one argument,
    +   *fp* by the :meth:`next` method of the mailbox object.  The default is the
    +   :class:`rfc822.Message` class (see the :mod:`rfc822` module -- and the note
    +   below).
    +
    +   .. note::
    +
    +      For reasons of this module's internal implementation, you will probably want to
    +      open the *fp* object in binary mode.  This is especially important on Windows.
    +
    +   For maximum portability, messages in a Unix-style mailbox are separated by any
    +   line that begins exactly with the string ``'From '`` (note the trailing space)
    +   if preceded by exactly two newlines. Because of the wide-range of variations in
    +   practice, nothing else on the ``From_`` line should be considered.  However, the
    +   current implementation doesn't check for the leading two newlines.  This is
    +   usually fine for most applications.
    +
    +   The :class:`UnixMailbox` class implements a more strict version of ``From_``
    +   line checking, using a regular expression that usually correctly matched
    +   ``From_`` delimiters.  It considers delimiter line to be separated by ``From
    +   name time`` lines.  For maximum portability, use the
    +   :class:`PortableUnixMailbox` class instead.  This class is identical to
    +   :class:`UnixMailbox` except that individual messages are separated by only
    +   ``From`` lines.
    +
    +   For more information, see `Configuring Netscape Mail on Unix: Why the
    +   Content-Length Format is Bad
    +   `_.
    +
    +
    +.. class:: PortableUnixMailbox(fp[, factory])
    +
    +   A less-strict version of :class:`UnixMailbox`, which considers only the ``From``
    +   at the beginning of the line separating messages.  The "*name* *time*" portion
    +   of the From line is ignored, to protect against some variations that are
    +   observed in practice.  This works since lines in the message which begin with
    +   ``'From '`` are quoted by mail handling software at delivery-time.
    +
    +
    +.. class:: MmdfMailbox(fp[, factory])
    +
    +   Access an MMDF-style mailbox, where all messages are contained in a single file
    +   and separated by lines consisting of 4 control-A characters.  The file object
    +   *fp* points to the mailbox file. Optional *factory* is as with the
    +   :class:`UnixMailbox` class.
    +
    +
    +.. class:: MHMailbox(dirname[, factory])
    +
    +   Access an MH mailbox, a directory with each message in a separate file with a
    +   numeric name. The name of the mailbox directory is passed in *dirname*.
    +   *factory* is as with the :class:`UnixMailbox` class.
    +
    +
    +.. class:: BabylMailbox(fp[, factory])
    +
    +   Access a Babyl mailbox, which is similar to an MMDF mailbox.  In Babyl format,
    +   each message has two sets of headers, the *original* headers and the *visible*
    +   headers.  The original headers appear before a line containing only ``'*** EOOH
    +   ***'`` (End-Of-Original-Headers) and the visible headers appear after the
    +   ``EOOH`` line.  Babyl-compliant mail readers will show you only the visible
    +   headers, and :class:`BabylMailbox` objects will return messages containing only
    +   the visible headers.  You'll have to do your own parsing of the mailbox file to
    +   get at the original headers.  Mail messages start with the EOOH line and end
    +   with a line containing only ``'\037\014'``.  *factory* is as with the
    +   :class:`UnixMailbox` class.
    +
    +If you wish to use the older mailbox classes with the :mod:`email` module rather
    +than the deprecated :mod:`rfc822` module, you can do so as follows::
    +
    +   import email
    +   import email.Errors
    +   import mailbox
    +
    +   def msgfactory(fp):
    +       try:
    +           return email.message_from_file(fp)
    +       except email.Errors.MessageParseError:
    +           # Don't return None since that will
    +           # stop the mailbox iterator
    +           return ''
    +
    +   mbox = mailbox.UnixMailbox(fp, msgfactory)
    +
    +Alternatively, if you know your mailbox contains only well-formed MIME messages,
    +you can simplify this to::
    +
    +   import email
    +   import mailbox
    +
    +   mbox = mailbox.UnixMailbox(fp, email.message_from_file)
    +
    +
    +.. _mailbox-examples:
    +
    +Examples
    +--------
    +
    +A simple example of printing the subjects of all messages in a mailbox that seem
    +interesting::
    +
    +   import mailbox
    +   for message in mailbox.mbox('~/mbox'):
    +       subject = message['subject']       # Could possibly be None.
    +       if subject and 'python' in subject.lower():
    +           print subject
    +
    +To copy all mail from a Babyl mailbox to an MH mailbox, converting all of the
    +format-specific information that can be converted::
    +
    +   import mailbox
    +   destination = mailbox.MH('~/Mail')
    +   destination.lock()
    +   for message in mailbox.Babyl('~/RMAIL'):
    +       destination.add(MHMessage(message))
    +   destination.flush()
    +   destination.unlock()
    +
    +This example sorts mail from several mailing lists into different mailboxes,
    +being careful to avoid mail corruption due to concurrent modification by other
    +programs, mail loss due to interruption of the program, or premature termination
    +due to malformed messages in the mailbox::
    +
    +   import mailbox
    +   import email.Errors
    +
    +   list_names = ('python-list', 'python-dev', 'python-bugs')
    +
    +   boxes = dict((name, mailbox.mbox('~/email/%s' % name)) for name in list_names)
    +   inbox = mailbox.Maildir('~/Maildir', factory=None)
    +
    +   for key in inbox.iterkeys():
    +       try:
    +           message = inbox[key]
    +       except email.Errors.MessageParseError:
    +           continue                # The message is malformed. Just leave it.
    +
    +       for name in list_names:
    +           list_id = message['list-id']
    +           if list_id and name in list_id:
    +               # Get mailbox to use
    +               box = boxes[name]
    +
    +               # Write copy to disk before removing original.
    +               # If there's a crash, you might duplicate a message, but
    +               # that's better than losing a message completely.
    +               box.lock()
    +               box.add(message)
    +               box.flush()         
    +               box.unlock()
    +
    +               # Remove original message
    +               inbox.lock()
    +               inbox.discard(key)
    +               inbox.flush()
    +               inbox.unlock()
    +               break               # Found destination, so stop looking.
    +
    +   for box in boxes.itervalues():
    +       box.close()
    +
    diff --git a/Doc/library/mailcap.rst b/Doc/library/mailcap.rst
    new file mode 100644
    index 0000000..8dcb1ec
    --- /dev/null
    +++ b/Doc/library/mailcap.rst
    @@ -0,0 +1,74 @@
    +:mod:`mailcap` --- Mailcap file handling
    +========================================
    +
    +.. module:: mailcap
    +   :synopsis: Mailcap file handling.
    +
    +
    +
    +Mailcap files are used to configure how MIME-aware applications such as mail
    +readers and Web browsers react to files with different MIME types. (The name
    +"mailcap" is derived from the phrase "mail capability".)  For example, a mailcap
    +file might contain a line like ``video/mpeg; xmpeg %s``.  Then, if the user
    +encounters an email message or Web document with the MIME type
    +:mimetype:`video/mpeg`, ``%s`` will be replaced by a filename (usually one
    +belonging to a temporary file) and the :program:`xmpeg` program can be
    +automatically started to view the file.
    +
    +The mailcap format is documented in :rfc:`1524`, "A User Agent Configuration
    +Mechanism For Multimedia Mail Format Information," but is not an Internet
    +standard.  However, mailcap files are supported on most Unix systems.
    +
    +
    +.. function:: findmatch(caps, MIMEtype[, key[, filename[, plist]]])
    +
    +   Return a 2-tuple; the first element is a string containing the command line to
    +   be executed (which can be passed to :func:`os.system`), and the second element
    +   is the mailcap entry for a given MIME type.  If no matching MIME type can be
    +   found, ``(None, None)`` is returned.
    +
    +   *key* is the name of the field desired, which represents the type of activity to
    +   be performed; the default value is 'view', since in the  most common case you
    +   simply want to view the body of the MIME-typed data.  Other possible values
    +   might be 'compose' and 'edit', if you wanted to create a new body of the given
    +   MIME type or alter the existing body data.  See :rfc:`1524` for a complete list
    +   of these fields.
    +
    +   *filename* is the filename to be substituted for ``%s`` in the command line; the
    +   default value is ``'/dev/null'`` which is almost certainly not what you want, so
    +   usually you'll override it by specifying a filename.
    +
    +   *plist* can be a list containing named parameters; the default value is simply
    +   an empty list.  Each entry in the list must be a string containing the parameter
    +   name, an equals sign (``'='``), and the parameter's value.  Mailcap entries can
    +   contain  named parameters like ``%{foo}``, which will be replaced by the value
    +   of the parameter named 'foo'.  For example, if the command line ``showpartial
    +   %{id} %{number} %{total}`` was in a mailcap file, and *plist* was set to
    +   ``['id=1', 'number=2', 'total=3']``, the resulting command line would be
    +   ``'showpartial 1 2 3'``.
    +
    +   In a mailcap file, the "test" field can optionally be specified to test some
    +   external condition (such as the machine architecture, or the window system in
    +   use) to determine whether or not the mailcap line applies.  :func:`findmatch`
    +   will automatically check such conditions and skip the entry if the check fails.
    +
    +
    +.. function:: getcaps()
    +
    +   Returns a dictionary mapping MIME types to a list of mailcap file entries. This
    +   dictionary must be passed to the :func:`findmatch` function.  An entry is stored
    +   as a list of dictionaries, but it shouldn't be necessary to know the details of
    +   this representation.
    +
    +   The information is derived from all of the mailcap files found on the system.
    +   Settings in the user's mailcap file :file:`$HOME/.mailcap` will override
    +   settings in the system mailcap files :file:`/etc/mailcap`,
    +   :file:`/usr/etc/mailcap`, and :file:`/usr/local/etc/mailcap`.
    +
    +An example usage::
    +
    +   >>> import mailcap
    +   >>> d=mailcap.getcaps()
    +   >>> mailcap.findmatch(d, 'video/mpeg', filename='/tmp/tmp1223')
    +   ('xmpeg /tmp/tmp1223', {'view': 'xmpeg %s'})
    +
    diff --git a/Doc/library/markup.rst b/Doc/library/markup.rst
    new file mode 100644
    index 0000000..dd0dd8f
    --- /dev/null
    +++ b/Doc/library/markup.rst
    @@ -0,0 +1,44 @@
    +
    +.. _markup:
    +
    +**********************************
    +Structured Markup Processing Tools
    +**********************************
    +
    +Python supports a variety of modules to work with various forms of structured
    +data markup.  This includes modules to work with the Standard Generalized Markup
    +Language (SGML) and the Hypertext Markup Language (HTML), and several interfaces
    +for working with the Extensible Markup Language (XML).
    +
    +It is important to note that modules in the :mod:`xml` package require that
    +there be at least one SAX-compliant XML parser available. Starting with Python
    +2.3, the Expat parser is included with Python, so the :mod:`xml.parsers.expat`
    +module will always be available. You may still want to be aware of the `PyXML
    +add-on package `_; that package provides an
    +extended set of XML libraries for Python.
    +
    +The documentation for the :mod:`xml.dom` and :mod:`xml.sax` packages are the
    +definition of the Python bindings for the DOM and SAX interfaces.
    +
    +
    +.. toctree::
    +
    +   htmlparser.rst
    +   sgmllib.rst
    +   htmllib.rst
    +   pyexpat.rst
    +   xml.dom.rst
    +   xml.dom.minidom.rst
    +   xml.dom.pulldom.rst
    +   xml.sax.rst
    +   xml.sax.handler.rst
    +   xml.sax.utils.rst
    +   xml.sax.reader.rst
    +   xml.etree.elementtree.rst
    +
    +.. seealso::
    +
    +   `Python/XML Libraries `_
    +      Home page for the PyXML package, containing an extension of :mod:`xml` package
    +      bundled with Python.
    +
    diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst
    new file mode 100644
    index 0000000..010ebc3
    --- /dev/null
    +++ b/Doc/library/marshal.rst
    @@ -0,0 +1,127 @@
    +
    +:mod:`marshal` --- Internal Python object serialization
    +=======================================================
    +
    +.. module:: marshal
    +   :synopsis: Convert Python objects to streams of bytes and back (with different
    +              constraints).
    +
    +
    +This module contains functions that can read and write Python values in a binary
    +format.  The format is specific to Python, but independent of machine
    +architecture issues (e.g., you can write a Python value to a file on a PC,
    +transport the file to a Sun, and read it back there).  Details of the format are
    +undocumented on purpose; it may change between Python versions (although it
    +rarely does). [#]_
    +
    +.. index::
    +   module: pickle
    +   module: shelve
    +   object: code
    +
    +This is not a general "persistence" module.  For general persistence and
    +transfer of Python objects through RPC calls, see the modules :mod:`pickle` and
    +:mod:`shelve`.  The :mod:`marshal` module exists mainly to support reading and
    +writing the "pseudo-compiled" code for Python modules of :file:`.pyc` files.
    +Therefore, the Python maintainers reserve the right to modify the marshal format
    +in backward incompatible ways should the need arise.  If you're serializing and
    +de-serializing Python objects, use the :mod:`pickle` module instead.
    +
    +.. warning::
    +
    +   The :mod:`marshal` module is not intended to be secure against erroneous or
    +   maliciously constructed data.  Never unmarshal data received from an
    +   untrusted or unauthenticated source.
    +
    +Not all Python object types are supported; in general, only objects whose value
    +is independent from a particular invocation of Python can be written and read by
    +this module.  The following types are supported: ``None``, integers, long
    +integers, floating point numbers, strings, Unicode objects, tuples, lists,
    +dictionaries, and code objects, where it should be understood that tuples, lists
    +and dictionaries are only supported as long as the values contained therein are
    +themselves supported; and recursive lists and dictionaries should not be written
    +(they will cause infinite loops).
    +
    +**Caveat:** On machines where C's ``long int`` type has more than 32 bits (such
    +as the DEC Alpha), it is possible to create plain Python integers that are
    +longer than 32 bits. If such an integer is marshaled and read back in on a
    +machine where C's ``long int`` type has only 32 bits, a Python long integer
    +object is returned instead.  While of a different type, the numeric value is the
    +same.  (This behavior is new in Python 2.2.  In earlier versions, all but the
    +least-significant 32 bits of the value were lost, and a warning message was
    +printed.)
    +
    +There are functions that read/write files as well as functions operating on
    +strings.
    +
    +The module defines these functions:
    +
    +
    +.. function:: dump(value, file[, version])
    +
    +   Write the value on the open file.  The value must be a supported type.  The
    +   file must be an open file object such as ``sys.stdout`` or returned by
    +   :func:`open` or :func:`os.popen`.  It must be opened in binary mode (``'wb'``
    +   or ``'w+b'``).
    +
    +   If the value has (or contains an object that has) an unsupported type, a
    +   :exc:`ValueError` exception is raised --- but garbage data will also be written
    +   to the file.  The object will not be properly read back by :func:`load`.
    +
    +   .. versionadded:: 2.4
    +      The *version* argument indicates the data format that ``dump`` should use
    +      (see below).
    +
    +
    +.. function:: load(file)
    +
    +   Read one value from the open file and return it.  If no valid value is read
    +   (e.g. because the data has a different Python version's incompatible marshal
    +   format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`.  The
    +   file must be an open file object opened in binary mode (``'rb'`` or
    +   ``'r+b'``).
    +
    +   .. warning::
    +
    +      If an object containing an unsupported type was marshalled with :func:`dump`,
    +      :func:`load` will substitute ``None`` for the unmarshallable type.
    +
    +
    +.. function:: dumps(value[, version])
    +
    +   Return the string that would be written to a file by ``dump(value, file)``.  The
    +   value must be a supported type.  Raise a :exc:`ValueError` exception if value
    +   has (or contains an object that has) an unsupported type.
    +
    +   .. versionadded:: 2.4
    +      The *version* argument indicates the data format that ``dumps`` should use
    +      (see below).
    +
    +
    +.. function:: loads(string)
    +
    +   Convert the string to a value.  If no valid value is found, raise
    +   :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`.  Extra characters in the
    +   string are ignored.
    +
    +
    +In addition, the following constants are defined:
    +
    +.. data:: version
    +
    +   Indicates the format that the module uses. Version 0 is the historical format,
    +   version 1 (added in Python 2.4) shares interned strings and version 2 (added in
    +   Python 2.5) uses a binary format for floating point numbers. The current version
    +   is 2.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. rubric:: Footnotes
    +
    +.. [#] The name of this module stems from a bit of terminology used by the designers of
    +   Modula-3 (amongst others), who use the term "marshalling" for shipping of data
    +   around in a self-contained form. Strictly speaking, "to marshal" means to
    +   convert some data from internal to external form (in an RPC buffer for instance)
    +   and "unmarshalling" for the reverse process.
    +
    diff --git a/Doc/library/math.rst b/Doc/library/math.rst
    new file mode 100644
    index 0000000..17c75d3
    --- /dev/null
    +++ b/Doc/library/math.rst
    @@ -0,0 +1,227 @@
    +
    +:mod:`math` --- Mathematical functions
    +======================================
    +
    +.. module:: math
    +   :synopsis: Mathematical functions (sin() etc.).
    +
    +
    +This module is always available.  It provides access to the mathematical
    +functions defined by the C standard.
    +
    +These functions cannot be used with complex numbers; use the functions of the
    +same name from the :mod:`cmath` module if you require support for complex
    +numbers.  The distinction between functions which support complex numbers and
    +those which don't is made since most users do not want to learn quite as much
    +mathematics as required to understand complex numbers.  Receiving an exception
    +instead of a complex result allows earlier detection of the unexpected complex
    +number used as a parameter, so that the programmer can determine how and why it
    +was generated in the first place.
    +
    +The following functions are provided by this module.  Except when explicitly
    +noted otherwise, all return values are floats.
    +
    +Number-theoretic and representation functions:
    +
    +
    +.. function:: ceil(x)
    +
    +   Return the ceiling of *x* as a float, the smallest integer value greater than or
    +   equal to *x*.
    +
    +
    +.. function:: fabs(x)
    +
    +   Return the absolute value of *x*.
    +
    +
    +.. function:: floor(x)
    +
    +   Return the floor of *x* as a float, the largest integer value less than or equal
    +   to *x*.
    +
    +
    +.. function:: fmod(x, y)
    +
    +   Return ``fmod(x, y)``, as defined by the platform C library. Note that the
    +   Python expression ``x % y`` may not return the same result.  The intent of the C
    +   standard is that ``fmod(x, y)`` be exactly (mathematically; to infinite
    +   precision) equal to ``x - n*y`` for some integer *n* such that the result has
    +   the same sign as *x* and magnitude less than ``abs(y)``.  Python's ``x % y``
    +   returns a result with the sign of *y* instead, and may not be exactly computable
    +   for float arguments. For example, ``fmod(-1e-100, 1e100)`` is ``-1e-100``, but
    +   the result of Python's ``-1e-100 % 1e100`` is ``1e100-1e-100``, which cannot be
    +   represented exactly as a float, and rounds to the surprising ``1e100``.  For
    +   this reason, function :func:`fmod` is generally preferred when working with
    +   floats, while Python's ``x % y`` is preferred when working with integers.
    +
    +
    +.. function:: frexp(x)
    +
    +   Return the mantissa and exponent of *x* as the pair ``(m, e)``.  *m* is a float
    +   and *e* is an integer such that ``x == m * 2**e`` exactly. If *x* is zero,
    +   returns ``(0.0, 0)``, otherwise ``0.5 <= abs(m) < 1``.  This is used to "pick
    +   apart" the internal representation of a float in a portable way.
    +
    +
    +.. function:: ldexp(x, i)
    +
    +   Return ``x * (2**i)``.  This is essentially the inverse of function
    +   :func:`frexp`.
    +
    +
    +.. function:: modf(x)
    +
    +   Return the fractional and integer parts of *x*.  Both results carry the sign of
    +   *x*, and both are floats.
    +
    +Note that :func:`frexp` and :func:`modf` have a different call/return pattern
    +than their C equivalents: they take a single argument and return a pair of
    +values, rather than returning their second return value through an 'output
    +parameter' (there is no such thing in Python).
    +
    +For the :func:`ceil`, :func:`floor`, and :func:`modf` functions, note that *all*
    +floating-point numbers of sufficiently large magnitude are exact integers.
    +Python floats typically carry no more than 53 bits of precision (the same as the
    +platform C double type), in which case any float *x* with ``abs(x) >= 2**52``
    +necessarily has no fractional bits.
    +
    +Power and logarithmic functions:
    +
    +
    +.. function:: exp(x)
    +
    +   Return ``e**x``.
    +
    +
    +.. function:: log(x[, base])
    +
    +   Return the logarithm of *x* to the given *base*. If the *base* is not specified,
    +   return the natural logarithm of *x* (that is, the logarithm to base *e*).
    +
    +   .. versionchanged:: 2.3
    +      *base* argument added.
    +
    +
    +.. function:: log10(x)
    +
    +   Return the base-10 logarithm of *x*.
    +
    +
    +.. function:: pow(x, y)
    +
    +   Return ``x**y``.
    +
    +
    +.. function:: sqrt(x)
    +
    +   Return the square root of *x*.
    +
    +Trigonometric functions:
    +
    +
    +.. function:: acos(x)
    +
    +   Return the arc cosine of *x*, in radians.
    +
    +
    +.. function:: asin(x)
    +
    +   Return the arc sine of *x*, in radians.
    +
    +
    +.. function:: atan(x)
    +
    +   Return the arc tangent of *x*, in radians.
    +
    +
    +.. function:: atan2(y, x)
    +
    +   Return ``atan(y / x)``, in radians. The result is between ``-pi`` and ``pi``.
    +   The vector in the plane from the origin to point ``(x, y)`` makes this angle
    +   with the positive X axis. The point of :func:`atan2` is that the signs of both
    +   inputs are known to it, so it can compute the correct quadrant for the angle.
    +   For example, ``atan(1``) and ``atan2(1, 1)`` are both ``pi/4``, but ``atan2(-1,
    +   -1)`` is ``-3*pi/4``.
    +
    +
    +.. function:: cos(x)
    +
    +   Return the cosine of *x* radians.
    +
    +
    +.. function:: hypot(x, y)
    +
    +   Return the Euclidean norm, ``sqrt(x*x + y*y)``. This is the length of the vector
    +   from the origin to point ``(x, y)``.
    +
    +
    +.. function:: sin(x)
    +
    +   Return the sine of *x* radians.
    +
    +
    +.. function:: tan(x)
    +
    +   Return the tangent of *x* radians.
    +
    +Angular conversion:
    +
    +
    +.. function:: degrees(x)
    +
    +   Converts angle *x* from radians to degrees.
    +
    +
    +.. function:: radians(x)
    +
    +   Converts angle *x* from degrees to radians.
    +
    +Hyperbolic functions:
    +
    +
    +.. function:: cosh(x)
    +
    +   Return the hyperbolic cosine of *x*.
    +
    +
    +.. function:: sinh(x)
    +
    +   Return the hyperbolic sine of *x*.
    +
    +
    +.. function:: tanh(x)
    +
    +   Return the hyperbolic tangent of *x*.
    +
    +The module also defines two mathematical constants:
    +
    +
    +.. data:: pi
    +
    +   The mathematical constant *pi*.
    +
    +
    +.. data:: e
    +
    +   The mathematical constant *e*.
    +
    +.. note::
    +
    +   The :mod:`math` module consists mostly of thin wrappers around the platform C
    +   math library functions.  Behavior in exceptional cases is loosely specified
    +   by the C standards, and Python inherits much of its math-function
    +   error-reporting behavior from the platform C implementation.  As a result,
    +   the specific exceptions raised in error cases (and even whether some
    +   arguments are considered to be exceptional at all) are not defined in any
    +   useful cross-platform or cross-release way.  For example, whether
    +   ``math.log(0)`` returns ``-Inf`` or raises :exc:`ValueError` or
    +   :exc:`OverflowError` isn't defined, and in cases where ``math.log(0)`` raises
    +   :exc:`OverflowError`, ``math.log(0L)`` may raise :exc:`ValueError` instead.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`cmath`
    +      Complex number versions of many of these functions.
    +
    diff --git a/Doc/library/mhlib.rst b/Doc/library/mhlib.rst
    new file mode 100644
    index 0000000..15d2b05
    --- /dev/null
    +++ b/Doc/library/mhlib.rst
    @@ -0,0 +1,205 @@
    +
    +:mod:`mhlib` --- Access to MH mailboxes
    +=======================================
    +
    +.. module:: mhlib
    +   :synopsis: Manipulate MH mailboxes from Python.
    +
    +
    +.. % LaTeX'ized from the comments in the module by Skip Montanaro
    +.. % .
    +
    +The :mod:`mhlib` module provides a Python interface to MH folders and their
    +contents.
    +
    +The module contains three basic classes, :class:`MH`, which represents a
    +particular collection of folders, :class:`Folder`, which represents a single
    +folder, and :class:`Message`, which represents a single message.
    +
    +
    +.. class:: MH([path[, profile]])
    +
    +   :class:`MH` represents a collection of MH folders.
    +
    +
    +.. class:: Folder(mh, name)
    +
    +   The :class:`Folder` class represents a single folder and its messages.
    +
    +
    +.. class:: Message(folder, number[, name])
    +
    +   :class:`Message` objects represent individual messages in a folder.  The Message
    +   class is derived from :class:`mimetools.Message`.
    +
    +
    +.. _mh-objects:
    +
    +MH Objects
    +----------
    +
    +:class:`MH` instances have the following methods:
    +
    +
    +.. method:: MH.error(format[, ...])
    +
    +   Print an error message -- can be overridden.
    +
    +
    +.. method:: MH.getprofile(key)
    +
    +   Return a profile entry (``None`` if not set).
    +
    +
    +.. method:: MH.getpath()
    +
    +   Return the mailbox pathname.
    +
    +
    +.. method:: MH.getcontext()
    +
    +   Return the current folder name.
    +
    +
    +.. method:: MH.setcontext(name)
    +
    +   Set the current folder name.
    +
    +
    +.. method:: MH.listfolders()
    +
    +   Return a list of top-level folders.
    +
    +
    +.. method:: MH.listallfolders()
    +
    +   Return a list of all folders.
    +
    +
    +.. method:: MH.listsubfolders(name)
    +
    +   Return a list of direct subfolders of the given folder.
    +
    +
    +.. method:: MH.listallsubfolders(name)
    +
    +   Return a list of all subfolders of the given folder.
    +
    +
    +.. method:: MH.makefolder(name)
    +
    +   Create a new folder.
    +
    +
    +.. method:: MH.deletefolder(name)
    +
    +   Delete a folder -- must have no subfolders.
    +
    +
    +.. method:: MH.openfolder(name)
    +
    +   Return a new open folder object.
    +
    +
    +.. _mh-folder-objects:
    +
    +Folder Objects
    +--------------
    +
    +:class:`Folder` instances represent open folders and have the following methods:
    +
    +
    +.. method:: Folder.error(format[, ...])
    +
    +   Print an error message -- can be overridden.
    +
    +
    +.. method:: Folder.getfullname()
    +
    +   Return the folder's full pathname.
    +
    +
    +.. method:: Folder.getsequencesfilename()
    +
    +   Return the full pathname of the folder's sequences file.
    +
    +
    +.. method:: Folder.getmessagefilename(n)
    +
    +   Return the full pathname of message *n* of the folder.
    +
    +
    +.. method:: Folder.listmessages()
    +
    +   Return a list of messages in the folder (as numbers).
    +
    +
    +.. method:: Folder.getcurrent()
    +
    +   Return the current message number.
    +
    +
    +.. method:: Folder.setcurrent(n)
    +
    +   Set the current message number to *n*.
    +
    +
    +.. method:: Folder.parsesequence(seq)
    +
    +   Parse msgs syntax into list of messages.
    +
    +
    +.. method:: Folder.getlast()
    +
    +   Get last message, or ``0`` if no messages are in the folder.
    +
    +
    +.. method:: Folder.setlast(n)
    +
    +   Set last message (internal use only).
    +
    +
    +.. method:: Folder.getsequences()
    +
    +   Return dictionary of sequences in folder.  The sequence names are used  as keys,
    +   and the values are the lists of message numbers in the sequences.
    +
    +
    +.. method:: Folder.putsequences(dict)
    +
    +   Return dictionary of sequences in folder name: list.
    +
    +
    +.. method:: Folder.removemessages(list)
    +
    +   Remove messages in list from folder.
    +
    +
    +.. method:: Folder.refilemessages(list, tofolder)
    +
    +   Move messages in list to other folder.
    +
    +
    +.. method:: Folder.movemessage(n, tofolder, ton)
    +
    +   Move one message to a given destination in another folder.
    +
    +
    +.. method:: Folder.copymessage(n, tofolder, ton)
    +
    +   Copy one message to a given destination in another folder.
    +
    +
    +.. _mh-message-objects:
    +
    +Message Objects
    +---------------
    +
    +The :class:`Message` class adds one method to those of
    +:class:`mimetools.Message`:
    +
    +
    +.. method:: Message.openmessage(n)
    +
    +   Return a new open message object (costs a file descriptor).
    +
    diff --git a/Doc/library/mimetools.rst b/Doc/library/mimetools.rst
    new file mode 100644
    index 0000000..603bec6
    --- /dev/null
    +++ b/Doc/library/mimetools.rst
    @@ -0,0 +1,130 @@
    +
    +:mod:`mimetools` --- Tools for parsing MIME messages
    +====================================================
    +
    +.. module:: mimetools
    +   :synopsis: Tools for parsing MIME-style message bodies.
    +
    +
    +.. deprecated:: 2.3
    +   The :mod:`email` package should be used in preference to the :mod:`mimetools`
    +   module.  This module is present only to maintain backward compatibility.
    +
    +.. index:: module: rfc822
    +
    +This module defines a subclass of the :mod:`rfc822` module's :class:`Message`
    +class and a number of utility functions that are useful for the manipulation for
    +MIME multipart or encoded message.
    +
    +It defines the following items:
    +
    +
    +.. class:: Message(fp[, seekable])
    +
    +   Return a new instance of the :class:`Message` class.  This is a subclass of the
    +   :class:`rfc822.Message` class, with some additional methods (see below).  The
    +   *seekable* argument has the same meaning as for :class:`rfc822.Message`.
    +
    +
    +.. function:: choose_boundary()
    +
    +   Return a unique string that has a high likelihood of being usable as a part
    +   boundary.  The string has the form ``'hostipaddr.uid.pid.timestamp.random'``.
    +
    +
    +.. function:: decode(input, output, encoding)
    +
    +   Read data encoded using the allowed MIME *encoding* from open file object
    +   *input* and write the decoded data to open file object *output*.  Valid values
    +   for *encoding* include ``'base64'``, ``'quoted-printable'``, ``'uuencode'``,
    +   ``'x-uuencode'``, ``'uue'``, ``'x-uue'``, ``'7bit'``, and  ``'8bit'``.  Decoding
    +   messages encoded in ``'7bit'`` or ``'8bit'`` has no effect.  The input is simply
    +   copied to the output.
    +
    +
    +.. function:: encode(input, output, encoding)
    +
    +   Read data from open file object *input* and write it encoded using the allowed
    +   MIME *encoding* to open file object *output*. Valid values for *encoding* are
    +   the same as for :meth:`decode`.
    +
    +
    +.. function:: copyliteral(input, output)
    +
    +   Read lines from open file *input* until EOF and write them to open file
    +   *output*.
    +
    +
    +.. function:: copybinary(input, output)
    +
    +   Read blocks until EOF from open file *input* and write them to open file
    +   *output*.  The block size is currently fixed at 8192.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`email`
    +      Comprehensive email handling package; supersedes the :mod:`mimetools` module.
    +
    +   Module :mod:`rfc822`
    +      Provides the base class for :class:`mimetools.Message`.
    +
    +   Module :mod:`multifile`
    +      Support for reading files which contain distinct parts, such as MIME data.
    +
    +   http://www.cs.uu.nl/wais/html/na-dir/mail/mime-faq/.html
    +      The MIME Frequently Asked Questions document.  For an overview of MIME, see the
    +      answer to question 1.1 in Part 1 of this document.
    +
    +
    +.. _mimetools-message-objects:
    +
    +Additional Methods of Message Objects
    +-------------------------------------
    +
    +The :class:`Message` class defines the following methods in addition to the
    +:class:`rfc822.Message` methods:
    +
    +
    +.. method:: Message.getplist()
    +
    +   Return the parameter list of the :mailheader:`Content-Type` header. This is a
    +   list of strings.  For parameters of the form ``key=value``, *key* is converted
    +   to lower case but *value* is not.  For example, if the message contains the
    +   header ``Content-type: text/html; spam=1; Spam=2; Spam`` then :meth:`getplist`
    +   will return the Python list ``['spam=1', 'spam=2', 'Spam']``.
    +
    +
    +.. method:: Message.getparam(name)
    +
    +   Return the *value* of the first parameter (as returned by :meth:`getplist`) of
    +   the form ``name=value`` for the given *name*.  If *value* is surrounded by
    +   quotes of the form '``<``...\ ``>``' or '``"``...\ ``"``', these are removed.
    +
    +
    +.. method:: Message.getencoding()
    +
    +   Return the encoding specified in the :mailheader:`Content-Transfer-Encoding`
    +   message header.  If no such header exists, return ``'7bit'``.  The encoding is
    +   converted to lower case.
    +
    +
    +.. method:: Message.gettype()
    +
    +   Return the message type (of the form ``type/subtype``) as specified in the
    +   :mailheader:`Content-Type` header.  If no such header exists, return
    +   ``'text/plain'``.  The type is converted to lower case.
    +
    +
    +.. method:: Message.getmaintype()
    +
    +   Return the main type as specified in the :mailheader:`Content-Type` header.  If
    +   no such header exists, return ``'text'``.  The main type is converted to lower
    +   case.
    +
    +
    +.. method:: Message.getsubtype()
    +
    +   Return the subtype as specified in the :mailheader:`Content-Type` header.  If no
    +   such header exists, return ``'plain'``.  The subtype is converted to lower case.
    +
    diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst
    new file mode 100644
    index 0000000..fd5e12d
    --- /dev/null
    +++ b/Doc/library/mimetypes.rst
    @@ -0,0 +1,232 @@
    +
    +:mod:`mimetypes` --- Map filenames to MIME types
    +================================================
    +
    +.. module:: mimetypes
    +   :synopsis: Mapping of filename extensions to MIME types.
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +.. index:: pair: MIME; content type
    +
    +The :mod:`mimetypes` module converts between a filename or URL and the MIME type
    +associated with the filename extension.  Conversions are provided from filename
    +to MIME type and from MIME type to filename extension; encodings are not
    +supported for the latter conversion.
    +
    +The module provides one class and a number of convenience functions. The
    +functions are the normal interface to this module, but some applications may be
    +interested in the class as well.
    +
    +The functions described below provide the primary interface for this module.  If
    +the module has not been initialized, they will call :func:`init` if they rely on
    +the information :func:`init` sets up.
    +
    +
    +.. function:: guess_type(filename[, strict])
    +
    +   .. index:: pair: MIME; headers
    +
    +   Guess the type of a file based on its filename or URL, given by *filename*.  The
    +   return value is a tuple ``(type, encoding)`` where *type* is ``None`` if the
    +   type can't be guessed (missing or unknown suffix) or a string of the form
    +   ``'type/subtype'``, usable for a MIME :mailheader:`content-type` header.
    +
    +   *encoding* is ``None`` for no encoding or the name of the program used to encode
    +   (e.g. :program:`compress` or :program:`gzip`). The encoding is suitable for use
    +   as a :mailheader:`Content-Encoding` header, *not* as a
    +   :mailheader:`Content-Transfer-Encoding` header. The mappings are table driven.
    +   Encoding suffixes are case sensitive; type suffixes are first tried case
    +   sensitively, then case insensitively.
    +
    +   Optional *strict* is a flag specifying whether the list of known MIME types
    +   is limited to only the official types `registered with IANA
    +   `_ are recognized.
    +   When *strict* is true (the default), only the IANA types are supported; when
    +   *strict* is false, some additional non-standard but commonly used MIME types
    +   are also recognized.
    +
    +
    +.. function:: guess_all_extensions(type[, strict])
    +
    +   Guess the extensions for a file based on its MIME type, given by *type*. The
    +   return value is a list of strings giving all possible filename extensions,
    +   including the leading dot (``'.'``).  The extensions are not guaranteed to have
    +   been associated with any particular data stream, but would be mapped to the MIME
    +   type *type* by :func:`guess_type`.
    +
    +   Optional *strict* has the same meaning as with the :func:`guess_type` function.
    +
    +
    +.. function:: guess_extension(type[, strict])
    +
    +   Guess the extension for a file based on its MIME type, given by *type*. The
    +   return value is a string giving a filename extension, including the leading dot
    +   (``'.'``).  The extension is not guaranteed to have been associated with any
    +   particular data stream, but would be mapped to the  MIME type *type* by
    +   :func:`guess_type`.  If no extension can be guessed for *type*, ``None`` is
    +   returned.
    +
    +   Optional *strict* has the same meaning as with the :func:`guess_type` function.
    +
    +Some additional functions and data items are available for controlling the
    +behavior of the module.
    +
    +
    +.. function:: init([files])
    +
    +   Initialize the internal data structures.  If given, *files* must be a sequence
    +   of file names which should be used to augment the default type map.  If omitted,
    +   the file names to use are taken from :const:`knownfiles`.  Each file named in
    +   *files* or :const:`knownfiles` takes precedence over those named before it.
    +   Calling :func:`init` repeatedly is allowed.
    +
    +
    +.. function:: read_mime_types(filename)
    +
    +   Load the type map given in the file *filename*, if it exists.  The  type map is
    +   returned as a dictionary mapping filename extensions, including the leading dot
    +   (``'.'``), to strings of the form ``'type/subtype'``.  If the file *filename*
    +   does not exist or cannot be read, ``None`` is returned.
    +
    +
    +.. function:: add_type(type, ext[, strict])
    +
    +   Add a mapping from the mimetype *type* to the extension *ext*. When the
    +   extension is already known, the new type will replace the old one. When the type
    +   is already known the extension will be added to the list of known extensions.
    +
    +   When *strict* is the mapping will added to the official MIME types, otherwise to
    +   the non-standard ones.
    +
    +
    +.. data:: inited
    +
    +   Flag indicating whether or not the global data structures have been initialized.
    +   This is set to true by :func:`init`.
    +
    +
    +.. data:: knownfiles
    +
    +   .. index:: single: file; mime.types
    +
    +   List of type map file names commonly installed.  These files are typically named
    +   :file:`mime.types` and are installed in different locations by different
    +   packages.
    +
    +
    +.. data:: suffix_map
    +
    +   Dictionary mapping suffixes to suffixes.  This is used to allow recognition of
    +   encoded files for which the encoding and the type are indicated by the same
    +   extension.  For example, the :file:`.tgz` extension is mapped to :file:`.tar.gz`
    +   to allow the encoding and type to be recognized separately.
    +
    +
    +.. data:: encodings_map
    +
    +   Dictionary mapping filename extensions to encoding types.
    +
    +
    +.. data:: types_map
    +
    +   Dictionary mapping filename extensions to MIME types.
    +
    +
    +.. data:: common_types
    +
    +   Dictionary mapping filename extensions to non-standard, but commonly found MIME
    +   types.
    +
    +The :class:`MimeTypes` class may be useful for applications which may want more
    +than one MIME-type database:
    +
    +
    +.. class:: MimeTypes([filenames])
    +
    +   This class represents a MIME-types database.  By default, it provides access to
    +   the same database as the rest of this module. The initial database is a copy of
    +   that provided by the module, and may be extended by loading additional
    +   :file:`mime.types`\ -style files into the database using the :meth:`read` or
    +   :meth:`readfp` methods.  The mapping dictionaries may also be cleared before
    +   loading additional data if the default data is not desired.
    +
    +   The optional *filenames* parameter can be used to cause additional files to be
    +   loaded "on top" of the default database.
    +
    +   .. versionadded:: 2.2
    +
    +An example usage of the module::
    +
    +   >>> import mimetypes
    +   >>> mimetypes.init()
    +   >>> mimetypes.knownfiles
    +   ['/etc/mime.types', '/etc/httpd/mime.types', ... ]
    +   >>> mimetypes.suffix_map['.tgz']
    +   '.tar.gz'
    +   >>> mimetypes.encodings_map['.gz']
    +   'gzip'
    +   >>> mimetypes.types_map['.tgz']
    +   'application/x-tar-gz'
    +
    +
    +.. _mimetypes-objects:
    +
    +MimeTypes Objects
    +-----------------
    +
    +:class:`MimeTypes` instances provide an interface which is very like that of the
    +:mod:`mimetypes` module.
    +
    +
    +.. attribute:: MimeTypes.suffix_map
    +
    +   Dictionary mapping suffixes to suffixes.  This is used to allow recognition of
    +   encoded files for which the encoding and the type are indicated by the same
    +   extension.  For example, the :file:`.tgz` extension is mapped to :file:`.tar.gz`
    +   to allow the encoding and type to be recognized separately.  This is initially a
    +   copy of the global ``suffix_map`` defined in the module.
    +
    +
    +.. attribute:: MimeTypes.encodings_map
    +
    +   Dictionary mapping filename extensions to encoding types.  This is initially a
    +   copy of the global ``encodings_map`` defined in the module.
    +
    +
    +.. attribute:: MimeTypes.types_map
    +
    +   Dictionary mapping filename extensions to MIME types.  This is initially a copy
    +   of the global ``types_map`` defined in the module.
    +
    +
    +.. attribute:: MimeTypes.common_types
    +
    +   Dictionary mapping filename extensions to non-standard, but commonly found MIME
    +   types.  This is initially a copy of the global ``common_types`` defined in the
    +   module.
    +
    +
    +.. method:: MimeTypes.guess_extension(type[, strict])
    +
    +   Similar to the :func:`guess_extension` function, using the tables stored as part
    +   of the object.
    +
    +
    +.. method:: MimeTypes.guess_type(url[, strict])
    +
    +   Similar to the :func:`guess_type` function, using the tables stored as part of
    +   the object.
    +
    +
    +.. method:: MimeTypes.read(path)
    +
    +   Load MIME information from a file named *path*.  This uses :meth:`readfp` to
    +   parse the file.
    +
    +
    +.. method:: MimeTypes.readfp(file)
    +
    +   Load MIME type information from an open file.  The file must have the format of
    +   the standard :file:`mime.types` files.
    +
    diff --git a/Doc/library/miniaeframe.rst b/Doc/library/miniaeframe.rst
    new file mode 100644
    index 0000000..5bf1b07
    --- /dev/null
    +++ b/Doc/library/miniaeframe.rst
    @@ -0,0 +1,68 @@
    +
    +:mod:`MiniAEFrame` --- Open Scripting Architecture server support
    +=================================================================
    +
    +.. module:: MiniAEFrame
    +   :platform: Mac
    +   :synopsis: Support to act as an Open Scripting Architecture (OSA) server ("Apple Events").
    +
    +
    +.. index::
    +   single: Open Scripting Architecture
    +   single: AppleEvents
    +   module: FrameWork
    +
    +The module :mod:`MiniAEFrame` provides a framework for an application that can
    +function as an Open Scripting Architecture  (OSA) server, i.e. receive and
    +process AppleEvents. It can be used in conjunction with :mod:`FrameWork` or
    +standalone. As an example, it is used in :program:`PythonCGISlave`.
    +
    +The :mod:`MiniAEFrame` module defines the following classes:
    +
    +
    +.. class:: AEServer()
    +
    +   A class that handles AppleEvent dispatch. Your application should subclass this
    +   class together with either :class:`MiniApplication` or
    +   :class:`FrameWork.Application`. Your :meth:`__init__` method should call the
    +   :meth:`__init__` method for both classes.
    +
    +
    +.. class:: MiniApplication()
    +
    +   A class that is more or less compatible with :class:`FrameWork.Application` but
    +   with less functionality. Its event loop supports the apple menu, command-dot and
    +   AppleEvents; other events are passed on to the Python interpreter and/or Sioux.
    +   Useful if your application wants to use :class:`AEServer` but does not provide
    +   its own windows, etc.
    +
    +
    +.. _aeserver-objects:
    +
    +AEServer Objects
    +----------------
    +
    +
    +.. method:: AEServer.installaehandler(classe, type, callback)
    +
    +   Installs an AppleEvent handler. *classe* and *type* are the four-character OSA
    +   Class and Type designators, ``'****'`` wildcards are allowed. When a matching
    +   AppleEvent is received the parameters are decoded and your callback is invoked.
    +
    +
    +.. method:: AEServer.callback(_object, **kwargs)
    +
    +   Your callback is called with the OSA Direct Object as first positional
    +   parameter. The other parameters are passed as keyword arguments, with the
    +   4-character designator as name. Three extra keyword parameters are passed:
    +   ``_class`` and ``_type`` are the Class and Type designators and ``_attributes``
    +   is a dictionary with the AppleEvent attributes.
    +
    +   The return value of your method is packed with :func:`aetools.packevent` and
    +   sent as reply.
    +
    +Note that there are some serious problems with the current design. AppleEvents
    +which have non-identifier 4-character designators for arguments are not
    +implementable, and it is not possible to return an error to the originator. This
    +will be addressed in a future release.
    +
    diff --git a/Doc/library/misc.rst b/Doc/library/misc.rst
    new file mode 100644
    index 0000000..ee22561
    --- /dev/null
    +++ b/Doc/library/misc.rst
    @@ -0,0 +1,14 @@
    +
    +.. _misc:
    +
    +**********************
    +Miscellaneous Services
    +**********************
    +
    +The modules described in this chapter provide miscellaneous services that are
    +available in all Python versions.  Here's an overview:
    +
    +
    +.. toctree::
    +
    +   formatter.rst
    diff --git a/Doc/library/mm.rst b/Doc/library/mm.rst
    new file mode 100644
    index 0000000..a7fbbec
    --- /dev/null
    +++ b/Doc/library/mm.rst
    @@ -0,0 +1,23 @@
    +
    +.. _mmedia:
    +
    +*******************
    +Multimedia Services
    +*******************
    +
    +The modules described in this chapter implement various algorithms or interfaces
    +that are mainly useful for multimedia applications.  They are available at the
    +discretion of the installation.  Here's an overview:
    +
    +
    +.. toctree::
    +
    +   audioop.rst
    +   aifc.rst
    +   sunau.rst
    +   wave.rst
    +   chunk.rst
    +   colorsys.rst
    +   imghdr.rst
    +   sndhdr.rst
    +   ossaudiodev.rst
    diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst
    new file mode 100644
    index 0000000..abe5b7b
    --- /dev/null
    +++ b/Doc/library/mmap.rst
    @@ -0,0 +1,173 @@
    +
    +:mod:`mmap` --- Memory-mapped file support
    +==========================================
    +
    +.. module:: mmap
    +   :synopsis: Interface to memory-mapped files for Unix and Windows.
    +
    +
    +Memory-mapped file objects behave like both strings and like file objects.
    +Unlike normal string objects, however, these are mutable.  You can use mmap
    +objects in most places where strings are expected; for example, you can use the
    +:mod:`re` module to search through a memory-mapped file.  Since they're mutable,
    +you can change a single character by doing ``obj[index] = 'a'``, or change a
    +substring by assigning to a slice: ``obj[i1:i2] = '...'``.  You can also read
    +and write data starting at the current file position, and :meth:`seek` through
    +the file to different positions.
    +
    +A memory-mapped file is created by the :func:`mmap` function, which is different
    +on Unix and on Windows.  In either case you must provide a file descriptor for a
    +file opened for update. If you wish to map an existing Python file object, use
    +its :meth:`fileno` method to obtain the correct value for the *fileno*
    +parameter.  Otherwise, you can open the file using the :func:`os.open` function,
    +which returns a file descriptor directly (the file still needs to be closed when
    +done).
    +
    +For both the Unix and Windows versions of the function, *access* may be
    +specified as an optional keyword parameter. *access* accepts one of three
    +values: :const:`ACCESS_READ`, :const:`ACCESS_WRITE`, or :const:`ACCESS_COPY` to
    +specify readonly, write-through or copy-on-write memory respectively. *access*
    +can be used on both Unix and Windows.  If *access* is not specified, Windows
    +mmap returns a write-through mapping.  The initial memory values for all three
    +access types are taken from the specified file.  Assignment to an
    +:const:`ACCESS_READ` memory map raises a :exc:`TypeError` exception.  Assignment
    +to an :const:`ACCESS_WRITE` memory map affects both memory and the underlying
    +file.  Assignment to an :const:`ACCESS_COPY` memory map affects memory but does
    +not update the underlying file.
    +
    +.. versionchanged:: 2.5
    +   To map anonymous memory, -1 should be passed as the fileno along with the
    +   length.
    +
    +
    +.. function:: mmap(fileno, length[, tagname[, access]])
    +
    +   **(Windows version)** Maps *length* bytes from the file specified by the file
    +   handle *fileno*, and returns a mmap object.  If *length* is larger than the
    +   current size of the file, the file is extended to contain *length* bytes.  If
    +   *length* is ``0``, the maximum length of the map is the current size of the
    +   file, except that if the file is empty Windows raises an exception (you cannot
    +   create an empty mapping on Windows).
    +
    +   *tagname*, if specified and not ``None``, is a string giving a tag name for the
    +   mapping.  Windows allows you to have many different mappings against the same
    +   file.  If you specify the name of an existing tag, that tag is opened, otherwise
    +   a new tag of this name is created.  If this parameter is omitted or ``None``,
    +   the mapping is created without a name.  Avoiding the use of the tag parameter
    +   will assist in keeping your code portable between Unix and Windows.
    +
    +
    +.. function:: mmap(fileno, length[, flags[, prot[, access]]])
    +   :noindex:
    +
    +   **(Unix version)** Maps *length* bytes from the file specified by the file
    +   descriptor *fileno*, and returns a mmap object.  If *length* is ``0``, the
    +   maximum length of the map will be the current size of the file when :func:`mmap`
    +   is called.
    +
    +   *flags* specifies the nature of the mapping. :const:`MAP_PRIVATE` creates a
    +   private copy-on-write mapping, so changes to the contents of the mmap object
    +   will be private to this process, and :const:`MAP_SHARED` creates a mapping
    +   that's shared with all other processes mapping the same areas of the file.  The
    +   default value is :const:`MAP_SHARED`.
    +
    +   *prot*, if specified, gives the desired memory protection; the two most useful
    +   values are :const:`PROT_READ` and :const:`PROT_WRITE`, to specify that the pages
    +   may be read or written.  *prot* defaults to :const:`PROT_READ \| PROT_WRITE`.
    +
    +   *access* may be specified in lieu of *flags* and *prot* as an optional keyword
    +   parameter.  It is an error to specify both *flags*, *prot* and *access*.  See
    +   the description of *access* above for information on how to use this parameter.
    +
    +Memory-mapped file objects support the following methods:
    +
    +
    +.. method:: mmap.close()
    +
    +   Close the file.  Subsequent calls to other methods of the object will result in
    +   an exception being raised.
    +
    +
    +.. method:: mmap.find(string[, start])
    +
    +   Returns the lowest index in the object where the substring *string* is found.
    +   Returns ``-1`` on failure.  *start* is the index at which the search begins, and
    +   defaults to zero.
    +
    +
    +.. method:: mmap.flush([offset, size])
    +
    +   Flushes changes made to the in-memory copy of a file back to disk. Without use
    +   of this call there is no guarantee that changes are written back before the
    +   object is destroyed.  If *offset* and *size* are specified, only changes to the
    +   given range of bytes will be flushed to disk; otherwise, the whole extent of the
    +   mapping is flushed.
    +
    +
    +.. method:: mmap.move(dest, src, count)
    +
    +   Copy the *count* bytes starting at offset *src* to the destination index *dest*.
    +   If the mmap was created with :const:`ACCESS_READ`, then calls to move will throw
    +   a :exc:`TypeError` exception.
    +
    +
    +.. method:: mmap.read(num)
    +
    +   Return a string containing up to *num* bytes starting from the current file
    +   position; the file position is updated to point after the bytes that were
    +   returned.
    +
    +
    +.. method:: mmap.read_byte()
    +
    +   Returns a string of length 1 containing the character at the current file
    +   position, and advances the file position by 1.
    +
    +
    +.. method:: mmap.readline()
    +
    +   Returns a single line, starting at the current file position and up to the next
    +   newline.
    +
    +
    +.. method:: mmap.resize(newsize)
    +
    +   Resizes the map and the underlying file, if any. If the mmap was created with
    +   :const:`ACCESS_READ` or :const:`ACCESS_COPY`, resizing the map will throw a
    +   :exc:`TypeError` exception.
    +
    +
    +.. method:: mmap.seek(pos[, whence])
    +
    +   Set the file's current position.  *whence* argument is optional and defaults to
    +   ``os.SEEK_SET`` or ``0`` (absolute file positioning); other values are
    +   ``os.SEEK_CUR`` or ``1`` (seek relative to the current position) and
    +   ``os.SEEK_END`` or ``2`` (seek relative to the file's end).
    +
    +
    +.. method:: mmap.size()
    +
    +   Return the length of the file, which can be larger than the size of the
    +   memory-mapped area.
    +
    +
    +.. method:: mmap.tell()
    +
    +   Returns the current position of the file pointer.
    +
    +
    +.. method:: mmap.write(string)
    +
    +   Write the bytes in *string* into memory at the current position of the file
    +   pointer; the file position is updated to point after the bytes that were
    +   written. If the mmap was created with :const:`ACCESS_READ`, then writing to it
    +   will throw a :exc:`TypeError` exception.
    +
    +
    +.. method:: mmap.write_byte(byte)
    +
    +   Write the single-character string *byte* into memory at the current position of
    +   the file pointer; the file position is advanced by ``1``. If the mmap was
    +   created with :const:`ACCESS_READ`, then writing to it will throw a
    +   :exc:`TypeError` exception.
    +
    diff --git a/Doc/library/modulefinder.rst b/Doc/library/modulefinder.rst
    new file mode 100644
    index 0000000..334bd5d
    --- /dev/null
    +++ b/Doc/library/modulefinder.rst
    @@ -0,0 +1,52 @@
    +
    +:mod:`modulefinder` --- Find modules used by a script
    +=====================================================
    +
    +.. sectionauthor:: A.M. Kuchling 
    +
    +
    +.. module:: modulefinder
    +   :synopsis: Find modules used by a script.
    +
    +
    +.. versionadded:: 2.3
    +
    +This module provides a :class:`ModuleFinder` class that can be used to determine
    +the set of modules imported by a script. ``modulefinder.py`` can also be run as
    +a script, giving the filename of a Python script as its argument, after which a
    +report of the imported modules will be printed.
    +
    +
    +.. function:: AddPackagePath(pkg_name, path)
    +
    +   Record that the package named *pkg_name* can be found in the specified *path*.
    +
    +
    +.. function:: ReplacePackage(oldname, newname)
    +
    +   Allows specifying that the module named *oldname* is in fact the package named
    +   *newname*.  The most common usage would be  to handle how the :mod:`_xmlplus`
    +   package replaces the :mod:`xml` package.
    +
    +
    +.. class:: ModuleFinder([path=None, debug=0, excludes=[], replace_paths=[]])
    +
    +   This class provides :meth:`run_script` and :meth:`report` methods to determine
    +   the set of modules imported by a script. *path* can be a list of directories to
    +   search for modules; if not specified, ``sys.path`` is used.  *debug* sets the
    +   debugging level; higher values make the class print  debugging messages about
    +   what it's doing. *excludes* is a list of module names to exclude from the
    +   analysis. *replace_paths* is a list of ``(oldpath, newpath)`` tuples that will
    +   be replaced in module paths.
    +
    +
    +.. method:: ModuleFinder.report()
    +
    +   Print a report to standard output that lists the modules imported by the script
    +   and their paths, as well as modules that are missing or seem to be missing.
    +
    +
    +.. method:: ModuleFinder.run_script(pathname)
    +
    +   Analyze the contents of the *pathname* file, which must contain  Python code.
    +
    diff --git a/Doc/library/modules.rst b/Doc/library/modules.rst
    new file mode 100644
    index 0000000..2590a3a
    --- /dev/null
    +++ b/Doc/library/modules.rst
    @@ -0,0 +1,20 @@
    +
    +.. _modules:
    +
    +*****************
    +Importing Modules
    +*****************
    +
    +The modules described in this chapter provide new ways to import other Python
    +modules and hooks for customizing the import process.
    +
    +The full list of modules described in this chapter is:
    +
    +
    +.. toctree::
    +
    +   imp.rst
    +   zipimport.rst
    +   pkgutil.rst
    +   modulefinder.rst
    +   runpy.rst
    diff --git a/Doc/library/msilib.rst b/Doc/library/msilib.rst
    new file mode 100644
    index 0000000..6c7955a
    --- /dev/null
    +++ b/Doc/library/msilib.rst
    @@ -0,0 +1,537 @@
    +
    +:mod:`msilib` --- Read and write Microsoft Installer files
    +==========================================================
    +
    +.. module:: msilib
    +   :platform: Windows
    +   :synopsis: Creation of Microsoft Installer files, and CAB files.
    +.. moduleauthor:: Martin v. Löwis 
    +.. sectionauthor:: Martin v. Löwis 
    +
    +
    +.. index:: single: msi
    +
    +.. versionadded:: 2.5
    +
    +The :mod:`msilib` supports the creation of Microsoft Installer (``.msi``) files.
    +Because these files often contain an embedded "cabinet" file (``.cab``), it also
    +exposes an API to create CAB files. Support for reading ``.cab`` files is
    +currently not implemented; read support for the ``.msi`` database is possible.
    +
    +This package aims to provide complete access to all tables in an ``.msi`` file,
    +therefore, it is a fairly low-level API. Two primary applications of this
    +package are the :mod:`distutils` command ``bdist_msi``, and the creation of
    +Python installer package itself (although that currently uses a different
    +version of ``msilib``).
    +
    +The package contents can be roughly split into four parts: low-level CAB
    +routines, low-level MSI routines, higher-level MSI routines, and standard table
    +structures.
    +
    +
    +.. function:: FCICreate(cabname, files)
    +
    +   Create a new CAB file named *cabname*. *files* must be a list of tuples, each
    +   containing the name of the file on disk, and the name of the file inside the CAB
    +   file.
    +
    +   The files are added to the CAB file in the order they appear in the list. All
    +   files are added into a single CAB file, using the MSZIP compression algorithm.
    +
    +   Callbacks to Python for the various steps of MSI creation are currently not
    +   exposed.
    +
    +
    +.. function:: UUIDCreate()
    +
    +   Return the string representation of a new unique identifier. This wraps the
    +   Windows API functions :cfunc:`UuidCreate` and :cfunc:`UuidToString`.
    +
    +
    +.. function:: OpenDatabase(path, persist)
    +
    +   Return a new database object by calling MsiOpenDatabase.   *path* is the file
    +   name of the MSI file; *persist* can be one of the constants
    +   ``MSIDBOPEN_CREATEDIRECT``, ``MSIDBOPEN_CREATE``, ``MSIDBOPEN_DIRECT``,
    +   ``MSIDBOPEN_READONLY``, or ``MSIDBOPEN_TRANSACT``, and may include the flag
    +   ``MSIDBOPEN_PATCHFILE``. See the Microsoft documentation for the meaning of
    +   these flags; depending on the flags, an existing database is opened, or a new
    +   one created.
    +
    +
    +.. function:: CreateRecord(count)
    +
    +   Return a new record object by calling :cfunc:`MSICreateRecord`. *count* is the
    +   number of fields of the record.
    +
    +
    +.. function:: init_database(name, schema, ProductName, ProductCode, ProductVersion, Manufacturer)
    +
    +   Create and return a new database *name*, initialize it  with *schema*,  and set
    +   the properties *ProductName*, *ProductCode*, *ProductVersion*, and
    +   *Manufacturer*.
    +
    +   *schema* must be a module object containing ``tables`` and
    +   ``_Validation_records`` attributes; typically, :mod:`msilib.schema` should be
    +   used.
    +
    +   The database will contain just the schema and the validation records when this
    +   function returns.
    +
    +
    +.. function:: add_data(database, records)
    +
    +   Add all *records* to *database*.  *records* should be a list of tuples, each one
    +   containing all fields of a record according to the schema of the table.  For
    +   optional fields, ``None`` can be passed.
    +
    +   Field values can be int or long numbers, strings, or instances of the Binary
    +   class.
    +
    +
    +.. class:: Binary(filename)
    +
    +   Represents entries in the Binary table; inserting such an object using
    +   :func:`add_data` reads the file named *filename* into the table.
    +
    +
    +.. function:: add_tables(database, module)
    +
    +   Add all table content from *module* to *database*. *module* must contain an
    +   attribute *tables* listing all tables for which content should be added, and one
    +   attribute per table that has the actual content.
    +
    +   This is typically used to install the sequence tables.
    +
    +
    +.. function:: add_stream(database, name, path)
    +
    +   Add the file *path* into the ``_Stream`` table of *database*, with the stream
    +   name *name*.
    +
    +
    +.. function:: gen_uuid()
    +
    +   Return a new UUID, in the format that MSI typically requires (i.e. in curly
    +   braces, and with all hexdigits in upper-case).
    +
    +
    +.. seealso::
    +
    +   `FCICreateFile `_
    +   `UuidCreate `_
    +   `UuidToString `_
    +
    +.. _database-objects:
    +
    +Database Objects
    +----------------
    +
    +
    +.. method:: Database.OpenView(sql)
    +
    +   Return a view object, by calling :cfunc:`MSIDatabaseOpenView`. *sql* is the SQL
    +   statement to execute.
    +
    +
    +.. method:: Database.Commit()
    +
    +   Commit the changes pending in the current transaction, by calling
    +   :cfunc:`MSIDatabaseCommit`.
    +
    +
    +.. method:: Database.GetSummaryInformation(count)
    +
    +   Return a new summary information object, by calling
    +   :cfunc:`MsiGetSummaryInformation`.  *count* is the maximum number of updated
    +   values.
    +
    +
    +.. seealso::
    +
    +   `MSIOpenView `_
    +   `MSIDatabaseCommit `_
    +   `MSIGetSummaryInformation `_
    +
    +.. _view-objects:
    +
    +View Objects
    +------------
    +
    +
    +.. method:: View.Execute([params=None])
    +
    +   Execute the SQL query of the view, through :cfunc:`MSIViewExecute`. *params* is
    +   an optional record describing actual values of the parameter tokens in the
    +   query.
    +
    +
    +.. method:: View.GetColumnInfo(kind)
    +
    +   Return a record describing the columns of the view, through calling
    +   :cfunc:`MsiViewGetColumnInfo`. *kind* can be either ``MSICOLINFO_NAMES`` or
    +   ``MSICOLINFO_TYPES``.
    +
    +
    +.. method:: View.Fetch()
    +
    +   Return a result record of the query, through calling :cfunc:`MsiViewFetch`.
    +
    +
    +.. method:: View.Modify(kind, data)
    +
    +   Modify the view, by calling :cfunc:`MsiViewModify`. *kind* can be one of
    +   ``MSIMODIFY_SEEK``, ``MSIMODIFY_REFRESH``, ``MSIMODIFY_INSERT``,
    +   ``MSIMODIFY_UPDATE``, ``MSIMODIFY_ASSIGN``, ``MSIMODIFY_REPLACE``,
    +   ``MSIMODIFY_MERGE``, ``MSIMODIFY_DELETE``, ``MSIMODIFY_INSERT_TEMPORARY``,
    +   ``MSIMODIFY_VALIDATE``, ``MSIMODIFY_VALIDATE_NEW``,
    +   ``MSIMODIFY_VALIDATE_FIELD``, or ``MSIMODIFY_VALIDATE_DELETE``.
    +
    +   *data* must be a record describing the new data.
    +
    +
    +.. method:: View.Close()
    +
    +   Close the view, through :cfunc:`MsiViewClose`.
    +
    +
    +.. seealso::
    +
    +   `MsiViewExecute `_
    +   `MSIViewGetColumnInfo `_
    +   `MsiViewFetch `_
    +   `MsiViewModify `_
    +   `MsiViewClose `_
    +
    +.. _summary-objects:
    +
    +Summary Information Objects
    +---------------------------
    +
    +
    +.. method:: SummaryInformation.GetProperty(field)
    +
    +   Return a property of the summary, through :cfunc:`MsiSummaryInfoGetProperty`.
    +   *field* is the name of the property, and can be one of the constants
    +   ``PID_CODEPAGE``, ``PID_TITLE``, ``PID_SUBJECT``, ``PID_AUTHOR``,
    +   ``PID_KEYWORDS``, ``PID_COMMENTS``, ``PID_TEMPLATE``, ``PID_LASTAUTHOR``,
    +   ``PID_REVNUMBER``, ``PID_LASTPRINTED``, ``PID_CREATE_DTM``,
    +   ``PID_LASTSAVE_DTM``, ``PID_PAGECOUNT``, ``PID_WORDCOUNT``, ``PID_CHARCOUNT``,
    +   ``PID_APPNAME``, or ``PID_SECURITY``.
    +
    +
    +.. method:: SummaryInformation.GetPropertyCount()
    +
    +   Return the number of summary properties, through
    +   :cfunc:`MsiSummaryInfoGetPropertyCount`.
    +
    +
    +.. method:: SummaryInformation.SetProperty(field, value)
    +
    +   Set a property through :cfunc:`MsiSummaryInfoSetProperty`. *field* can have the
    +   same values as in :meth:`GetProperty`, *value* is the new value of the property.
    +   Possible value types are integer and string.
    +
    +
    +.. method:: SummaryInformation.Persist()
    +
    +   Write the modified properties to the summary information stream, using
    +   :cfunc:`MsiSummaryInfoPersist`.
    +
    +
    +.. seealso::
    +
    +   `MsiSummaryInfoGetProperty `_
    +   `MsiSummaryInfoGetPropertyCount `_
    +   `MsiSummaryInfoSetProperty `_
    +   `MsiSummaryInfoPersist `_
    +
    +.. _record-objects:
    +
    +Record Objects
    +--------------
    +
    +
    +.. method:: Record.GetFieldCount()
    +
    +   Return the number of fields of the record, through
    +   :cfunc:`MsiRecordGetFieldCount`.
    +
    +
    +.. method:: Record.SetString(field, value)
    +
    +   Set *field* to *value* through :cfunc:`MsiRecordSetString`. *field* must be an
    +   integer; *value* a string.
    +
    +
    +.. method:: Record.SetStream(field, value)
    +
    +   Set *field* to the contents of the file named *value*, through
    +   :cfunc:`MsiRecordSetStream`. *field* must be an integer; *value* a string.
    +
    +
    +.. method:: Record.SetInteger(field, value)
    +
    +   Set *field* to *value* through :cfunc:`MsiRecordSetInteger`. Both *field* and
    +   *value* must be an integer.
    +
    +
    +.. method:: Record.ClearData()
    +
    +   Set all fields of the record to 0, through :cfunc:`MsiRecordClearData`.
    +
    +
    +.. seealso::
    +
    +   `MsiRecordGetFieldCount `_
    +   `MsiRecordSetString `_
    +   `MsiRecordSetStream `_
    +   `MsiRecordSetInteger `_
    +   `MsiRecordClear `_
    +
    +.. _msi-errors:
    +
    +Errors
    +------
    +
    +All wrappers around MSI functions raise :exc:`MsiError`; the string inside the
    +exception will contain more detail.
    +
    +
    +.. _cab:
    +
    +CAB Objects
    +-----------
    +
    +
    +.. class:: CAB(name)
    +
    +   The class :class:`CAB` represents a CAB file. During MSI construction, files
    +   will be added simultaneously to the ``Files`` table, and to a CAB file. Then,
    +   when all files have been added, the CAB file can be written, then added to the
    +   MSI file.
    +
    +   *name* is the name of the CAB file in the MSI file.
    +
    +
    +.. method:: CAB.append(full, file, logical)
    +
    +   Add the file with the pathname *full* to the CAB file, under the name *logical*.
    +   If there is already a file named *logical*, a new file name is created.
    +
    +   Return the index of the file in the CAB file, and the new name of the file
    +   inside the CAB file.
    +
    +
    +.. method:: CAB.commit(database)
    +
    +   Generate a CAB file, add it as a stream to the MSI file, put it into the
    +   ``Media`` table, and remove the generated file from the disk.
    +
    +
    +.. _msi-directory:
    +
    +Directory Objects
    +-----------------
    +
    +
    +.. class:: Directory(database, cab, basedir, physical,  logical, default, component, [componentflags])
    +
    +   Create a new directory in the Directory table. There is a current component at
    +   each point in time for the directory, which is either explicitly created through
    +   :meth:`start_component`, or implicitly when files are added for the first time.
    +   Files are added into the current component, and into the cab file.  To create a
    +   directory, a base directory object needs to be specified (can be ``None``), the
    +   path to the physical directory, and a logical directory name.  *default*
    +   specifies the DefaultDir slot in the directory table. *componentflags* specifies
    +   the default flags that new components get.
    +
    +
    +.. method:: Directory.start_component([component[, feature[, flags[, keyfile[, uuid]]]]])
    +
    +   Add an entry to the Component table, and make this component the current
    +   component for this directory. If no component name is given, the directory name
    +   is used. If no *feature* is given, the current feature is used. If no *flags*
    +   are given, the directory's default flags are used. If no *keyfile* is given, the
    +   KeyPath is left null in the Component table.
    +
    +
    +.. method:: Directory.add_file(file[, src[, version[, language]]])
    +
    +   Add a file to the current component of the directory, starting a new one if
    +   there is no current component. By default, the file name in the source and the
    +   file table will be identical. If the *src* file is specified, it is interpreted
    +   relative to the current directory. Optionally, a *version* and a *language* can
    +   be specified for the entry in the File table.
    +
    +
    +.. method:: Directory.glob(pattern[, exclude])
    +
    +   Add a list of files to the current component as specified in the glob pattern.
    +   Individual files can be excluded in the *exclude* list.
    +
    +
    +.. method:: Directory.remove_pyc()
    +
    +   Remove ``.pyc``/``.pyo`` files on uninstall.
    +
    +
    +.. seealso::
    +
    +   `Directory Table `_
    +   `File Table `_
    +   `Component Table `_
    +   `FeatureComponents Table `_
    +
    +.. _features:
    +
    +Features
    +--------
    +
    +
    +.. class:: Feature(database, id, title, desc, display[, level=1[, parent[, directory[,  attributes=0]]]])
    +
    +   Add a new record to the ``Feature`` table, using the values *id*, *parent.id*,
    +   *title*, *desc*, *display*, *level*, *directory*, and *attributes*. The
    +   resulting feature object can be passed to the :meth:`start_component` method of
    +   :class:`Directory`.
    +
    +
    +.. method:: Feature.set_current()
    +
    +   Make this feature the current feature of :mod:`msilib`. New components are
    +   automatically added to the default feature, unless a feature is explicitly
    +   specified.
    +
    +
    +.. seealso::
    +
    +   `Feature Table `_
    +
    +.. _msi-gui:
    +
    +GUI classes
    +-----------
    +
    +:mod:`msilib` provides several classes that wrap the GUI tables in an MSI
    +database. However, no standard user interface is provided; use :mod:`bdist_msi`
    +to create MSI files with a user-interface for installing Python packages.
    +
    +
    +.. class:: Control(dlg, name)
    +
    +   Base class of the dialog controls. *dlg* is the dialog object the control
    +   belongs to, and *name* is the control's name.
    +
    +
    +.. method:: Control.event(event, argument[,  condition=1[, ordering]])
    +
    +   Make an entry into the ``ControlEvent`` table for this control.
    +
    +
    +.. method:: Control.mapping(event, attribute)
    +
    +   Make an entry into the ``EventMapping`` table for this control.
    +
    +
    +.. method:: Control.condition(action, condition)
    +
    +   Make an entry into the ``ControlCondition`` table for this control.
    +
    +
    +.. class:: RadioButtonGroup(dlg, name, property)
    +
    +   Create a radio button control named *name*. *property* is the installer property
    +   that gets set when a radio button is selected.
    +
    +
    +.. method:: RadioButtonGroup.add(name, x, y, width, height, text [, value])
    +
    +   Add a radio button named *name* to the group, at the coordinates *x*, *y*,
    +   *width*, *height*, and with the label *text*. If *value* is omitted, it defaults
    +   to *name*.
    +
    +
    +.. class:: Dialog(db, name, x, y, w, h, attr, title, first,  default, cancel)
    +
    +   Return a new :class:`Dialog` object. An entry in the ``Dialog`` table is made,
    +   with the specified coordinates, dialog attributes, title, name of the first,
    +   default, and cancel controls.
    +
    +
    +.. method:: Dialog.control(name, type, x, y, width, height,  attributes, property, text, control_next, help)
    +
    +   Return a new :class:`Control` object. An entry in the ``Control`` table is made
    +   with the specified parameters.
    +
    +   This is a generic method; for specific types, specialized methods are provided.
    +
    +
    +.. method:: Dialog.text(name, x, y, width, height, attributes, text)
    +
    +   Add and return a ``Text`` control.
    +
    +
    +.. method:: Dialog.bitmap(name, x, y, width, height, text)
    +
    +   Add and return a ``Bitmap`` control.
    +
    +
    +.. method:: Dialog.line(name, x, y, width, height)
    +
    +   Add and return a ``Line`` control.
    +
    +
    +.. method:: Dialog.pushbutton(name, x, y, width, height, attributes,  text, next_control)
    +
    +   Add and return a ``PushButton`` control.
    +
    +
    +.. method:: Dialog.radiogroup(name, x, y, width, height,  attributes, property, text, next_control)
    +
    +   Add and return a ``RadioButtonGroup`` control.
    +
    +
    +.. method:: Dialog.checkbox(name, x, y, width, height,  attributes, property, text, next_control)
    +
    +   Add and return a ``CheckBox`` control.
    +
    +
    +.. seealso::
    +
    +   `Dialog Table `_
    +   `Control Table `_
    +   `Control Types `_
    +   `ControlCondition Table `_
    +   `ControlEvent Table `_
    +   `EventMapping Table `_
    +   `RadioButton Table `_
    +
    +.. _msi-tables:
    +
    +Precomputed tables
    +------------------
    +
    +:mod:`msilib` provides a few subpackages that contain only schema and table
    +definitions. Currently, these definitions are based on MSI version 2.0.
    +
    +
    +.. data:: schema
    +
    +   This is the standard MSI schema for MSI 2.0, with the *tables* variable
    +   providing a list of table definitions, and *_Validation_records* providing the
    +   data for MSI validation.
    +
    +
    +.. data:: sequence
    +
    +   This module contains table contents for the standard sequence tables:
    +   *AdminExecuteSequence*, *AdminUISequence*, *AdvtExecuteSequence*,
    +   *InstallExecuteSequence*, and *InstallUISequence*.
    +
    +
    +.. data:: text
    +
    +   This module contains definitions for the UIText and ActionText tables, for the
    +   standard installer actions.
    +
    diff --git a/Doc/library/msvcrt.rst b/Doc/library/msvcrt.rst
    new file mode 100644
    index 0000000..d43bb4c
    --- /dev/null
    +++ b/Doc/library/msvcrt.rst
    @@ -0,0 +1,126 @@
    +
    +:mod:`msvcrt` -- Useful routines from the MS VC++ runtime
    +=========================================================
    +
    +.. module:: msvcrt
    +   :platform: Windows
    +   :synopsis: Miscellaneous useful routines from the MS VC++ runtime.
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +These functions provide access to some useful capabilities on Windows platforms.
    +Some higher-level modules use these functions to build the  Windows
    +implementations of their services.  For example, the :mod:`getpass` module uses
    +this in the implementation of the :func:`getpass` function.
    +
    +Further documentation on these functions can be found in the Platform API
    +documentation.
    +
    +
    +.. _msvcrt-files:
    +
    +File Operations
    +---------------
    +
    +
    +.. function:: locking(fd, mode, nbytes)
    +
    +   Lock part of a file based on file descriptor *fd* from the C runtime.  Raises
    +   :exc:`IOError` on failure.  The locked region of the file extends from the
    +   current file position for *nbytes* bytes, and may continue beyond the end of the
    +   file.  *mode* must be one of the :const:`LK_\*` constants listed below. Multiple
    +   regions in a file may be locked at the same time, but may not overlap.  Adjacent
    +   regions are not merged; they must be unlocked individually.
    +
    +
    +.. data:: LK_LOCK
    +          LK_RLCK
    +
    +   Locks the specified bytes. If the bytes cannot be locked, the program
    +   immediately tries again after 1 second.  If, after 10 attempts, the bytes cannot
    +   be locked, :exc:`IOError` is raised.
    +
    +
    +.. data:: LK_NBLCK
    +          LK_NBRLCK
    +
    +   Locks the specified bytes. If the bytes cannot be locked, :exc:`IOError` is
    +   raised.
    +
    +
    +.. data:: LK_UNLCK
    +
    +   Unlocks the specified bytes, which must have been previously locked.
    +
    +
    +.. function:: setmode(fd, flags)
    +
    +   Set the line-end translation mode for the file descriptor *fd*. To set it to
    +   text mode, *flags* should be :const:`os.O_TEXT`; for binary, it should be
    +   :const:`os.O_BINARY`.
    +
    +
    +.. function:: open_osfhandle(handle, flags)
    +
    +   Create a C runtime file descriptor from the file handle *handle*.  The *flags*
    +   parameter should be a bit-wise OR of :const:`os.O_APPEND`, :const:`os.O_RDONLY`,
    +   and :const:`os.O_TEXT`.  The returned file descriptor may be used as a parameter
    +   to :func:`os.fdopen` to create a file object.
    +
    +
    +.. function:: get_osfhandle(fd)
    +
    +   Return the file handle for the file descriptor *fd*.  Raises :exc:`IOError` if
    +   *fd* is not recognized.
    +
    +
    +.. _msvcrt-console:
    +
    +Console I/O
    +-----------
    +
    +
    +.. function:: kbhit()
    +
    +   Return true if a keypress is waiting to be read.
    +
    +
    +.. function:: getch()
    +
    +   Read a keypress and return the resulting character.  Nothing is echoed to the
    +   console.  This call will block if a keypress is not already available, but will
    +   not wait for :kbd:`Enter` to be pressed. If the pressed key was a special
    +   function key, this will return ``'\000'`` or ``'\xe0'``; the next call will
    +   return the keycode.  The :kbd:`Control-C` keypress cannot be read with this
    +   function.
    +
    +
    +.. function:: getche()
    +
    +   Similar to :func:`getch`, but the keypress will be echoed if it  represents a
    +   printable character.
    +
    +
    +.. function:: putch(char)
    +
    +   Print the character *char* to the console without buffering.
    +
    +
    +.. function:: ungetch(char)
    +
    +   Cause the character *char* to be "pushed back" into the console buffer; it will
    +   be the next character read by :func:`getch` or :func:`getche`.
    +
    +
    +.. _msvcrt-other:
    +
    +Other Functions
    +---------------
    +
    +
    +.. function:: heapmin()
    +
    +   Force the :cfunc:`malloc` heap to clean itself up and return unused blocks to
    +   the operating system.  This only works on Windows NT.  On failure, this raises
    +   :exc:`IOError`.
    +
    diff --git a/Doc/library/multifile.rst b/Doc/library/multifile.rst
    new file mode 100644
    index 0000000..c36ccb7
    --- /dev/null
    +++ b/Doc/library/multifile.rst
    @@ -0,0 +1,190 @@
    +
    +:mod:`multifile` --- Support for files containing distinct parts
    +================================================================
    +
    +.. module:: multifile
    +   :synopsis: Support for reading files which contain distinct parts, such as some MIME data.
    +.. sectionauthor:: Eric S. Raymond 
    +
    +
    +.. deprecated:: 2.5
    +   The :mod:`email` package should be used in preference to the :mod:`multifile`
    +   module. This module is present only to maintain backward compatibility.
    +
    +The :class:`MultiFile` object enables you to treat sections of a text file as
    +file-like input objects, with ``''`` being returned by :meth:`readline` when a
    +given delimiter pattern is encountered.  The defaults of this class are designed
    +to make it useful for parsing MIME multipart messages, but by subclassing it and
    +overriding methods  it can be easily adapted for more general use.
    +
    +
    +.. class:: MultiFile(fp[, seekable])
    +
    +   Create a multi-file.  You must instantiate this class with an input object
    +   argument for the :class:`MultiFile` instance to get lines from, such as a file
    +   object returned by :func:`open`.
    +
    +   :class:`MultiFile` only ever looks at the input object's :meth:`readline`,
    +   :meth:`seek` and :meth:`tell` methods, and the latter two are only needed if you
    +   want random access to the individual MIME parts. To use :class:`MultiFile` on a
    +   non-seekable stream object, set the optional *seekable* argument to false; this
    +   will prevent using the input object's :meth:`seek` and :meth:`tell` methods.
    +
    +It will be useful to know that in :class:`MultiFile`'s view of the world, text
    +is composed of three kinds of lines: data, section-dividers, and end-markers.
    +MultiFile is designed to support parsing of messages that may have multiple
    +nested message parts, each with its own pattern for section-divider and
    +end-marker lines.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`email`
    +      Comprehensive email handling package; supersedes the :mod:`multifile` module.
    +
    +
    +.. _multifile-objects:
    +
    +MultiFile Objects
    +-----------------
    +
    +A :class:`MultiFile` instance has the following methods:
    +
    +
    +.. method:: MultiFile.readline(str)
    +
    +   Read a line.  If the line is data (not a section-divider or end-marker or real
    +   EOF) return it.  If the line matches the most-recently-stacked boundary, return
    +   ``''`` and set ``self.last`` to 1 or 0 according as the match is or is not an
    +   end-marker.  If the line matches any other stacked boundary, raise an error.  On
    +   encountering end-of-file on the underlying stream object, the method raises
    +   :exc:`Error` unless all boundaries have been popped.
    +
    +
    +.. method:: MultiFile.readlines(str)
    +
    +   Return all lines remaining in this part as a list of strings.
    +
    +
    +.. method:: MultiFile.read()
    +
    +   Read all lines, up to the next section.  Return them as a single (multiline)
    +   string.  Note that this doesn't take a size argument!
    +
    +
    +.. method:: MultiFile.seek(pos[, whence])
    +
    +   Seek.  Seek indices are relative to the start of the current section. The *pos*
    +   and *whence* arguments are interpreted as for a file seek.
    +
    +
    +.. method:: MultiFile.tell()
    +
    +   Return the file position relative to the start of the current section.
    +
    +
    +.. method:: MultiFile.next()
    +
    +   Skip lines to the next section (that is, read lines until a section-divider or
    +   end-marker has been consumed).  Return true if there is such a section, false if
    +   an end-marker is seen.  Re-enable the most-recently-pushed boundary.
    +
    +
    +.. method:: MultiFile.is_data(str)
    +
    +   Return true if *str* is data and false if it might be a section boundary.  As
    +   written, it tests for a prefix other than ``'-``\ ``-'`` at start of line (which
    +   all MIME boundaries have) but it is declared so it can be overridden in derived
    +   classes.
    +
    +   Note that this test is used intended as a fast guard for the real boundary
    +   tests; if it always returns false it will merely slow processing, not cause it
    +   to fail.
    +
    +
    +.. method:: MultiFile.push(str)
    +
    +   Push a boundary string.  When a decorated version of this boundary  is found as
    +   an input line, it will be interpreted as a section-divider  or end-marker
    +   (depending on the decoration, see :rfc:`2045`).  All subsequent reads will
    +   return the empty string to indicate end-of-file, until a call to :meth:`pop`
    +   removes the boundary a or :meth:`next` call reenables it.
    +
    +   It is possible to push more than one boundary.  Encountering the
    +   most-recently-pushed boundary will return EOF; encountering any other
    +   boundary will raise an error.
    +
    +
    +.. method:: MultiFile.pop()
    +
    +   Pop a section boundary.  This boundary will no longer be interpreted as EOF.
    +
    +
    +.. method:: MultiFile.section_divider(str)
    +
    +   Turn a boundary into a section-divider line.  By default, this method
    +   prepends ``'--'`` (which MIME section boundaries have) but it is declared so
    +   it can be overridden in derived classes.  This method need not append LF or
    +   CR-LF, as comparison with the result ignores trailing whitespace.
    +
    +
    +.. method:: MultiFile.end_marker(str)
    +
    +   Turn a boundary string into an end-marker line.  By default, this method
    +   prepends ``'--'`` and appends ``'--'`` (like a MIME-multipart end-of-message
    +   marker) but it is declared so it can be overridden in derived classes.  This
    +   method need not append LF or CR-LF, as comparison with the result ignores
    +   trailing whitespace.
    +
    +Finally, :class:`MultiFile` instances have two public instance variables:
    +
    +
    +.. attribute:: MultiFile.level
    +
    +   Nesting depth of the current part.
    +
    +
    +.. attribute:: MultiFile.last
    +
    +   True if the last end-of-file was for an end-of-message marker.
    +
    +
    +.. _multifile-example:
    +
    +:class:`MultiFile` Example
    +--------------------------
    +
    +.. sectionauthor:: Skip Montanaro 
    +
    +
    +::
    +
    +   import mimetools
    +   import multifile
    +   import StringIO
    +
    +   def extract_mime_part_matching(stream, mimetype):
    +       """Return the first element in a multipart MIME message on stream
    +       matching mimetype."""
    +
    +       msg = mimetools.Message(stream)
    +       msgtype = msg.gettype()
    +       params = msg.getplist()
    +
    +       data = StringIO.StringIO()
    +       if msgtype[:10] == "multipart/":
    +
    +           file = multifile.MultiFile(stream)
    +           file.push(msg.getparam("boundary"))
    +           while file.next():
    +               submsg = mimetools.Message(file)
    +               try:
    +                   data = StringIO.StringIO()
    +                   mimetools.decode(file, data, submsg.getencoding())
    +               except ValueError:
    +                   continue
    +               if submsg.gettype() == mimetype:
    +                   break
    +           file.pop()
    +       return data.getvalue()
    +
    diff --git a/Doc/library/mutex.rst b/Doc/library/mutex.rst
    new file mode 100644
    index 0000000..523692f
    --- /dev/null
    +++ b/Doc/library/mutex.rst
    @@ -0,0 +1,62 @@
    +
    +:mod:`mutex` --- Mutual exclusion support
    +=========================================
    +
    +.. module:: mutex
    +   :synopsis: Lock and queue for mutual exclusion.
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`mutex` module defines a class that allows mutual-exclusion via
    +acquiring and releasing locks. It does not require (or imply) threading or
    +multi-tasking, though it could be useful for those purposes.
    +
    +The :mod:`mutex` module defines the following class:
    +
    +
    +.. class:: mutex()
    +
    +   Create a new (unlocked) mutex.
    +
    +   A mutex has two pieces of state --- a "locked" bit and a queue. When the mutex
    +   is not locked, the queue is empty. Otherwise, the queue contains zero or more
    +   ``(function, argument)`` pairs representing functions (or methods) waiting to
    +   acquire the lock. When the mutex is unlocked while the queue is not empty, the
    +   first queue entry is removed and its  ``function(argument)`` pair called,
    +   implying it now has the lock.
    +
    +   Of course, no multi-threading is implied -- hence the funny interface for
    +   :meth:`lock`, where a function is called once the lock is acquired.
    +
    +
    +.. _mutex-objects:
    +
    +Mutex Objects
    +-------------
    +
    +:class:`mutex` objects have following methods:
    +
    +
    +.. method:: mutex.test()
    +
    +   Check whether the mutex is locked.
    +
    +
    +.. method:: mutex.testandset()
    +
    +   "Atomic" test-and-set, grab the lock if it is not set, and return ``True``,
    +   otherwise, return ``False``.
    +
    +
    +.. method:: mutex.lock(function, argument)
    +
    +   Execute ``function(argument)``, unless the mutex is locked. In the case it is
    +   locked, place the function and argument on the queue. See :meth:`unlock` for
    +   explanation of when ``function(argument)`` is executed in that case.
    +
    +
    +.. method:: mutex.unlock()
    +
    +   Unlock the mutex if queue is empty, otherwise execute the first element in the
    +   queue.
    +
    diff --git a/Doc/library/netdata.rst b/Doc/library/netdata.rst
    new file mode 100644
    index 0000000..add01d2
    --- /dev/null
    +++ b/Doc/library/netdata.rst
    @@ -0,0 +1,26 @@
    +
    +.. _netdata:
    +
    +**********************
    +Internet Data Handling
    +**********************
    +
    +This chapter describes modules which support handling data formats commonly used
    +on the Internet.
    +
    +
    +.. toctree::
    +
    +   email.rst
    +   mailcap.rst
    +   mailbox.rst
    +   mhlib.rst
    +   mimetools.rst
    +   mimetypes.rst
    +   multifile.rst
    +   rfc822.rst
    +   base64.rst
    +   binhex.rst
    +   binascii.rst
    +   quopri.rst
    +   uu.rst
    diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst
    new file mode 100644
    index 0000000..bf3d92e
    --- /dev/null
    +++ b/Doc/library/netrc.rst
    @@ -0,0 +1,78 @@
    +
    +:mod:`netrc` --- netrc file processing
    +======================================
    +
    +.. module:: netrc
    +   :synopsis: Loading of .netrc files.
    +.. moduleauthor:: Eric S. Raymond 
    +.. sectionauthor:: Eric S. Raymond 
    +
    +
    +.. % Note the \protect needed for \file... ;-(
    +
    +.. versionadded:: 1.5.2
    +
    +The :class:`netrc` class parses and encapsulates the netrc file format used by
    +the Unix :program:`ftp` program and other FTP clients.
    +
    +
    +.. class:: netrc([file])
    +
    +   A :class:`netrc` instance or subclass instance encapsulates data from  a netrc
    +   file.  The initialization argument, if present, specifies the file to parse.  If
    +   no argument is given, the file :file:`.netrc` in the user's home directory will
    +   be read.  Parse errors will raise :exc:`NetrcParseError` with diagnostic
    +   information including the file name, line number, and terminating token.
    +
    +
    +.. exception:: NetrcParseError
    +
    +   Exception raised by the :class:`netrc` class when syntactical errors are
    +   encountered in source text.  Instances of this exception provide three
    +   interesting attributes:  :attr:`msg` is a textual explanation of the error,
    +   :attr:`filename` is the name of the source file, and :attr:`lineno` gives the
    +   line number on which the error was found.
    +
    +
    +.. _netrc-objects:
    +
    +netrc Objects
    +-------------
    +
    +A :class:`netrc` instance has the following methods:
    +
    +
    +.. method:: netrc.authenticators(host)
    +
    +   Return a 3-tuple ``(login, account, password)`` of authenticators for *host*.
    +   If the netrc file did not contain an entry for the given host, return the tuple
    +   associated with the 'default' entry.  If neither matching host nor default entry
    +   is available, return ``None``.
    +
    +
    +.. method:: netrc.__repr__()
    +
    +   Dump the class data as a string in the format of a netrc file. (This discards
    +   comments and may reorder the entries.)
    +
    +Instances of :class:`netrc` have public instance variables:
    +
    +
    +.. attribute:: netrc.hosts
    +
    +   Dictionary mapping host names to ``(login, account, password)`` tuples.  The
    +   'default' entry, if any, is represented as a pseudo-host by that name.
    +
    +
    +.. attribute:: netrc.macros
    +
    +   Dictionary mapping macro names to string lists.
    +
    +.. note::
    +
    +   Passwords are limited to a subset of the ASCII character set. Versions of
    +   this module prior to 2.3 were extremely limited.  Starting with 2.3, all
    +   ASCII punctuation is allowed in passwords.  However, note that whitespace and
    +   non-printable characters are not allowed in passwords.  This is a limitation
    +   of the way the .netrc file is parsed and may be removed in the future.
    +
    diff --git a/Doc/library/new.rst b/Doc/library/new.rst
    new file mode 100644
    index 0000000..852fb58
    --- /dev/null
    +++ b/Doc/library/new.rst
    @@ -0,0 +1,53 @@
    +
    +:mod:`new` --- Creation of runtime internal objects
    +===================================================
    +
    +.. module:: new
    +   :synopsis: Interface to the creation of runtime implementation objects.
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`new` module allows an interface to the interpreter object creation
    +functions. This is for use primarily in marshal-type functions, when a new
    +object needs to be created "magically" and not by using the regular creation
    +functions. This module provides a low-level interface to the interpreter, so
    +care must be exercised when using this module. It is possible to supply
    +non-sensical arguments which crash the interpreter when the object is used.
    +
    +The :mod:`new` module defines the following functions:
    +
    +
    +.. function:: instancemethod(function, instance, class)
    +
    +   This function will return a method object, bound to *instance*, or unbound if
    +   *instance* is ``None``.  *function* must be callable.
    +
    +
    +.. function:: function(code, globals[, name[, argdefs[, closure]]])
    +
    +   Returns a (Python) function with the given code and globals. If *name* is given,
    +   it must be a string or ``None``.  If it is a string, the function will have the
    +   given name, otherwise the function name will be taken from ``code.co_name``.  If
    +   *argdefs* is given, it must be a tuple and will be used to determine the default
    +   values of parameters.  If *closure* is given, it must be ``None`` or a tuple of
    +   cell objects containing objects to bind to the names in ``code.co_freevars``.
    +
    +
    +.. function:: code(argcount, nlocals, stacksize, flags, codestring, constants, names, varnames, filename, name, firstlineno, lnotab)
    +
    +   This function is an interface to the :cfunc:`PyCode_New` C function.
    +
    +   .. % XXX This is still undocumented!!!!!!!!!!!
    +
    +
    +.. function:: module(name[, doc])
    +
    +   This function returns a new module object with name *name*. *name* must be a
    +   string. The optional *doc* argument can have any type.
    +
    +
    +.. function:: classobj(name, baseclasses, dict)
    +
    +   This function returns a new class object, with name *name*, derived from
    +   *baseclasses* (which should be a tuple of classes) and with namespace *dict*.
    +
    diff --git a/Doc/library/nis.rst b/Doc/library/nis.rst
    new file mode 100644
    index 0000000..77684bf
    --- /dev/null
    +++ b/Doc/library/nis.rst
    @@ -0,0 +1,68 @@
    +
    +:mod:`nis` --- Interface to Sun's NIS (Yellow Pages)
    +====================================================
    +
    +.. module:: nis
    +   :platform: Unix
    +   :synopsis: Interface to Sun's NIS (Yellow Pages) library.
    +.. moduleauthor:: Fred Gansevles 
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`nis` module gives a thin wrapper around the NIS library, useful for
    +central administration of several hosts.
    +
    +Because NIS exists only on Unix systems, this module is only available for Unix.
    +
    +The :mod:`nis` module defines the following functions:
    +
    +
    +.. function:: match(key, mapname[, domain=default_domain])
    +
    +   Return the match for *key* in map *mapname*, or raise an error
    +   (:exc:`nis.error`) if there is none. Both should be strings, *key* is 8-bit
    +   clean. Return value is an arbitrary array of bytes (may contain ``NULL`` and
    +   other joys).
    +
    +   Note that *mapname* is first checked if it is an alias to another name.
    +
    +   .. versionchanged:: 2.5
    +      The *domain* argument allows to override the NIS domain used for the lookup. If
    +      unspecified, lookup is in the default NIS domain.
    +
    +
    +.. function:: cat(mapname[, domain=default_domain])
    +
    +   Return a dictionary mapping *key* to *value* such that ``match(key,
    +   mapname)==value``. Note that both keys and values of the dictionary are
    +   arbitrary arrays of bytes.
    +
    +   Note that *mapname* is first checked if it is an alias to another name.
    +
    +   .. versionchanged:: 2.5
    +      The *domain* argument allows to override the NIS domain used for the lookup. If
    +      unspecified, lookup is in the default NIS domain.
    +
    +
    +.. function:: maps([domain=default_domain])
    +
    +   Return a list of all valid maps.
    +
    +   .. versionchanged:: 2.5
    +      The *domain* argument allows to override the NIS domain used for the lookup. If
    +      unspecified, lookup is in the default NIS domain.
    +
    +
    +.. function:: get_default_domain()
    +
    +   Return the system default NIS domain.
    +
    +   .. versionadded:: 2.5
    +
    +The :mod:`nis` module defines the following exception:
    +
    +
    +.. exception:: error
    +
    +   An error raised when a NIS function returns an error code.
    +
    diff --git a/Doc/library/nntplib.rst b/Doc/library/nntplib.rst
    new file mode 100644
    index 0000000..5bc947e
    --- /dev/null
    +++ b/Doc/library/nntplib.rst
    @@ -0,0 +1,350 @@
    +
    +:mod:`nntplib` --- NNTP protocol client
    +=======================================
    +
    +.. module:: nntplib
    +   :synopsis: NNTP protocol client (requires sockets).
    +
    +
    +.. index::
    +   pair: NNTP; protocol
    +   single: Network News Transfer Protocol
    +
    +This module defines the class :class:`NNTP` which implements the client side of
    +the NNTP protocol.  It can be used to implement a news reader or poster, or
    +automated news processors.  For more information on NNTP (Network News Transfer
    +Protocol), see Internet :rfc:`977`.
    +
    +Here are two small examples of how it can be used.  To list some statistics
    +about a newsgroup and print the subjects of the last 10 articles::
    +
    +   >>> s = NNTP('news.cwi.nl')
    +   >>> resp, count, first, last, name = s.group('comp.lang.python')
    +   >>> print 'Group', name, 'has', count, 'articles, range', first, 'to', last
    +   Group comp.lang.python has 59 articles, range 3742 to 3803
    +   >>> resp, subs = s.xhdr('subject', first + '-' + last)
    +   >>> for id, sub in subs[-10:]: print id, sub
    +   ... 
    +   3792 Re: Removing elements from a list while iterating...
    +   3793 Re: Who likes Info files?
    +   3794 Emacs and doc strings
    +   3795 a few questions about the Mac implementation
    +   3796 Re: executable python scripts
    +   3797 Re: executable python scripts
    +   3798 Re: a few questions about the Mac implementation 
    +   3799 Re: PROPOSAL: A Generic Python Object Interface for Python C Modules
    +   3802 Re: executable python scripts 
    +   3803 Re: \POSIX{} wait and SIGCHLD
    +   >>> s.quit()
    +   '205 news.cwi.nl closing connection.  Goodbye.'
    +
    +To post an article from a file (this assumes that the article has valid
    +headers)::
    +
    +   >>> s = NNTP('news.cwi.nl')
    +   >>> f = open('/tmp/article')
    +   >>> s.post(f)
    +   '240 Article posted successfully.'
    +   >>> s.quit()
    +   '205 news.cwi.nl closing connection.  Goodbye.'
    +
    +The module itself defines the following items:
    +
    +
    +.. class:: NNTP(host[, port [, user[, password [, readermode] [, usenetrc]]]])
    +
    +   Return a new instance of the :class:`NNTP` class, representing a connection
    +   to the NNTP server running on host *host*, listening at port *port*.  The
    +   default *port* is 119.  If the optional *user* and *password* are provided,
    +   or if suitable credentials are present in :file:`/.netrc` and the optional
    +   flag *usenetrc* is true (the default), the ``AUTHINFO USER`` and ``AUTHINFO
    +   PASS`` commands are used to identify and authenticate the user to the server.
    +   If the optional flag *readermode* is true, then a ``mode reader`` command is
    +   sent before authentication is performed.  Reader mode is sometimes necessary
    +   if you are connecting to an NNTP server on the local machine and intend to
    +   call reader-specific commands, such as ``group``.  If you get unexpected
    +   :exc:`NNTPPermanentError`\ s, you might need to set *readermode*.
    +   *readermode* defaults to ``None``. *usenetrc* defaults to ``True``.
    +
    +   .. versionchanged:: 2.4
    +      *usenetrc* argument added.
    +
    +
    +.. exception:: NNTPError
    +
    +   Derived from the standard exception :exc:`Exception`, this is the base class for
    +   all exceptions raised by the :mod:`nntplib` module.
    +
    +
    +.. exception:: NNTPReplyError
    +
    +   Exception raised when an unexpected reply is received from the server.  For
    +   backwards compatibility, the exception ``error_reply`` is equivalent to this
    +   class.
    +
    +
    +.. exception:: NNTPTemporaryError
    +
    +   Exception raised when an error code in the range 400--499 is received.  For
    +   backwards compatibility, the exception ``error_temp`` is equivalent to this
    +   class.
    +
    +
    +.. exception:: NNTPPermanentError
    +
    +   Exception raised when an error code in the range 500--599 is received.  For
    +   backwards compatibility, the exception ``error_perm`` is equivalent to this
    +   class.
    +
    +
    +.. exception:: NNTPProtocolError
    +
    +   Exception raised when a reply is received from the server that does not begin
    +   with a digit in the range 1--5.  For backwards compatibility, the exception
    +   ``error_proto`` is equivalent to this class.
    +
    +
    +.. exception:: NNTPDataError
    +
    +   Exception raised when there is some error in the response data.  For backwards
    +   compatibility, the exception ``error_data`` is equivalent to this class.
    +
    +
    +.. _nntp-objects:
    +
    +NNTP Objects
    +------------
    +
    +NNTP instances have the following methods.  The *response* that is returned as
    +the first item in the return tuple of almost all methods is the server's
    +response: a string beginning with a three-digit code. If the server's response
    +indicates an error, the method raises one of the above exceptions.
    +
    +
    +.. method:: NNTP.getwelcome()
    +
    +   Return the welcome message sent by the server in reply to the initial
    +   connection.  (This message sometimes contains disclaimers or help information
    +   that may be relevant to the user.)
    +
    +
    +.. method:: NNTP.set_debuglevel(level)
    +
    +   Set the instance's debugging level.  This controls the amount of debugging
    +   output printed.  The default, ``0``, produces no debugging output.  A value of
    +   ``1`` produces a moderate amount of debugging output, generally a single line
    +   per request or response.  A value of ``2`` or higher produces the maximum amount
    +   of debugging output, logging each line sent and received on the connection
    +   (including message text).
    +
    +
    +.. method:: NNTP.newgroups(date, time, [file])
    +
    +   Send a ``NEWGROUPS`` command.  The *date* argument should be a string of the
    +   form ``'yymmdd'`` indicating the date, and *time* should be a string of the form
    +   ``'hhmmss'`` indicating the time.  Return a pair ``(response, groups)`` where
    +   *groups* is a list of group names that are new since the given date and time. If
    +   the *file* parameter is supplied, then the output of the  ``NEWGROUPS`` command
    +   is stored in a file.  If *file* is a string,  then the method will open a file
    +   object with that name, write to it  then close it.  If *file* is a file object,
    +   then it will start calling :meth:`write` on it to store the lines of the command
    +   output. If *file* is supplied, then the returned *list* is an empty list.
    +
    +
    +.. method:: NNTP.newnews(group, date, time, [file])
    +
    +   Send a ``NEWNEWS`` command.  Here, *group* is a group name or ``'*'``, and
    +   *date* and *time* have the same meaning as for :meth:`newgroups`.  Return a pair
    +   ``(response, articles)`` where *articles* is a list of message ids. If the
    +   *file* parameter is supplied, then the output of the  ``NEWNEWS`` command is
    +   stored in a file.  If *file* is a string,  then the method will open a file
    +   object with that name, write to it  then close it.  If *file* is a file object,
    +   then it will start calling :meth:`write` on it to store the lines of the command
    +   output. If *file* is supplied, then the returned *list* is an empty list.
    +
    +
    +.. method:: NNTP.list([file])
    +
    +   Send a ``LIST`` command.  Return a pair ``(response, list)`` where *list* is a
    +   list of tuples.  Each tuple has the form ``(group, last, first, flag)``, where
    +   *group* is a group name, *last* and *first* are the last and first article
    +   numbers (as strings), and *flag* is ``'y'`` if posting is allowed, ``'n'`` if
    +   not, and ``'m'`` if the newsgroup is moderated.  (Note the ordering: *last*,
    +   *first*.) If the *file* parameter is supplied, then the output of the  ``LIST``
    +   command is stored in a file.  If *file* is a string,  then the method will open
    +   a file object with that name, write to it  then close it.  If *file* is a file
    +   object, then it will start calling :meth:`write` on it to store the lines of the
    +   command output. If *file* is supplied, then the returned *list* is an empty
    +   list.
    +
    +
    +.. method:: NNTP.descriptions(grouppattern)
    +
    +   Send a ``LIST NEWSGROUPS`` command, where *grouppattern* is a wildmat string as
    +   specified in RFC2980 (it's essentially the same as DOS or UNIX shell wildcard
    +   strings).  Return a pair ``(response, list)``, where *list* is a list of tuples
    +   containing ``(name, title)``.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. method:: NNTP.description(group)
    +
    +   Get a description for a single group *group*.  If more than one group matches
    +   (if 'group' is a real wildmat string), return the first match.   If no group
    +   matches, return an empty string.
    +
    +   This elides the response code from the server.  If the response code is needed,
    +   use :meth:`descriptions`.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. method:: NNTP.group(name)
    +
    +   Send a ``GROUP`` command, where *name* is the group name. Return a tuple
    +   ``(response, count, first, last, name)`` where *count* is the (estimated) number
    +   of articles in the group, *first* is the first article number in the group,
    +   *last* is the last article number in the group, and *name* is the group name.
    +   The numbers are returned as strings.
    +
    +
    +.. method:: NNTP.help([file])
    +
    +   Send a ``HELP`` command.  Return a pair ``(response, list)`` where *list* is a
    +   list of help strings. If the *file* parameter is supplied, then the output of
    +   the  ``HELP`` command is stored in a file.  If *file* is a string,  then the
    +   method will open a file object with that name, write to it  then close it.  If
    +   *file* is a file object, then it will start calling :meth:`write` on it to store
    +   the lines of the command output. If *file* is supplied, then the returned *list*
    +   is an empty list.
    +
    +
    +.. method:: NNTP.stat(id)
    +
    +   Send a ``STAT`` command, where *id* is the message id (enclosed in ``'<'`` and
    +   ``'>'``) or an article number (as a string). Return a triple ``(response,
    +   number, id)`` where *number* is the article number (as a string) and *id* is the
    +   message id  (enclosed in ``'<'`` and ``'>'``).
    +
    +
    +.. method:: NNTP.next()
    +
    +   Send a ``NEXT`` command.  Return as for :meth:`stat`.
    +
    +
    +.. method:: NNTP.last()
    +
    +   Send a ``LAST`` command.  Return as for :meth:`stat`.
    +
    +
    +.. method:: NNTP.head(id)
    +
    +   Send a ``HEAD`` command, where *id* has the same meaning as for :meth:`stat`.
    +   Return a tuple ``(response, number, id, list)`` where the first three are the
    +   same as for :meth:`stat`, and *list* is a list of the article's headers (an
    +   uninterpreted list of lines, without trailing newlines).
    +
    +
    +.. method:: NNTP.body(id,[file])
    +
    +   Send a ``BODY`` command, where *id* has the same meaning as for :meth:`stat`.
    +   If the *file* parameter is supplied, then the body is stored in a file.  If
    +   *file* is a string, then the method will open a file object with that name,
    +   write to it then close it. If *file* is a file object, then it will start
    +   calling :meth:`write` on it to store the lines of the body. Return as for
    +   :meth:`head`.  If *file* is supplied, then the returned *list* is an empty list.
    +
    +
    +.. method:: NNTP.article(id)
    +
    +   Send an ``ARTICLE`` command, where *id* has the same meaning as for
    +   :meth:`stat`.  Return as for :meth:`head`.
    +
    +
    +.. method:: NNTP.slave()
    +
    +   Send a ``SLAVE`` command.  Return the server's *response*.
    +
    +
    +.. method:: NNTP.xhdr(header, string, [file])
    +
    +   Send an ``XHDR`` command.  This command is not defined in the RFC but is a
    +   common extension.  The *header* argument is a header keyword, e.g.
    +   ``'subject'``.  The *string* argument should have the form ``'first-last'``
    +   where *first* and *last* are the first and last article numbers to search.
    +   Return a pair ``(response, list)``, where *list* is a list of pairs ``(id,
    +   text)``, where *id* is an article number (as a string) and *text* is the text of
    +   the requested header for that article. If the *file* parameter is supplied, then
    +   the output of the  ``XHDR`` command is stored in a file.  If *file* is a string,
    +   then the method will open a file object with that name, write to it  then close
    +   it.  If *file* is a file object, then it will start calling :meth:`write` on it
    +   to store the lines of the command output. If *file* is supplied, then the
    +   returned *list* is an empty list.
    +
    +
    +.. method:: NNTP.post(file)
    +
    +   Post an article using the ``POST`` command.  The *file* argument is an open file
    +   object which is read until EOF using its :meth:`readline` method.  It should be
    +   a well-formed news article, including the required headers.  The :meth:`post`
    +   method automatically escapes lines beginning with ``.``.
    +
    +
    +.. method:: NNTP.ihave(id, file)
    +
    +   Send an ``IHAVE`` command. *id* is a message id (enclosed in  ``'<'`` and
    +   ``'>'``). If the response is not an error, treat *file* exactly as for the
    +   :meth:`post` method.
    +
    +
    +.. method:: NNTP.date()
    +
    +   Return a triple ``(response, date, time)``, containing the current date and time
    +   in a form suitable for the :meth:`newnews` and :meth:`newgroups` methods. This
    +   is an optional NNTP extension, and may not be supported by all servers.
    +
    +
    +.. method:: NNTP.xgtitle(name, [file])
    +
    +   Process an ``XGTITLE`` command, returning a pair ``(response, list)``, where
    +   *list* is a list of tuples containing ``(name, title)``. If the *file* parameter
    +   is supplied, then the output of the  ``XGTITLE`` command is stored in a file.
    +   If *file* is a string,  then the method will open a file object with that name,
    +   write to it  then close it.  If *file* is a file object, then it will start
    +   calling :meth:`write` on it to store the lines of the command output. If *file*
    +   is supplied, then the returned *list* is an empty list. This is an optional NNTP
    +   extension, and may not be supported by all servers.
    +
    +   .. % XXX huh?  Should that be name, description?
    +
    +   RFC2980 says "It is suggested that this extension be deprecated".  Use
    +   :meth:`descriptions` or :meth:`description` instead.
    +
    +
    +.. method:: NNTP.xover(start, end, [file])
    +
    +   Return a pair ``(resp, list)``.  *list* is a list of tuples, one for each
    +   article in the range delimited by the *start* and *end* article numbers.  Each
    +   tuple is of the form ``(article number, subject, poster, date, id, references,
    +   size, lines)``. If the *file* parameter is supplied, then the output of the
    +   ``XOVER`` command is stored in a file.  If *file* is a string,  then the method
    +   will open a file object with that name, write to it  then close it.  If *file*
    +   is a file object, then it will start calling :meth:`write` on it to store the
    +   lines of the command output. If *file* is supplied, then the returned *list* is
    +   an empty list. This is an optional NNTP extension, and may not be supported by
    +   all servers.
    +
    +
    +.. method:: NNTP.xpath(id)
    +
    +   Return a pair ``(resp, path)``, where *path* is the directory path to the
    +   article with message ID *id*.  This is an optional NNTP extension, and may not
    +   be supported by all servers.
    +
    +
    +.. method:: NNTP.quit()
    +
    +   Send a ``QUIT`` command and close the connection.  Once this method has been
    +   called, no other methods of the NNTP object should be called.
    +
    diff --git a/Doc/library/numeric.rst b/Doc/library/numeric.rst
    new file mode 100644
    index 0000000..0d9d59f
    --- /dev/null
    +++ b/Doc/library/numeric.rst
    @@ -0,0 +1,25 @@
    +
    +.. _numeric:
    +
    +********************************
    +Numeric and Mathematical Modules
    +********************************
    +
    +The modules described in this chapter provide numeric and math-related functions
    +and data types. The :mod:`math` and :mod:`cmath` contain  various mathematical
    +functions for floating-point and complex numbers. For users more interested in
    +decimal accuracy than in speed, the  :mod:`decimal` module supports exact
    +representations of  decimal numbers.
    +
    +The following modules are documented in this chapter:
    +
    +
    +.. toctree::
    +
    +   math.rst
    +   cmath.rst
    +   decimal.rst
    +   random.rst
    +   itertools.rst
    +   functools.rst
    +   operator.rst
    diff --git a/Doc/library/objects.rst b/Doc/library/objects.rst
    new file mode 100644
    index 0000000..c6cc9e4
    --- /dev/null
    +++ b/Doc/library/objects.rst
    @@ -0,0 +1,32 @@
    +
    +.. _builtin:
    +
    +****************
    +Built-in Objects
    +****************
    +
    +.. index::
    +   pair: built-in; types
    +   pair: built-in; exceptions
    +   pair: built-in; functions
    +   pair: built-in; constants
    +   single: symbol table
    +
    +Names for built-in exceptions and functions and a number of constants are found
    +in a separate  symbol table.  This table is searched last when the interpreter
    +looks up the meaning of a name, so local and global user-defined names can
    +override built-in names.  Built-in types are described together here for easy
    +reference. [#]_
    +
    +The tables in this chapter document the priorities of operators by listing them
    +in order of ascending priority (within a table) and grouping operators that have
    +the same priority in the same box. Binary operators of the same priority group
    +from left to right. (Unary operators group from right to left, but there you
    +have no real choice.)  See :ref:`operator-summary` for the complete picture on
    +operator priorities.
    +
    +.. rubric:: Footnotes
    +
    +.. [#] Most descriptions sorely lack explanations of the exceptions that may be raised
    +   --- this will be fixed in a future version of this manual.
    +
    diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst
    new file mode 100644
    index 0000000..4e85569
    --- /dev/null
    +++ b/Doc/library/operator.rst
    @@ -0,0 +1,612 @@
    +:mod:`operator` --- Standard operators as functions
    +===================================================
    +
    +.. module:: operator
    +   :synopsis: Functions corresponding to the standard operators.
    +.. sectionauthor:: Skip Montanaro 
    +
    +
    +
    +The :mod:`operator` module exports a set of functions implemented in C
    +corresponding to the intrinsic operators of Python.  For example,
    +``operator.add(x, y)`` is equivalent to the expression ``x+y``.  The function
    +names are those used for special class methods; variants without leading and
    +trailing ``__`` are also provided for convenience.
    +
    +The functions fall into categories that perform object comparisons, logical
    +operations, mathematical operations, sequence operations, and abstract type
    +tests.
    +
    +The object comparison functions are useful for all objects, and are named after
    +the rich comparison operators they support:
    +
    +
    +.. function:: lt(a, b)
    +              le(a, b)
    +              eq(a, b)
    +              ne(a, b)
    +              ge(a, b)
    +              gt(a, b)
    +              __lt__(a, b)
    +              __le__(a, b)
    +              __eq__(a, b)
    +              __ne__(a, b)
    +              __ge__(a, b)
    +              __gt__(a, b)
    +
    +   Perform "rich comparisons" between *a* and *b*. Specifically, ``lt(a, b)`` is
    +   equivalent to ``a < b``, ``le(a, b)`` is equivalent to ``a <= b``, ``eq(a,
    +   b)`` is equivalent to ``a == b``, ``ne(a, b)`` is equivalent to ``a != b``,
    +   ``gt(a, b)`` is equivalent to ``a > b`` and ``ge(a, b)`` is equivalent to ``a
    +   >= b``.  Note that unlike the built-in :func:`cmp`, these functions can
    +   return any value, which may or may not be interpretable as a Boolean value.
    +   See :ref:`comparisons` for more information about rich comparisons.
    +
    +   .. versionadded:: 2.2
    +
    +The logical operations are also generally applicable to all objects, and support
    +truth tests, identity tests, and boolean operations:
    +
    +
    +.. function:: not_(o)
    +              __not__(o)
    +
    +   Return the outcome of :keyword:`not` *o*.  (Note that there is no
    +   :meth:`__not__` method for object instances; only the interpreter core defines
    +   this operation.  The result is affected by the :meth:`__bool__` and
    +   :meth:`__len__` methods.)
    +
    +
    +.. function:: truth(o)
    +
    +   Return :const:`True` if *o* is true, and :const:`False` otherwise.  This is
    +   equivalent to using the :class:`bool` constructor.
    +
    +
    +.. function:: is_(a, b)
    +
    +   Return ``a is b``.  Tests object identity.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: is_not(a, b)
    +
    +   Return ``a is not b``.  Tests object identity.
    +
    +   .. versionadded:: 2.3
    +
    +The mathematical and bitwise operations are the most numerous:
    +
    +
    +.. function:: abs(o)
    +              __abs__(o)
    +
    +   Return the absolute value of *o*.
    +
    +
    +.. function:: add(a, b)
    +              __add__(a, b)
    +
    +   Return ``a + b``, for *a* and *b* numbers.
    +
    +
    +.. function:: and_(a, b)
    +              __and__(a, b)
    +
    +   Return the bitwise and of *a* and *b*.
    +
    +
    +.. function:: div(a, b)
    +              __div__(a, b)
    +
    +   Return ``a / b`` when ``__future__.division`` is not in effect.  This is
    +   also known as "classic" division.
    +
    +
    +.. function:: floordiv(a, b)
    +              __floordiv__(a, b)
    +
    +   Return ``a // b``.
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. function:: inv(o)
    +              invert(o)
    +              __inv__(o)
    +              __invert__(o)
    +
    +   Return the bitwise inverse of the number *o*.  This is equivalent to ``~o``.
    +
    +   .. versionadded:: 2.0
    +      The names :func:`invert` and :func:`__invert__`.
    +
    +
    +.. function:: lshift(a, b)
    +              __lshift__(a, b)
    +
    +   Return *a* shifted left by *b*.
    +
    +
    +.. function:: mod(a, b)
    +              __mod__(a, b)
    +
    +   Return ``a % b``.
    +
    +
    +.. function:: mul(a, b)
    +              __mul__(a, b)
    +
    +   Return ``a * b``, for *a* and *b* numbers.
    +
    +
    +.. function:: neg(o)
    +              __neg__(o)
    +
    +   Return *o* negated.
    +
    +
    +.. function:: or_(a, b)
    +              __or__(a, b)
    +
    +   Return the bitwise or of *a* and *b*.
    +
    +
    +.. function:: pos(o)
    +              __pos__(o)
    +
    +   Return *o* positive.
    +
    +
    +.. function:: pow(a, b)
    +              __pow__(a, b)
    +
    +   Return ``a ** b``, for *a* and *b* numbers.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: rshift(a, b)
    +              __rshift__(a, b)
    +
    +   Return *a* shifted right by *b*.
    +
    +
    +.. function:: sub(a, b)
    +              __sub__(a, b)
    +
    +   Return ``a - b``.
    +
    +
    +.. function:: truediv(a, b)
    +              __truediv__(a, b)
    +
    +   Return ``a / b`` when ``__future__.division`` is in effect.  This is also
    +   known as "true" division.
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. function:: xor(a, b)
    +              __xor__(a, b)
    +
    +   Return the bitwise exclusive or of *a* and *b*.
    +
    +
    +.. function:: index(a)
    +              __index__(a)
    +
    +   Return *a* converted to an integer.  Equivalent to ``a.__index__()``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +Operations which work with sequences include:
    +
    +.. function:: concat(a, b)
    +              __concat__(a, b)
    +
    +   Return ``a + b`` for *a* and *b* sequences.
    +
    +
    +.. function:: contains(a, b)
    +              __contains__(a, b)
    +
    +   Return the outcome of the test ``b in a``. Note the reversed operands.
    +
    +   .. versionadded:: 2.0
    +      The name :func:`__contains__`.
    +
    +
    +.. function:: countOf(a, b)
    +
    +   Return the number of occurrences of *b* in *a*.
    +
    +
    +.. function:: delitem(a, b)
    +              __delitem__(a, b)
    +
    +   Remove the value of *a* at index *b*.
    +
    +
    +.. function:: delslice(a, b, c)
    +              __delslice__(a, b, c)
    +
    +   Delete the slice of *a* from index *b* to index *c-1*.
    +
    +
    +.. function:: getitem(a, b)
    +              __getitem__(a, b)
    +
    +   Return the value of *a* at index *b*.
    +
    +
    +.. function:: getslice(a, b, c)
    +              __getslice__(a, b, c)
    +
    +   Return the slice of *a* from index *b* to index *c-1*.
    +
    +
    +.. function:: indexOf(a, b)
    +
    +   Return the index of the first of occurrence of *b* in *a*.
    +
    +
    +.. function:: repeat(a, b)
    +              __repeat__(a, b)
    +
    +   Return ``a * b`` where *a* is a sequence and *b* is an integer.
    +
    +
    +.. function:: sequenceIncludes(...)
    +
    +   .. deprecated:: 2.0
    +      Use :func:`contains` instead.
    +
    +   Alias for :func:`contains`.
    +
    +
    +.. function:: setitem(a, b, c)
    +              __setitem__(a, b, c)
    +
    +   Set the value of *a* at index *b* to *c*.
    +
    +
    +.. function:: setslice(a, b, c, v)
    +              __setslice__(a, b, c, v)
    +
    +   Set the slice of *a* from index *b* to index *c-1* to the sequence *v*.
    +
    +Many operations have an "in-place" version.  The following functions provide a
    +more primitive access to in-place operators than the usual syntax does; for
    +example, the statement ``x += y`` is equivalent to ``x = operator.iadd(x, y)``.
    +Another way to put it is to say that ``z = operator.iadd(x, y)`` is equivalent
    +to the compound statement ``z = x; z += y``.
    +
    +
    +.. function:: iadd(a, b)
    +              __iadd__(a, b)
    +
    +   ``a = iadd(a, b)`` is equivalent to ``a += b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: iand(a, b)
    +              __iand__(a, b)
    +
    +   ``a = iand(a, b)`` is equivalent to ``a &= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: iconcat(a, b)
    +              __iconcat__(a, b)
    +
    +   ``a = iconcat(a, b)`` is equivalent to ``a += b`` for *a* and *b* sequences.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: idiv(a, b)
    +              __idiv__(a, b)
    +
    +   ``a = idiv(a, b)`` is equivalent to ``a /= b`` when ``__future__.division`` is
    +   not in effect.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ifloordiv(a, b)
    +              __ifloordiv__(a, b)
    +
    +   ``a = ifloordiv(a, b)`` is equivalent to ``a //= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ilshift(a, b)
    +              __ilshift__(a, b)
    +
    +   ``a = ilshift(a, b)`` is equivalent to ``a <``\ ``<= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: imod(a, b)
    +              __imod__(a, b)
    +
    +   ``a = imod(a, b)`` is equivalent to ``a %= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: imul(a, b)
    +              __imul__(a, b)
    +
    +   ``a = imul(a, b)`` is equivalent to ``a *= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ior(a, b)
    +              __ior__(a, b)
    +
    +   ``a = ior(a, b)`` is equivalent to ``a |= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ipow(a, b)
    +              __ipow__(a, b)
    +
    +   ``a = ipow(a, b)`` is equivalent to ``a **= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: irepeat(a, b)
    +              __irepeat__(a, b)
    +
    +   ``a = irepeat(a, b)`` is equivalent to ``a *= b`` where *a* is a sequence and
    +   *b* is an integer.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: irshift(a, b)
    +              __irshift__(a, b)
    +
    +   ``a = irshift(a, b)`` is equivalent to ``a >>= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: isub(a, b)
    +              __isub__(a, b)
    +
    +   ``a = isub(a, b)`` is equivalent to ``a -= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: itruediv(a, b)
    +              __itruediv__(a, b)
    +
    +   ``a = itruediv(a, b)`` is equivalent to ``a /= b`` when ``__future__.division``
    +   is in effect.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: ixor(a, b)
    +              __ixor__(a, b)
    +
    +   ``a = ixor(a, b)`` is equivalent to ``a ^= b``.
    +
    +   .. versionadded:: 2.5
    +
    +
    +The :mod:`operator` module also defines a few predicates to test the type of
    +objects.
    +
    +.. note::
    +
    +   Be careful not to misinterpret the results of these functions; only
    +   :func:`isCallable` has any measure of reliability with instance objects.
    +   For example::
    +
    +      >>> class C:
    +      ...     pass
    +      ... 
    +      >>> import operator
    +      >>> o = C()
    +      >>> operator.isMappingType(o)
    +      True
    +
    +
    +.. function:: isCallable(o)
    +
    +   .. deprecated:: 2.0
    +      Use the :func:`callable` built-in function instead.
    +
    +   Returns true if the object *o* can be called like a function, otherwise it
    +   returns false.  True is returned for functions, bound and unbound methods, class
    +   objects, and instance objects which support the :meth:`__call__` method.
    +
    +
    +.. function:: isMappingType(o)
    +
    +   Returns true if the object *o* supports the mapping interface. This is true for
    +   dictionaries and all instance objects defining :meth:`__getitem__`.
    +
    +   .. warning::
    +
    +      There is no reliable way to test if an instance supports the complete mapping
    +      protocol since the interface itself is ill-defined.  This makes this test less
    +      useful than it otherwise might be.
    +
    +
    +.. function:: isNumberType(o)
    +
    +   Returns true if the object *o* represents a number.  This is true for all
    +   numeric types implemented in C.
    +
    +   .. warning::
    +
    +      There is no reliable way to test if an instance supports the complete numeric
    +      interface since the interface itself is ill-defined.  This makes this test less
    +      useful than it otherwise might be.
    +
    +
    +.. function:: isSequenceType(o)
    +
    +   Returns true if the object *o* supports the sequence protocol. This returns true
    +   for all objects which define sequence methods in C, and for all instance objects
    +   defining :meth:`__getitem__`.
    +
    +   .. warning::
    +
    +      There is no reliable way to test if an instance supports the complete sequence
    +      interface since the interface itself is ill-defined.  This makes this test less
    +      useful than it otherwise might be.
    +
    +Example: Build a dictionary that maps the ordinals from ``0`` to ``255`` to
    +their character equivalents. ::
    +
    +   >>> import operator
    +   >>> d = {}
    +   >>> keys = range(256)
    +   >>> vals = map(chr, keys)
    +   >>> map(operator.setitem, [d]*len(keys), keys, vals)
    +
    +.. XXX: find a better, readable, example
    +
    +The :mod:`operator` module also defines tools for generalized attribute and item
    +lookups.  These are useful for making fast field extractors as arguments for
    +:func:`map`, :func:`sorted`, :meth:`itertools.groupby`, or other functions that
    +expect a function argument.
    +
    +
    +.. function:: attrgetter(attr[, args...])
    +
    +   Return a callable object that fetches *attr* from its operand. If more than one
    +   attribute is requested, returns a tuple of attributes. After,
    +   ``f=attrgetter('name')``, the call ``f(b)`` returns ``b.name``.  After,
    +   ``f=attrgetter('name', 'date')``, the call ``f(b)`` returns ``(b.name,
    +   b.date)``.
    +
    +   .. versionadded:: 2.4
    +
    +   .. versionchanged:: 2.5
    +      Added support for multiple attributes.
    +
    +
    +.. function:: itemgetter(item[, args...])
    +
    +   Return a callable object that fetches *item* from its operand. If more than one
    +   item is requested, returns a tuple of items. After, ``f=itemgetter(2)``, the
    +   call ``f(b)`` returns ``b[2]``. After, ``f=itemgetter(2,5,3)``, the call
    +   ``f(b)`` returns ``(b[2], b[5], b[3])``.
    +
    +   .. versionadded:: 2.4
    +
    +   .. versionchanged:: 2.5
    +      Added support for multiple item extraction.
    +
    +Examples::
    +
    +   >>> from operator import itemgetter
    +   >>> inventory = [('apple', 3), ('banana', 2), ('pear', 5), ('orange', 1)]
    +   >>> getcount = itemgetter(1)
    +   >>> map(getcount, inventory)
    +   [3, 2, 5, 1]
    +   >>> sorted(inventory, key=getcount)
    +   [('orange', 1), ('banana', 2), ('apple', 3), ('pear', 5)]
    +
    +
    +.. _operator-map:
    +
    +Mapping Operators to Functions
    +------------------------------
    +
    +This table shows how abstract operations correspond to operator symbols in the
    +Python syntax and the functions in the :mod:`operator` module.
    +
    ++-----------------------+-------------------------+---------------------------------+
    +| Operation             | Syntax                  | Function                        |
    ++=======================+=========================+=================================+
    +| Addition              | ``a + b``               | ``add(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Concatenation         | ``seq1 + seq2``         | ``concat(seq1, seq2)``          |
    ++-----------------------+-------------------------+---------------------------------+
    +| Containment Test      | ``o in seq``            | ``contains(seq, o)``            |
    ++-----------------------+-------------------------+---------------------------------+
    +| Division              | ``a / b``               | ``div(a, b)`` (without          |
    +|                       |                         | ``__future__.division``)        |
    ++-----------------------+-------------------------+---------------------------------+
    +| Division              | ``a / b``               | ``truediv(a, b)`` (with         |
    +|                       |                         | ``__future__.division``)        |
    ++-----------------------+-------------------------+---------------------------------+
    +| Division              | ``a // b``              | ``floordiv(a, b)``              |
    ++-----------------------+-------------------------+---------------------------------+
    +| Bitwise And           | ``a & b``               | ``and_(a, b)``                  |
    ++-----------------------+-------------------------+---------------------------------+
    +| Bitwise Exclusive Or  | ``a ^ b``               | ``xor(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Bitwise Inversion     | ``~ a``                 | ``invert(a)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Bitwise Or            | ``a | b``               | ``or_(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Exponentiation        | ``a ** b``              | ``pow(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Identity              | ``a is b``              | ``is_(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Identity              | ``a is not b``          | ``is_not(a, b)``                |
    ++-----------------------+-------------------------+---------------------------------+
    +| Indexed Assignment    | ``o[k] = v``            | ``setitem(o, k, v)``            |
    ++-----------------------+-------------------------+---------------------------------+
    +| Indexed Deletion      | ``del o[k]``            | ``delitem(o, k)``               |
    ++-----------------------+-------------------------+---------------------------------+
    +| Indexing              | ``o[k]``                | ``getitem(o, k)``               |
    ++-----------------------+-------------------------+---------------------------------+
    +| Left Shift            | ``a << b``              | ``lshift(a, b)``                |
    ++-----------------------+-------------------------+---------------------------------+
    +| Modulo                | ``a % b``               | ``mod(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Multiplication        | ``a * b``               | ``mul(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Negation (Arithmetic) | ``- a``                 | ``neg(a)``                      |
    ++-----------------------+-------------------------+---------------------------------+
    +| Negation (Logical)    | ``not a``               | ``not_(a)``                     |
    ++-----------------------+-------------------------+---------------------------------+
    +| Right Shift           | ``a >> b``              | ``rshift(a, b)``                |
    ++-----------------------+-------------------------+---------------------------------+
    +| Sequence Repitition   | ``seq * i``             | ``repeat(seq, i)``              |
    ++-----------------------+-------------------------+---------------------------------+
    +| Slice Assignment      | ``seq[i:j] = values``   | ``setslice(seq, i, j, values)`` |
    ++-----------------------+-------------------------+---------------------------------+
    +| Slice Deletion        | ``del seq[i:j]``        | ``delslice(seq, i, j)``         |
    ++-----------------------+-------------------------+---------------------------------+
    +| Slicing               | ``seq[i:j]``            | ``getslice(seq, i, j)``         |
    ++-----------------------+-------------------------+---------------------------------+
    +| String Formatting     | ``s % o``               | ``mod(s, o)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Subtraction           | ``a - b``               | ``sub(a, b)``                   |
    ++-----------------------+-------------------------+---------------------------------+
    +| Truth Test            | ``o``                   | ``truth(o)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Ordering              | ``a < b``               | ``lt(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Ordering              | ``a <= b``              | ``le(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Equality              | ``a == b``              | ``eq(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Difference            | ``a != b``              | ``ne(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Ordering              | ``a >= b``              | ``ge(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +| Ordering              | ``a > b``               | ``gt(a, b)``                    |
    ++-----------------------+-------------------------+---------------------------------+
    +
    diff --git a/Doc/library/optparse.rst b/Doc/library/optparse.rst
    new file mode 100644
    index 0000000..cfcd8a6
    --- /dev/null
    +++ b/Doc/library/optparse.rst
    @@ -0,0 +1,1827 @@
    +.. % THIS FILE IS AUTO-GENERATED!  DO NOT EDIT!
    +.. % (Your changes will be lost the next time it is generated.)
    +
    +
    +:mod:`optparse` --- More powerful command line option parser
    +============================================================
    +
    +.. module:: optparse
    +   :synopsis: More convenient, flexible, and powerful command-line parsing library.
    +.. moduleauthor:: Greg Ward 
    +
    +
    +.. versionadded:: 2.3
    +
    +.. sectionauthor:: Greg Ward 
    +
    +
    +``optparse`` is a more convenient, flexible, and powerful library for parsing
    +command-line options than ``getopt``.  ``optparse`` uses a more declarative
    +style of command-line parsing: you create an instance of :class:`OptionParser`,
    +populate it with options, and parse the command line. ``optparse`` allows users
    +to specify options in the conventional GNU/POSIX syntax, and additionally
    +generates usage and help messages for you.
    +
    +.. % An intro blurb used only when generating LaTeX docs for the Python
    +.. % manual (based on README.txt).
    +
    +Here's an example of using ``optparse`` in a simple script::
    +
    +   from optparse import OptionParser
    +   [...]
    +   parser = OptionParser()
    +   parser.add_option("-f", "--file", dest="filename",
    +                     help="write report to FILE", metavar="FILE")
    +   parser.add_option("-q", "--quiet",
    +                     action="store_false", dest="verbose", default=True,
    +                     help="don't print status messages to stdout")
    +
    +   (options, args) = parser.parse_args()
    +
    +With these few lines of code, users of your script can now do the "usual thing"
    +on the command-line, for example::
    +
    +    --file=outfile -q
    +
    +As it parses the command line, ``optparse`` sets attributes of the ``options``
    +object returned by :meth:`parse_args` based on user-supplied command-line
    +values.  When :meth:`parse_args` returns from parsing this command line,
    +``options.filename`` will be ``"outfile"`` and ``options.verbose`` will be
    +``False``.  ``optparse`` supports both long and short options, allows short
    +options to be merged together, and allows options to be associated with their
    +arguments in a variety of ways.  Thus, the following command lines are all
    +equivalent to the above example::
    +
    +    -f outfile --quiet
    +    --quiet --file outfile
    +    -q -foutfile
    +    -qfoutfile
    +
    +Additionally, users can run one of  ::
    +
    +    -h
    +    --help
    +
    +and ``optparse`` will print out a brief summary of your script's options::
    +
    +   usage:  [options]
    +
    +   options:
    +     -h, --help            show this help message and exit
    +     -f FILE, --file=FILE  write report to FILE
    +     -q, --quiet           don't print status messages to stdout
    +
    +where the value of *yourscript* is determined at runtime (normally from
    +``sys.argv[0]``).
    +
    +.. % $Id: intro.txt 413 2004-09-28 00:59:13Z greg $
    +
    +
    +.. _optparse-background:
    +
    +Background
    +----------
    +
    +:mod:`optparse` was explicitly designed to encourage the creation of programs
    +with straightforward, conventional command-line interfaces.  To that end, it
    +supports only the most common command-line syntax and semantics conventionally
    +used under Unix.  If you are unfamiliar with these conventions, read this
    +section to acquaint yourself with them.
    +
    +
    +.. _optparse-terminology:
    +
    +Terminology
    +^^^^^^^^^^^
    +
    +argument
    +   a string entered on the command-line, and passed by the shell to ``execl()`` or
    +   ``execv()``.  In Python, arguments are elements of ``sys.argv[1:]``
    +   (``sys.argv[0]`` is the name of the program being executed).  Unix shells also
    +   use the term "word".
    +
    +   It is occasionally desirable to substitute an argument list other than
    +   ``sys.argv[1:]``, so you should read "argument" as "an element of
    +   ``sys.argv[1:]``, or of some other list provided as a substitute for
    +   ``sys.argv[1:]``".
    +
    +option   
    +   an argument used to supply extra information to guide or customize the execution
    +   of a program.  There are many different syntaxes for options; the traditional
    +   Unix syntax is a hyphen ("-") followed by a single letter, e.g. ``"-x"`` or
    +   ``"-F"``.  Also, traditional Unix syntax allows multiple options to be merged
    +   into a single argument, e.g.  ``"-x -F"`` is equivalent to ``"-xF"``.  The GNU
    +   project introduced ``"--"`` followed by a series of hyphen-separated words, e.g.
    +   ``"--file"`` or ``"--dry-run"``.  These are the only two option syntaxes
    +   provided by :mod:`optparse`.
    +
    +   Some other option syntaxes that the world has seen include:
    +
    +   * a hyphen followed by a few letters, e.g. ``"-pf"`` (this is *not* the same
    +     as multiple options merged into a single argument)
    +
    +   * a hyphen followed by a whole word, e.g. ``"-file"`` (this is technically
    +     equivalent to the previous syntax, but they aren't usually seen in the same
    +     program)
    +
    +   * a plus sign followed by a single letter, or a few letters, or a word, e.g.
    +     ``"+f"``, ``"+rgb"``
    +
    +   * a slash followed by a letter, or a few letters, or a word, e.g. ``"/f"``,
    +     ``"/file"``
    +
    +   These option syntaxes are not supported by :mod:`optparse`, and they never will
    +   be.  This is deliberate: the first three are non-standard on any environment,
    +   and the last only makes sense if you're exclusively targeting VMS, MS-DOS,
    +   and/or Windows.
    +
    +option argument
    +   an argument that follows an option, is closely associated with that option, and
    +   is consumed from the argument list when that option is. With :mod:`optparse`,
    +   option arguments may either be in a separate argument from their option::
    +
    +      -f foo
    +      --file foo
    +
    +   or included in the same argument::
    +
    +      -ffoo
    +      --file=foo
    +
    +   Typically, a given option either takes an argument or it doesn't. Lots of people
    +   want an "optional option arguments" feature, meaning that some options will take
    +   an argument if they see it, and won't if they don't.  This is somewhat
    +   controversial, because it makes parsing ambiguous: if ``"-a"`` takes an optional
    +   argument and ``"-b"`` is another option entirely, how do we interpret ``"-ab"``?
    +   Because of this ambiguity, :mod:`optparse` does not support this feature.
    +
    +positional argument
    +   something leftover in the argument list after options have been parsed, i.e.
    +   after options and their arguments have been parsed and removed from the argument
    +   list.
    +
    +required option
    +   an option that must be supplied on the command-line; note that the phrase
    +   "required option" is self-contradictory in English.  :mod:`optparse` doesn't
    +   prevent you from implementing required options, but doesn't give you much help
    +   at it either.  See ``examples/required_1.py`` and ``examples/required_2.py`` in
    +   the :mod:`optparse` source distribution for two ways to implement required
    +   options with :mod:`optparse`.
    +
    +For example, consider this hypothetical command-line::
    +
    +   prog -v --report /tmp/report.txt foo bar
    +
    +``"-v"`` and ``"--report"`` are both options.  Assuming that :option:`--report`
    +takes one argument, ``"/tmp/report.txt"`` is an option argument.  ``"foo"`` and
    +``"bar"`` are positional arguments.
    +
    +
    +.. _optparse-what-options-for:
    +
    +What are options for?
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +Options are used to provide extra information to tune or customize the execution
    +of a program.  In case it wasn't clear, options are usually *optional*.  A
    +program should be able to run just fine with no options whatsoever.  (Pick a
    +random program from the Unix or GNU toolsets.  Can it run without any options at
    +all and still make sense?  The main exceptions are ``find``, ``tar``, and
    +``dd``\ ---all of which are mutant oddballs that have been rightly criticized
    +for their non-standard syntax and confusing interfaces.)
    +
    +Lots of people want their programs to have "required options".  Think about it.
    +If it's required, then it's *not optional*!  If there is a piece of information
    +that your program absolutely requires in order to run successfully, that's what
    +positional arguments are for.
    +
    +As an example of good command-line interface design, consider the humble ``cp``
    +utility, for copying files.  It doesn't make much sense to try to copy files
    +without supplying a destination and at least one source. Hence, ``cp`` fails if
    +you run it with no arguments.  However, it has a flexible, useful syntax that
    +does not require any options at all::
    +
    +   cp SOURCE DEST
    +   cp SOURCE ... DEST-DIR
    +
    +You can get pretty far with just that.  Most ``cp`` implementations provide a
    +bunch of options to tweak exactly how the files are copied: you can preserve
    +mode and modification time, avoid following symlinks, ask before clobbering
    +existing files, etc.  But none of this distracts from the core mission of
    +``cp``, which is to copy either one file to another, or several files to another
    +directory.
    +
    +
    +.. _optparse-what-positional-arguments-for:
    +
    +What are positional arguments for?
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Positional arguments are for those pieces of information that your program
    +absolutely, positively requires to run.
    +
    +A good user interface should have as few absolute requirements as possible.  If
    +your program requires 17 distinct pieces of information in order to run
    +successfully, it doesn't much matter *how* you get that information from the
    +user---most people will give up and walk away before they successfully run the
    +program.  This applies whether the user interface is a command-line, a
    +configuration file, or a GUI: if you make that many demands on your users, most
    +of them will simply give up.
    +
    +In short, try to minimize the amount of information that users are absolutely
    +required to supply---use sensible defaults whenever possible.  Of course, you
    +also want to make your programs reasonably flexible.  That's what options are
    +for.  Again, it doesn't matter if they are entries in a config file, widgets in
    +the "Preferences" dialog of a GUI, or command-line options---the more options
    +you implement, the more flexible your program is, and the more complicated its
    +implementation becomes.  Too much flexibility has drawbacks as well, of course;
    +too many options can overwhelm users and make your code much harder to maintain.
    +
    +.. % $Id: tao.txt 413 2004-09-28 00:59:13Z greg $
    +
    +
    +.. _optparse-tutorial:
    +
    +Tutorial
    +--------
    +
    +While :mod:`optparse` is quite flexible and powerful, it's also straightforward
    +to use in most cases.  This section covers the code patterns that are common to
    +any :mod:`optparse`\ -based program.
    +
    +First, you need to import the OptionParser class; then, early in the main
    +program, create an OptionParser instance::
    +
    +   from optparse import OptionParser
    +   [...]
    +   parser = OptionParser()
    +
    +Then you can start defining options.  The basic syntax is::
    +
    +   parser.add_option(opt_str, ...,
    +                     attr=value, ...)
    +
    +Each option has one or more option strings, such as ``"-f"`` or ``"--file"``,
    +and several option attributes that tell :mod:`optparse` what to expect and what
    +to do when it encounters that option on the command line.
    +
    +Typically, each option will have one short option string and one long option
    +string, e.g.::
    +
    +   parser.add_option("-f", "--file", ...)
    +
    +You're free to define as many short option strings and as many long option
    +strings as you like (including zero), as long as there is at least one option
    +string overall.
    +
    +The option strings passed to :meth:`add_option` are effectively labels for the
    +option defined by that call.  For brevity, we will frequently refer to
    +*encountering an option* on the command line; in reality, :mod:`optparse`
    +encounters *option strings* and looks up options from them.
    +
    +Once all of your options are defined, instruct :mod:`optparse` to parse your
    +program's command line::
    +
    +   (options, args) = parser.parse_args()
    +
    +(If you like, you can pass a custom argument list to :meth:`parse_args`, but
    +that's rarely necessary: by default it uses ``sys.argv[1:]``.)
    +
    +:meth:`parse_args` returns two values:
    +
    +* ``options``, an object containing values for all of your options---e.g. if
    +  ``"--file"`` takes a single string argument, then ``options.file`` will be the
    +  filename supplied by the user, or ``None`` if the user did not supply that
    +  option
    +
    +* ``args``, the list of positional arguments leftover after parsing options
    +
    +This tutorial section only covers the four most important option attributes:
    +:attr:`action`, :attr:`type`, :attr:`dest` (destination), and :attr:`help`. Of
    +these, :attr:`action` is the most fundamental.
    +
    +
    +.. _optparse-understanding-option-actions:
    +
    +Understanding option actions
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Actions tell :mod:`optparse` what to do when it encounters an option on the
    +command line.  There is a fixed set of actions hard-coded into :mod:`optparse`;
    +adding new actions is an advanced topic covered in section
    +:ref:`optparse-extending-optparse`. Most actions tell
    +:mod:`optparse` to store a value in some variable---for example, take a string
    +from the command line and store it in an attribute of ``options``.
    +
    +If you don't specify an option action, :mod:`optparse` defaults to ``store``.
    +
    +
    +.. _optparse-store-action:
    +
    +The store action
    +^^^^^^^^^^^^^^^^
    +
    +The most common option action is ``store``, which tells :mod:`optparse` to take
    +the next argument (or the remainder of the current argument), ensure that it is
    +of the correct type, and store it to your chosen destination.
    +
    +For example::
    +
    +   parser.add_option("-f", "--file",
    +                     action="store", type="string", dest="filename")
    +
    +Now let's make up a fake command line and ask :mod:`optparse` to parse it::
    +
    +   args = ["-f", "foo.txt"]
    +   (options, args) = parser.parse_args(args)
    +
    +When :mod:`optparse` sees the option string ``"-f"``, it consumes the next
    +argument, ``"foo.txt"``, and stores it in ``options.filename``.  So, after this
    +call to :meth:`parse_args`, ``options.filename`` is ``"foo.txt"``.
    +
    +Some other option types supported by :mod:`optparse` are ``int`` and ``float``.
    +Here's an option that expects an integer argument::
    +
    +   parser.add_option("-n", type="int", dest="num")
    +
    +Note that this option has no long option string, which is perfectly acceptable.
    +Also, there's no explicit action, since the default is ``store``.
    +
    +Let's parse another fake command-line.  This time, we'll jam the option argument
    +right up against the option: since ``"-n42"`` (one argument) is equivalent to
    +``"-n 42"`` (two arguments), the code  ::
    +
    +   (options, args) = parser.parse_args(["-n42"])
    +   print options.num
    +
    +will print ``"42"``.
    +
    +If you don't specify a type, :mod:`optparse` assumes ``string``.  Combined with
    +the fact that the default action is ``store``, that means our first example can
    +be a lot shorter::
    +
    +   parser.add_option("-f", "--file", dest="filename")
    +
    +If you don't supply a destination, :mod:`optparse` figures out a sensible
    +default from the option strings: if the first long option string is
    +``"--foo-bar"``, then the default destination is ``foo_bar``.  If there are no
    +long option strings, :mod:`optparse` looks at the first short option string: the
    +default destination for ``"-f"`` is ``f``.
    +
    +:mod:`optparse` also includes built-in ``long`` and ``complex`` types.  Adding
    +types is covered in section :ref:`optparse-extending-optparse`.
    +
    +
    +.. _optparse-handling-boolean-options:
    +
    +Handling boolean (flag) options
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Flag options---set a variable to true or false when a particular option is seen
    +---are quite common.  :mod:`optparse` supports them with two separate actions,
    +``store_true`` and ``store_false``.  For example, you might have a ``verbose``
    +flag that is turned on with ``"-v"`` and off with ``"-q"``::
    +
    +   parser.add_option("-v", action="store_true", dest="verbose")
    +   parser.add_option("-q", action="store_false", dest="verbose")
    +
    +Here we have two different options with the same destination, which is perfectly
    +OK.  (It just means you have to be a bit careful when setting default values---
    +see below.)
    +
    +When :mod:`optparse` encounters ``"-v"`` on the command line, it sets
    +``options.verbose`` to ``True``; when it encounters ``"-q"``,
    +``options.verbose`` is set to ``False``.
    +
    +
    +.. _optparse-other-actions:
    +
    +Other actions
    +^^^^^^^^^^^^^
    +
    +Some other actions supported by :mod:`optparse` are:
    +
    +``store_const``
    +   store a constant value
    +
    +``append``
    +   append this option's argument to a list
    +
    +``count``
    +   increment a counter by one
    +
    +``callback``
    +   call a specified function
    +
    +These are covered in section :ref:`optparse-reference-guide`, Reference Guide
    +and section :ref:`optparse-option-callbacks`.
    +
    +
    +.. _optparse-default-values:
    +
    +Default values
    +^^^^^^^^^^^^^^
    +
    +All of the above examples involve setting some variable (the "destination") when
    +certain command-line options are seen.  What happens if those options are never
    +seen?  Since we didn't supply any defaults, they are all set to ``None``.  This
    +is usually fine, but sometimes you want more control.  :mod:`optparse` lets you
    +supply a default value for each destination, which is assigned before the
    +command line is parsed.
    +
    +First, consider the verbose/quiet example.  If we want :mod:`optparse` to set
    +``verbose`` to ``True`` unless ``"-q"`` is seen, then we can do this::
    +
    +   parser.add_option("-v", action="store_true", dest="verbose", default=True)
    +   parser.add_option("-q", action="store_false", dest="verbose")
    +
    +Since default values apply to the *destination* rather than to any particular
    +option, and these two options happen to have the same destination, this is
    +exactly equivalent::
    +
    +   parser.add_option("-v", action="store_true", dest="verbose")
    +   parser.add_option("-q", action="store_false", dest="verbose", default=True)
    +
    +Consider this::
    +
    +   parser.add_option("-v", action="store_true", dest="verbose", default=False)
    +   parser.add_option("-q", action="store_false", dest="verbose", default=True)
    +
    +Again, the default value for ``verbose`` will be ``True``: the last default
    +value supplied for any particular destination is the one that counts.
    +
    +A clearer way to specify default values is the :meth:`set_defaults` method of
    +OptionParser, which you can call at any time before calling :meth:`parse_args`::
    +
    +   parser.set_defaults(verbose=True)
    +   parser.add_option(...)
    +   (options, args) = parser.parse_args()
    +
    +As before, the last value specified for a given option destination is the one
    +that counts.  For clarity, try to use one method or the other of setting default
    +values, not both.
    +
    +
    +.. _optparse-generating-help:
    +
    +Generating help
    +^^^^^^^^^^^^^^^
    +
    +:mod:`optparse`'s ability to generate help and usage text automatically is
    +useful for creating user-friendly command-line interfaces.  All you have to do
    +is supply a :attr:`help` value for each option, and optionally a short usage
    +message for your whole program.  Here's an OptionParser populated with
    +user-friendly (documented) options::
    +
    +   usage = "usage: %prog [options] arg1 arg2"
    +   parser = OptionParser(usage=usage)
    +   parser.add_option("-v", "--verbose",
    +                     action="store_true", dest="verbose", default=True,
    +                     help="make lots of noise [default]")
    +   parser.add_option("-q", "--quiet",
    +                     action="store_false", dest="verbose", 
    +                     help="be vewwy quiet (I'm hunting wabbits)")
    +   parser.add_option("-f", "--filename",
    +                     metavar="FILE", help="write output to FILE"),
    +   parser.add_option("-m", "--mode",
    +                     default="intermediate",
    +                     help="interaction mode: novice, intermediate, "
    +                          "or expert [default: %default]")
    +
    +If :mod:`optparse` encounters either ``"-h"`` or ``"--help"`` on the
    +command-line, or if you just call :meth:`parser.print_help`, it prints the
    +following to standard output::
    +
    +   usage:  [options] arg1 arg2
    +
    +   options:
    +     -h, --help            show this help message and exit
    +     -v, --verbose         make lots of noise [default]
    +     -q, --quiet           be vewwy quiet (I'm hunting wabbits)
    +     -f FILE, --filename=FILE
    +                           write output to FILE
    +     -m MODE, --mode=MODE  interaction mode: novice, intermediate, or
    +                           expert [default: intermediate]
    +
    +(If the help output is triggered by a help option, :mod:`optparse` exits after
    +printing the help text.)
    +
    +There's a lot going on here to help :mod:`optparse` generate the best possible
    +help message:
    +
    +* the script defines its own usage message::
    +
    +     usage = "usage: %prog [options] arg1 arg2"
    +
    +  :mod:`optparse` expands ``"%prog"`` in the usage string to the name of the
    +  current program, i.e. ``os.path.basename(sys.argv[0])``.  The expanded string is
    +  then printed before the detailed option help.
    +
    +  If you don't supply a usage string, :mod:`optparse` uses a bland but sensible
    +  default: ``"usage: %prog [options]"``, which is fine if your script doesn't take
    +  any positional arguments.
    +
    +* every option defines a help string, and doesn't worry about line-wrapping---
    +  :mod:`optparse` takes care of wrapping lines and making the help output look
    +  good.
    +
    +* options that take a value indicate this fact in their automatically-generated
    +  help message, e.g. for the "mode" option::
    +
    +     -m MODE, --mode=MODE
    +
    +  Here, "MODE" is called the meta-variable: it stands for the argument that the
    +  user is expected to supply to :option:`-m`/:option:`--mode`.  By default,
    +  :mod:`optparse` converts the destination variable name to uppercase and uses
    +  that for the meta-variable.  Sometimes, that's not what you want---for example,
    +  the :option:`--filename` option explicitly sets ``metavar="FILE"``, resulting in
    +  this automatically-generated option description::
    +
    +     -f FILE, --filename=FILE
    +
    +  This is important for more than just saving space, though: the manually written
    +  help text uses the meta-variable "FILE" to clue the user in that there's a
    +  connection between the semi-formal syntax "-f FILE" and the informal semantic
    +  description "write output to FILE". This is a simple but effective way to make
    +  your help text a lot clearer and more useful for end users.
    +
    +* options that have a default value can include ``%default`` in the help
    +  string---\ :mod:`optparse` will replace it with :func:`str` of the option's
    +  default value.  If an option has no default value (or the default value is
    +  ``None``), ``%default`` expands to ``none``.
    +
    +
    +.. _optparse-printing-version-string:
    +
    +Printing a version string
    +^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Similar to the brief usage string, :mod:`optparse` can also print a version
    +string for your program.  You have to supply the string as the ``version``
    +argument to OptionParser::
    +
    +   parser = OptionParser(usage="%prog [-f] [-q]", version="%prog 1.0")
    +
    +``"%prog"`` is expanded just like it is in ``usage``.  Apart from that,
    +``version`` can contain anything you like.  When you supply it, :mod:`optparse`
    +automatically adds a ``"--version"`` option to your parser. If it encounters
    +this option on the command line, it expands your ``version`` string (by
    +replacing ``"%prog"``), prints it to stdout, and exits.
    +
    +For example, if your script is called ``/usr/bin/foo``::
    +
    +   $ /usr/bin/foo --version
    +   foo 1.0
    +
    +
    +.. _optparse-how-optparse-handles-errors:
    +
    +How :mod:`optparse` handles errors
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +There are two broad classes of errors that :mod:`optparse` has to worry about:
    +programmer errors and user errors.  Programmer errors are usually erroneous
    +calls to ``parser.add_option()``, e.g. invalid option strings, unknown option
    +attributes, missing option attributes, etc.  These are dealt with in the usual
    +way: raise an exception (either ``optparse.OptionError`` or ``TypeError``) and
    +let the program crash.
    +
    +Handling user errors is much more important, since they are guaranteed to happen
    +no matter how stable your code is.  :mod:`optparse` can automatically detect
    +some user errors, such as bad option arguments (passing ``"-n 4x"`` where
    +:option:`-n` takes an integer argument), missing arguments (``"-n"`` at the end
    +of the command line, where :option:`-n` takes an argument of any type).  Also,
    +you can call ``parser.error()`` to signal an application-defined error
    +condition::
    +
    +   (options, args) = parser.parse_args()
    +   [...]
    +   if options.a and options.b:
    +       parser.error("options -a and -b are mutually exclusive")
    +
    +In either case, :mod:`optparse` handles the error the same way: it prints the
    +program's usage message and an error message to standard error and exits with
    +error status 2.
    +
    +Consider the first example above, where the user passes ``"4x"`` to an option
    +that takes an integer::
    +
    +   $ /usr/bin/foo -n 4x
    +   usage: foo [options]
    +
    +   foo: error: option -n: invalid integer value: '4x'
    +
    +Or, where the user fails to pass a value at all::
    +
    +   $ /usr/bin/foo -n
    +   usage: foo [options]
    +
    +   foo: error: -n option requires an argument
    +
    +:mod:`optparse`\ -generated error messages take care always to mention the
    +option involved in the error; be sure to do the same when calling
    +``parser.error()`` from your application code.
    +
    +If :mod:`optparse`'s default error-handling behaviour does not suite your needs,
    +you'll need to subclass OptionParser and override ``exit()`` and/or
    +:meth:`error`.
    +
    +
    +.. _optparse-putting-it-all-together:
    +
    +Putting it all together
    +^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Here's what :mod:`optparse`\ -based scripts usually look like::
    +
    +   from optparse import OptionParser
    +   [...]
    +   def main():
    +       usage = "usage: %prog [options] arg"
    +       parser = OptionParser(usage)
    +       parser.add_option("-f", "--file", dest="filename",
    +                         help="read data from FILENAME")
    +       parser.add_option("-v", "--verbose",
    +                         action="store_true", dest="verbose")
    +       parser.add_option("-q", "--quiet",
    +                         action="store_false", dest="verbose")
    +       [...]
    +       (options, args) = parser.parse_args()
    +       if len(args) != 1:
    +           parser.error("incorrect number of arguments")
    +       if options.verbose:
    +           print "reading %s..." % options.filename
    +       [...]
    +
    +   if __name__ == "__main__":
    +       main()
    +
    +.. % $Id: tutorial.txt 515 2006-06-10 15:37:45Z gward $
    +
    +
    +.. _optparse-reference-guide:
    +
    +Reference Guide
    +---------------
    +
    +
    +.. _optparse-creating-parser:
    +
    +Creating the parser
    +^^^^^^^^^^^^^^^^^^^
    +
    +The first step in using :mod:`optparse` is to create an OptionParser instance::
    +
    +   parser = OptionParser(...)
    +
    +The OptionParser constructor has no required arguments, but a number of optional
    +keyword arguments.  You should always pass them as keyword arguments, i.e. do
    +not rely on the order in which the arguments are declared.
    +
    +   ``usage`` (default: ``"%prog [options]"``)
    +      The usage summary to print when your program is run incorrectly or with a help
    +      option.  When :mod:`optparse` prints the usage string, it expands ``%prog`` to
    +      ``os.path.basename(sys.argv[0])`` (or to ``prog`` if you passed that keyword
    +      argument).  To suppress a usage message, pass the special value
    +      ``optparse.SUPPRESS_USAGE``.
    +
    +   ``option_list`` (default: ``[]``)
    +      A list of Option objects to populate the parser with.  The options in
    +      ``option_list`` are added after any options in ``standard_option_list`` (a class
    +      attribute that may be set by OptionParser subclasses), but before any version or
    +      help options. Deprecated; use :meth:`add_option` after creating the parser
    +      instead.
    +
    +   ``option_class`` (default: optparse.Option)
    +      Class to use when adding options to the parser in :meth:`add_option`.
    +
    +   ``version`` (default: ``None``)
    +      A version string to print when the user supplies a version option. If you supply
    +      a true value for ``version``, :mod:`optparse` automatically adds a version
    +      option with the single option string ``"--version"``.  The substring ``"%prog"``
    +      is expanded the same as for ``usage``.
    +
    +   ``conflict_handler`` (default: ``"error"``)
    +      Specifies what to do when options with conflicting option strings are added to
    +      the parser; see section :ref:`optparse-conflicts-between-options`.
    +
    +   ``description`` (default: ``None``)
    +      A paragraph of text giving a brief overview of your program.  :mod:`optparse`
    +      reformats this paragraph to fit the current terminal width and prints it when
    +      the user requests help (after ``usage``, but before the list of options).
    +
    +   ``formatter`` (default: a new IndentedHelpFormatter)
    +      An instance of optparse.HelpFormatter that will be used for printing help text.
    +      :mod:`optparse` provides two concrete classes for this purpose:
    +      IndentedHelpFormatter and TitledHelpFormatter.
    +
    +   ``add_help_option`` (default: ``True``)
    +      If true, :mod:`optparse` will add a help option (with option strings ``"-h"``
    +      and ``"--help"``) to the parser.
    +
    +   ``prog``
    +      The string to use when expanding ``"%prog"`` in ``usage`` and ``version``
    +      instead of ``os.path.basename(sys.argv[0])``.
    +
    +
    +
    +.. _optparse-populating-parser:
    +
    +Populating the parser
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +There are several ways to populate the parser with options.  The preferred way
    +is by using ``OptionParser.add_option()``, as shown in section
    +:ref:`optparse-tutorial`.  :meth:`add_option` can be called in one of two ways:
    +
    +* pass it an Option instance (as returned by :func:`make_option`)
    +
    +* pass it any combination of positional and keyword arguments that are
    +  acceptable to :func:`make_option` (i.e., to the Option constructor), and it will
    +  create the Option instance for you
    +
    +The other alternative is to pass a list of pre-constructed Option instances to
    +the OptionParser constructor, as in::
    +
    +   option_list = [
    +       make_option("-f", "--filename",
    +                   action="store", type="string", dest="filename"),
    +       make_option("-q", "--quiet",
    +                   action="store_false", dest="verbose"),
    +       ]
    +   parser = OptionParser(option_list=option_list)
    +
    +(:func:`make_option` is a factory function for creating Option instances;
    +currently it is an alias for the Option constructor.  A future version of
    +:mod:`optparse` may split Option into several classes, and :func:`make_option`
    +will pick the right class to instantiate.  Do not instantiate Option directly.)
    +
    +
    +.. _optparse-defining-options:
    +
    +Defining options
    +^^^^^^^^^^^^^^^^
    +
    +Each Option instance represents a set of synonymous command-line option strings,
    +e.g. :option:`-f` and :option:`--file`.  You can specify any number of short or
    +long option strings, but you must specify at least one overall option string.
    +
    +The canonical way to create an Option instance is with the :meth:`add_option`
    +method of :class:`OptionParser`::
    +
    +   parser.add_option(opt_str[, ...], attr=value, ...)
    +
    +To define an option with only a short option string::
    +
    +   parser.add_option("-f", attr=value, ...)
    +
    +And to define an option with only a long option string::
    +
    +   parser.add_option("--foo", attr=value, ...)
    +
    +The keyword arguments define attributes of the new Option object.  The most
    +important option attribute is :attr:`action`, and it largely determines which
    +other attributes are relevant or required.  If you pass irrelevant option
    +attributes, or fail to pass required ones, :mod:`optparse` raises an OptionError
    +exception explaining your mistake.
    +
    +An options's *action* determines what :mod:`optparse` does when it encounters
    +this option on the command-line.  The standard option actions hard-coded into
    +:mod:`optparse` are:
    +
    +``store``
    +   store this option's argument (default)
    +
    +``store_const``
    +   store a constant value
    +
    +``store_true``
    +   store a true value
    +
    +``store_false``
    +   store a false value
    +
    +``append``
    +   append this option's argument to a list
    +
    +``append_const``
    +   append a constant value to a list
    +
    +``count``
    +   increment a counter by one
    +
    +``callback``
    +   call a specified function
    +
    +:attr:`help`
    +   print a usage message including all options and the documentation for them
    +
    +(If you don't supply an action, the default is ``store``.  For this action, you
    +may also supply :attr:`type` and :attr:`dest` option attributes; see below.)
    +
    +As you can see, most actions involve storing or updating a value somewhere.
    +:mod:`optparse` always creates a special object for this, conventionally called
    +``options`` (it happens to be an instance of ``optparse.Values``).  Option
    +arguments (and various other values) are stored as attributes of this object,
    +according to the :attr:`dest` (destination) option attribute.
    +
    +For example, when you call  ::
    +
    +   parser.parse_args()
    +
    +one of the first things :mod:`optparse` does is create the ``options`` object::
    +
    +   options = Values()
    +
    +If one of the options in this parser is defined with  ::
    +
    +   parser.add_option("-f", "--file", action="store", type="string", dest="filename")
    +
    +and the command-line being parsed includes any of the following::
    +
    +   -ffoo
    +   -f foo
    +   --file=foo
    +   --file foo
    +
    +then :mod:`optparse`, on seeing this option, will do the equivalent of  ::
    +
    +   options.filename = "foo"
    +
    +The :attr:`type` and :attr:`dest` option attributes are almost as important as
    +:attr:`action`, but :attr:`action` is the only one that makes sense for *all*
    +options.
    +
    +
    +.. _optparse-standard-option-actions:
    +
    +Standard option actions
    +^^^^^^^^^^^^^^^^^^^^^^^
    +
    +The various option actions all have slightly different requirements and effects.
    +Most actions have several relevant option attributes which you may specify to
    +guide :mod:`optparse`'s behaviour; a few have required attributes, which you
    +must specify for any option using that action.
    +
    +* ``store`` [relevant: :attr:`type`, :attr:`dest`, ``nargs``, ``choices``]
    +
    +  The option must be followed by an argument, which is converted to a value
    +  according to :attr:`type` and stored in :attr:`dest`.  If ``nargs`` > 1,
    +  multiple arguments will be consumed from the command line; all will be converted
    +  according to :attr:`type` and stored to :attr:`dest` as a tuple.  See the
    +  "Option types" section below.
    +
    +  If ``choices`` is supplied (a list or tuple of strings), the type defaults to
    +  ``choice``.
    +
    +  If :attr:`type` is not supplied, it defaults to ``string``.
    +
    +  If :attr:`dest` is not supplied, :mod:`optparse` derives a destination from the
    +  first long option string (e.g., ``"--foo-bar"`` implies ``foo_bar``). If there
    +  are no long option strings, :mod:`optparse` derives a destination from the first
    +  short option string (e.g., ``"-f"`` implies ``f``).
    +
    +  Example::
    +
    +     parser.add_option("-f")
    +     parser.add_option("-p", type="float", nargs=3, dest="point")
    +
    +  As it parses the command line  ::
    +
    +     -f foo.txt -p 1 -3.5 4 -fbar.txt
    +
    +  :mod:`optparse` will set  ::
    +
    +     options.f = "foo.txt"
    +     options.point = (1.0, -3.5, 4.0)
    +     options.f = "bar.txt"
    +
    +* ``store_const`` [required: ``const``; relevant: :attr:`dest`]
    +
    +  The value ``const`` is stored in :attr:`dest`.
    +
    +  Example::
    +
    +     parser.add_option("-q", "--quiet",
    +                       action="store_const", const=0, dest="verbose")
    +     parser.add_option("-v", "--verbose",
    +                       action="store_const", const=1, dest="verbose")
    +     parser.add_option("--noisy",
    +                       action="store_const", const=2, dest="verbose")
    +
    +  If ``"--noisy"`` is seen, :mod:`optparse` will set  ::
    +
    +     options.verbose = 2
    +
    +* ``store_true`` [relevant: :attr:`dest`]
    +
    +  A special case of ``store_const`` that stores a true value to :attr:`dest`.
    +
    +* ``store_false`` [relevant: :attr:`dest`]
    +
    +  Like ``store_true``, but stores a false value.
    +
    +  Example::
    +
    +     parser.add_option("--clobber", action="store_true", dest="clobber")
    +     parser.add_option("--no-clobber", action="store_false", dest="clobber")
    +
    +* ``append`` [relevant: :attr:`type`, :attr:`dest`, ``nargs``, ``choices``]
    +
    +  The option must be followed by an argument, which is appended to the list in
    +  :attr:`dest`.  If no default value for :attr:`dest` is supplied, an empty list
    +  is automatically created when :mod:`optparse` first encounters this option on
    +  the command-line.  If ``nargs`` > 1, multiple arguments are consumed, and a
    +  tuple of length ``nargs`` is appended to :attr:`dest`.
    +
    +  The defaults for :attr:`type` and :attr:`dest` are the same as for the ``store``
    +  action.
    +
    +  Example::
    +
    +     parser.add_option("-t", "--tracks", action="append", type="int")
    +
    +  If ``"-t3"`` is seen on the command-line, :mod:`optparse` does the equivalent
    +  of::
    +
    +     options.tracks = []
    +     options.tracks.append(int("3"))
    +
    +  If, a little later on, ``"--tracks=4"`` is seen, it does::
    +
    +     options.tracks.append(int("4"))
    +
    +* ``append_const`` [required: ``const``; relevant: :attr:`dest`]
    +
    +  Like ``store_const``, but the value ``const`` is appended to :attr:`dest`; as
    +  with ``append``, :attr:`dest` defaults to ``None``, and an an empty list is
    +  automatically created the first time the option is encountered.
    +
    +* ``count`` [relevant: :attr:`dest`]
    +
    +  Increment the integer stored at :attr:`dest`.  If no default value is supplied,
    +  :attr:`dest` is set to zero before being incremented the first time.
    +
    +  Example::
    +
    +     parser.add_option("-v", action="count", dest="verbosity")
    +
    +  The first time ``"-v"`` is seen on the command line, :mod:`optparse` does the
    +  equivalent of::
    +
    +     options.verbosity = 0
    +     options.verbosity += 1
    +
    +  Every subsequent occurrence of ``"-v"`` results in  ::
    +
    +     options.verbosity += 1
    +
    +* ``callback`` [required: ``callback``; relevant: :attr:`type`, ``nargs``,
    +  ``callback_args``, ``callback_kwargs``]
    +
    +  Call the function specified by ``callback``, which is called as  ::
    +
    +     func(option, opt_str, value, parser, *args, **kwargs)
    +
    +  See section :ref:`optparse-option-callbacks` for more detail.
    +
    +* :attr:`help`
    +
    +  Prints a complete help message for all the options in the current option parser.
    +  The help message is constructed from the ``usage`` string passed to
    +  OptionParser's constructor and the :attr:`help` string passed to every option.
    +
    +  If no :attr:`help` string is supplied for an option, it will still be listed in
    +  the help message.  To omit an option entirely, use the special value
    +  ``optparse.SUPPRESS_HELP``.
    +
    +  :mod:`optparse` automatically adds a :attr:`help` option to all OptionParsers,
    +  so you do not normally need to create one.
    +
    +  Example::
    +
    +     from optparse import OptionParser, SUPPRESS_HELP
    +
    +     parser = OptionParser()
    +     parser.add_option("-h", "--help", action="help"),
    +     parser.add_option("-v", action="store_true", dest="verbose",
    +                       help="Be moderately verbose")
    +     parser.add_option("--file", dest="filename",
    +                       help="Input file to read data from"),
    +     parser.add_option("--secret", help=SUPPRESS_HELP)
    +
    +  If :mod:`optparse` sees either ``"-h"`` or ``"--help"`` on the command line, it
    +  will print something like the following help message to stdout (assuming
    +  ``sys.argv[0]`` is ``"foo.py"``)::
    +
    +     usage: foo.py [options]
    +
    +     options:
    +       -h, --help        Show this help message and exit
    +       -v                Be moderately verbose
    +       --file=FILENAME   Input file to read data from
    +
    +  After printing the help message, :mod:`optparse` terminates your process with
    +  ``sys.exit(0)``.
    +
    +* ``version``
    +
    +  Prints the version number supplied to the OptionParser to stdout and exits.  The
    +  version number is actually formatted and printed by the ``print_version()``
    +  method of OptionParser.  Generally only relevant if the ``version`` argument is
    +  supplied to the OptionParser constructor.  As with :attr:`help` options, you
    +  will rarely create ``version`` options, since :mod:`optparse` automatically adds
    +  them when needed.
    +
    +
    +.. _optparse-option-attributes:
    +
    +Option attributes
    +^^^^^^^^^^^^^^^^^
    +
    +The following option attributes may be passed as keyword arguments to
    +``parser.add_option()``.  If you pass an option attribute that is not relevant
    +to a particular option, or fail to pass a required option attribute,
    +:mod:`optparse` raises OptionError.
    +
    +* :attr:`action` (default: ``"store"``)
    +
    +  Determines :mod:`optparse`'s behaviour when this option is seen on the command
    +  line; the available options are documented above.
    +
    +* :attr:`type` (default: ``"string"``)
    +
    +  The argument type expected by this option (e.g., ``"string"`` or ``"int"``); the
    +  available option types are documented below.
    +
    +* :attr:`dest` (default: derived from option strings)
    +
    +  If the option's action implies writing or modifying a value somewhere, this
    +  tells :mod:`optparse` where to write it: :attr:`dest` names an attribute of the
    +  ``options`` object that :mod:`optparse` builds as it parses the command line.
    +
    +* ``default`` (deprecated)
    +
    +  The value to use for this option's destination if the option is not seen on the
    +  command line.  Deprecated; use ``parser.set_defaults()`` instead.
    +
    +* ``nargs`` (default: 1)
    +
    +  How many arguments of type :attr:`type` should be consumed when this option is
    +  seen.  If > 1, :mod:`optparse` will store a tuple of values to :attr:`dest`.
    +
    +* ``const``
    +
    +  For actions that store a constant value, the constant value to store.
    +
    +* ``choices``
    +
    +  For options of type ``"choice"``, the list of strings the user may choose from.
    +
    +* ``callback``
    +
    +  For options with action ``"callback"``, the callable to call when this option
    +  is seen.  See section :ref:`optparse-option-callbacks` for detail on the
    +  arguments passed to ``callable``.
    +
    +* ``callback_args``, ``callback_kwargs``
    +
    +  Additional positional and keyword arguments to pass to ``callback`` after the
    +  four standard callback arguments.
    +
    +* :attr:`help`
    +
    +  Help text to print for this option when listing all available options after the
    +  user supplies a :attr:`help` option (such as ``"--help"``). If no help text is
    +  supplied, the option will be listed without help text.  To hide this option, use
    +  the special value ``SUPPRESS_HELP``.
    +
    +* ``metavar`` (default: derived from option strings)
    +
    +  Stand-in for the option argument(s) to use when printing help text. See section
    +  :ref:`optparse-tutorial` for an example.
    +
    +
    +.. _optparse-standard-option-types:
    +
    +Standard option types
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +:mod:`optparse` has six built-in option types: ``string``, ``int``, ``long``,
    +``choice``, ``float`` and ``complex``.  If you need to add new option types, see
    +section :ref:`optparse-extending-optparse`.
    +
    +Arguments to string options are not checked or converted in any way: the text on
    +the command line is stored in the destination (or passed to the callback) as-is.
    +
    +Integer arguments (type ``int`` or ``long``) are parsed as follows:
    +
    +* if the number starts with ``0x``, it is parsed as a hexadecimal number
    +
    +* if the number starts with ``0``, it is parsed as an octal number
    +
    +* if the number starts with ``0b``, is is parsed as a binary number
    +
    +* otherwise, the number is parsed as a decimal number
    +
    +
    +The conversion is done by calling either ``int()`` or ``long()`` with the
    +appropriate base (2, 8, 10, or 16).  If this fails, so will :mod:`optparse`,
    +although with a more useful error message.
    +
    +``float`` and ``complex`` option arguments are converted directly with
    +``float()`` and ``complex()``, with similar error-handling.
    +
    +``choice`` options are a subtype of ``string`` options.  The ``choices`` option
    +attribute (a sequence of strings) defines the set of allowed option arguments.
    +``optparse.check_choice()`` compares user-supplied option arguments against this
    +master list and raises OptionValueError if an invalid string is given.
    +
    +
    +.. _optparse-parsing-arguments:
    +
    +Parsing arguments
    +^^^^^^^^^^^^^^^^^
    +
    +The whole point of creating and populating an OptionParser is to call its
    +:meth:`parse_args` method::
    +
    +   (options, args) = parser.parse_args(args=None, values=None)
    +
    +where the input parameters are
    +
    +``args``
    +   the list of arguments to process (default: ``sys.argv[1:]``)
    +
    +``values``
    +   object to store option arguments in (default: a new instance of optparse.Values)
    +
    +and the return values are
    +
    +``options``
    +   the same object that was passed in as ``options``, or the optparse.Values
    +   instance created by :mod:`optparse`
    +
    +``args``
    +   the leftover positional arguments after all options have been processed
    +
    +The most common usage is to supply neither keyword argument.  If you supply
    +``options``, it will be modified with repeated ``setattr()`` calls (roughly one
    +for every option argument stored to an option destination) and returned by
    +:meth:`parse_args`.
    +
    +If :meth:`parse_args` encounters any errors in the argument list, it calls the
    +OptionParser's :meth:`error` method with an appropriate end-user error message.
    +This ultimately terminates your process with an exit status of 2 (the
    +traditional Unix exit status for command-line errors).
    +
    +
    +.. _optparse-querying-manipulating-option-parser:
    +
    +Querying and manipulating your option parser
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Sometimes, it's useful to poke around your option parser and see what's there.
    +OptionParser provides a couple of methods to help you out:
    +
    +``has_option(opt_str)``
    +   Return true if the OptionParser has an option with  option string ``opt_str``
    +   (e.g., ``"-q"`` or ``"--verbose"``).
    +
    +``get_option(opt_str)``
    +   Returns the Option instance with the option string ``opt_str``, or ``None`` if
    +   no options have that option string.
    +
    +``remove_option(opt_str)``
    +   If the OptionParser has an option corresponding to ``opt_str``, that option is
    +   removed.  If that option provided any other option strings, all of those option
    +   strings become invalid. If ``opt_str`` does not occur in any option belonging to
    +   this OptionParser, raises ValueError.
    +
    +
    +.. _optparse-conflicts-between-options:
    +
    +Conflicts between options
    +^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +If you're not careful, it's easy to define options with conflicting option
    +strings::
    +
    +   parser.add_option("-n", "--dry-run", ...)
    +   [...]
    +   parser.add_option("-n", "--noisy", ...)
    +
    +(This is particularly true if you've defined your own OptionParser subclass with
    +some standard options.)
    +
    +Every time you add an option, :mod:`optparse` checks for conflicts with existing
    +options.  If it finds any, it invokes the current conflict-handling mechanism.
    +You can set the conflict-handling mechanism either in the constructor::
    +
    +   parser = OptionParser(..., conflict_handler=handler)
    +
    +or with a separate call::
    +
    +   parser.set_conflict_handler(handler)
    +
    +The available conflict handlers are:
    +
    +   ``error`` (default)
    +      assume option conflicts are a programming error and raise  OptionConflictError
    +
    +   ``resolve``
    +      resolve option conflicts intelligently (see below)
    +
    +
    +As an example, let's define an OptionParser that resolves conflicts
    +intelligently and add conflicting options to it::
    +
    +   parser = OptionParser(conflict_handler="resolve")
    +   parser.add_option("-n", "--dry-run", ..., help="do no harm")
    +   parser.add_option("-n", "--noisy", ..., help="be noisy")
    +
    +At this point, :mod:`optparse` detects that a previously-added option is already
    +using the ``"-n"`` option string.  Since ``conflict_handler`` is ``"resolve"``,
    +it resolves the situation by removing ``"-n"`` from the earlier option's list of
    +option strings.  Now ``"--dry-run"`` is the only way for the user to activate
    +that option.  If the user asks for help, the help message will reflect that::
    +
    +   options:
    +     --dry-run     do no harm
    +     [...]
    +     -n, --noisy   be noisy
    +
    +It's possible to whittle away the option strings for a previously-added option
    +until there are none left, and the user has no way of invoking that option from
    +the command-line.  In that case, :mod:`optparse` removes that option completely,
    +so it doesn't show up in help text or anywhere else. Carrying on with our
    +existing OptionParser::
    +
    +   parser.add_option("--dry-run", ..., help="new dry-run option")
    +
    +At this point, the original :option:`-n/--dry-run` option is no longer
    +accessible, so :mod:`optparse` removes it, leaving this help text::
    +
    +   options:
    +     [...]
    +     -n, --noisy   be noisy
    +     --dry-run     new dry-run option
    +
    +
    +.. _optparse-cleanup:
    +
    +Cleanup
    +^^^^^^^
    +
    +OptionParser instances have several cyclic references.  This should not be a
    +problem for Python's garbage collector, but you may wish to break the cyclic
    +references explicitly by calling ``destroy()`` on your OptionParser once you are
    +done with it.  This is particularly useful in long-running applications where
    +large object graphs are reachable from your OptionParser.
    +
    +
    +.. _optparse-other-methods:
    +
    +Other methods
    +^^^^^^^^^^^^^
    +
    +OptionParser supports several other public methods:
    +
    +* ``set_usage(usage)``
    +
    +  Set the usage string according to the rules described above for the ``usage``
    +  constructor keyword argument.  Passing ``None`` sets the default usage string;
    +  use ``SUPPRESS_USAGE`` to suppress a usage message.
    +
    +* ``enable_interspersed_args()``, ``disable_interspersed_args()``
    +
    +  Enable/disable positional arguments interspersed with options, similar to GNU
    +  getopt (enabled by default).  For example, if ``"-a"`` and ``"-b"`` are both
    +  simple options that take no arguments, :mod:`optparse` normally accepts this
    +  syntax::
    +
    +     prog -a arg1 -b arg2
    +
    +  and treats it as equivalent to  ::
    +
    +     prog -a -b arg1 arg2
    +
    +  To disable this feature, call ``disable_interspersed_args()``.  This restores
    +  traditional Unix syntax, where option parsing stops with the first non-option
    +  argument.
    +
    +* ``set_defaults(dest=value, ...)``
    +
    +  Set default values for several option destinations at once.  Using
    +  :meth:`set_defaults` is the preferred way to set default values for options,
    +  since multiple options can share the same destination.  For example, if several
    +  "mode" options all set the same destination, any one of them can set the
    +  default, and the last one wins::
    +
    +     parser.add_option("--advanced", action="store_const",
    +                       dest="mode", const="advanced",
    +                       default="novice")    # overridden below
    +     parser.add_option("--novice", action="store_const",
    +                       dest="mode", const="novice",
    +                       default="advanced")  # overrides above setting
    +
    +  To avoid this confusion, use :meth:`set_defaults`::
    +
    +     parser.set_defaults(mode="advanced")
    +     parser.add_option("--advanced", action="store_const",
    +                       dest="mode", const="advanced")
    +     parser.add_option("--novice", action="store_const",
    +                       dest="mode", const="novice")
    +
    +.. % $Id: reference.txt 519 2006-06-11 14:39:11Z gward $
    +
    +
    +.. _optparse-option-callbacks:
    +
    +Option Callbacks
    +----------------
    +
    +When :mod:`optparse`'s built-in actions and types aren't quite enough for your
    +needs, you have two choices: extend :mod:`optparse` or define a callback option.
    +Extending :mod:`optparse` is more general, but overkill for a lot of simple
    +cases.  Quite often a simple callback is all you need.
    +
    +There are two steps to defining a callback option:
    +
    +* define the option itself using the ``callback`` action
    +
    +* write the callback; this is a function (or method) that takes at least four
    +  arguments, as described below
    +
    +
    +.. _optparse-defining-callback-option:
    +
    +Defining a callback option
    +^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +As always, the easiest way to define a callback option is by using the
    +``parser.add_option()`` method.  Apart from :attr:`action`, the only option
    +attribute you must specify is ``callback``, the function to call::
    +
    +   parser.add_option("-c", action="callback", callback=my_callback)
    +
    +``callback`` is a function (or other callable object), so you must have already
    +defined ``my_callback()`` when you create this callback option. In this simple
    +case, :mod:`optparse` doesn't even know if :option:`-c` takes any arguments,
    +which usually means that the option takes no arguments---the mere presence of
    +:option:`-c` on the command-line is all it needs to know.  In some
    +circumstances, though, you might want your callback to consume an arbitrary
    +number of command-line arguments.  This is where writing callbacks gets tricky;
    +it's covered later in this section.
    +
    +:mod:`optparse` always passes four particular arguments to your callback, and it
    +will only pass additional arguments if you specify them via ``callback_args``
    +and ``callback_kwargs``.  Thus, the minimal callback function signature is::
    +
    +   def my_callback(option, opt, value, parser):
    +
    +The four arguments to a callback are described below.
    +
    +There are several other option attributes that you can supply when you define a
    +callback option:
    +
    +:attr:`type`
    +   has its usual meaning: as with the ``store`` or ``append`` actions, it instructs
    +   :mod:`optparse` to consume one argument and convert it to :attr:`type`.  Rather
    +   than storing the converted value(s) anywhere, though, :mod:`optparse` passes it
    +   to your callback function.
    +
    +``nargs``
    +   also has its usual meaning: if it is supplied and > 1, :mod:`optparse` will
    +   consume ``nargs`` arguments, each of which must be convertible to :attr:`type`.
    +   It then passes a tuple of converted values to your callback.
    +
    +``callback_args``
    +   a tuple of extra positional arguments to pass to the callback
    +
    +``callback_kwargs``
    +   a dictionary of extra keyword arguments to pass to the callback
    +
    +
    +.. _optparse-how-callbacks-called:
    +
    +How callbacks are called
    +^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +All callbacks are called as follows::
    +
    +   func(option, opt_str, value, parser, *args, **kwargs)
    +
    +where
    +
    +``option``
    +   is the Option instance that's calling the callback
    +
    +``opt_str``
    +   is the option string seen on the command-line that's triggering the callback.
    +   (If an abbreviated long option was used, ``opt_str`` will be the full, canonical
    +   option string---e.g. if the user puts ``"--foo"`` on the command-line as an
    +   abbreviation for ``"--foobar"``, then ``opt_str`` will be ``"--foobar"``.)
    +
    +``value``
    +   is the argument to this option seen on the command-line.  :mod:`optparse` will
    +   only expect an argument if :attr:`type` is set; the type of ``value`` will be
    +   the type implied by the option's type.  If :attr:`type` for this option is
    +   ``None`` (no argument expected), then ``value`` will be ``None``.  If ``nargs``
    +   > 1, ``value`` will be a tuple of values of the appropriate type.
    +
    +``parser``
    +   is the OptionParser instance driving the whole thing, mainly useful because you
    +   can access some other interesting data through its instance attributes:
    +
    +   ``parser.largs``
    +      the current list of leftover arguments, ie. arguments that have been consumed
    +      but are neither options nor option arguments. Feel free to modify
    +      ``parser.largs``, e.g. by adding more arguments to it.  (This list will become
    +      ``args``, the second return value of :meth:`parse_args`.)
    +
    +   ``parser.rargs``
    +      the current list of remaining arguments, ie. with ``opt_str`` and ``value`` (if
    +      applicable) removed, and only the arguments following them still there.  Feel
    +      free to modify ``parser.rargs``, e.g. by consuming more arguments.
    +
    +   ``parser.values``
    +      the object where option values are by default stored (an instance of
    +      optparse.OptionValues).  This lets callbacks use the same mechanism as the rest
    +      of :mod:`optparse` for storing option values; you don't need to mess around with
    +      globals or closures.  You can also access or modify the value(s) of any options
    +      already encountered on the command-line.
    +
    +``args``
    +   is a tuple of arbitrary positional arguments supplied via the ``callback_args``
    +   option attribute.
    +
    +``kwargs``
    +   is a dictionary of arbitrary keyword arguments supplied via ``callback_kwargs``.
    +
    +
    +.. _optparse-raising-errors-in-callback:
    +
    +Raising errors in a callback
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +The callback function should raise OptionValueError if there are any problems
    +with the option or its argument(s).  :mod:`optparse` catches this and terminates
    +the program, printing the error message you supply to stderr.  Your message
    +should be clear, concise, accurate, and mention the option at fault.  Otherwise,
    +the user will have a hard time figuring out what he did wrong.
    +
    +
    +.. _optparse-callback-example-1:
    +
    +Callback example 1: trivial callback
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Here's an example of a callback option that takes no arguments, and simply
    +records that the option was seen::
    +
    +   def record_foo_seen(option, opt_str, value, parser):
    +       parser.saw_foo = True
    +
    +   parser.add_option("--foo", action="callback", callback=record_foo_seen)
    +
    +Of course, you could do that with the ``store_true`` action.
    +
    +
    +.. _optparse-callback-example-2:
    +
    +Callback example 2: check option order
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Here's a slightly more interesting example: record the fact that ``"-a"`` is
    +seen, but blow up if it comes after ``"-b"`` in the command-line.  ::
    +
    +   def check_order(option, opt_str, value, parser):
    +       if parser.values.b:
    +           raise OptionValueError("can't use -a after -b")
    +       parser.values.a = 1
    +   [...]
    +   parser.add_option("-a", action="callback", callback=check_order)
    +   parser.add_option("-b", action="store_true", dest="b")
    +
    +
    +.. _optparse-callback-example-3:
    +
    +Callback example 3: check option order (generalized)
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +If you want to re-use this callback for several similar options (set a flag, but
    +blow up if ``"-b"`` has already been seen), it needs a bit of work: the error
    +message and the flag that it sets must be generalized.  ::
    +
    +   def check_order(option, opt_str, value, parser):
    +       if parser.values.b:
    +           raise OptionValueError("can't use %s after -b" % opt_str)
    +       setattr(parser.values, option.dest, 1)
    +   [...]
    +   parser.add_option("-a", action="callback", callback=check_order, dest='a')
    +   parser.add_option("-b", action="store_true", dest="b")
    +   parser.add_option("-c", action="callback", callback=check_order, dest='c')
    +
    +
    +.. _optparse-callback-example-4:
    +
    +Callback example 4: check arbitrary condition
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Of course, you could put any condition in there---you're not limited to checking
    +the values of already-defined options.  For example, if you have options that
    +should not be called when the moon is full, all you have to do is this::
    +
    +   def check_moon(option, opt_str, value, parser):
    +       if is_moon_full():
    +           raise OptionValueError("%s option invalid when moon is full"
    +                                  % opt_str)
    +       setattr(parser.values, option.dest, 1)
    +   [...]
    +   parser.add_option("--foo",
    +                     action="callback", callback=check_moon, dest="foo")
    +
    +(The definition of ``is_moon_full()`` is left as an exercise for the reader.)
    +
    +
    +.. _optparse-callback-example-5:
    +
    +Callback example 5: fixed arguments
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Things get slightly more interesting when you define callback options that take
    +a fixed number of arguments.  Specifying that a callback option takes arguments
    +is similar to defining a ``store`` or ``append`` option: if you define
    +:attr:`type`, then the option takes one argument that must be convertible to
    +that type; if you further define ``nargs``, then the option takes ``nargs``
    +arguments.
    +
    +Here's an example that just emulates the standard ``store`` action::
    +
    +   def store_value(option, opt_str, value, parser):
    +       setattr(parser.values, option.dest, value)
    +   [...]
    +   parser.add_option("--foo",
    +                     action="callback", callback=store_value,
    +                     type="int", nargs=3, dest="foo")
    +
    +Note that :mod:`optparse` takes care of consuming 3 arguments and converting
    +them to integers for you; all you have to do is store them.  (Or whatever;
    +obviously you don't need a callback for this example.)
    +
    +
    +.. _optparse-callback-example-6:
    +
    +Callback example 6: variable arguments
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +Things get hairy when you want an option to take a variable number of arguments.
    +For this case, you must write a callback, as :mod:`optparse` doesn't provide any
    +built-in capabilities for it.  And you have to deal with certain intricacies of
    +conventional Unix command-line parsing that :mod:`optparse` normally handles for
    +you.  In particular, callbacks should implement the conventional rules for bare
    +``"--"`` and ``"-"`` arguments:
    +
    +* either ``"--"`` or ``"-"`` can be option arguments
    +
    +* bare ``"--"`` (if not the argument to some option): halt command-line
    +  processing and discard the ``"--"``
    +
    +* bare ``"-"`` (if not the argument to some option): halt command-line
    +  processing but keep the ``"-"`` (append it to ``parser.largs``)
    +
    +If you want an option that takes a variable number of arguments, there are
    +several subtle, tricky issues to worry about.  The exact implementation you
    +choose will be based on which trade-offs you're willing to make for your
    +application (which is why :mod:`optparse` doesn't support this sort of thing
    +directly).
    +
    +Nevertheless, here's a stab at a callback for an option with variable
    +arguments::
    +
    +   def vararg_callback(option, opt_str, value, parser):
    +       assert value is None
    +       done = 0
    +       value = []
    +       rargs = parser.rargs
    +       while rargs:
    +           arg = rargs[0]
    +
    +           # Stop if we hit an arg like "--foo", "-a", "-fx", "--file=f",
    +           # etc.  Note that this also stops on "-3" or "-3.0", so if
    +           # your option takes numeric values, you will need to handle
    +           # this.
    +           if ((arg[:2] == "--" and len(arg) > 2) or
    +               (arg[:1] == "-" and len(arg) > 1 and arg[1] != "-")):
    +               break
    +           else:
    +               value.append(arg)
    +               del rargs[0]
    +
    +        setattr(parser.values, option.dest, value)
    +
    +   [...]
    +   parser.add_option("-c", "--callback",
    +                     action="callback", callback=varargs)
    +
    +The main weakness with this particular implementation is that negative numbers
    +in the arguments following ``"-c"`` will be interpreted as further options
    +(probably causing an error), rather than as arguments to ``"-c"``.  Fixing this
    +is left as an exercise for the reader.
    +
    +.. % $Id: callbacks.txt 415 2004-09-30 02:26:17Z greg $
    +
    +
    +.. _optparse-extending-optparse:
    +
    +Extending :mod:`optparse`
    +-------------------------
    +
    +Since the two major controlling factors in how :mod:`optparse` interprets
    +command-line options are the action and type of each option, the most likely
    +direction of extension is to add new actions and new types.
    +
    +
    +.. _optparse-adding-new-types:
    +
    +Adding new types
    +^^^^^^^^^^^^^^^^
    +
    +To add new types, you need to define your own subclass of :mod:`optparse`'s
    +Option class.  This class has a couple of attributes that define
    +:mod:`optparse`'s types: :attr:`TYPES` and :attr:`TYPE_CHECKER`.
    +
    +:attr:`TYPES` is a tuple of type names; in your subclass, simply define a new
    +tuple :attr:`TYPES` that builds on the standard one.
    +
    +:attr:`TYPE_CHECKER` is a dictionary mapping type names to type-checking
    +functions.  A type-checking function has the following signature::
    +
    +   def check_mytype(option, opt, value)
    +
    +where ``option`` is an :class:`Option` instance, ``opt`` is an option string
    +(e.g., ``"-f"``), and ``value`` is the string from the command line that must be
    +checked and converted to your desired type.  ``check_mytype()`` should return an
    +object of the hypothetical type ``mytype``.  The value returned by a
    +type-checking function will wind up in the OptionValues instance returned by
    +:meth:`OptionParser.parse_args`, or be passed to a callback as the ``value``
    +parameter.
    +
    +Your type-checking function should raise OptionValueError if it encounters any
    +problems.  OptionValueError takes a single string argument, which is passed
    +as-is to OptionParser's :meth:`error` method, which in turn prepends the program
    +name and the string ``"error:"`` and prints everything to stderr before
    +terminating the process.
    +
    +Here's a silly example that demonstrates adding a ``complex`` option type to
    +parse Python-style complex numbers on the command line.  (This is even sillier
    +than it used to be, because :mod:`optparse` 1.3 added built-in support for
    +complex numbers, but never mind.)
    +
    +First, the necessary imports::
    +
    +   from copy import copy
    +   from optparse import Option, OptionValueError
    +
    +You need to define your type-checker first, since it's referred to later (in the
    +:attr:`TYPE_CHECKER` class attribute of your Option subclass)::
    +
    +   def check_complex(option, opt, value):
    +       try:
    +           return complex(value)
    +       except ValueError:
    +           raise OptionValueError(
    +               "option %s: invalid complex value: %r" % (opt, value))
    +
    +Finally, the Option subclass::
    +
    +   class MyOption (Option):
    +       TYPES = Option.TYPES + ("complex",)
    +       TYPE_CHECKER = copy(Option.TYPE_CHECKER)
    +       TYPE_CHECKER["complex"] = check_complex
    +
    +(If we didn't make a :func:`copy` of :attr:`Option.TYPE_CHECKER`, we would end
    +up modifying the :attr:`TYPE_CHECKER` attribute of :mod:`optparse`'s Option
    +class. This being Python, nothing stops you from doing that except good manners
    +and common sense.)
    +
    +That's it!  Now you can write a script that uses the new option type just like
    +any other :mod:`optparse`\ -based script, except you have to instruct your
    +OptionParser to use MyOption instead of Option::
    +
    +   parser = OptionParser(option_class=MyOption)
    +   parser.add_option("-c", type="complex")
    +
    +Alternately, you can build your own option list and pass it to OptionParser; if
    +you don't use :meth:`add_option` in the above way, you don't need to tell
    +OptionParser which option class to use::
    +
    +   option_list = [MyOption("-c", action="store", type="complex", dest="c")]
    +   parser = OptionParser(option_list=option_list)
    +
    +
    +.. _optparse-adding-new-actions:
    +
    +Adding new actions
    +^^^^^^^^^^^^^^^^^^
    +
    +Adding new actions is a bit trickier, because you have to understand that
    +:mod:`optparse` has a couple of classifications for actions:
    +
    +"store" actions
    +   actions that result in :mod:`optparse` storing a value to an attribute of the
    +   current OptionValues instance; these options require a :attr:`dest` attribute to
    +   be supplied to the Option constructor
    +
    +"typed" actions
    +   actions that take a value from the command line and expect it to be of a certain
    +   type; or rather, a string that can be converted to a certain type.  These
    +   options require a :attr:`type` attribute to the Option constructor.
    +
    +These are overlapping sets: some default "store" actions are ``store``,
    +``store_const``, ``append``, and ``count``, while the default "typed" actions
    +are ``store``, ``append``, and ``callback``.
    +
    +When you add an action, you need to categorize it by listing it in at least one
    +of the following class attributes of Option (all are lists of strings):
    +
    +:attr:`ACTIONS`
    +   all actions must be listed in ACTIONS
    +
    +:attr:`STORE_ACTIONS`
    +   "store" actions are additionally listed here
    +
    +:attr:`TYPED_ACTIONS`
    +   "typed" actions are additionally listed here
    +
    +``ALWAYS_TYPED_ACTIONS``
    +   actions that always take a type (i.e. whose options always take a value) are
    +   additionally listed here.  The only effect of this is that :mod:`optparse`
    +   assigns the default type, ``string``, to options with no explicit type whose
    +   action is listed in ``ALWAYS_TYPED_ACTIONS``.
    +
    +In order to actually implement your new action, you must override Option's
    +:meth:`take_action` method and add a case that recognizes your action.
    +
    +For example, let's add an ``extend`` action.  This is similar to the standard
    +``append`` action, but instead of taking a single value from the command-line
    +and appending it to an existing list, ``extend`` will take multiple values in a
    +single comma-delimited string, and extend an existing list with them.  That is,
    +if ``"--names"`` is an ``extend`` option of type ``string``, the command line
    +::
    +
    +   --names=foo,bar --names blah --names ding,dong
    +
    +would result in a list  ::
    +
    +   ["foo", "bar", "blah", "ding", "dong"]
    +
    +Again we define a subclass of Option::
    +
    +   class MyOption (Option):
    +
    +       ACTIONS = Option.ACTIONS + ("extend",)
    +       STORE_ACTIONS = Option.STORE_ACTIONS + ("extend",)
    +       TYPED_ACTIONS = Option.TYPED_ACTIONS + ("extend",)
    +       ALWAYS_TYPED_ACTIONS = Option.ALWAYS_TYPED_ACTIONS + ("extend",)
    +
    +       def take_action(self, action, dest, opt, value, values, parser):
    +           if action == "extend":
    +               lvalue = value.split(",")
    +               values.ensure_value(dest, []).extend(lvalue)
    +           else:
    +               Option.take_action(
    +                   self, action, dest, opt, value, values, parser)
    +
    +Features of note:
    +
    +* ``extend`` both expects a value on the command-line and stores that value
    +  somewhere, so it goes in both :attr:`STORE_ACTIONS` and :attr:`TYPED_ACTIONS`
    +
    +* to ensure that :mod:`optparse` assigns the default type of ``string`` to
    +  ``extend`` actions, we put the ``extend`` action in ``ALWAYS_TYPED_ACTIONS`` as
    +  well
    +
    +* :meth:`MyOption.take_action` implements just this one new action, and passes
    +  control back to :meth:`Option.take_action` for the standard :mod:`optparse`
    +  actions
    +
    +* ``values`` is an instance of the optparse_parser.Values class, which
    +  provides the very useful :meth:`ensure_value` method. :meth:`ensure_value` is
    +  essentially :func:`getattr` with a safety valve; it is called as  ::
    +
    +     values.ensure_value(attr, value)
    +
    +  If the ``attr`` attribute of ``values`` doesn't exist or is None, then
    +  ensure_value() first sets it to ``value``, and then returns 'value. This is very
    +  handy for actions like ``extend``, ``append``, and ``count``, all of which
    +  accumulate data in a variable and expect that variable to be of a certain type
    +  (a list for the first two, an integer for the latter).  Using
    +  :meth:`ensure_value` means that scripts using your action don't have to worry
    +  about setting a default value for the option destinations in question; they can
    +  just leave the default as None and :meth:`ensure_value` will take care of
    +  getting it right when it's needed.
    +
    +.. % $Id: extending.txt 517 2006-06-10 16:18:11Z gward $
    +
    diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
    new file mode 100644
    index 0000000..291d155
    --- /dev/null
    +++ b/Doc/library/os.path.rst
    @@ -0,0 +1,317 @@
    +
    +:mod:`os.path` --- Common pathname manipulations
    +================================================
    +
    +.. module:: os.path
    +   :synopsis: Operations on pathnames.
    +
    +
    +.. index:: single: path; operations
    +
    +This module implements some useful functions on pathnames. To read or
    +write files see :func:`open`, and for accessing the filesystem see the
    +:mod:`os` module.
    +
    +.. warning::
    +
    +   On Windows, many of these functions do not properly support UNC pathnames.
    +   :func:`splitunc` and :func:`ismount` do handle them correctly.
    +
    +
    +.. function:: abspath(path)
    +
    +   Return a normalized absolutized version of the pathname *path*. On most
    +   platforms, this is equivalent to ``normpath(join(os.getcwd(), path))``.
    +
    +   .. versionadded:: 1.5.2
    +
    +
    +.. function:: basename(path)
    +
    +   Return the base name of pathname *path*.  This is the second half of the pair
    +   returned by ``split(path)``.  Note that the result of this function is different
    +   from the Unix :program:`basename` program; where :program:`basename` for
    +   ``'/foo/bar/'`` returns ``'bar'``, the :func:`basename` function returns an
    +   empty string (``''``).
    +
    +
    +.. function:: commonprefix(list)
    +
    +   Return the longest path prefix (taken character-by-character) that is a prefix
    +   of all paths in  *list*.  If *list* is empty, return the empty string (``''``).
    +   Note that this may return invalid paths because it works a character at a time.
    +
    +
    +.. function:: dirname(path)
    +
    +   Return the directory name of pathname *path*.  This is the first half of the
    +   pair returned by ``split(path)``.
    +
    +
    +.. function:: exists(path)
    +
    +   Return ``True`` if *path* refers to an existing path.  Returns ``False`` for
    +   broken symbolic links. On some platforms, this function may return ``False`` if
    +   permission is not granted to execute :func:`os.stat` on the requested file, even
    +   if the *path* physically exists.
    +
    +
    +.. function:: lexists(path)
    +
    +   Return ``True`` if *path* refers to an existing path. Returns ``True`` for
    +   broken symbolic links.   Equivalent to :func:`exists` on platforms lacking
    +   :func:`os.lstat`.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. function:: expanduser(path)
    +
    +   On Unix and Windows, return the argument with an initial component of ``~`` or
    +   ``~user`` replaced by that *user*'s home directory.
    +
    +   .. index:: module: pwd
    +
    +   On Unix, an initial ``~`` is replaced by the environment variable :envvar:`HOME`
    +   if it is set; otherwise the current user's home directory is looked up in the
    +   password directory through the built-in module :mod:`pwd`. An initial ``~user``
    +   is looked up directly in the password directory.
    +
    +   On Windows, :envvar:`HOME` and :envvar:`USERPROFILE` will be used if set,
    +   otherwise a combination of :envvar:`HOMEPATH` and :envvar:`HOMEDRIVE` will be
    +   used.  An initial ``~user`` is handled by stripping the last directory component
    +   from the created user path derived above.
    +
    +   If the expansion fails or if the path does not begin with a tilde, the path is
    +   returned unchanged.
    +
    +
    +.. function:: expandvars(path)
    +
    +   Return the argument with environment variables expanded.  Substrings of the form
    +   ``$name`` or ``${name}`` are replaced by the value of environment variable
    +   *name*.  Malformed variable names and references to non-existing variables are
    +   left unchanged.
    +
    +   On Windows, ``%name%`` expansions are supported in addition to ``$name`` and
    +   ``${name}``.
    +
    +
    +.. function:: getatime(path)
    +
    +   Return the time of last access of *path*.  The return value is a number giving
    +   the number of seconds since the epoch (see the  :mod:`time` module).  Raise
    +   :exc:`os.error` if the file does not exist or is inaccessible.
    +
    +   .. versionadded:: 1.5.2
    +
    +   .. versionchanged:: 2.3
    +      If :func:`os.stat_float_times` returns True, the result is a floating point
    +      number.
    +
    +
    +.. function:: getmtime(path)
    +
    +   Return the time of last modification of *path*.  The return value is a number
    +   giving the number of seconds since the epoch (see the  :mod:`time` module).
    +   Raise :exc:`os.error` if the file does not exist or is inaccessible.
    +
    +   .. versionadded:: 1.5.2
    +
    +   .. versionchanged:: 2.3
    +      If :func:`os.stat_float_times` returns True, the result is a floating point
    +      number.
    +
    +
    +.. function:: getctime(path)
    +
    +   Return the system's ctime which, on some systems (like Unix) is the time of the
    +   last change, and, on others (like Windows), is the creation time for *path*.
    +   The return value is a number giving the number of seconds since the epoch (see
    +   the  :mod:`time` module).  Raise :exc:`os.error` if the file does not exist or
    +   is inaccessible.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: getsize(path)
    +
    +   Return the size, in bytes, of *path*.  Raise :exc:`os.error` if the file does
    +   not exist or is inaccessible.
    +
    +   .. versionadded:: 1.5.2
    +
    +
    +.. function:: isabs(path)
    +
    +   Return ``True`` if *path* is an absolute pathname (begins with a slash).
    +
    +
    +.. function:: isfile(path)
    +
    +   Return ``True`` if *path* is an existing regular file.  This follows symbolic
    +   links, so both :func:`islink` and :func:`isfile` can be true for the same path.
    +
    +
    +.. function:: isdir(path)
    +
    +   Return ``True`` if *path* is an existing directory.  This follows symbolic
    +   links, so both :func:`islink` and :func:`isdir` can be true for the same path.
    +
    +
    +.. function:: islink(path)
    +
    +   Return ``True`` if *path* refers to a directory entry that is a symbolic link.
    +   Always ``False`` if symbolic links are not supported.
    +
    +
    +.. function:: ismount(path)
    +
    +   Return ``True`` if pathname *path* is a :dfn:`mount point`: a point in a file
    +   system where a different file system has been mounted.  The function checks
    +   whether *path*'s parent, :file:`path/..`, is on a different device than *path*,
    +   or whether :file:`path/..` and *path* point to the same i-node on the same
    +   device --- this should detect mount points for all Unix and POSIX variants.
    +
    +
    +.. function:: join(path1[, path2[, ...]])
    +
    +   Join one or more path components intelligently.  If any component is an absolute
    +   path, all previous components (on Windows, including the previous drive letter,
    +   if there was one) are thrown away, and joining continues.  The return value is
    +   the concatenation of *path1*, and optionally *path2*, etc., with exactly one
    +   directory separator (``os.sep``) inserted between components, unless *path2* is
    +   empty.  Note that on Windows, since there is a current directory for each drive,
    +   ``os.path.join("c:", "foo")`` represents a path relative to the current
    +   directory on drive :file:`C:` (:file:`c:foo`), not :file:`c:\\foo`.
    +
    +
    +.. function:: normcase(path)
    +
    +   Normalize the case of a pathname.  On Unix, this returns the path unchanged; on
    +   case-insensitive filesystems, it converts the path to lowercase.  On Windows, it
    +   also converts forward slashes to backward slashes.
    +
    +
    +.. function:: normpath(path)
    +
    +   Normalize a pathname.  This collapses redundant separators and up-level
    +   references so that ``A//B``, ``A/./B`` and ``A/foo/../B`` all become ``A/B``.
    +   It does not normalize the case (use :func:`normcase` for that).  On Windows, it
    +   converts forward slashes to backward slashes. It should be understood that this
    +   may change the meaning of the path if it contains symbolic links!
    +
    +
    +.. function:: realpath(path)
    +
    +   Return the canonical path of the specified filename, eliminating any symbolic
    +   links encountered in the path (if they are supported by the operating system).
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. function:: relpath(path[, start])
    +
    +   Return a relative filepath to *path* either from the current directory or from
    +   an optional *start* point.
    +
    +   *start* defaults to :attr:`os.curdir`. Availability:  Windows, Unix.
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: samefile(path1, path2)
    +
    +   Return ``True`` if both pathname arguments refer to the same file or directory
    +   (as indicated by device number and i-node number). Raise an exception if a
    +   :func:`os.stat` call on either pathname fails. Availability:  Macintosh, Unix.
    +
    +
    +.. function:: sameopenfile(fp1, fp2)
    +
    +   Return ``True`` if the file descriptors *fp1* and *fp2* refer to the same file.
    +   Availability:  Macintosh, Unix.
    +
    +
    +.. function:: samestat(stat1, stat2)
    +
    +   Return ``True`` if the stat tuples *stat1* and *stat2* refer to the same file.
    +   These structures may have been returned by :func:`fstat`, :func:`lstat`, or
    +   :func:`stat`.  This function implements the underlying comparison used by
    +   :func:`samefile` and :func:`sameopenfile`. Availability:  Macintosh, Unix.
    +
    +
    +.. function:: split(path)
    +
    +   Split the pathname *path* into a pair, ``(head, tail)`` where *tail* is the last
    +   pathname component and *head* is everything leading up to that.  The *tail* part
    +   will never contain a slash; if *path* ends in a slash, *tail* will be empty.  If
    +   there is no slash in *path*, *head* will be empty.  If *path* is empty, both
    +   *head* and *tail* are empty.  Trailing slashes are stripped from *head* unless
    +   it is the root (one or more slashes only).  In nearly all cases, ``join(head,
    +   tail)`` equals *path* (the only exception being when there were multiple slashes
    +   separating *head* from *tail*).
    +
    +
    +.. function:: splitdrive(path)
    +
    +   Split the pathname *path* into a pair ``(drive, tail)`` where *drive* is either
    +   a drive specification or the empty string.  On systems which do not use drive
    +   specifications, *drive* will always be the empty string.  In all cases, ``drive
    +   + tail`` will be the same as *path*.
    +
    +   .. versionadded:: 1.3
    +
    +
    +.. function:: splitext(path)
    +
    +   Split the pathname *path* into a pair ``(root, ext)``  such that ``root + ext ==
    +   path``, and *ext* is empty or begins with a period and contains at most one
    +   period. Leading periods on the basename are  ignored; ``splitext('.cshrc')``
    +   returns  ``('.cshrc', '')``.
    +
    +   .. versionchanged:: 2.6
    +      Earlier versions could produce an empty root when the only period was the
    +      first character.
    +
    +
    +.. function:: splitunc(path)
    +
    +   Split the pathname *path* into a pair ``(unc, rest)`` so that *unc* is the UNC
    +   mount point (such as ``r'\\host\mount'``), if present, and *rest* the rest of
    +   the path (such as  ``r'\path\file.ext'``).  For paths containing drive letters,
    +   *unc* will always be the empty string. Availability:  Windows.
    +
    +
    +.. function:: walk(path, visit, arg)
    +
    +   Calls the function *visit* with arguments ``(arg, dirname, names)`` for each
    +   directory in the directory tree rooted at *path* (including *path* itself, if it
    +   is a directory).  The argument *dirname* specifies the visited directory, the
    +   argument *names* lists the files in the directory (gotten from
    +   ``os.listdir(dirname)``). The *visit* function may modify *names* to influence
    +   the set of directories visited below *dirname*, e.g. to avoid visiting certain
    +   parts of the tree.  (The object referred to by *names* must be modified in
    +   place, using :keyword:`del` or slice assignment.)
    +
    +   .. note::
    +
    +      Symbolic links to directories are not treated as subdirectories, and that
    +      :func:`walk` therefore will not visit them. To visit linked directories you must
    +      identify them with ``os.path.islink(file)`` and ``os.path.isdir(file)``, and
    +      invoke :func:`walk` as necessary.
    +
    +   .. note::
    +
    +      The newer :func:`os.walk` generator supplies similar functionality and can be
    +      easier to use.
    +
    +
    +.. data:: supports_unicode_filenames
    +
    +   True if arbitrary Unicode strings can be used as file names (within limitations
    +   imposed by the file system), and if :func:`os.listdir` returns Unicode strings
    +   for a Unicode argument.
    +
    +   .. versionadded:: 2.3
    +
    diff --git a/Doc/library/os.rst b/Doc/library/os.rst
    new file mode 100644
    index 0000000..5d057f1
    --- /dev/null
    +++ b/Doc/library/os.rst
    @@ -0,0 +1,2036 @@
    +
    +:mod:`os` --- Miscellaneous operating system interfaces
    +=======================================================
    +
    +.. module:: os
    +   :synopsis: Miscellaneous operating system interfaces.
    +
    +
    +This module provides a more portable way of using operating system dependent
    +functionality than importing a operating system dependent built-in module like
    +:mod:`posix` or :mod:`nt`. (If you just want to read or write a file see
    +:func:`open`, and if you want to manipulate paths, see the :mod:`os.path`
    +module.)
    +
    +This module searches for an operating system dependent built-in module like
    +:mod:`mac` or :mod:`posix` and exports the same functions and data as found
    +there.  The design of all Python's built-in operating system dependent modules
    +is such that as long as the same functionality is available, it uses the same
    +interface; for example, the function ``os.stat(path)`` returns stat information
    +about *path* in the same format (which happens to have originated with the POSIX
    +interface).
    +
    +Extensions peculiar to a particular operating system are also available through
    +the :mod:`os` module, but using them is of course a threat to portability!
    +
    +Note that after the first time :mod:`os` is imported, there is *no* performance
    +penalty in using functions from :mod:`os` instead of directly from the operating
    +system dependent built-in module, so there should be *no* reason not to use
    +:mod:`os`!
    +
    +The :mod:`os` module contains many functions and data values. The items below
    +and in the following sub-sections are all available directly from the :mod:`os`
    +module.
    +
    +.. % Frank Stajano  complained that it
    +.. % wasn't clear that the entries described in the subsections were all
    +.. % available at the module level (most uses of subsections are
    +.. % different); I think this is only a problem for the HTML version,
    +.. % where the relationship may not be as clear.
    +.. % 
    +
    +
    +.. exception:: error
    +
    +   .. index:: module: errno
    +
    +   This exception is raised when a function returns a system-related error (not for
    +   illegal argument types or other incidental errors). This is also known as the
    +   built-in exception :exc:`OSError`.  The accompanying value is a pair containing
    +   the numeric error code from :cdata:`errno` and the corresponding string, as
    +   would be printed by the C function :cfunc:`perror`.  See the module
    +   :mod:`errno`, which contains names for the error codes defined by the underlying
    +   operating system.
    +
    +   When exceptions are classes, this exception carries two attributes,
    +   :attr:`errno` and :attr:`strerror`.  The first holds the value of the C
    +   :cdata:`errno` variable, and the latter holds the corresponding error message
    +   from :cfunc:`strerror`.  For exceptions that involve a file system path (such as
    +   :func:`chdir` or :func:`unlink`), the exception instance will contain a third
    +   attribute, :attr:`filename`, which is the file name passed to the function.
    +
    +
    +.. data:: name
    +
    +   The name of the operating system dependent module imported.  The following names
    +   have currently been registered: ``'posix'``, ``'nt'``, ``'mac'``, ``'os2'``,
    +   ``'ce'``, ``'java'``, ``'riscos'``.
    +
    +
    +.. data:: path
    +
    +   The corresponding operating system dependent standard module for pathname
    +   operations, such as :mod:`posixpath` or :mod:`macpath`.  Thus, given the proper
    +   imports, ``os.path.split(file)`` is equivalent to but more portable than
    +   ``posixpath.split(file)``.  Note that this is also an importable module: it may
    +   be imported directly as :mod:`os.path`.
    +
    +
    +.. _os-procinfo:
    +
    +Process Parameters
    +------------------
    +
    +These functions and data items provide information and operate on the current
    +process and user.
    +
    +
    +.. data:: environ
    +
    +   A mapping object representing the string environment. For example,
    +   ``environ['HOME']`` is the pathname of your home directory (on some platforms),
    +   and is equivalent to ``getenv("HOME")`` in C.
    +
    +   This mapping is captured the first time the :mod:`os` module is imported,
    +   typically during Python startup as part of processing :file:`site.py`.  Changes
    +   to the environment made after this time are not reflected in ``os.environ``,
    +   except for changes made by modifying ``os.environ`` directly.
    +
    +   If the platform supports the :func:`putenv` function, this mapping may be used
    +   to modify the environment as well as query the environment.  :func:`putenv` will
    +   be called automatically when the mapping is modified.
    +
    +   .. note::
    +
    +      Calling :func:`putenv` directly does not change ``os.environ``, so it's better
    +      to modify ``os.environ``.
    +
    +   .. note::
    +
    +      On some platforms, including FreeBSD and Mac OS X, setting ``environ`` may cause
    +      memory leaks.  Refer to the system documentation for :cfunc:`putenv`.
    +
    +   If :func:`putenv` is not provided, a modified copy of this mapping  may be
    +   passed to the appropriate process-creation functions to cause  child processes
    +   to use a modified environment.
    +
    +   If the platform supports the :func:`unsetenv` function, you can  delete items in
    +   this mapping to unset environment variables. :func:`unsetenv` will be called
    +   automatically when an item is deleted from ``os.environ``.
    +
    +
    +.. function:: chdir(path)
    +              fchdir(fd)
    +              getcwd()
    +   :noindex:
    +
    +   These functions are described in :ref:`os-file-dir`.
    +
    +
    +.. function:: ctermid()
    +
    +   Return the filename corresponding to the controlling terminal of the process.
    +   Availability: Unix.
    +
    +
    +.. function:: getegid()
    +
    +   Return the effective group id of the current process.  This corresponds to the
    +   'set id' bit on the file being executed in the current process. Availability:
    +   Unix.
    +
    +
    +.. function:: geteuid()
    +
    +   .. index:: single: user; effective id
    +
    +   Return the current process' effective user id. Availability: Unix.
    +
    +
    +.. function:: getgid()
    +
    +   .. index:: single: process; group
    +
    +   Return the real group id of the current process. Availability: Unix.
    +
    +
    +.. function:: getgroups()
    +
    +   Return list of supplemental group ids associated with the current process.
    +   Availability: Unix.
    +
    +
    +.. function:: getlogin()
    +
    +   Return the name of the user logged in on the controlling terminal of the
    +   process.  For most purposes, it is more useful to use the environment variable
    +   :envvar:`LOGNAME` to find out who the user is, or
    +   ``pwd.getpwuid(os.getuid())[0]`` to get the login name of the currently
    +   effective user ID. Availability: Unix.
    +
    +
    +.. function:: getpgid(pid)
    +
    +   Return the process group id of the process with process id *pid*. If *pid* is 0,
    +   the process group id of the current process is returned. Availability: Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: getpgrp()
    +
    +   .. index:: single: process; group
    +
    +   Return the id of the current process group. Availability: Unix.
    +
    +
    +.. function:: getpid()
    +
    +   .. index:: single: process; id
    +
    +   Return the current process id. Availability: Unix, Windows.
    +
    +
    +.. function:: getppid()
    +
    +   .. index:: single: process; id of parent
    +
    +   Return the parent's process id. Availability: Unix.
    +
    +
    +.. function:: getuid()
    +
    +   .. index:: single: user; id
    +
    +   Return the current process' user id. Availability: Unix.
    +
    +
    +.. function:: getenv(varname[, value])
    +
    +   Return the value of the environment variable *varname* if it exists, or *value*
    +   if it doesn't.  *value* defaults to ``None``. Availability: most flavors of
    +   Unix, Windows.
    +
    +
    +.. function:: putenv(varname, value)
    +
    +   .. index:: single: environment variables; setting
    +
    +   Set the environment variable named *varname* to the string *value*.  Such
    +   changes to the environment affect subprocesses started with :func:`os.system`,
    +   :func:`popen` or :func:`fork` and :func:`execv`. Availability: most flavors of
    +   Unix, Windows.
    +
    +   .. note::
    +
    +      On some platforms, including FreeBSD and Mac OS X, setting ``environ`` may cause
    +      memory leaks. Refer to the system documentation for putenv.
    +
    +   When :func:`putenv` is supported, assignments to items in ``os.environ`` are
    +   automatically translated into corresponding calls to :func:`putenv`; however,
    +   calls to :func:`putenv` don't update ``os.environ``, so it is actually
    +   preferable to assign to items of ``os.environ``.
    +
    +
    +.. function:: setegid(egid)
    +
    +   Set the current process's effective group id. Availability: Unix.
    +
    +
    +.. function:: seteuid(euid)
    +
    +   Set the current process's effective user id. Availability: Unix.
    +
    +
    +.. function:: setgid(gid)
    +
    +   Set the current process' group id. Availability: Unix.
    +
    +
    +.. function:: setgroups(groups)
    +
    +   Set the list of supplemental group ids associated with the current process to
    +   *groups*. *groups* must be a sequence, and each element must be an integer
    +   identifying a group. This operation is typical available only to the superuser.
    +   Availability: Unix.
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. function:: setpgrp()
    +
    +   Calls the system call :cfunc:`setpgrp` or :cfunc:`setpgrp(0, 0)` depending on
    +   which version is implemented (if any).  See the Unix manual for the semantics.
    +   Availability: Unix.
    +
    +
    +.. function:: setpgid(pid, pgrp)
    +
    +   Calls the system call :cfunc:`setpgid` to set the process group id of the
    +   process with id *pid* to the process group with id *pgrp*.  See the Unix manual
    +   for the semantics. Availability: Unix.
    +
    +
    +.. function:: setreuid(ruid, euid)
    +
    +   Set the current process's real and effective user ids. Availability: Unix.
    +
    +
    +.. function:: setregid(rgid, egid)
    +
    +   Set the current process's real and effective group ids. Availability: Unix.
    +
    +
    +.. function:: getsid(pid)
    +
    +   Calls the system call :cfunc:`getsid`.  See the Unix manual for the semantics.
    +   Availability: Unix.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. function:: setsid()
    +
    +   Calls the system call :cfunc:`setsid`.  See the Unix manual for the semantics.
    +   Availability: Unix.
    +
    +
    +.. function:: setuid(uid)
    +
    +   .. index:: single: user; id, setting
    +
    +   Set the current process' user id. Availability: Unix.
    +
    +.. % placed in this section since it relates to errno.... a little weak
    +
    +
    +.. function:: strerror(code)
    +
    +   Return the error message corresponding to the error code in *code*.
    +   Availability: Unix, Windows.
    +
    +
    +.. function:: umask(mask)
    +
    +   Set the current numeric umask and returns the previous umask. Availability:
    +   Unix, Windows.
    +
    +
    +.. function:: uname()
    +
    +   .. index::
    +      single: gethostname() (in module socket)
    +      single: gethostbyaddr() (in module socket)
    +
    +   Return a 5-tuple containing information identifying the current operating
    +   system.  The tuple contains 5 strings: ``(sysname, nodename, release, version,
    +   machine)``.  Some systems truncate the nodename to 8 characters or to the
    +   leading component; a better way to get the hostname is
    +   :func:`socket.gethostname`  or even
    +   ``socket.gethostbyaddr(socket.gethostname())``. Availability: recent flavors of
    +   Unix.
    +
    +
    +.. function:: unsetenv(varname)
    +
    +   .. index:: single: environment variables; deleting
    +
    +   Unset (delete) the environment variable named *varname*. Such changes to the
    +   environment affect subprocesses started with :func:`os.system`, :func:`popen` or
    +   :func:`fork` and :func:`execv`. Availability: most flavors of Unix, Windows.
    +
    +   When :func:`unsetenv` is supported, deletion of items in ``os.environ`` is
    +   automatically translated into a corresponding call to :func:`unsetenv`; however,
    +   calls to :func:`unsetenv` don't update ``os.environ``, so it is actually
    +   preferable to delete items of ``os.environ``.
    +
    +
    +.. _os-newstreams:
    +
    +File Object Creation
    +--------------------
    +
    +These functions create new file objects. (See also :func:`open`.)
    +
    +
    +.. function:: fdopen(fd[, mode[, bufsize]])
    +
    +   .. index:: single: I/O control; buffering
    +
    +   Return an open file object connected to the file descriptor *fd*.  The *mode*
    +   and *bufsize* arguments have the same meaning as the corresponding arguments to
    +   the built-in :func:`open` function. Availability: Macintosh, Unix, Windows.
    +
    +   .. versionchanged:: 2.3
    +      When specified, the *mode* argument must now start with one of the letters
    +      ``'r'``, ``'w'``, or ``'a'``, otherwise a :exc:`ValueError` is raised.
    +
    +   .. versionchanged:: 2.5
    +      On Unix, when the *mode* argument starts with ``'a'``, the *O_APPEND* flag is
    +      set on the file descriptor (which the :cfunc:`fdopen` implementation already
    +      does on most platforms).
    +
    +
    +.. function:: popen(command[, mode[, bufsize]])
    +
    +   Open a pipe to or from *command*.  The return value is an open file object
    +   connected to the pipe, which can be read or written depending on whether *mode*
    +   is ``'r'`` (default) or ``'w'``. The *bufsize* argument has the same meaning as
    +   the corresponding argument to the built-in :func:`open` function.  The exit
    +   status of the command (encoded in the format specified for :func:`wait`) is
    +   available as the return value of the :meth:`close` method of the file object,
    +   except that when the exit status is zero (termination without errors), ``None``
    +   is returned. Availability: Macintosh, Unix, Windows.
    +
    +   .. deprecated:: 2.6
    +      This function is obsolete.  Use the :mod:`subprocess` module.
    +
    +   .. versionchanged:: 2.0
    +      This function worked unreliably under Windows in earlier versions of Python.
    +      This was due to the use of the :cfunc:`_popen` function from the libraries
    +      provided with Windows.  Newer versions of Python do not use the broken
    +      implementation from the Windows libraries.
    +
    +
    +.. function:: tmpfile()
    +
    +   Return a new file object opened in update mode (``w+b``).  The file has no
    +   directory entries associated with it and will be automatically deleted once
    +   there are no file descriptors for the file. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. _os-fd-ops:
    +
    +File Descriptor Operations
    +--------------------------
    +
    +These functions operate on I/O streams referenced using file descriptors.
    +
    +File descriptors are small integers corresponding to a file that has been opened
    +by the current process.  For example, standard input is usually file descriptor
    +0, standard output is 1, and standard error is 2.  Further files opened by a
    +process will then be assigned 3, 4, 5, and so forth.  The name "file descriptor"
    +is slightly deceptive; on Unix platforms, sockets and pipes are also referenced
    +by file descriptors.
    +
    +
    +.. function:: close(fd)
    +
    +   Close file descriptor *fd*. Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      This function is intended for low-level I/O and must be applied to a file
    +      descriptor as returned by :func:`open` or :func:`pipe`.  To close a "file
    +      object" returned by the built-in function :func:`open` or by :func:`popen` or
    +      :func:`fdopen`, use its :meth:`close` method.
    +
    +
    +.. function:: dup(fd)
    +
    +   Return a duplicate of file descriptor *fd*. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: dup2(fd, fd2)
    +
    +   Duplicate file descriptor *fd* to *fd2*, closing the latter first if necessary.
    +   Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: fdatasync(fd)
    +
    +   Force write of file with filedescriptor *fd* to disk. Does not force update of
    +   metadata. Availability: Unix.
    +
    +
    +.. function:: fpathconf(fd, name)
    +
    +   Return system configuration information relevant to an open file. *name*
    +   specifies the configuration value to retrieve; it may be a string which is the
    +   name of a defined system value; these names are specified in a number of
    +   standards (POSIX.1, Unix 95, Unix 98, and others).  Some platforms define
    +   additional names as well.  The names known to the host operating system are
    +   given in the ``pathconf_names`` dictionary.  For configuration variables not
    +   included in that mapping, passing an integer for *name* is also accepted.
    +   Availability: Macintosh, Unix.
    +
    +   If *name* is a string and is not known, :exc:`ValueError` is raised.  If a
    +   specific value for *name* is not supported by the host system, even if it is
    +   included in ``pathconf_names``, an :exc:`OSError` is raised with
    +   :const:`errno.EINVAL` for the error number.
    +
    +
    +.. function:: fstat(fd)
    +
    +   Return status for file descriptor *fd*, like :func:`stat`. Availability:
    +   Macintosh, Unix, Windows.
    +
    +
    +.. function:: fstatvfs(fd)
    +
    +   Return information about the filesystem containing the file associated with file
    +   descriptor *fd*, like :func:`statvfs`. Availability: Unix.
    +
    +
    +.. function:: fsync(fd)
    +
    +   Force write of file with filedescriptor *fd* to disk.  On Unix, this calls the
    +   native :cfunc:`fsync` function; on Windows, the MS :cfunc:`_commit` function.
    +
    +   If you're starting with a Python file object *f*, first do ``f.flush()``, and
    +   then do ``os.fsync(f.fileno())``, to ensure that all internal buffers associated
    +   with *f* are written to disk. Availability: Macintosh, Unix, and Windows
    +   starting in 2.2.3.
    +
    +
    +.. function:: ftruncate(fd, length)
    +
    +   Truncate the file corresponding to file descriptor *fd*, so that it is at most
    +   *length* bytes in size. Availability: Macintosh, Unix.
    +
    +
    +.. function:: isatty(fd)
    +
    +   Return ``True`` if the file descriptor *fd* is open and connected to a
    +   tty(-like) device, else ``False``. Availability: Macintosh, Unix.
    +
    +
    +.. function:: lseek(fd, pos, how)
    +
    +   Set the current position of file descriptor *fd* to position *pos*, modified by
    +   *how*: ``0`` to set the position relative to the beginning of the file; ``1`` to
    +   set it relative to the current position; ``2`` to set it relative to the end of
    +   the file. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: open(file, flags[, mode])
    +
    +   Open the file *file* and set various flags according to *flags* and possibly its
    +   mode according to *mode*. The default *mode* is ``0777`` (octal), and the
    +   current umask value is first masked out.  Return the file descriptor for the
    +   newly opened file. Availability: Macintosh, Unix, Windows.
    +
    +   For a description of the flag and mode values, see the C run-time documentation;
    +   flag constants (like :const:`O_RDONLY` and :const:`O_WRONLY`) are defined in
    +   this module too (see below).
    +
    +   .. note::
    +
    +      This function is intended for low-level I/O.  For normal usage, use the built-in
    +      function :func:`open`, which returns a "file object" with :meth:`read` and
    +      :meth:`write` methods (and many more).  To wrap a file descriptor in a "file
    +      object", use :func:`fdopen`.
    +
    +
    +.. function:: openpty()
    +
    +   .. index:: module: pty
    +
    +   Open a new pseudo-terminal pair. Return a pair of file descriptors ``(master,
    +   slave)`` for the pty and the tty, respectively. For a (slightly) more portable
    +   approach, use the :mod:`pty` module. Availability: Macintosh, Some flavors of
    +   Unix.
    +
    +
    +.. function:: pipe()
    +
    +   Create a pipe.  Return a pair of file descriptors ``(r, w)`` usable for reading
    +   and writing, respectively. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: read(fd, n)
    +
    +   Read at most *n* bytes from file descriptor *fd*. Return a string containing the
    +   bytes read.  If the end of the file referred to by *fd* has been reached, an
    +   empty string is returned. Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      This function is intended for low-level I/O and must be applied to a file
    +      descriptor as returned by :func:`open` or :func:`pipe`.  To read a "file object"
    +      returned by the built-in function :func:`open` or by :func:`popen` or
    +      :func:`fdopen`, or ``sys.stdin``, use its :meth:`read` or :meth:`readline`
    +      methods.
    +
    +
    +.. function:: tcgetpgrp(fd)
    +
    +   Return the process group associated with the terminal given by *fd* (an open
    +   file descriptor as returned by :func:`open`). Availability: Macintosh, Unix.
    +
    +
    +.. function:: tcsetpgrp(fd, pg)
    +
    +   Set the process group associated with the terminal given by *fd* (an open file
    +   descriptor as returned by :func:`open`) to *pg*. Availability: Macintosh, Unix.
    +
    +
    +.. function:: ttyname(fd)
    +
    +   Return a string which specifies the terminal device associated with
    +   file-descriptor *fd*.  If *fd* is not associated with a terminal device, an
    +   exception is raised. Availability:Macintosh, Unix.
    +
    +
    +.. function:: write(fd, str)
    +
    +   Write the string *str* to file descriptor *fd*. Return the number of bytes
    +   actually written. Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      This function is intended for low-level I/O and must be applied to a file
    +      descriptor as returned by :func:`open` or :func:`pipe`.  To write a "file
    +      object" returned by the built-in function :func:`open` or by :func:`popen` or
    +      :func:`fdopen`, or ``sys.stdout`` or ``sys.stderr``, use its :meth:`write`
    +      method.
    +
    +The following data items are available for use in constructing the *flags*
    +parameter to the :func:`open` function.  Some items will not be available on all
    +platforms.  For descriptions of their availability and use, consult
    +:manpage:`open(2)`.
    +
    +
    +.. data:: O_RDONLY
    +          O_WRONLY
    +          O_RDWR
    +          O_APPEND
    +          O_CREAT
    +          O_EXCL
    +          O_TRUNC
    +
    +   Options for the *flag* argument to the :func:`open` function. These can be
    +   bit-wise OR'd together. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. data:: O_DSYNC
    +          O_RSYNC
    +          O_SYNC
    +          O_NDELAY
    +          O_NONBLOCK
    +          O_NOCTTY
    +          O_SHLOCK
    +          O_EXLOCK
    +
    +   More options for the *flag* argument to the :func:`open` function. Availability:
    +   Macintosh, Unix.
    +
    +
    +.. data:: O_BINARY
    +
    +   Option for the *flag* argument to the :func:`open` function. This can be
    +   bit-wise OR'd together with those listed above. Availability: Windows.
    +
    +   .. % XXX need to check on the availability of this one.
    +
    +
    +.. data:: O_NOINHERIT
    +          O_SHORT_LIVED
    +          O_TEMPORARY
    +          O_RANDOM
    +          O_SEQUENTIAL
    +          O_TEXT
    +
    +   Options for the *flag* argument to the :func:`open` function. These can be
    +   bit-wise OR'd together. Availability: Windows.
    +
    +
    +.. data:: SEEK_SET
    +          SEEK_CUR
    +          SEEK_END
    +
    +   Parameters to the :func:`lseek` function. Their values are 0, 1, and 2,
    +   respectively. Availability: Windows, Macintosh, Unix.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. _os-file-dir:
    +
    +Files and Directories
    +---------------------
    +
    +
    +.. function:: access(path, mode)
    +
    +   Use the real uid/gid to test for access to *path*.  Note that most operations
    +   will use the effective uid/gid, therefore this routine can be used in a
    +   suid/sgid environment to test if the invoking user has the specified access to
    +   *path*.  *mode* should be :const:`F_OK` to test the existence of *path*, or it
    +   can be the inclusive OR of one or more of :const:`R_OK`, :const:`W_OK`, and
    +   :const:`X_OK` to test permissions.  Return :const:`True` if access is allowed,
    +   :const:`False` if not. See the Unix man page :manpage:`access(2)` for more
    +   information. Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      Using :func:`access` to check if a user is authorized to e.g. open a file before
    +      actually doing so using :func:`open` creates a  security hole, because the user
    +      might exploit the short time interval  between checking and opening the file to
    +      manipulate it.
    +
    +   .. note::
    +
    +      I/O operations may fail even when :func:`access` indicates that they would
    +      succeed, particularly for operations on network filesystems which may have
    +      permissions semantics beyond the usual POSIX permission-bit model.
    +
    +
    +.. data:: F_OK
    +
    +   Value to pass as the *mode* parameter of :func:`access` to test the existence of
    +   *path*.
    +
    +
    +.. data:: R_OK
    +
    +   Value to include in the *mode* parameter of :func:`access` to test the
    +   readability of *path*.
    +
    +
    +.. data:: W_OK
    +
    +   Value to include in the *mode* parameter of :func:`access` to test the
    +   writability of *path*.
    +
    +
    +.. data:: X_OK
    +
    +   Value to include in the *mode* parameter of :func:`access` to determine if
    +   *path* can be executed.
    +
    +
    +.. function:: chdir(path)
    +
    +   .. index:: single: directory; changing
    +
    +   Change the current working directory to *path*. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: fchdir(fd)
    +
    +   Change the current working directory to the directory represented by the file
    +   descriptor *fd*.  The descriptor must refer to an opened directory, not an open
    +   file. Availability: Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: getcwd()
    +
    +   Return a string representing the current working directory. Availability:
    +   Macintosh, Unix, Windows.
    +
    +
    +.. function:: getcwdu()
    +
    +   Return a Unicode object representing the current working directory.
    +   Availability: Macintosh, Unix, Windows.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: chflags(path, flags)
    +
    +   Set the flags of *path* to the numeric *flags*. *flags* may take a combination
    +   (bitwise OR) of the following values (as defined in the :mod:`stat` module):
    +
    +   * ``UF_NODUMP``
    +   * ``UF_IMMUTABLE``
    +   * ``UF_APPEND``
    +   * ``UF_OPAQUE``
    +   * ``UF_NOUNLINK``
    +   * ``SF_ARCHIVED``
    +   * ``SF_IMMUTABLE``
    +   * ``SF_APPEND``
    +   * ``SF_NOUNLINK``
    +   * ``SF_SNAPSHOT``
    +
    +   Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: chroot(path)
    +
    +   Change the root directory of the current process to *path*. Availability:
    +   Macintosh, Unix.
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. function:: chmod(path, mode)
    +
    +   Change the mode of *path* to the numeric *mode*. *mode* may take one of the
    +   following values (as defined in the :mod:`stat` module) or bitwise or-ed
    +   combinations of them:
    +
    +   * ``stat.S_ISUID``
    +   * ``stat.S_ISGID``
    +   * ``stat.S_ENFMT``
    +   * ``stat.S_ISVTX``
    +   * ``stat.S_IREAD``
    +   * ``stat.S_IWRITE``
    +   * ``stat.S_IEXEC``
    +   * ``stat.S_IRWXU``
    +   * ``stat.S_IRUSR``
    +   * ``stat.S_IWUSR``
    +   * ``stat.S_IXUSR``
    +   * ``stat.S_IRWXG``
    +   * ``stat.S_IRGRP``
    +   * ``stat.S_IWGRP``
    +   * ``stat.S_IXGRP``
    +   * ``stat.S_IRWXO``
    +   * ``stat.S_IROTH``
    +   * ``stat.S_IWOTH``
    +   * ``stat.S_IXOTH``
    +
    +   Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      Although Windows supports :func:`chmod`, you can only  set the file's read-only
    +      flag with it (via the ``stat.S_IWRITE``  and ``stat.S_IREAD``
    +      constants or a corresponding integer value).  All other bits are
    +      ignored.
    +
    +
    +.. function:: chown(path, uid, gid)
    +
    +   Change the owner and group id of *path* to the numeric *uid* and *gid*. To leave
    +   one of the ids unchanged, set it to -1. Availability: Macintosh, Unix.
    +
    +
    +.. function:: lchflags(path, flags)
    +
    +   Set the flags of *path* to the numeric *flags*, like :func:`chflags`, but do not
    +   follow symbolic links. Availability: Unix.
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: lchown(path, uid, gid)
    +
    +   Change the owner and group id of *path* to the numeric *uid* and gid. This
    +   function will not follow symbolic links. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: link(src, dst)
    +
    +   Create a hard link pointing to *src* named *dst*. Availability: Macintosh, Unix.
    +
    +
    +.. function:: listdir(path)
    +
    +   Return a list containing the names of the entries in the directory. The list is
    +   in arbitrary order.  It does not include the special entries ``'.'`` and
    +   ``'..'`` even if they are present in the directory. Availability: Macintosh,
    +   Unix, Windows.
    +
    +   .. versionchanged:: 2.3
    +      On Windows NT/2k/XP and Unix, if *path* is a Unicode object, the result will be
    +      a list of Unicode objects.
    +
    +
    +.. function:: lstat(path)
    +
    +   Like :func:`stat`, but do not follow symbolic links. Availability: Macintosh,
    +   Unix.
    +
    +
    +.. function:: mkfifo(path[, mode])
    +
    +   Create a FIFO (a named pipe) named *path* with numeric mode *mode*.  The default
    +   *mode* is ``0666`` (octal).  The current umask value is first masked out from
    +   the mode. Availability: Macintosh, Unix.
    +
    +   FIFOs are pipes that can be accessed like regular files.  FIFOs exist until they
    +   are deleted (for example with :func:`os.unlink`). Generally, FIFOs are used as
    +   rendezvous between "client" and "server" type processes: the server opens the
    +   FIFO for reading, and the client opens it for writing.  Note that :func:`mkfifo`
    +   doesn't open the FIFO --- it just creates the rendezvous point.
    +
    +
    +.. function:: mknod(filename[, mode=0600, device])
    +
    +   Create a filesystem node (file, device special file or named pipe) named
    +   *filename*. *mode* specifies both the permissions to use and the type of node to
    +   be created, being combined (bitwise OR) with one of ``stat.S_IFREG``,
    +   ``stat.S_IFCHR``, ``stat.S_IFBLK``,
    +   and ``stat.S_IFIFO`` (those constants are available in :mod:`stat`).
    +   For ``stat.S_IFCHR`` and
    +   ``stat.S_IFBLK``, *device* defines the newly created device special file (probably using
    +   :func:`os.makedev`), otherwise it is ignored.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: major(device)
    +
    +   Extracts the device major number from a raw device number (usually the
    +   :attr:`st_dev` or :attr:`st_rdev` field from :ctype:`stat`).
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: minor(device)
    +
    +   Extracts the device minor number from a raw device number (usually the
    +   :attr:`st_dev` or :attr:`st_rdev` field from :ctype:`stat`).
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: makedev(major, minor)
    +
    +   Composes a raw device number from the major and minor device numbers.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: mkdir(path[, mode])
    +
    +   Create a directory named *path* with numeric mode *mode*. The default *mode* is
    +   ``0777`` (octal).  On some systems, *mode* is ignored.  Where it is used, the
    +   current umask value is first masked out. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: makedirs(path[, mode])
    +
    +   .. index::
    +      single: directory; creating
    +      single: UNC paths; and os.makedirs()
    +
    +   Recursive directory creation function.  Like :func:`mkdir`, but makes all
    +   intermediate-level directories needed to contain the leaf directory.  Throws an
    +   :exc:`error` exception if the leaf directory already exists or cannot be
    +   created.  The default *mode* is ``0777`` (octal).  On some systems, *mode* is
    +   ignored. Where it is used, the current umask value is first masked out.
    +
    +   .. note::
    +
    +      :func:`makedirs` will become confused if the path elements to create include
    +      *os.pardir*.
    +
    +   .. versionadded:: 1.5.2
    +
    +   .. versionchanged:: 2.3
    +      This function now handles UNC paths correctly.
    +
    +
    +.. function:: pathconf(path, name)
    +
    +   Return system configuration information relevant to a named file. *name*
    +   specifies the configuration value to retrieve; it may be a string which is the
    +   name of a defined system value; these names are specified in a number of
    +   standards (POSIX.1, Unix 95, Unix 98, and others).  Some platforms define
    +   additional names as well.  The names known to the host operating system are
    +   given in the ``pathconf_names`` dictionary.  For configuration variables not
    +   included in that mapping, passing an integer for *name* is also accepted.
    +   Availability: Macintosh, Unix.
    +
    +   If *name* is a string and is not known, :exc:`ValueError` is raised.  If a
    +   specific value for *name* is not supported by the host system, even if it is
    +   included in ``pathconf_names``, an :exc:`OSError` is raised with
    +   :const:`errno.EINVAL` for the error number.
    +
    +
    +.. data:: pathconf_names
    +
    +   Dictionary mapping names accepted by :func:`pathconf` and :func:`fpathconf` to
    +   the integer values defined for those names by the host operating system.  This
    +   can be used to determine the set of names known to the system. Availability:
    +   Macintosh, Unix.
    +
    +
    +.. function:: readlink(path)
    +
    +   Return a string representing the path to which the symbolic link points.  The
    +   result may be either an absolute or relative pathname; if it is relative, it may
    +   be converted to an absolute pathname using ``os.path.join(os.path.dirname(path),
    +   result)``.
    +
    +   .. versionchanged:: 2.6
    +      If the *path* is a Unicode object the result will also be a Unicode object.
    +
    +   Availability: Macintosh, Unix.
    +
    +
    +.. function:: remove(path)
    +
    +   Remove the file *path*.  If *path* is a directory, :exc:`OSError` is raised; see
    +   :func:`rmdir` below to remove a directory.  This is identical to the
    +   :func:`unlink` function documented below.  On Windows, attempting to remove a
    +   file that is in use causes an exception to be raised; on Unix, the directory
    +   entry is removed but the storage allocated to the file is not made available
    +   until the original file is no longer in use. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: removedirs(path)
    +
    +   .. index:: single: directory; deleting
    +
    +   Removes directories recursively.  Works like :func:`rmdir` except that, if the
    +   leaf directory is successfully removed, :func:`removedirs`  tries to
    +   successively remove every parent directory mentioned in  *path* until an error
    +   is raised (which is ignored, because it generally means that a parent directory
    +   is not empty). For example, ``os.removedirs('foo/bar/baz')`` will first remove
    +   the directory ``'foo/bar/baz'``, and then remove ``'foo/bar'`` and ``'foo'`` if
    +   they are empty. Raises :exc:`OSError` if the leaf directory could not be
    +   successfully removed.
    +
    +   .. versionadded:: 1.5.2
    +
    +
    +.. function:: rename(src, dst)
    +
    +   Rename the file or directory *src* to *dst*.  If *dst* is a directory,
    +   :exc:`OSError` will be raised.  On Unix, if *dst* exists and is a file, it will
    +   be removed silently if the user has permission.  The operation may fail on some
    +   Unix flavors if *src* and *dst* are on different filesystems.  If successful,
    +   the renaming will be an atomic operation (this is a POSIX requirement).  On
    +   Windows, if *dst* already exists, :exc:`OSError` will be raised even if it is a
    +   file; there may be no way to implement an atomic rename when *dst* names an
    +   existing file. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: renames(old, new)
    +
    +   Recursive directory or file renaming function. Works like :func:`rename`, except
    +   creation of any intermediate directories needed to make the new pathname good is
    +   attempted first. After the rename, directories corresponding to rightmost path
    +   segments of the old name will be pruned away using :func:`removedirs`.
    +
    +   .. versionadded:: 1.5.2
    +
    +   .. note::
    +
    +      This function can fail with the new directory structure made if you lack
    +      permissions needed to remove the leaf directory or file.
    +
    +
    +.. function:: rmdir(path)
    +
    +   Remove the directory *path*. Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: stat(path)
    +
    +   Perform a :cfunc:`stat` system call on the given path.  The return value is an
    +   object whose attributes correspond to the members of the :ctype:`stat`
    +   structure, namely: :attr:`st_mode` (protection bits), :attr:`st_ino` (inode
    +   number), :attr:`st_dev` (device), :attr:`st_nlink` (number of hard links),
    +   :attr:`st_uid` (user ID of owner), :attr:`st_gid` (group ID of owner),
    +   :attr:`st_size` (size of file, in bytes), :attr:`st_atime` (time of most recent
    +   access), :attr:`st_mtime` (time of most recent content modification),
    +   :attr:`st_ctime` (platform dependent; time of most recent metadata change on
    +   Unix, or the time of creation on Windows)::
    +
    +      >>> import os
    +      >>> statinfo = os.stat('somefile.txt')
    +      >>> statinfo
    +      (33188, 422511L, 769L, 1, 1032, 100, 926L, 1105022698,1105022732, 1105022732)
    +      >>> statinfo.st_size
    +      926L
    +      >>>
    +
    +   .. versionchanged:: 2.3
    +      If :func:`stat_float_times` returns true, the time values are floats, measuring
    +      seconds. Fractions of a second may be reported if the system supports that. On
    +      Mac OS, the times are always floats. See :func:`stat_float_times` for further
    +      discussion.
    +
    +   On some Unix systems (such as Linux), the following attributes may also be
    +   available: :attr:`st_blocks` (number of blocks allocated for file),
    +   :attr:`st_blksize` (filesystem blocksize), :attr:`st_rdev` (type of device if an
    +   inode device). :attr:`st_flags` (user defined flags for file).
    +
    +   On other Unix systems (such as FreeBSD), the following attributes may be
    +   available (but may be only filled out if root tries to use them): :attr:`st_gen`
    +   (file generation number), :attr:`st_birthtime` (time of file creation).
    +
    +   On Mac OS systems, the following attributes may also be available:
    +   :attr:`st_rsize`, :attr:`st_creator`, :attr:`st_type`.
    +
    +   On RISCOS systems, the following attributes are also available: :attr:`st_ftype`
    +   (file type), :attr:`st_attrs` (attributes), :attr:`st_obtype` (object type).
    +
    +   .. index:: module: stat
    +
    +   For backward compatibility, the return value of :func:`stat` is also accessible
    +   as a tuple of at least 10 integers giving the most important (and portable)
    +   members of the :ctype:`stat` structure, in the order :attr:`st_mode`,
    +   :attr:`st_ino`, :attr:`st_dev`, :attr:`st_nlink`, :attr:`st_uid`,
    +   :attr:`st_gid`, :attr:`st_size`, :attr:`st_atime`, :attr:`st_mtime`,
    +   :attr:`st_ctime`. More items may be added at the end by some implementations.
    +   The standard module :mod:`stat` defines functions and constants that are useful
    +   for extracting information from a :ctype:`stat` structure. (On Windows, some
    +   items are filled with dummy values.)
    +
    +   .. note::
    +
    +      The exact meaning and resolution of the :attr:`st_atime`, :attr:`st_mtime`, and
    +      :attr:`st_ctime` members depends on the operating system and the file system.
    +      For example, on Windows systems using the FAT or FAT32 file systems,
    +      :attr:`st_mtime` has 2-second resolution, and :attr:`st_atime` has only 1-day
    +      resolution.  See your operating system documentation for details.
    +
    +   Availability: Macintosh, Unix, Windows.
    +
    +   .. versionchanged:: 2.2
    +      Added access to values as attributes of the returned object.
    +
    +   .. versionchanged:: 2.5
    +      Added st_gen, st_birthtime.
    +
    +
    +.. function:: stat_float_times([newvalue])
    +
    +   Determine whether :class:`stat_result` represents time stamps as float objects.
    +   If *newvalue* is ``True``, future calls to :func:`stat` return floats, if it is
    +   ``False``, future calls return ints. If *newvalue* is omitted, return the
    +   current setting.
    +
    +   For compatibility with older Python versions, accessing :class:`stat_result` as
    +   a tuple always returns integers.
    +
    +   .. versionchanged:: 2.5
    +      Python now returns float values by default. Applications which do not work
    +      correctly with floating point time stamps can use this function to restore the
    +      old behaviour.
    +
    +   The resolution of the timestamps (that is the smallest possible fraction)
    +   depends on the system. Some systems only support second resolution; on these
    +   systems, the fraction will always be zero.
    +
    +   It is recommended that this setting is only changed at program startup time in
    +   the *__main__* module; libraries should never change this setting. If an
    +   application uses a library that works incorrectly if floating point time stamps
    +   are processed, this application should turn the feature off until the library
    +   has been corrected.
    +
    +
    +.. function:: statvfs(path)
    +
    +   Perform a :cfunc:`statvfs` system call on the given path.  The return value is
    +   an object whose attributes describe the filesystem on the given path, and
    +   correspond to the members of the :ctype:`statvfs` structure, namely:
    +   :attr:`f_bsize`, :attr:`f_frsize`, :attr:`f_blocks`, :attr:`f_bfree`,
    +   :attr:`f_bavail`, :attr:`f_files`, :attr:`f_ffree`, :attr:`f_favail`,
    +   :attr:`f_flag`, :attr:`f_namemax`. Availability: Unix.
    +
    +   .. index:: module: statvfs
    +
    +   For backward compatibility, the return value is also accessible as a tuple whose
    +   values correspond to the attributes, in the order given above. The standard
    +   module :mod:`statvfs` defines constants that are useful for extracting
    +   information from a :ctype:`statvfs` structure when accessing it as a sequence;
    +   this remains useful when writing code that needs to work with versions of Python
    +   that don't support accessing the fields as attributes.
    +
    +   .. versionchanged:: 2.2
    +      Added access to values as attributes of the returned object.
    +
    +
    +.. function:: symlink(src, dst)
    +
    +   Create a symbolic link pointing to *src* named *dst*. Availability: Unix.
    +
    +
    +.. function:: tempnam([dir[, prefix]])
    +
    +   Return a unique path name that is reasonable for creating a temporary file.
    +   This will be an absolute path that names a potential directory entry in the
    +   directory *dir* or a common location for temporary files if *dir* is omitted or
    +   ``None``.  If given and not ``None``, *prefix* is used to provide a short prefix
    +   to the filename.  Applications are responsible for properly creating and
    +   managing files created using paths returned by :func:`tempnam`; no automatic
    +   cleanup is provided. On Unix, the environment variable :envvar:`TMPDIR`
    +   overrides *dir*, while on Windows the :envvar:`TMP` is used.  The specific
    +   behavior of this function depends on the C library implementation; some aspects
    +   are underspecified in system documentation.
    +
    +   .. warning::
    +
    +      Use of :func:`tempnam` is vulnerable to symlink attacks; consider using
    +      :func:`tmpfile` (section :ref:`os-newstreams`) instead.
    +
    +   Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: tmpnam()
    +
    +   Return a unique path name that is reasonable for creating a temporary file.
    +   This will be an absolute path that names a potential directory entry in a common
    +   location for temporary files.  Applications are responsible for properly
    +   creating and managing files created using paths returned by :func:`tmpnam`; no
    +   automatic cleanup is provided.
    +
    +   .. warning::
    +
    +      Use of :func:`tmpnam` is vulnerable to symlink attacks; consider using
    +      :func:`tmpfile` (section :ref:`os-newstreams`) instead.
    +
    +   Availability: Unix, Windows.  This function probably shouldn't be used on
    +   Windows, though: Microsoft's implementation of :func:`tmpnam` always creates a
    +   name in the root directory of the current drive, and that's generally a poor
    +   location for a temp file (depending on privileges, you may not even be able to
    +   open a file using this name).
    +
    +
    +.. data:: TMP_MAX
    +
    +   The maximum number of unique names that :func:`tmpnam` will generate before
    +   reusing names.
    +
    +
    +.. function:: unlink(path)
    +
    +   Remove the file *path*.  This is the same function as :func:`remove`; the
    +   :func:`unlink` name is its traditional Unix name. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: utime(path, times)
    +
    +   Set the access and modified times of the file specified by *path*. If *times* is
    +   ``None``, then the file's access and modified times are set to the current time.
    +   Otherwise, *times* must be a 2-tuple of numbers, of the form ``(atime, mtime)``
    +   which is used to set the access and modified times, respectively. Whether a
    +   directory can be given for *path* depends on whether the operating system
    +   implements directories as files (for example, Windows does not).  Note that the
    +   exact times you set here may not be returned by a subsequent :func:`stat` call,
    +   depending on the resolution with which your operating system records access and
    +   modification times; see :func:`stat`.
    +
    +   .. versionchanged:: 2.0
    +      Added support for ``None`` for *times*.
    +
    +   Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: walk(top[, topdown=True [, onerror=None[, followlinks=False]]])
    +
    +   .. index::
    +      single: directory; walking
    +      single: directory; traversal
    +
    +   :func:`walk` generates the file names in a directory tree, by walking the tree
    +   either top down or bottom up. For each directory in the tree rooted at directory
    +   *top* (including *top* itself), it yields a 3-tuple ``(dirpath, dirnames,
    +   filenames)``.
    +
    +   *dirpath* is a string, the path to the directory.  *dirnames* is a list of the
    +   names of the subdirectories in *dirpath* (excluding ``'.'`` and ``'..'``).
    +   *filenames* is a list of the names of the non-directory files in *dirpath*.
    +   Note that the names in the lists contain no path components.  To get a full path
    +   (which begins with *top*) to a file or directory in *dirpath*, do
    +   ``os.path.join(dirpath, name)``.
    +
    +   If optional argument *topdown* is true or not specified, the triple for a
    +   directory is generated before the triples for any of its subdirectories
    +   (directories are generated top down).  If *topdown* is false, the triple for a
    +   directory is generated after the triples for all of its subdirectories
    +   (directories are generated bottom up).
    +
    +   When *topdown* is true, the caller can modify the *dirnames* list in-place
    +   (perhaps using :keyword:`del` or slice assignment), and :func:`walk` will only
    +   recurse into the subdirectories whose names remain in *dirnames*; this can be
    +   used to prune the search, impose a specific order of visiting, or even to inform
    +   :func:`walk` about directories the caller creates or renames before it resumes
    +   :func:`walk` again.  Modifying *dirnames* when *topdown* is false is
    +   ineffective, because in bottom-up mode the directories in *dirnames* are
    +   generated before *dirpath* itself is generated.
    +
    +   By default errors from the ``os.listdir()`` call are ignored.  If optional
    +   argument *onerror* is specified, it should be a function; it will be called with
    +   one argument, an :exc:`OSError` instance.  It can report the error to continue
    +   with the walk, or raise the exception to abort the walk.  Note that the filename
    +   is available as the ``filename`` attribute of the exception object.
    +
    +   By default, :func:`walk` will not walk down into symbolic links that resolve to
    +   directories. Set *followlinks* to True to visit directories pointed to by
    +   symlinks, on systems that support them.
    +
    +   .. versionadded:: 2.6
    +      The *followlinks* parameter.
    +
    +   .. note::
    +
    +      Be aware that setting *followlinks* to true can lead to infinite recursion if a
    +      link points to a parent directory of itself. :func:`walk` does not keep track of
    +      the directories it visited already.
    +
    +   .. note::
    +
    +      If you pass a relative pathname, don't change the current working directory
    +      between resumptions of :func:`walk`.  :func:`walk` never changes the current
    +      directory, and assumes that its caller doesn't either.
    +
    +   This example displays the number of bytes taken by non-directory files in each
    +   directory under the starting directory, except that it doesn't look under any
    +   CVS subdirectory::
    +
    +      import os
    +      from os.path import join, getsize
    +      for root, dirs, files in os.walk('python/Lib/email'):
    +          print root, "consumes",
    +          print sum(getsize(join(root, name)) for name in files),
    +          print "bytes in", len(files), "non-directory files"
    +          if 'CVS' in dirs:
    +              dirs.remove('CVS')  # don't visit CVS directories
    +
    +   In the next example, walking the tree bottom up is essential: :func:`rmdir`
    +   doesn't allow deleting a directory before the directory is empty::
    +
    +      # Delete everything reachable from the directory named in 'top',
    +      # assuming there are no symbolic links.
    +      # CAUTION:  This is dangerous!  For example, if top == '/', it
    +      # could delete all your disk files.
    +      import os
    +      for root, dirs, files in os.walk(top, topdown=False):
    +          for name in files:
    +              os.remove(os.path.join(root, name))
    +          for name in dirs:
    +              os.rmdir(os.path.join(root, name))
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. _os-process:
    +
    +Process Management
    +------------------
    +
    +These functions may be used to create and manage processes.
    +
    +The various :func:`exec\*` functions take a list of arguments for the new
    +program loaded into the process.  In each case, the first of these arguments is
    +passed to the new program as its own name rather than as an argument a user may
    +have typed on a command line.  For the C programmer, this is the ``argv[0]``
    +passed to a program's :cfunc:`main`.  For example, ``os.execv('/bin/echo',
    +['foo', 'bar'])`` will only print ``bar`` on standard output; ``foo`` will seem
    +to be ignored.
    +
    +
    +.. function:: abort()
    +
    +   Generate a :const:`SIGABRT` signal to the current process.  On Unix, the default
    +   behavior is to produce a core dump; on Windows, the process immediately returns
    +   an exit code of ``3``.  Be aware that programs which use :func:`signal.signal`
    +   to register a handler for :const:`SIGABRT` will behave differently.
    +   Availability: Macintosh, Unix, Windows.
    +
    +
    +.. function:: execl(path, arg0, arg1, ...)
    +              execle(path, arg0, arg1, ..., env)
    +              execlp(file, arg0, arg1, ...)
    +              execlpe(file, arg0, arg1, ..., env)
    +              execv(path, args)
    +              execve(path, args, env)
    +              execvp(file, args)
    +              execvpe(file, args, env)
    +
    +   These functions all execute a new program, replacing the current process; they
    +   do not return.  On Unix, the new executable is loaded into the current process,
    +   and will have the same process ID as the caller.  Errors will be reported as
    +   :exc:`OSError` exceptions.
    +
    +   The ``'l'`` and ``'v'`` variants of the :func:`exec\*` functions differ in how
    +   command-line arguments are passed.  The ``'l'`` variants are perhaps the easiest
    +   to work with if the number of parameters is fixed when the code is written; the
    +   individual parameters simply become additional parameters to the :func:`execl\*`
    +   functions.  The ``'v'`` variants are good when the number of parameters is
    +   variable, with the arguments being passed in a list or tuple as the *args*
    +   parameter.  In either case, the arguments to the child process should start with
    +   the name of the command being run, but this is not enforced.
    +
    +   The variants which include a ``'p'`` near the end (:func:`execlp`,
    +   :func:`execlpe`, :func:`execvp`, and :func:`execvpe`) will use the
    +   :envvar:`PATH` environment variable to locate the program *file*.  When the
    +   environment is being replaced (using one of the :func:`exec\*e` variants,
    +   discussed in the next paragraph), the new environment is used as the source of
    +   the :envvar:`PATH` variable. The other variants, :func:`execl`, :func:`execle`,
    +   :func:`execv`, and :func:`execve`, will not use the :envvar:`PATH` variable to
    +   locate the executable; *path* must contain an appropriate absolute or relative
    +   path.
    +
    +   For :func:`execle`, :func:`execlpe`, :func:`execve`, and :func:`execvpe` (note
    +   that these all end in ``'e'``), the *env* parameter must be a mapping which is
    +   used to define the environment variables for the new process; the :func:`execl`,
    +   :func:`execlp`, :func:`execv`, and :func:`execvp` all cause the new process to
    +   inherit the environment of the current process. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: _exit(n)
    +
    +   Exit to the system with status *n*, without calling cleanup handlers, flushing
    +   stdio buffers, etc. Availability: Macintosh, Unix, Windows.
    +
    +   .. note::
    +
    +      The standard way to exit is ``sys.exit(n)``. :func:`_exit` should normally only
    +      be used in the child process after a :func:`fork`.
    +
    +The following exit codes are a defined, and can be used with :func:`_exit`,
    +although they are not required.  These are typically used for system programs
    +written in Python, such as a mail server's external command delivery program.
    +
    +.. note::
    +
    +   Some of these may not be available on all Unix platforms, since there is some
    +   variation.  These constants are defined where they are defined by the underlying
    +   platform.
    +
    +
    +.. data:: EX_OK
    +
    +   Exit code that means no error occurred. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_USAGE
    +
    +   Exit code that means the command was used incorrectly, such as when the wrong
    +   number of arguments are given. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_DATAERR
    +
    +   Exit code that means the input data was incorrect. Availability: Macintosh,
    +   Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_NOINPUT
    +
    +   Exit code that means an input file did not exist or was not readable.
    +   Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_NOUSER
    +
    +   Exit code that means a specified user did not exist. Availability: Macintosh,
    +   Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_NOHOST
    +
    +   Exit code that means a specified host did not exist. Availability: Macintosh,
    +   Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_UNAVAILABLE
    +
    +   Exit code that means that a required service is unavailable. Availability:
    +   Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_SOFTWARE
    +
    +   Exit code that means an internal software error was detected. Availability:
    +   Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_OSERR
    +
    +   Exit code that means an operating system error was detected, such as the
    +   inability to fork or create a pipe. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_OSFILE
    +
    +   Exit code that means some system file did not exist, could not be opened, or had
    +   some other kind of error. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_CANTCREAT
    +
    +   Exit code that means a user specified output file could not be created.
    +   Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_IOERR
    +
    +   Exit code that means that an error occurred while doing I/O on some file.
    +   Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_TEMPFAIL
    +
    +   Exit code that means a temporary failure occurred.  This indicates something
    +   that may not really be an error, such as a network connection that couldn't be
    +   made during a retryable operation. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_PROTOCOL
    +
    +   Exit code that means that a protocol exchange was illegal, invalid, or not
    +   understood. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_NOPERM
    +
    +   Exit code that means that there were insufficient permissions to perform the
    +   operation (but not intended for file system problems). Availability: Macintosh,
    +   Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_CONFIG
    +
    +   Exit code that means that some kind of configuration error occurred.
    +   Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: EX_NOTFOUND
    +
    +   Exit code that means something like "an entry was not found". Availability:
    +   Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: fork()
    +
    +   Fork a child process.  Return ``0`` in the child, the child's process id in the
    +   parent. Availability: Macintosh, Unix.
    +
    +
    +.. function:: forkpty()
    +
    +   Fork a child process, using a new pseudo-terminal as the child's controlling
    +   terminal. Return a pair of ``(pid, fd)``, where *pid* is ``0`` in the child, the
    +   new child's process id in the parent, and *fd* is the file descriptor of the
    +   master end of the pseudo-terminal.  For a more portable approach, use the
    +   :mod:`pty` module. Availability: Macintosh, Some flavors of Unix.
    +
    +
    +.. function:: kill(pid, sig)
    +
    +   .. index::
    +      single: process; killing
    +      single: process; signalling
    +
    +   Send signal *sig* to the process *pid*.  Constants for the specific signals
    +   available on the host platform are defined in the :mod:`signal` module.
    +   Availability: Macintosh, Unix.
    +
    +
    +.. function:: killpg(pgid, sig)
    +
    +   .. index::
    +      single: process; killing
    +      single: process; signalling
    +
    +   Send the signal *sig* to the process group *pgid*. Availability: Macintosh,
    +   Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: nice(increment)
    +
    +   Add *increment* to the process's "niceness".  Return the new niceness.
    +   Availability: Macintosh, Unix.
    +
    +
    +.. function:: plock(op)
    +
    +   Lock program segments into memory.  The value of *op* (defined in
    +   ````) determines which segments are locked. Availability: Macintosh,
    +   Unix.
    +
    +
    +.. function:: popen(...)
    +   :noindex:
    +
    +   Run child processes, returning opened pipes for communications.  These functions
    +   are described in section :ref:`os-newstreams`.
    +
    +
    +.. function:: spawnl(mode, path, ...)
    +              spawnle(mode, path, ..., env)
    +              spawnlp(mode, file, ...)
    +              spawnlpe(mode, file, ..., env)
    +              spawnv(mode, path, args)
    +              spawnve(mode, path, args, env)
    +              spawnvp(mode, file, args)
    +              spawnvpe(mode, file, args, env)
    +
    +   Execute the program *path* in a new process.
    +
    +   (Note that the :mod:`subprocess` module provides more powerful facilities for
    +   spawning new processes and retrieving their results; using that module is
    +   preferable to using these functions.)
    +
    +   If *mode* is :const:`P_NOWAIT`, this function returns the process ID of the new
    +   process; if *mode* is :const:`P_WAIT`, returns the process's exit code if it
    +   exits normally, or ``-signal``, where *signal* is the signal that killed the
    +   process.  On Windows, the process ID will actually be the process handle, so can
    +   be used with the :func:`waitpid` function.
    +
    +   The ``'l'`` and ``'v'`` variants of the :func:`spawn\*` functions differ in how
    +   command-line arguments are passed.  The ``'l'`` variants are perhaps the easiest
    +   to work with if the number of parameters is fixed when the code is written; the
    +   individual parameters simply become additional parameters to the
    +   :func:`spawnl\*` functions.  The ``'v'`` variants are good when the number of
    +   parameters is variable, with the arguments being passed in a list or tuple as
    +   the *args* parameter.  In either case, the arguments to the child process must
    +   start with the name of the command being run.
    +
    +   The variants which include a second ``'p'`` near the end (:func:`spawnlp`,
    +   :func:`spawnlpe`, :func:`spawnvp`, and :func:`spawnvpe`) will use the
    +   :envvar:`PATH` environment variable to locate the program *file*.  When the
    +   environment is being replaced (using one of the :func:`spawn\*e` variants,
    +   discussed in the next paragraph), the new environment is used as the source of
    +   the :envvar:`PATH` variable.  The other variants, :func:`spawnl`,
    +   :func:`spawnle`, :func:`spawnv`, and :func:`spawnve`, will not use the
    +   :envvar:`PATH` variable to locate the executable; *path* must contain an
    +   appropriate absolute or relative path.
    +
    +   For :func:`spawnle`, :func:`spawnlpe`, :func:`spawnve`, and :func:`spawnvpe`
    +   (note that these all end in ``'e'``), the *env* parameter must be a mapping
    +   which is used to define the environment variables for the new process; the
    +   :func:`spawnl`, :func:`spawnlp`, :func:`spawnv`, and :func:`spawnvp` all cause
    +   the new process to inherit the environment of the current process.
    +
    +   As an example, the following calls to :func:`spawnlp` and :func:`spawnvpe` are
    +   equivalent::
    +
    +      import os
    +      os.spawnlp(os.P_WAIT, 'cp', 'cp', 'index.html', '/dev/null')
    +
    +      L = ['cp', 'index.html', '/dev/null']
    +      os.spawnvpe(os.P_WAIT, 'cp', L, os.environ)
    +
    +   Availability: Unix, Windows.  :func:`spawnlp`, :func:`spawnlpe`, :func:`spawnvp`
    +   and :func:`spawnvpe` are not available on Windows.
    +
    +   .. versionadded:: 1.6
    +
    +
    +.. data:: P_NOWAIT
    +          P_NOWAITO
    +
    +   Possible values for the *mode* parameter to the :func:`spawn\*` family of
    +   functions.  If either of these values is given, the :func:`spawn\*` functions
    +   will return as soon as the new process has been created, with the process ID as
    +   the return value. Availability: Macintosh, Unix, Windows.
    +
    +   .. versionadded:: 1.6
    +
    +
    +.. data:: P_WAIT
    +
    +   Possible value for the *mode* parameter to the :func:`spawn\*` family of
    +   functions.  If this is given as *mode*, the :func:`spawn\*` functions will not
    +   return until the new process has run to completion and will return the exit code
    +   of the process the run is successful, or ``-signal`` if a signal kills the
    +   process. Availability: Macintosh, Unix, Windows.
    +
    +   .. versionadded:: 1.6
    +
    +
    +.. data:: P_DETACH
    +          P_OVERLAY
    +
    +   Possible values for the *mode* parameter to the :func:`spawn\*` family of
    +   functions.  These are less portable than those listed above. :const:`P_DETACH`
    +   is similar to :const:`P_NOWAIT`, but the new process is detached from the
    +   console of the calling process. If :const:`P_OVERLAY` is used, the current
    +   process will be replaced; the :func:`spawn\*` function will not return.
    +   Availability: Windows.
    +
    +   .. versionadded:: 1.6
    +
    +
    +.. function:: startfile(path[, operation])
    +
    +   Start a file with its associated application.
    +
    +   When *operation* is not specified or ``'open'``, this acts like double-clicking
    +   the file in Windows Explorer, or giving the file name as an argument to the
    +   :program:`start` command from the interactive command shell: the file is opened
    +   with whatever application (if any) its extension is associated.
    +
    +   When another *operation* is given, it must be a "command verb" that specifies
    +   what should be done with the file. Common verbs documented by Microsoft are
    +   ``'print'`` and  ``'edit'`` (to be used on files) as well as ``'explore'`` and
    +   ``'find'`` (to be used on directories).
    +
    +   :func:`startfile` returns as soon as the associated application is launched.
    +   There is no option to wait for the application to close, and no way to retrieve
    +   the application's exit status.  The *path* parameter is relative to the current
    +   directory.  If you want to use an absolute path, make sure the first character
    +   is not a slash (``'/'``); the underlying Win32 :cfunc:`ShellExecute` function
    +   doesn't work if it is.  Use the :func:`os.path.normpath` function to ensure that
    +   the path is properly encoded for Win32. Availability: Windows.
    +
    +   .. versionadded:: 2.0
    +
    +   .. versionadded:: 2.5
    +      The *operation* parameter.
    +
    +
    +.. function:: system(command)
    +
    +   Execute the command (a string) in a subshell.  This is implemented by calling
    +   the Standard C function :cfunc:`system`, and has the same limitations.  Changes
    +   to ``posix.environ``, ``sys.stdin``, etc. are not reflected in the environment
    +   of the executed command.
    +
    +   On Unix, the return value is the exit status of the process encoded in the
    +   format specified for :func:`wait`.  Note that POSIX does not specify the meaning
    +   of the return value of the C :cfunc:`system` function, so the return value of
    +   the Python function is system-dependent.
    +
    +   On Windows, the return value is that returned by the system shell after running
    +   *command*, given by the Windows environment variable :envvar:`COMSPEC`: on
    +   :program:`command.com` systems (Windows 95, 98 and ME) this is always ``0``; on
    +   :program:`cmd.exe` systems (Windows NT, 2000 and XP) this is the exit status of
    +   the command run; on systems using a non-native shell, consult your shell
    +   documentation.
    +
    +   Availability: Macintosh, Unix, Windows.
    +
    +   The :mod:`subprocess` module provides more powerful facilities for spawning new
    +   processes and retrieving their results; using that module is preferable to using
    +   this function.
    +
    +
    +.. function:: times()
    +
    +   Return a 5-tuple of floating point numbers indicating accumulated (processor or
    +   other) times, in seconds.  The items are: user time, system time, children's
    +   user time, children's system time, and elapsed real time since a fixed point in
    +   the past, in that order.  See the Unix manual page :manpage:`times(2)` or the
    +   corresponding Windows Platform API documentation. Availability: Macintosh, Unix,
    +   Windows.
    +
    +
    +.. function:: wait()
    +
    +   Wait for completion of a child process, and return a tuple containing its pid
    +   and exit status indication: a 16-bit number, whose low byte is the signal number
    +   that killed the process, and whose high byte is the exit status (if the signal
    +   number is zero); the high bit of the low byte is set if a core file was
    +   produced. Availability: Macintosh, Unix.
    +
    +
    +.. function:: waitpid(pid, options)
    +
    +   The details of this function differ on Unix and Windows.
    +
    +   On Unix: Wait for completion of a child process given by process id *pid*, and
    +   return a tuple containing its process id and exit status indication (encoded as
    +   for :func:`wait`).  The semantics of the call are affected by the value of the
    +   integer *options*, which should be ``0`` for normal operation.
    +
    +   If *pid* is greater than ``0``, :func:`waitpid` requests status information for
    +   that specific process.  If *pid* is ``0``, the request is for the status of any
    +   child in the process group of the current process.  If *pid* is ``-1``, the
    +   request pertains to any child of the current process.  If *pid* is less than
    +   ``-1``, status is requested for any process in the process group ``-pid`` (the
    +   absolute value of *pid*).
    +
    +   On Windows: Wait for completion of a process given by process handle *pid*, and
    +   return a tuple containing *pid*, and its exit status shifted left by 8 bits
    +   (shifting makes cross-platform use of the function easier). A *pid* less than or
    +   equal to ``0`` has no special meaning on Windows, and raises an exception. The
    +   value of integer *options* has no effect. *pid* can refer to any process whose
    +   id is known, not necessarily a child process. The :func:`spawn` functions called
    +   with :const:`P_NOWAIT` return suitable process handles.
    +
    +
    +.. function:: wait3([options])
    +
    +   Similar to :func:`waitpid`, except no process id argument is given and a
    +   3-element tuple containing the child's process id, exit status indication, and
    +   resource usage information is returned.  Refer to :mod:`resource`.\
    +   :func:`getrusage` for details on resource usage information.  The option
    +   argument is the same as that provided to :func:`waitpid` and :func:`wait4`.
    +   Availability: Unix.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. function:: wait4(pid, options)
    +
    +   Similar to :func:`waitpid`, except a 3-element tuple, containing the child's
    +   process id, exit status indication, and resource usage information is returned.
    +   Refer to :mod:`resource`.\ :func:`getrusage` for details on resource usage
    +   information.  The arguments to :func:`wait4` are the same as those provided to
    +   :func:`waitpid`. Availability: Unix.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. data:: WNOHANG
    +
    +   The option for :func:`waitpid` to return immediately if no child process status
    +   is available immediately. The function returns ``(0, 0)`` in this case.
    +   Availability: Macintosh, Unix.
    +
    +
    +.. data:: WCONTINUED
    +
    +   This option causes child processes to be reported if they have been continued
    +   from a job control stop since their status was last reported. Availability: Some
    +   Unix systems.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. data:: WUNTRACED
    +
    +   This option causes child processes to be reported if they have been stopped but
    +   their current state has not been reported since they were stopped. Availability:
    +   Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +The following functions take a process status code as returned by
    +:func:`system`, :func:`wait`, or :func:`waitpid` as a parameter.  They may be
    +used to determine the disposition of a process.
    +
    +
    +.. function:: WCOREDUMP(status)
    +
    +   Returns ``True`` if a core dump was generated for the process, otherwise it
    +   returns ``False``. Availability: Macintosh, Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: WIFCONTINUED(status)
    +
    +   Returns ``True`` if the process has been continued from a job control stop,
    +   otherwise it returns ``False``. Availability: Unix.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: WIFSTOPPED(status)
    +
    +   Returns ``True`` if the process has been stopped, otherwise it returns
    +   ``False``. Availability: Unix.
    +
    +
    +.. function:: WIFSIGNALED(status)
    +
    +   Returns ``True`` if the process exited due to a signal, otherwise it returns
    +   ``False``. Availability: Macintosh, Unix.
    +
    +
    +.. function:: WIFEXITED(status)
    +
    +   Returns ``True`` if the process exited using the :manpage:`exit(2)` system call,
    +   otherwise it returns ``False``. Availability: Macintosh, Unix.
    +
    +
    +.. function:: WEXITSTATUS(status)
    +
    +   If ``WIFEXITED(status)`` is true, return the integer parameter to the
    +   :manpage:`exit(2)` system call.  Otherwise, the return value is meaningless.
    +   Availability: Macintosh, Unix.
    +
    +
    +.. function:: WSTOPSIG(status)
    +
    +   Return the signal which caused the process to stop. Availability: Macintosh,
    +   Unix.
    +
    +
    +.. function:: WTERMSIG(status)
    +
    +   Return the signal which caused the process to exit. Availability: Macintosh,
    +   Unix.
    +
    +
    +.. _os-path:
    +
    +Miscellaneous System Information
    +--------------------------------
    +
    +
    +.. function:: confstr(name)
    +
    +   Return string-valued system configuration values. *name* specifies the
    +   configuration value to retrieve; it may be a string which is the name of a
    +   defined system value; these names are specified in a number of standards (POSIX,
    +   Unix 95, Unix 98, and others).  Some platforms define additional names as well.
    +   The names known to the host operating system are given as the keys of the
    +   ``confstr_names`` dictionary.  For configuration variables not included in that
    +   mapping, passing an integer for *name* is also accepted. Availability:
    +   Macintosh, Unix.
    +
    +   If the configuration value specified by *name* isn't defined, ``None`` is
    +   returned.
    +
    +   If *name* is a string and is not known, :exc:`ValueError` is raised.  If a
    +   specific value for *name* is not supported by the host system, even if it is
    +   included in ``confstr_names``, an :exc:`OSError` is raised with
    +   :const:`errno.EINVAL` for the error number.
    +
    +
    +.. data:: confstr_names
    +
    +   Dictionary mapping names accepted by :func:`confstr` to the integer values
    +   defined for those names by the host operating system. This can be used to
    +   determine the set of names known to the system. Availability: Macintosh, Unix.
    +
    +
    +.. function:: getloadavg()
    +
    +   Return the number of processes in the system run queue averaged over the last 1,
    +   5, and 15 minutes or raises :exc:`OSError` if the load  average was
    +   unobtainable.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. function:: sysconf(name)
    +
    +   Return integer-valued system configuration values. If the configuration value
    +   specified by *name* isn't defined, ``-1`` is returned.  The comments regarding
    +   the *name* parameter for :func:`confstr` apply here as well; the dictionary that
    +   provides information on the known names is given by ``sysconf_names``.
    +   Availability: Macintosh, Unix.
    +
    +
    +.. data:: sysconf_names
    +
    +   Dictionary mapping names accepted by :func:`sysconf` to the integer values
    +   defined for those names by the host operating system. This can be used to
    +   determine the set of names known to the system. Availability: Macintosh, Unix.
    +
    +The follow data values are used to support path manipulation operations.  These
    +are defined for all platforms.
    +
    +Higher-level operations on pathnames are defined in the :mod:`os.path` module.
    +
    +
    +.. data:: curdir
    +
    +   The constant string used by the operating system to refer to the current
    +   directory. For example: ``'.'`` for POSIX or ``':'`` for Mac OS 9. Also
    +   available via :mod:`os.path`.
    +
    +
    +.. data:: pardir
    +
    +   The constant string used by the operating system to refer to the parent
    +   directory. For example: ``'..'`` for POSIX or ``'::'`` for Mac OS 9. Also
    +   available via :mod:`os.path`.
    +
    +
    +.. data:: sep
    +
    +   The character used by the operating system to separate pathname components, for
    +   example, ``'/'`` for POSIX or ``':'`` for Mac OS 9.  Note that knowing this is
    +   not sufficient to be able to parse or concatenate pathnames --- use
    +   :func:`os.path.split` and :func:`os.path.join` --- but it is occasionally
    +   useful. Also available via :mod:`os.path`.
    +
    +
    +.. data:: altsep
    +
    +   An alternative character used by the operating system to separate pathname
    +   components, or ``None`` if only one separator character exists.  This is set to
    +   ``'/'`` on Windows systems where ``sep`` is a backslash. Also available via
    +   :mod:`os.path`.
    +
    +
    +.. data:: extsep
    +
    +   The character which separates the base filename from the extension; for example,
    +   the ``'.'`` in :file:`os.py`. Also available via :mod:`os.path`.
    +
    +   .. versionadded:: 2.2
    +
    +
    +.. data:: pathsep
    +
    +   The character conventionally used by the operating system to separate search
    +   path components (as in :envvar:`PATH`), such as ``':'`` for POSIX or ``';'`` for
    +   Windows. Also available via :mod:`os.path`.
    +
    +
    +.. data:: defpath
    +
    +   The default search path used by :func:`exec\*p\*` and :func:`spawn\*p\*` if the
    +   environment doesn't have a ``'PATH'`` key. Also available via :mod:`os.path`.
    +
    +
    +.. data:: linesep
    +
    +   The string used to separate (or, rather, terminate) lines on the current
    +   platform.  This may be a single character, such as  ``'\n'`` for POSIX or
    +   ``'\r'`` for Mac OS, or multiple  characters, for example, ``'\r\n'`` for
    +   Windows. Do not use *os.linesep* as a line terminator when writing files  opened
    +   in text mode (the default); use a single ``'\n'`` instead,  on all platforms.
    +
    +
    +.. data:: devnull
    +
    +   The file path of the null device. For example: ``'/dev/null'`` for POSIX or
    +   ``'Dev:Nul'`` for Mac OS 9. Also available via :mod:`os.path`.
    +
    +   .. versionadded:: 2.4
    +
    +
    +.. _os-miscfunc:
    +
    +Miscellaneous Functions
    +-----------------------
    +
    +
    +.. function:: urandom(n)
    +
    +   Return a string of *n* random bytes suitable for cryptographic use.
    +
    +   This function returns random bytes from an OS-specific randomness source.  The
    +   returned data should be unpredictable enough for cryptographic applications,
    +   though its exact quality depends on the OS implementation.  On a UNIX-like
    +   system this will query /dev/urandom, and on Windows it will use CryptGenRandom.
    +   If a randomness source is not found, :exc:`NotImplementedError` will be raised.
    +
    +   .. versionadded:: 2.4
    +
    diff --git a/Doc/library/ossaudiodev.rst b/Doc/library/ossaudiodev.rst
    new file mode 100644
    index 0000000..066b26b
    --- /dev/null
    +++ b/Doc/library/ossaudiodev.rst
    @@ -0,0 +1,429 @@
    +
    +:mod:`ossaudiodev` --- Access to OSS-compatible audio devices
    +=============================================================
    +
    +.. module:: ossaudiodev
    +   :platform: Linux, FreeBSD
    +   :synopsis: Access to OSS-compatible audio devices.
    +
    +
    +.. versionadded:: 2.3
    +
    +This module allows you to access the OSS (Open Sound System) audio interface.
    +OSS is available for a wide range of open-source and commercial Unices, and is
    +the standard audio interface for Linux and recent versions of FreeBSD.
    +
    +.. % Things will get more complicated for future Linux versions, since
    +.. % ALSA is in the standard kernel as of 2.5.x.  Presumably if you
    +.. % use ALSA, you'll have to make sure its OSS compatibility layer
    +.. % is active to use ossaudiodev, but you're gonna need it for the vast
    +.. % majority of Linux audio apps anyways.
    +.. % 
    +.. % Sounds like things are also complicated for other BSDs.  In response
    +.. % to my python-dev query, Thomas Wouters said:
    +.. % 
    +.. % > Likewise, googling shows OpenBSD also uses OSS/Free -- the commercial
    +.. % > OSS installation manual tells you to remove references to OSS/Free from the
    +.. % > kernel :)
    +.. % 
    +.. % but Aleksander Piotrowsk actually has an OpenBSD box, and he quotes
    +.. % from its :
    +.. % >  * WARNING!  WARNING!
    +.. % >  * This is an OSS (Linux) audio emulator.
    +.. % >  * Use the Native NetBSD API for developing new code, and this
    +.. % >  * only for compiling Linux programs.
    +.. % 
    +.. % There's also an ossaudio manpage on OpenBSD that explains things
    +.. % further.  Presumably NetBSD and OpenBSD have a different standard
    +.. % audio interface.  That's the great thing about standards, there are so
    +.. % many to choose from ... ;-)
    +.. % 
    +.. % This probably all warrants a footnote or two, but I don't understand
    +.. % things well enough right now to write it!   --GPW
    +
    +
    +.. seealso::
    +
    +   `Open Sound System Programmer's Guide `_
    +      the official documentation for the OSS C API
    +
    +   The module defines a large number of constants supplied by the OSS device
    +   driver; see ```` on either Linux or FreeBSD for a listing .
    +
    +:mod:`ossaudiodev` defines the following variables and functions:
    +
    +
    +.. exception:: OSSAudioError
    +
    +   This exception is raised on certain errors.  The argument is a string describing
    +   what went wrong.
    +
    +   (If :mod:`ossaudiodev` receives an error from a system call such as
    +   :cfunc:`open`, :cfunc:`write`, or :cfunc:`ioctl`, it raises :exc:`IOError`.
    +   Errors detected directly by :mod:`ossaudiodev` result in :exc:`OSSAudioError`.)
    +
    +   (For backwards compatibility, the exception class is also available as
    +   ``ossaudiodev.error``.)
    +
    +
    +.. function:: open([device, ]mode)
    +
    +   Open an audio device and return an OSS audio device object.  This object
    +   supports many file-like methods, such as :meth:`read`, :meth:`write`, and
    +   :meth:`fileno` (although there are subtle differences between conventional Unix
    +   read/write semantics and those of OSS audio devices).  It also supports a number
    +   of audio-specific methods; see below for the complete list of methods.
    +
    +   *device* is the audio device filename to use.  If it is not specified, this
    +   module first looks in the environment variable :envvar:`AUDIODEV` for a device
    +   to use.  If not found, it falls back to :file:`/dev/dsp`.
    +
    +   *mode* is one of ``'r'`` for read-only (record) access, ``'w'`` for
    +   write-only (playback) access and ``'rw'`` for both. Since many sound cards
    +   only allow one process to have the recorder or player open at a time, it is a
    +   good idea to open the device only for the activity needed.  Further, some
    +   sound cards are half-duplex: they can be opened for reading or writing, but
    +   not both at once.
    +
    +   Note the unusual calling syntax: the *first* argument is optional, and the
    +   second is required.  This is a historical artifact for compatibility with the
    +   older :mod:`linuxaudiodev` module which :mod:`ossaudiodev` supersedes.
    +
    +   .. % XXX it might also be motivated
    +   .. % by my unfounded-but-still-possibly-true belief that the default
    +   .. % audio device varies unpredictably across operating systems.  -GW
    +
    +
    +.. function:: openmixer([device])
    +
    +   Open a mixer device and return an OSS mixer device object.   *device* is the
    +   mixer device filename to use.  If it is not specified, this module first looks
    +   in the environment variable :envvar:`MIXERDEV` for a device to use.  If not
    +   found, it falls back to :file:`/dev/mixer`.
    +
    +
    +.. _ossaudio-device-objects:
    +
    +Audio Device Objects
    +--------------------
    +
    +Before you can write to or read from an audio device, you must call three
    +methods in the correct order:
    +
    +#. :meth:`setfmt` to set the output format
    +
    +#. :meth:`channels` to set the number of channels
    +
    +#. :meth:`speed` to set the sample rate
    +
    +Alternately, you can use the :meth:`setparameters` method to set all three audio
    +parameters at once.  This is more convenient, but may not be as flexible in all
    +cases.
    +
    +The audio device objects returned by :func:`open` define the following methods
    +and (read-only) attributes:
    +
    +
    +.. method:: oss_audio_device.close()
    +
    +   Explicitly close the audio device.  When you are done writing to or reading from
    +   an audio device, you should explicitly close it.  A closed device cannot be used
    +   again.
    +
    +
    +.. method:: oss_audio_device.fileno()
    +
    +   Return the file descriptor associated with the device.
    +
    +
    +.. method:: oss_audio_device.read(size)
    +
    +   Read *size* bytes from the audio input and return them as a Python string.
    +   Unlike most Unix device drivers, OSS audio devices in blocking mode (the
    +   default) will block :func:`read` until the entire requested amount of data is
    +   available.
    +
    +
    +.. method:: oss_audio_device.write(data)
    +
    +   Write the Python string *data* to the audio device and return the number of
    +   bytes written.  If the audio device is in blocking mode (the default), the
    +   entire string is always written (again, this is different from usual Unix device
    +   semantics).  If the device is in non-blocking mode, some data may not be written
    +   ---see :meth:`writeall`.
    +
    +
    +.. method:: oss_audio_device.writeall(data)
    +
    +   Write the entire Python string *data* to the audio device: waits until the audio
    +   device is able to accept data, writes as much data as it will accept, and
    +   repeats until *data* has been completely written. If the device is in blocking
    +   mode (the default), this has the same effect as :meth:`write`; :meth:`writeall`
    +   is only useful in non-blocking mode.  Has no return value, since the amount of
    +   data written is always equal to the amount of data supplied.
    +
    +The following methods each map to exactly one :func:`ioctl` system call.  The
    +correspondence is obvious: for example, :meth:`setfmt` corresponds to the
    +``SNDCTL_DSP_SETFMT`` ioctl, and :meth:`sync` to ``SNDCTL_DSP_SYNC`` (this can
    +be useful when consulting the OSS documentation).  If the underlying
    +:func:`ioctl` fails, they all raise :exc:`IOError`.
    +
    +
    +.. method:: oss_audio_device.nonblock()
    +
    +   Put the device into non-blocking mode.  Once in non-blocking mode, there is no
    +   way to return it to blocking mode.
    +
    +
    +.. method:: oss_audio_device.getfmts()
    +
    +   Return a bitmask of the audio output formats supported by the soundcard.  Some
    +   of the formats supported by OSS are:
    +
    +   +-------------------------+---------------------------------------------+
    +   | Format                  | Description                                 |
    +   +=========================+=============================================+
    +   | :const:`AFMT_MU_LAW`    | a logarithmic encoding (used by Sun ``.au`` |
    +   |                         | files and :file:`/dev/audio`)               |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_A_LAW`     | a logarithmic encoding                      |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_IMA_ADPCM` | a 4:1 compressed format defined by the      |
    +   |                         | Interactive Multimedia Association          |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_U8`        | Unsigned, 8-bit audio                       |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_S16_LE`    | Signed, 16-bit audio, little-endian byte    |
    +   |                         | order (as used by Intel processors)         |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_S16_BE`    | Signed, 16-bit audio, big-endian byte order |
    +   |                         | (as used by 68k, PowerPC, Sparc)            |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_S8`        | Signed, 8 bit audio                         |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_U16_LE`    | Unsigned, 16-bit little-endian audio        |
    +   +-------------------------+---------------------------------------------+
    +   | :const:`AFMT_U16_BE`    | Unsigned, 16-bit big-endian audio           |
    +   +-------------------------+---------------------------------------------+
    +
    +   Consult the OSS documentation for a full list of audio formats, and note that
    +   most devices support only a subset of these formats.  Some older devices only
    +   support :const:`AFMT_U8`; the most common format used today is
    +   :const:`AFMT_S16_LE`.
    +
    +
    +.. method:: oss_audio_device.setfmt(format)
    +
    +   Try to set the current audio format to *format*---see :meth:`getfmts` for a
    +   list.  Returns the audio format that the device was set to, which may not be the
    +   requested format.  May also be used to return the current audio format---do this
    +   by passing an "audio format" of :const:`AFMT_QUERY`.
    +
    +
    +.. method:: oss_audio_device.channels(nchannels)
    +
    +   Set the number of output channels to *nchannels*.  A value of 1 indicates
    +   monophonic sound, 2 stereophonic.  Some devices may have more than 2 channels,
    +   and some high-end devices may not support mono. Returns the number of channels
    +   the device was set to.
    +
    +
    +.. method:: oss_audio_device.speed(samplerate)
    +
    +   Try to set the audio sampling rate to *samplerate* samples per second.  Returns
    +   the rate actually set.  Most sound devices don't support arbitrary sampling
    +   rates.  Common rates are:
    +
    +   +-------+-------------------------------------------+
    +   | Rate  | Description                               |
    +   +=======+===========================================+
    +   | 8000  | default rate for :file:`/dev/audio`       |
    +   +-------+-------------------------------------------+
    +   | 11025 | speech recording                          |
    +   +-------+-------------------------------------------+
    +   | 22050 |                                           |
    +   +-------+-------------------------------------------+
    +   | 44100 | CD quality audio (at 16 bits/sample and 2 |
    +   |       | channels)                                 |
    +   +-------+-------------------------------------------+
    +   | 96000 | DVD quality audio (at 24 bits/sample)     |
    +   +-------+-------------------------------------------+
    +
    +
    +.. method:: oss_audio_device.sync()
    +
    +   Wait until the sound device has played every byte in its buffer.  (This happens
    +   implicitly when the device is closed.)  The OSS documentation recommends closing
    +   and re-opening the device rather than using :meth:`sync`.
    +
    +
    +.. method:: oss_audio_device.reset()
    +
    +   Immediately stop playing or recording and return the device to a state where it
    +   can accept commands.  The OSS documentation recommends closing and re-opening
    +   the device after calling :meth:`reset`.
    +
    +
    +.. method:: oss_audio_device.post()
    +
    +   Tell the driver that there is likely to be a pause in the output, making it
    +   possible for the device to handle the pause more intelligently.  You might use
    +   this after playing a spot sound effect, before waiting for user input, or before
    +   doing disk I/O.
    +
    +The following convenience methods combine several ioctls, or one ioctl and some
    +simple calculations.
    +
    +
    +.. method:: oss_audio_device.setparameters(format, nchannels, samplerate [, strict=False])
    +
    +   Set the key audio sampling parameters---sample format, number of channels, and
    +   sampling rate---in one method call.  *format*,  *nchannels*, and *samplerate*
    +   should be as specified in the :meth:`setfmt`, :meth:`channels`, and
    +   :meth:`speed`  methods.  If *strict* is true, :meth:`setparameters` checks to
    +   see if each parameter was actually set to the requested value, and raises
    +   :exc:`OSSAudioError` if not.  Returns a tuple (*format*, *nchannels*,
    +   *samplerate*) indicating the parameter values that were actually set by the
    +   device driver (i.e., the same as the return values of :meth:`setfmt`,
    +   :meth:`channels`, and :meth:`speed`).
    +
    +   For example,  ::
    +
    +      (fmt, channels, rate) = dsp.setparameters(fmt, channels, rate)
    +
    +   is equivalent to  ::
    +
    +      fmt = dsp.setfmt(fmt)
    +      channels = dsp.channels(channels)
    +      rate = dsp.rate(channels)
    +
    +
    +.. method:: oss_audio_device.bufsize()
    +
    +   Returns the size of the hardware buffer, in samples.
    +
    +
    +.. method:: oss_audio_device.obufcount()
    +
    +   Returns the number of samples that are in the hardware buffer yet to be played.
    +
    +
    +.. method:: oss_audio_device.obuffree()
    +
    +   Returns the number of samples that could be queued into the hardware buffer to
    +   be played without blocking.
    +
    +Audio device objects also support several read-only attributes:
    +
    +
    +.. attribute:: oss_audio_device.closed
    +
    +   Boolean indicating whether the device has been closed.
    +
    +
    +.. attribute:: oss_audio_device.name
    +
    +   String containing the name of the device file.
    +
    +
    +.. attribute:: oss_audio_device.mode
    +
    +   The I/O mode for the file, either ``"r"``, ``"rw"``, or ``"w"``.
    +
    +
    +.. _mixer-device-objects:
    +
    +Mixer Device Objects
    +--------------------
    +
    +The mixer object provides two file-like methods:
    +
    +
    +.. method:: oss_mixer_device.close()
    +
    +   This method closes the open mixer device file.  Any further attempts to use the
    +   mixer after this file is closed will raise an :exc:`IOError`.
    +
    +
    +.. method:: oss_mixer_device.fileno()
    +
    +   Returns the file handle number of the open mixer device file.
    +
    +The remaining methods are specific to audio mixing:
    +
    +
    +.. method:: oss_mixer_device.controls()
    +
    +   This method returns a bitmask specifying the available mixer controls ("Control"
    +   being a specific mixable "channel", such as :const:`SOUND_MIXER_PCM` or
    +   :const:`SOUND_MIXER_SYNTH`).  This bitmask indicates a subset of all available
    +   mixer controls---the :const:`SOUND_MIXER_\*` constants defined at module level.
    +   To determine if, for example, the current mixer object supports a PCM mixer, use
    +   the following Python code::
    +
    +      mixer=ossaudiodev.openmixer()
    +      if mixer.controls() & (1 << ossaudiodev.SOUND_MIXER_PCM):
    +          # PCM is supported
    +          ... code ...
    +
    +   For most purposes, the :const:`SOUND_MIXER_VOLUME` (master volume) and
    +   :const:`SOUND_MIXER_PCM` controls should suffice---but code that uses the mixer
    +   should be flexible when it comes to choosing mixer controls.  On the Gravis
    +   Ultrasound, for example, :const:`SOUND_MIXER_VOLUME` does not exist.
    +
    +
    +.. method:: oss_mixer_device.stereocontrols()
    +
    +   Returns a bitmask indicating stereo mixer controls.  If a bit is set, the
    +   corresponding control is stereo; if it is unset, the control is either
    +   monophonic or not supported by the mixer (use in combination with
    +   :meth:`controls` to determine which).
    +
    +   See the code example for the :meth:`controls` function for an example of getting
    +   data from a bitmask.
    +
    +
    +.. method:: oss_mixer_device.reccontrols()
    +
    +   Returns a bitmask specifying the mixer controls that may be used to record.  See
    +   the code example for :meth:`controls` for an example of reading from a bitmask.
    +
    +
    +.. method:: oss_mixer_device.get(control)
    +
    +   Returns the volume of a given mixer control.  The returned volume is a 2-tuple
    +   ``(left_volume,right_volume)``.  Volumes are specified as numbers from 0
    +   (silent) to 100 (full volume).  If the control is monophonic, a 2-tuple is still
    +   returned, but both volumes are the same.
    +
    +   Raises :exc:`OSSAudioError` if an invalid control was is specified, or
    +   :exc:`IOError` if an unsupported control is specified.
    +
    +
    +.. method:: oss_mixer_device.set(control, (left, right))
    +
    +   Sets the volume for a given mixer control to ``(left,right)``. ``left`` and
    +   ``right`` must be ints and between 0 (silent) and 100 (full volume).  On
    +   success, the new volume is returned as a 2-tuple. Note that this may not be
    +   exactly the same as the volume specified, because of the limited resolution of
    +   some soundcard's mixers.
    +
    +   Raises :exc:`OSSAudioError` if an invalid mixer control was specified, or if the
    +   specified volumes were out-of-range.
    +
    +
    +.. method:: oss_mixer_device.get_recsrc()
    +
    +   This method returns a bitmask indicating which control(s) are currently being
    +   used as a recording source.
    +
    +
    +.. method:: oss_mixer_device.set_recsrc(bitmask)
    +
    +   Call this function to specify a recording source.  Returns a bitmask indicating
    +   the new recording source (or sources) if successful; raises :exc:`IOError` if an
    +   invalid source was specified.  To set the current recording source to the
    +   microphone input::
    +
    +      mixer.setrecsrc (1 << ossaudiodev.SOUND_MIXER_MIC)
    +
    diff --git a/Doc/library/othergui.rst b/Doc/library/othergui.rst
    new file mode 100644
    index 0000000..aadb74d
    --- /dev/null
    +++ b/Doc/library/othergui.rst
    @@ -0,0 +1,84 @@
    +.. _other-gui-packages:
    +
    +Other Graphical User Interface Packages
    +=======================================
    +
    +There are an number of extension widget sets to :mod:`Tkinter`.
    +
    +
    +.. seealso::
    +
    +   `Python megawidgets `_
    +      is a toolkit for building high-level compound widgets in Python using the
    +      :mod:`Tkinter` module.  It consists of a set of base classes and a library of
    +      flexible and extensible megawidgets built on this foundation. These megawidgets
    +      include notebooks, comboboxes, selection widgets, paned widgets, scrolled
    +      widgets, dialog windows, etc.  Also, with the Pmw.Blt interface to BLT, the
    +      busy, graph, stripchart, tabset and vector commands are be available.
    +
    +      The initial ideas for Pmw were taken from the Tk ``itcl`` extensions ``[incr
    +      Tk]`` by Michael McLennan and ``[incr Widgets]`` by Mark Ulferts. Several of the
    +      megawidgets are direct translations from the itcl to Python. It offers most of
    +      the range of widgets that ``[incr Widgets]`` does, and is almost as complete as
    +      Tix, lacking however Tix's fast :class:`HList` widget for drawing trees.
    +
    +   `Tkinter3000 Widget Construction Kit (WCK) `_
    +      is a library that allows you to write new Tkinter widgets in pure Python.  The
    +      WCK framework gives you full control over widget creation, configuration, screen
    +      appearance, and event handling.  WCK widgets can be very fast and light-weight,
    +      since they can operate directly on Python data structures, without having to
    +      transfer data through the Tk/Tcl layer.
    +
    +      .. % 
    +
    +The major cross-platform (Windows, Mac OS X, Unix-like) GUI toolkits that are
    +also available for Python:
    +
    +
    +.. seealso::
    +
    +   `PyGTK `_
    +      is a set of bindings for the `GTK `_ widget set. It
    +      provides an object oriented interface that is slightly higher level than the C
    +      one. It comes with many more widgets than Tkinter provides, and
    +      has good Python-specific reference documentation. There are also `bindings
    +      `_ to  `GNOME `_.
    +      One well known PyGTK application is
    +      `PythonCAD `_. An
    +      online `tutorial `_ is
    +      available.
    +
    +   `PyQt `_
    +      PyQt is a :program:`sip`\ -wrapped binding to the Qt toolkit.  Qt is an
    +      extensive C++ GUI application development framework that is
    +      available for Unix, Windows and Mac OS X. :program:`sip` is a tool
    +      for generating bindings for C++ libraries as Python classes, and
    +      is specifically designed for Python. The *PyQt3* bindings have a
    +      book, `GUI Programming with Python: QT Edition
    +      `_ by Boudewijn
    +      Rempt. The *PyQt4* bindings also have a book, `Rapid GUI Programming
    +      with Python and Qt `_, by Mark
    +      Summerfield.
    +
    +   `wxPython `_
    +      wxPython is a cross-platform GUI toolkit for Python that is built around
    +      the popular `wxWidgets `_ (formerly wxWindows)
    +      C++ toolkit.  It provides a native look and feel for applications on
    +      Windows, Mac OS X, and Unix systems by using each platform's native
    +      widgets where ever possible, (GTK+ on Unix-like systems).  In addition to
    +      an extensive set of widgets, wxPython provides classes for online
    +      documentation and context sensitive help, printing, HTML viewing,
    +      low-level device context drawing, drag and drop, system clipboard access,
    +      an XML-based resource format and more, including an ever growing library
    +      of user-contributed modules.  wxPython has a book, `wxPython in Action
    +      `_, by Noel Rappin and
    +      Robin Dunn.
    +
    +PyGTK, PyQt, and wxPython, all have a modern look and feel and far more
    +widgets and better documentation than Tkinter. In addition,
    +there are many other GUI toolkits for Python, both cross-platform, and
    +platform-specific. See the `GUI Programming
    +`_ page in the Python Wiki for a
    +much more complete list, and also for links to documents where the
    +different GUI toolkits are compared.
    +
    diff --git a/Doc/library/parser.rst b/Doc/library/parser.rst
    new file mode 100644
    index 0000000..b767561
    --- /dev/null
    +++ b/Doc/library/parser.rst
    @@ -0,0 +1,683 @@
    +
    +:mod:`parser` --- Access Python parse trees
    +===========================================
    +
    +.. module:: parser
    +   :synopsis: Access parse trees for Python source code.
    +.. moduleauthor:: Fred L. Drake, Jr. 
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +.. % Copyright 1995 Virginia Polytechnic Institute and State University
    +.. % and Fred L. Drake, Jr.  This copyright notice must be distributed on
    +.. % all copies, but this document otherwise may be distributed as part
    +.. % of the Python distribution.  No fee may be charged for this document
    +.. % in any representation, either on paper or electronically.  This
    +.. % restriction does not affect other elements in a distributed package
    +.. % in any way.
    +
    +.. index:: single: parsing; Python source code
    +
    +The :mod:`parser` module provides an interface to Python's internal parser and
    +byte-code compiler.  The primary purpose for this interface is to allow Python
    +code to edit the parse tree of a Python expression and create executable code
    +from this.  This is better than trying to parse and modify an arbitrary Python
    +code fragment as a string because parsing is performed in a manner identical to
    +the code forming the application.  It is also faster.
    +
    +There are a few things to note about this module which are important to making
    +use of the data structures created.  This is not a tutorial on editing the parse
    +trees for Python code, but some examples of using the :mod:`parser` module are
    +presented.
    +
    +Most importantly, a good understanding of the Python grammar processed by the
    +internal parser is required.  For full information on the language syntax, refer
    +to :ref:`reference-index`.  The parser
    +itself is created from a grammar specification defined in the file
    +:file:`Grammar/Grammar` in the standard Python distribution.  The parse trees
    +stored in the AST objects created by this module are the actual output from the
    +internal parser when created by the :func:`expr` or :func:`suite` functions,
    +described below.  The AST objects created by :func:`sequence2ast` faithfully
    +simulate those structures.  Be aware that the values of the sequences which are
    +considered "correct" will vary from one version of Python to another as the
    +formal grammar for the language is revised.  However, transporting code from one
    +Python version to another as source text will always allow correct parse trees
    +to be created in the target version, with the only restriction being that
    +migrating to an older version of the interpreter will not support more recent
    +language constructs.  The parse trees are not typically compatible from one
    +version to another, whereas source code has always been forward-compatible.
    +
    +Each element of the sequences returned by :func:`ast2list` or :func:`ast2tuple`
    +has a simple form.  Sequences representing non-terminal elements in the grammar
    +always have a length greater than one.  The first element is an integer which
    +identifies a production in the grammar.  These integers are given symbolic names
    +in the C header file :file:`Include/graminit.h` and the Python module
    +:mod:`symbol`.  Each additional element of the sequence represents a component
    +of the production as recognized in the input string: these are always sequences
    +which have the same form as the parent.  An important aspect of this structure
    +which should be noted is that keywords used to identify the parent node type,
    +such as the keyword :keyword:`if` in an :const:`if_stmt`, are included in the
    +node tree without any special treatment.  For example, the :keyword:`if` keyword
    +is represented by the tuple ``(1, 'if')``, where ``1`` is the numeric value
    +associated with all :const:`NAME` tokens, including variable and function names
    +defined by the user.  In an alternate form returned when line number information
    +is requested, the same token might be represented as ``(1, 'if', 12)``, where
    +the ``12`` represents the line number at which the terminal symbol was found.
    +
    +Terminal elements are represented in much the same way, but without any child
    +elements and the addition of the source text which was identified.  The example
    +of the :keyword:`if` keyword above is representative.  The various types of
    +terminal symbols are defined in the C header file :file:`Include/token.h` and
    +the Python module :mod:`token`.
    +
    +The AST objects are not required to support the functionality of this module,
    +but are provided for three purposes: to allow an application to amortize the
    +cost of processing complex parse trees, to provide a parse tree representation
    +which conserves memory space when compared to the Python list or tuple
    +representation, and to ease the creation of additional modules in C which
    +manipulate parse trees.  A simple "wrapper" class may be created in Python to
    +hide the use of AST objects.
    +
    +The :mod:`parser` module defines functions for a few distinct purposes.  The
    +most important purposes are to create AST objects and to convert AST objects to
    +other representations such as parse trees and compiled code objects, but there
    +are also functions which serve to query the type of parse tree represented by an
    +AST object.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`symbol`
    +      Useful constants representing internal nodes of the parse tree.
    +
    +   Module :mod:`token`
    +      Useful constants representing leaf nodes of the parse tree and functions for
    +      testing node values.
    +
    +
    +.. _creating-asts:
    +
    +Creating AST Objects
    +--------------------
    +
    +AST objects may be created from source code or from a parse tree. When creating
    +an AST object from source, different functions are used to create the ``'eval'``
    +and ``'exec'`` forms.
    +
    +
    +.. function:: expr(source)
    +
    +   The :func:`expr` function parses the parameter *source* as if it were an input
    +   to ``compile(source, 'file.py', 'eval')``.  If the parse succeeds, an AST object
    +   is created to hold the internal parse tree representation, otherwise an
    +   appropriate exception is thrown.
    +
    +
    +.. function:: suite(source)
    +
    +   The :func:`suite` function parses the parameter *source* as if it were an input
    +   to ``compile(source, 'file.py', 'exec')``.  If the parse succeeds, an AST object
    +   is created to hold the internal parse tree representation, otherwise an
    +   appropriate exception is thrown.
    +
    +
    +.. function:: sequence2ast(sequence)
    +
    +   This function accepts a parse tree represented as a sequence and builds an
    +   internal representation if possible.  If it can validate that the tree conforms
    +   to the Python grammar and all nodes are valid node types in the host version of
    +   Python, an AST object is created from the internal representation and returned
    +   to the called.  If there is a problem creating the internal representation, or
    +   if the tree cannot be validated, a :exc:`ParserError` exception is thrown.  An
    +   AST object created this way should not be assumed to compile correctly; normal
    +   exceptions thrown by compilation may still be initiated when the AST object is
    +   passed to :func:`compileast`.  This may indicate problems not related to syntax
    +   (such as a :exc:`MemoryError` exception), but may also be due to constructs such
    +   as the result of parsing ``del f(0)``, which escapes the Python parser but is
    +   checked by the bytecode compiler.
    +
    +   Sequences representing terminal tokens may be represented as either two-element
    +   lists of the form ``(1, 'name')`` or as three-element lists of the form ``(1,
    +   'name', 56)``.  If the third element is present, it is assumed to be a valid
    +   line number.  The line number may be specified for any subset of the terminal
    +   symbols in the input tree.
    +
    +
    +.. function:: tuple2ast(sequence)
    +
    +   This is the same function as :func:`sequence2ast`.  This entry point is
    +   maintained for backward compatibility.
    +
    +
    +.. _converting-asts:
    +
    +Converting AST Objects
    +----------------------
    +
    +AST objects, regardless of the input used to create them, may be converted to
    +parse trees represented as list- or tuple- trees, or may be compiled into
    +executable code objects.  Parse trees may be extracted with or without line
    +numbering information.
    +
    +
    +.. function:: ast2list(ast[, line_info])
    +
    +   This function accepts an AST object from the caller in *ast* and returns a
    +   Python list representing the equivalent parse tree.  The resulting list
    +   representation can be used for inspection or the creation of a new parse tree in
    +   list form.  This function does not fail so long as memory is available to build
    +   the list representation.  If the parse tree will only be used for inspection,
    +   :func:`ast2tuple` should be used instead to reduce memory consumption and
    +   fragmentation.  When the list representation is required, this function is
    +   significantly faster than retrieving a tuple representation and converting that
    +   to nested lists.
    +
    +   If *line_info* is true, line number information will be included for all
    +   terminal tokens as a third element of the list representing the token.  Note
    +   that the line number provided specifies the line on which the token *ends*.
    +   This information is omitted if the flag is false or omitted.
    +
    +
    +.. function:: ast2tuple(ast[, line_info])
    +
    +   This function accepts an AST object from the caller in *ast* and returns a
    +   Python tuple representing the equivalent parse tree.  Other than returning a
    +   tuple instead of a list, this function is identical to :func:`ast2list`.
    +
    +   If *line_info* is true, line number information will be included for all
    +   terminal tokens as a third element of the list representing the token.  This
    +   information is omitted if the flag is false or omitted.
    +
    +
    +.. function:: compileast(ast[, filename=''])
    +
    +   .. index::
    +      builtin: exec
    +      builtin: eval
    +
    +   The Python byte compiler can be invoked on an AST object to produce code objects
    +   which can be used as part of a call to the built-in :func:`exec` or :func:`eval`
    +   functions. This function provides the interface to the compiler, passing the
    +   internal parse tree from *ast* to the parser, using the source file name
    +   specified by the *filename* parameter. The default value supplied for *filename*
    +   indicates that the source was an AST object.
    +
    +   Compiling an AST object may result in exceptions related to compilation; an
    +   example would be a :exc:`SyntaxError` caused by the parse tree for ``del f(0)``:
    +   this statement is considered legal within the formal grammar for Python but is
    +   not a legal language construct.  The :exc:`SyntaxError` raised for this
    +   condition is actually generated by the Python byte-compiler normally, which is
    +   why it can be raised at this point by the :mod:`parser` module.  Most causes of
    +   compilation failure can be diagnosed programmatically by inspection of the parse
    +   tree.
    +
    +
    +.. _querying-asts:
    +
    +Queries on AST Objects
    +----------------------
    +
    +Two functions are provided which allow an application to determine if an AST was
    +created as an expression or a suite.  Neither of these functions can be used to
    +determine if an AST was created from source code via :func:`expr` or
    +:func:`suite` or from a parse tree via :func:`sequence2ast`.
    +
    +
    +.. function:: isexpr(ast)
    +
    +   .. index:: builtin: compile
    +
    +   When *ast* represents an ``'eval'`` form, this function returns true, otherwise
    +   it returns false.  This is useful, since code objects normally cannot be queried
    +   for this information using existing built-in functions.  Note that the code
    +   objects created by :func:`compileast` cannot be queried like this either, and
    +   are identical to those created by the built-in :func:`compile` function.
    +
    +
    +.. function:: issuite(ast)
    +
    +   This function mirrors :func:`isexpr` in that it reports whether an AST object
    +   represents an ``'exec'`` form, commonly known as a "suite."  It is not safe to
    +   assume that this function is equivalent to ``not isexpr(ast)``, as additional
    +   syntactic fragments may be supported in the future.
    +
    +
    +.. _ast-errors:
    +
    +Exceptions and Error Handling
    +-----------------------------
    +
    +The parser module defines a single exception, but may also pass other built-in
    +exceptions from other portions of the Python runtime environment.  See each
    +function for information about the exceptions it can raise.
    +
    +
    +.. exception:: ParserError
    +
    +   Exception raised when a failure occurs within the parser module.  This is
    +   generally produced for validation failures rather than the built in
    +   :exc:`SyntaxError` thrown during normal parsing. The exception argument is
    +   either a string describing the reason of the failure or a tuple containing a
    +   sequence causing the failure from a parse tree passed to :func:`sequence2ast`
    +   and an explanatory string.  Calls to :func:`sequence2ast` need to be able to
    +   handle either type of exception, while calls to other functions in the module
    +   will only need to be aware of the simple string values.
    +
    +Note that the functions :func:`compileast`, :func:`expr`, and :func:`suite` may
    +throw exceptions which are normally thrown by the parsing and compilation
    +process.  These include the built in exceptions :exc:`MemoryError`,
    +:exc:`OverflowError`, :exc:`SyntaxError`, and :exc:`SystemError`.  In these
    +cases, these exceptions carry all the meaning normally associated with them.
    +Refer to the descriptions of each function for detailed information.
    +
    +
    +.. _ast-objects:
    +
    +AST Objects
    +-----------
    +
    +Ordered and equality comparisons are supported between AST objects. Pickling of
    +AST objects (using the :mod:`pickle` module) is also supported.
    +
    +
    +.. data:: ASTType
    +
    +   The type of the objects returned by :func:`expr`, :func:`suite` and
    +   :func:`sequence2ast`.
    +
    +AST objects have the following methods:
    +
    +
    +.. method:: AST.compile([filename])
    +
    +   Same as ``compileast(ast, filename)``.
    +
    +
    +.. method:: AST.isexpr()
    +
    +   Same as ``isexpr(ast)``.
    +
    +
    +.. method:: AST.issuite()
    +
    +   Same as ``issuite(ast)``.
    +
    +
    +.. method:: AST.tolist([line_info])
    +
    +   Same as ``ast2list(ast, line_info)``.
    +
    +
    +.. method:: AST.totuple([line_info])
    +
    +   Same as ``ast2tuple(ast, line_info)``.
    +
    +
    +.. _ast-examples:
    +
    +Examples
    +--------
    +
    +.. index:: builtin: compile
    +
    +The parser modules allows operations to be performed on the parse tree of Python
    +source code before the bytecode is generated, and provides for inspection of the
    +parse tree for information gathering purposes. Two examples are presented.  The
    +simple example demonstrates emulation of the :func:`compile` built-in function
    +and the complex example shows the use of a parse tree for information discovery.
    +
    +
    +Emulation of :func:`compile`
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +While many useful operations may take place between parsing and bytecode
    +generation, the simplest operation is to do nothing.  For this purpose, using
    +the :mod:`parser` module to produce an intermediate data structure is equivalent
    +to the code ::
    +
    +   >>> code = compile('a + 5', 'file.py', 'eval')
    +   >>> a = 5
    +   >>> eval(code)
    +   10
    +
    +The equivalent operation using the :mod:`parser` module is somewhat longer, and
    +allows the intermediate internal parse tree to be retained as an AST object::
    +
    +   >>> import parser
    +   >>> ast = parser.expr('a + 5')
    +   >>> code = ast.compile('file.py')
    +   >>> a = 5
    +   >>> eval(code)
    +   10
    +
    +An application which needs both AST and code objects can package this code into
    +readily available functions::
    +
    +   import parser
    +
    +   def load_suite(source_string):
    +       ast = parser.suite(source_string)
    +       return ast, ast.compile()
    +
    +   def load_expression(source_string):
    +       ast = parser.expr(source_string)
    +       return ast, ast.compile()
    +
    +
    +Information Discovery
    +^^^^^^^^^^^^^^^^^^^^^
    +
    +.. index::
    +   single: string; documentation
    +   single: docstrings
    +
    +Some applications benefit from direct access to the parse tree.  The remainder
    +of this section demonstrates how the parse tree provides access to module
    +documentation defined in docstrings without requiring that the code being
    +examined be loaded into a running interpreter via :keyword:`import`.  This can
    +be very useful for performing analyses of untrusted code.
    +
    +Generally, the example will demonstrate how the parse tree may be traversed to
    +distill interesting information.  Two functions and a set of classes are
    +developed which provide programmatic access to high level function and class
    +definitions provided by a module.  The classes extract information from the
    +parse tree and provide access to the information at a useful semantic level, one
    +function provides a simple low-level pattern matching capability, and the other
    +function defines a high-level interface to the classes by handling file
    +operations on behalf of the caller.  All source files mentioned here which are
    +not part of the Python installation are located in the :file:`Demo/parser/`
    +directory of the distribution.
    +
    +The dynamic nature of Python allows the programmer a great deal of flexibility,
    +but most modules need only a limited measure of this when defining classes,
    +functions, and methods.  In this example, the only definitions that will be
    +considered are those which are defined in the top level of their context, e.g.,
    +a function defined by a :keyword:`def` statement at column zero of a module, but
    +not a function defined within a branch of an :keyword:`if` ... :keyword:`else`
    +construct, though there are some good reasons for doing so in some situations.
    +Nesting of definitions will be handled by the code developed in the example.
    +
    +To construct the upper-level extraction methods, we need to know what the parse
    +tree structure looks like and how much of it we actually need to be concerned
    +about.  Python uses a moderately deep parse tree so there are a large number of
    +intermediate nodes.  It is important to read and understand the formal grammar
    +used by Python.  This is specified in the file :file:`Grammar/Grammar` in the
    +distribution. Consider the simplest case of interest when searching for
    +docstrings: a module consisting of a docstring and nothing else.  (See file
    +:file:`docstring.py`.) ::
    +
    +   """Some documentation.
    +   """
    +
    +Using the interpreter to take a look at the parse tree, we find a bewildering
    +mass of numbers and parentheses, with the documentation buried deep in nested
    +tuples. ::
    +
    +   >>> import parser
    +   >>> import pprint
    +   >>> ast = parser.suite(open('docstring.py').read())
    +   >>> tup = ast.totuple()
    +   >>> pprint.pprint(tup)
    +   (257,
    +    (264,
    +     (265,
    +      (266,
    +       (267,
    +        (307,
    +         (287,
    +          (288,
    +           (289,
    +            (290,
    +             (292,
    +              (293,
    +               (294,
    +                (295,
    +                 (296,
    +                  (297,
    +                   (298,
    +                    (299,
    +                     (300, (3, '"""Some documentation.\n"""'))))))))))))))))),
    +      (4, ''))),
    +    (4, ''),
    +    (0, ''))
    +
    +The numbers at the first element of each node in the tree are the node types;
    +they map directly to terminal and non-terminal symbols in the grammar.
    +Unfortunately, they are represented as integers in the internal representation,
    +and the Python structures generated do not change that.  However, the
    +:mod:`symbol` and :mod:`token` modules provide symbolic names for the node types
    +and dictionaries which map from the integers to the symbolic names for the node
    +types.
    +
    +In the output presented above, the outermost tuple contains four elements: the
    +integer ``257`` and three additional tuples.  Node type ``257`` has the symbolic
    +name :const:`file_input`.  Each of these inner tuples contains an integer as the
    +first element; these integers, ``264``, ``4``, and ``0``, represent the node
    +types :const:`stmt`, :const:`NEWLINE`, and :const:`ENDMARKER`, respectively.
    +Note that these values may change depending on the version of Python you are
    +using; consult :file:`symbol.py` and :file:`token.py` for details of the
    +mapping.  It should be fairly clear that the outermost node is related primarily
    +to the input source rather than the contents of the file, and may be disregarded
    +for the moment.  The :const:`stmt` node is much more interesting.  In
    +particular, all docstrings are found in subtrees which are formed exactly as
    +this node is formed, with the only difference being the string itself.  The
    +association between the docstring in a similar tree and the defined entity
    +(class, function, or module) which it describes is given by the position of the
    +docstring subtree within the tree defining the described structure.
    +
    +By replacing the actual docstring with something to signify a variable component
    +of the tree, we allow a simple pattern matching approach to check any given
    +subtree for equivalence to the general pattern for docstrings.  Since the
    +example demonstrates information extraction, we can safely require that the tree
    +be in tuple form rather than list form, allowing a simple variable
    +representation to be ``['variable_name']``.  A simple recursive function can
    +implement the pattern matching, returning a Boolean and a dictionary of variable
    +name to value mappings.  (See file :file:`example.py`.) ::
    +
    +   from types import ListType, TupleType
    +
    +   def match(pattern, data, vars=None):
    +       if vars is None:
    +           vars = {}
    +       if type(pattern) is ListType:
    +           vars[pattern[0]] = data
    +           return 1, vars
    +       if type(pattern) is not TupleType:
    +           return (pattern == data), vars
    +       if len(data) != len(pattern):
    +           return 0, vars
    +       for pattern, data in map(None, pattern, data):
    +           same, vars = match(pattern, data, vars)
    +           if not same:
    +               break
    +       return same, vars
    +
    +Using this simple representation for syntactic variables and the symbolic node
    +types, the pattern for the candidate docstring subtrees becomes fairly readable.
    +(See file :file:`example.py`.) ::
    +
    +   import symbol
    +   import token
    +
    +   DOCSTRING_STMT_PATTERN = (
    +       symbol.stmt,
    +       (symbol.simple_stmt,
    +        (symbol.small_stmt,
    +         (symbol.expr_stmt,
    +          (symbol.testlist,
    +           (symbol.test,
    +            (symbol.and_test,
    +             (symbol.not_test,
    +              (symbol.comparison,
    +               (symbol.expr,
    +                (symbol.xor_expr,
    +                 (symbol.and_expr,
    +                  (symbol.shift_expr,
    +                   (symbol.arith_expr,
    +                    (symbol.term,
    +                     (symbol.factor,
    +                      (symbol.power,
    +                       (symbol.atom,
    +                        (token.STRING, ['docstring'])
    +                        )))))))))))))))),
    +        (token.NEWLINE, '')
    +        ))
    +
    +Using the :func:`match` function with this pattern, extracting the module
    +docstring from the parse tree created previously is easy::
    +
    +   >>> found, vars = match(DOCSTRING_STMT_PATTERN, tup[1])
    +   >>> found
    +   1
    +   >>> vars
    +   {'docstring': '"""Some documentation.\n"""'}
    +
    +Once specific data can be extracted from a location where it is expected, the
    +question of where information can be expected needs to be answered.  When
    +dealing with docstrings, the answer is fairly simple: the docstring is the first
    +:const:`stmt` node in a code block (:const:`file_input` or :const:`suite` node
    +types).  A module consists of a single :const:`file_input` node, and class and
    +function definitions each contain exactly one :const:`suite` node.  Classes and
    +functions are readily identified as subtrees of code block nodes which start
    +with ``(stmt, (compound_stmt, (classdef, ...`` or ``(stmt, (compound_stmt,
    +(funcdef, ...``.  Note that these subtrees cannot be matched by :func:`match`
    +since it does not support multiple sibling nodes to match without regard to
    +number.  A more elaborate matching function could be used to overcome this
    +limitation, but this is sufficient for the example.
    +
    +Given the ability to determine whether a statement might be a docstring and
    +extract the actual string from the statement, some work needs to be performed to
    +walk the parse tree for an entire module and extract information about the names
    +defined in each context of the module and associate any docstrings with the
    +names.  The code to perform this work is not complicated, but bears some
    +explanation.
    +
    +The public interface to the classes is straightforward and should probably be
    +somewhat more flexible.  Each "major" block of the module is described by an
    +object providing several methods for inquiry and a constructor which accepts at
    +least the subtree of the complete parse tree which it represents.  The
    +:class:`ModuleInfo` constructor accepts an optional *name* parameter since it
    +cannot otherwise determine the name of the module.
    +
    +The public classes include :class:`ClassInfo`, :class:`FunctionInfo`, and
    +:class:`ModuleInfo`.  All objects provide the methods :meth:`get_name`,
    +:meth:`get_docstring`, :meth:`get_class_names`, and :meth:`get_class_info`.  The
    +:class:`ClassInfo` objects support :meth:`get_method_names` and
    +:meth:`get_method_info` while the other classes provide
    +:meth:`get_function_names` and :meth:`get_function_info`.
    +
    +Within each of the forms of code block that the public classes represent, most
    +of the required information is in the same form and is accessed in the same way,
    +with classes having the distinction that functions defined at the top level are
    +referred to as "methods." Since the difference in nomenclature reflects a real
    +semantic distinction from functions defined outside of a class, the
    +implementation needs to maintain the distinction. Hence, most of the
    +functionality of the public classes can be implemented in a common base class,
    +:class:`SuiteInfoBase`, with the accessors for function and method information
    +provided elsewhere. Note that there is only one class which represents function
    +and method information; this parallels the use of the :keyword:`def` statement
    +to define both types of elements.
    +
    +Most of the accessor functions are declared in :class:`SuiteInfoBase` and do not
    +need to be overridden by subclasses.  More importantly, the extraction of most
    +information from a parse tree is handled through a method called by the
    +:class:`SuiteInfoBase` constructor.  The example code for most of the classes is
    +clear when read alongside the formal grammar, but the method which recursively
    +creates new information objects requires further examination.  Here is the
    +relevant part of the :class:`SuiteInfoBase` definition from :file:`example.py`::
    +
    +   class SuiteInfoBase:
    +       _docstring = ''
    +       _name = ''
    +
    +       def __init__(self, tree = None):
    +           self._class_info = {}
    +           self._function_info = {}
    +           if tree:
    +               self._extract_info(tree)
    +
    +       def _extract_info(self, tree):
    +           # extract docstring
    +           if len(tree) == 2:
    +               found, vars = match(DOCSTRING_STMT_PATTERN[1], tree[1])
    +           else:
    +               found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
    +           if found:
    +               self._docstring = eval(vars['docstring'])
    +           # discover inner definitions
    +           for node in tree[1:]:
    +               found, vars = match(COMPOUND_STMT_PATTERN, node)
    +               if found:
    +                   cstmt = vars['compound']
    +                   if cstmt[0] == symbol.funcdef:
    +                       name = cstmt[2][1]
    +                       self._function_info[name] = FunctionInfo(cstmt)
    +                   elif cstmt[0] == symbol.classdef:
    +                       name = cstmt[2][1]
    +                       self._class_info[name] = ClassInfo(cstmt)
    +
    +After initializing some internal state, the constructor calls the
    +:meth:`_extract_info` method.  This method performs the bulk of the information
    +extraction which takes place in the entire example.  The extraction has two
    +distinct phases: the location of the docstring for the parse tree passed in, and
    +the discovery of additional definitions within the code block represented by the
    +parse tree.
    +
    +The initial :keyword:`if` test determines whether the nested suite is of the
    +"short form" or the "long form."  The short form is used when the code block is
    +on the same line as the definition of the code block, as in ::
    +
    +   def square(x): "Square an argument."; return x ** 2
    +
    +while the long form uses an indented block and allows nested definitions::
    +
    +   def make_power(exp):
    +       "Make a function that raises an argument to the exponent `exp'."
    +       def raiser(x, y=exp):
    +           return x ** y
    +       return raiser
    +
    +When the short form is used, the code block may contain a docstring as the
    +first, and possibly only, :const:`small_stmt` element.  The extraction of such a
    +docstring is slightly different and requires only a portion of the complete
    +pattern used in the more common case.  As implemented, the docstring will only
    +be found if there is only one :const:`small_stmt` node in the
    +:const:`simple_stmt` node. Since most functions and methods which use the short
    +form do not provide a docstring, this may be considered sufficient.  The
    +extraction of the docstring proceeds using the :func:`match` function as
    +described above, and the value of the docstring is stored as an attribute of the
    +:class:`SuiteInfoBase` object.
    +
    +After docstring extraction, a simple definition discovery algorithm operates on
    +the :const:`stmt` nodes of the :const:`suite` node.  The special case of the
    +short form is not tested; since there are no :const:`stmt` nodes in the short
    +form, the algorithm will silently skip the single :const:`simple_stmt` node and
    +correctly not discover any nested definitions.
    +
    +Each statement in the code block is categorized as a class definition, function
    +or method definition, or something else.  For the definition statements, the
    +name of the element defined is extracted and a representation object appropriate
    +to the definition is created with the defining subtree passed as an argument to
    +the constructor.  The representation objects are stored in instance variables
    +and may be retrieved by name using the appropriate accessor methods.
    +
    +The public classes provide any accessors required which are more specific than
    +those provided by the :class:`SuiteInfoBase` class, but the real extraction
    +algorithm remains common to all forms of code blocks.  A high-level function can
    +be used to extract the complete set of information from a source file.  (See
    +file :file:`example.py`.) ::
    +
    +   def get_docs(fileName):
    +       import os
    +       import parser
    +
    +       source = open(fileName).read()
    +       basename = os.path.basename(os.path.splitext(fileName)[0])
    +       ast = parser.suite(source)
    +       return ModuleInfo(ast.totuple(), basename)
    +
    +This provides an easy-to-use interface to the documentation of a module.  If
    +information is required which is not extracted by the code of this example, the
    +code may be extended at clearly defined points to provide additional
    +capabilities.
    +
    diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst
    new file mode 100644
    index 0000000..804dd23
    --- /dev/null
    +++ b/Doc/library/pdb.rst
    @@ -0,0 +1,409 @@
    +
    +.. _debugger:
    +
    +*******************
    +The Python Debugger
    +*******************
    +
    +.. module:: pdb
    +   :synopsis: The Python debugger for interactive interpreters.
    +
    +
    +.. index:: single: debugging
    +
    +The module :mod:`pdb` defines an interactive source code debugger for Python
    +programs.  It supports setting (conditional) breakpoints and single stepping at
    +the source line level, inspection of stack frames, source code listing, and
    +evaluation of arbitrary Python code in the context of any stack frame.  It also
    +supports post-mortem debugging and can be called under program control.
    +
    +.. index::
    +   single: Pdb (class in pdb)
    +   module: bdb
    +   module: cmd
    +
    +The debugger is extensible --- it is actually defined as the class :class:`Pdb`.
    +This is currently undocumented but easily understood by reading the source.  The
    +extension interface uses the modules :mod:`bdb` (undocumented) and :mod:`cmd`.
    +
    +The debugger's prompt is ``(Pdb)``. Typical usage to run a program under control
    +of the debugger is::
    +
    +   >>> import pdb
    +   >>> import mymodule
    +   >>> pdb.run('mymodule.test()')
    +   > (0)?()
    +   (Pdb) continue
    +   > (1)?()
    +   (Pdb) continue
    +   NameError: 'spam'
    +   > (1)?()
    +   (Pdb) 
    +
    +:file:`pdb.py` can also be invoked as a script to debug other scripts.  For
    +example::
    +
    +   python -m pdb myscript.py
    +
    +When invoked as a script, pdb will automatically enter post-mortem debugging if
    +the program being debugged exits abnormally. After post-mortem debugging (or
    +after normal exit of the program), pdb will restart the program. Automatic
    +restarting preserves pdb's state (such as breakpoints) and in most cases is more
    +useful than quitting the debugger upon program's exit.
    +
    +.. versionadded:: 2.4
    +   Restarting post-mortem behavior added.
    +
    +Typical usage to inspect a crashed program is::
    +
    +   >>> import pdb
    +   >>> import mymodule
    +   >>> mymodule.test()
    +   Traceback (most recent call last):
    +     File "", line 1, in ?
    +     File "./mymodule.py", line 4, in test
    +       test2()
    +     File "./mymodule.py", line 3, in test2
    +       print spam
    +   NameError: spam
    +   >>> pdb.pm()
    +   > ./mymodule.py(3)test2()
    +   -> print spam
    +   (Pdb) 
    +
    +The module defines the following functions; each enters the debugger in a
    +slightly different way:
    +
    +
    +.. function:: run(statement[, globals[, locals]])
    +
    +   Execute the *statement* (given as a string) under debugger control.  The
    +   debugger prompt appears before any code is executed; you can set breakpoints and
    +   type ``continue``, or you can step through the statement using ``step`` or
    +   ``next`` (all these commands are explained below).  The optional *globals* and
    +   *locals* arguments specify the environment in which the code is executed; by
    +   default the dictionary of the module :mod:`__main__` is used.  (See the
    +   explanation of the built-in :func:`exec` or :func:`eval` functions.)
    +
    +
    +.. function:: runeval(expression[, globals[, locals]])
    +
    +   Evaluate the *expression* (given as a string) under debugger control.  When
    +   :func:`runeval` returns, it returns the value of the expression.  Otherwise this
    +   function is similar to :func:`run`.
    +
    +
    +.. function:: runcall(function[, argument, ...])
    +
    +   Call the *function* (a function or method object, not a string) with the given
    +   arguments.  When :func:`runcall` returns, it returns whatever the function call
    +   returned.  The debugger prompt appears as soon as the function is entered.
    +
    +
    +.. function:: set_trace()
    +
    +   Enter the debugger at the calling stack frame.  This is useful to hard-code a
    +   breakpoint at a given point in a program, even if the code is not otherwise
    +   being debugged (e.g. when an assertion fails).
    +
    +
    +.. function:: post_mortem(traceback)
    +
    +   Enter post-mortem debugging of the given *traceback* object.
    +
    +
    +.. function:: pm()
    +
    +   Enter post-mortem debugging of the traceback found in ``sys.last_traceback``.
    +
    +
    +.. _debugger-commands:
    +
    +Debugger Commands
    +=================
    +
    +The debugger recognizes the following commands.  Most commands can be
    +abbreviated to one or two letters; e.g. ``h(elp)`` means that either ``h`` or
    +``help`` can be used to enter the help command (but not ``he`` or ``hel``, nor
    +``H`` or ``Help`` or ``HELP``).  Arguments to commands must be separated by
    +whitespace (spaces or tabs).  Optional arguments are enclosed in square brackets
    +(``[]``) in the command syntax; the square brackets must not be typed.
    +Alternatives in the command syntax are separated by a vertical bar (``|``).
    +
    +Entering a blank line repeats the last command entered.  Exception: if the last
    +command was a ``list`` command, the next 11 lines are listed.
    +
    +Commands that the debugger doesn't recognize are assumed to be Python statements
    +and are executed in the context of the program being debugged.  Python
    +statements can also be prefixed with an exclamation point (``!``).  This is a
    +powerful way to inspect the program being debugged; it is even possible to
    +change a variable or call a function.  When an exception occurs in such a
    +statement, the exception name is printed but the debugger's state is not
    +changed.
    +
    +Multiple commands may be entered on a single line, separated by ``;;``.  (A
    +single ``;`` is not used as it is the separator for multiple commands in a line
    +that is passed to the Python parser.) No intelligence is applied to separating
    +the commands; the input is split at the first ``;;`` pair, even if it is in the
    +middle of a quoted string.
    +
    +The debugger supports aliases.  Aliases can have parameters which allows one a
    +certain level of adaptability to the context under examination.
    +
    +.. index::
    +   pair: .pdbrc; file
    +   triple: debugger; configuration; file
    +
    +If a file :file:`.pdbrc`  exists in the user's home directory or in the current
    +directory, it is read in and executed as if it had been typed at the debugger
    +prompt. This is particularly useful for aliases.  If both files exist, the one
    +in the home directory is read first and aliases defined there can be overridden
    +by the local file.
    +
    +h(elp) [*command*]
    +   Without argument, print the list of available commands.  With a *command* as
    +   argument, print help about that command.  ``help pdb`` displays the full
    +   documentation file; if the environment variable :envvar:`PAGER` is defined, the
    +   file is piped through that command instead.  Since the *command* argument must
    +   be an identifier, ``help exec`` must be entered to get help on the ``!``
    +   command.
    +
    +w(here)
    +   Print a stack trace, with the most recent frame at the bottom.  An arrow
    +   indicates the current frame, which determines the context of most commands.
    +
    +d(own)
    +   Move the current frame one level down in the stack trace (to a newer frame).
    +
    +u(p)
    +   Move the current frame one level up in the stack trace (to an older frame).
    +
    +b(reak) [[*filename*:]*lineno*``|``*function*[, *condition*]]
    +   With a *lineno* argument, set a break there in the current file.  With a
    +   *function* argument, set a break at the first executable statement within that
    +   function. The line number may be prefixed with a filename and a colon, to
    +   specify a breakpoint in another file (probably one that hasn't been loaded yet).
    +   The file is searched on ``sys.path``. Note that each breakpoint is assigned a
    +   number to which all the other breakpoint commands refer.
    +
    +   If a second argument is present, it is an expression which must evaluate to true
    +   before the breakpoint is honored.
    +
    +   Without argument, list all breaks, including for each breakpoint, the number of
    +   times that breakpoint has been hit, the current ignore count, and the associated
    +   condition if any.
    +
    +tbreak [[*filename*:]*lineno*``|``*function*[, *condition*]]
    +   Temporary breakpoint, which is removed automatically when it is first hit.  The
    +   arguments are the same as break.
    +
    +cl(ear) [*bpnumber* [*bpnumber ...*]]
    +   With a space separated list of breakpoint numbers, clear those breakpoints.
    +   Without argument, clear all breaks (but first ask confirmation).
    +
    +disable [*bpnumber* [*bpnumber ...*]]
    +   Disables the breakpoints given as a space separated list of breakpoint numbers.
    +   Disabling a breakpoint means it cannot cause the program to stop execution, but
    +   unlike clearing a breakpoint, it remains in the list of breakpoints and can be
    +   (re-)enabled.
    +
    +enable [*bpnumber* [*bpnumber ...*]]
    +   Enables the breakpoints specified.
    +
    +ignore *bpnumber* [*count*]
    +   Sets the ignore count for the given breakpoint number.  If count is omitted, the
    +   ignore count is set to 0.  A breakpoint becomes active when the ignore count is
    +   zero.  When non-zero, the count is decremented each time the breakpoint is
    +   reached and the breakpoint is not disabled and any associated condition
    +   evaluates to true.
    +
    +condition *bpnumber* [*condition*]
    +   Condition is an expression which must evaluate to true before the breakpoint is
    +   honored.  If condition is absent, any existing condition is removed; i.e., the
    +   breakpoint is made unconditional.
    +
    +commands [*bpnumber*]
    +   Specify a list of commands for breakpoint number *bpnumber*.  The commands
    +   themselves appear on the following lines.  Type a line containing just 'end' to
    +   terminate the commands. An example::
    +
    +      (Pdb) commands 1
    +      (com) print some_variable
    +      (com) end
    +      (Pdb)
    +
    +   To remove all commands from a breakpoint, type commands and follow it
    +   immediately with  end; that is, give no commands.
    +
    +   With no *bpnumber* argument, commands refers to the last breakpoint set.
    +
    +   You can use breakpoint commands to start your program up again. Simply use the
    +   continue command, or step, or any other command that resumes execution.
    +
    +   Specifying any command resuming execution (currently continue, step, next,
    +   return, jump, quit and their abbreviations) terminates the command list (as if
    +   that command was immediately followed by end). This is because any time you
    +   resume execution (even with a simple next or step), you may encounter· another
    +   breakpoint--which could have its own command list, leading to ambiguities about
    +   which list to execute.
    +
    +   If you use the 'silent' command in the command list, the usual message about
    +   stopping at a breakpoint is not printed.  This may be desirable for breakpoints
    +   that are to print a specific message and then continue.  If none of the other
    +   commands print anything, you see no sign that the breakpoint was reached.
    +
    +   .. versionadded:: 2.5
    +
    +s(tep)
    +   Execute the current line, stop at the first possible occasion (either in a
    +   function that is called or on the next line in the current function).
    +
    +n(ext)
    +   Continue execution until the next line in the current function is reached or it
    +   returns.  (The difference between ``next`` and ``step`` is that ``step`` stops
    +   inside a called function, while ``next`` executes called functions at (nearly)
    +   full speed, only stopping at the next line in the current function.)
    +
    +r(eturn)
    +   Continue execution until the current function returns.
    +
    +c(ont(inue))
    +   Continue execution, only stop when a breakpoint is encountered.
    +
    +j(ump) *lineno*
    +   Set the next line that will be executed.  Only available in the bottom-most
    +   frame.  This lets you jump back and execute code again, or jump forward to skip
    +   code that you don't want to run.
    +
    +   It should be noted that not all jumps are allowed --- for instance it is not
    +   possible to jump into the middle of a :keyword:`for` loop or out of a
    +   :keyword:`finally` clause.
    +
    +l(ist) [*first*[, *last*]]
    +   List source code for the current file.  Without arguments, list 11 lines around
    +   the current line or continue the previous listing.  With one argument, list 11
    +   lines around at that line.  With two arguments, list the given range; if the
    +   second argument is less than the first, it is interpreted as a count.
    +
    +a(rgs)
    +   Print the argument list of the current function.
    +
    +p *expression*
    +   Evaluate the *expression* in the current context and print its value.
    +
    +   .. note::
    +
    +      ``print`` can also be used, but is not a debugger command --- this executes the
    +      Python :keyword:`print` statement.
    +
    +pp *expression*
    +   Like the ``p`` command, except the value of the expression is pretty-printed
    +   using the :mod:`pprint` module.
    +
    +alias [*name* [command]]
    +   Creates an alias called *name* that executes *command*.  The command must *not*
    +   be enclosed in quotes.  Replaceable parameters can be indicated by ``%1``,
    +   ``%2``, and so on, while ``%*`` is replaced by all the parameters.  If no
    +   command is given, the current alias for *name* is shown. If no arguments are
    +   given, all aliases are listed.
    +
    +   Aliases may be nested and can contain anything that can be legally typed at the
    +   pdb prompt.  Note that internal pdb commands *can* be overridden by aliases.
    +   Such a command is then hidden until the alias is removed.  Aliasing is
    +   recursively applied to the first word of the command line; all other words in
    +   the line are left alone.
    +
    +   As an example, here are two useful aliases (especially when placed in the
    +   :file:`.pdbrc` file)::
    +
    +      #Print instance variables (usage "pi classInst")
    +      alias pi for k in %1.__dict__.keys(): print "%1.",k,"=",%1.__dict__[k]
    +      #Print instance variables in self
    +      alias ps pi self
    +
    +unalias *name*
    +   Deletes the specified alias.
    +
    +[!]*statement*
    +   Execute the (one-line) *statement* in the context of the current stack frame.
    +   The exclamation point can be omitted unless the first word of the statement
    +   resembles a debugger command. To set a global variable, you can prefix the
    +   assignment command with a ``global`` command on the same line, e.g.::
    +
    +      (Pdb) global list_options; list_options = ['-l']
    +      (Pdb)
    +
    +run [*args* ...]
    +   Restart the debugged python program. If an argument is supplied, it is splitted
    +   with "shlex" and the result is used as the new sys.argv. History, breakpoints,
    +   actions and debugger options are preserved. "restart" is an alias for "run".
    +
    +   .. versionadded:: 2.6
    +
    +q(uit)
    +   Quit from the debugger. The program being executed is aborted.
    +
    +
    +.. _debugger-hooks:
    +
    +How It Works
    +============
    +
    +Some changes were made to the interpreter:
    +
    +* ``sys.settrace(func)`` sets the global trace function
    +
    +* there can also a local trace function (see later)
    +
    +Trace functions have three arguments: *frame*, *event*, and *arg*. *frame* is
    +the current stack frame.  *event* is a string: ``'call'``, ``'line'``,
    +``'return'``, ``'exception'``, ``'c_call'``, ``'c_return'``, or
    +``'c_exception'``. *arg* depends on the event type.
    +
    +The global trace function is invoked (with *event* set to ``'call'``) whenever a
    +new local scope is entered; it should return a reference to the local trace
    +function to be used that scope, or ``None`` if the scope shouldn't be traced.
    +
    +The local trace function should return a reference to itself (or to another
    +function for further tracing in that scope), or ``None`` to turn off tracing in
    +that scope.
    +
    +Instance methods are accepted (and very useful!) as trace functions.
    +
    +The events have the following meaning:
    +
    +``'call'``
    +   A function is called (or some other code block entered).  The global trace
    +   function is called; *arg* is ``None``; the return value specifies the local
    +   trace function.
    +
    +``'line'``
    +   The interpreter is about to execute a new line of code (sometimes multiple line
    +   events on one line exist).  The local trace function is called; *arg* is
    +   ``None``; the return value specifies the new local trace function.
    +
    +``'return'``
    +   A function (or other code block) is about to return.  The local trace function
    +   is called; *arg* is the value that will be returned.  The trace function's
    +   return value is ignored.
    +
    +``'exception'``
    +   An exception has occurred.  The local trace function is called; *arg* is a
    +   triple ``(exception, value, traceback)``; the return value specifies the new
    +   local trace function.
    +
    +``'c_call'``
    +   A C function is about to be called.  This may be an extension function or a
    +   builtin.  *arg* is the C function object.
    +
    +``'c_return'``
    +   A C function has returned. *arg* is ``None``.
    +
    +``'c_exception'``
    +   A C function has thrown an exception.  *arg* is ``None``.
    +
    +Note that as an exception is propagated down the chain of callers, an
    +``'exception'`` event is generated at each level.
    +
    +For more information on code and frame objects, refer to :ref:`types`.
    +
    diff --git a/Doc/library/persistence.rst b/Doc/library/persistence.rst
    new file mode 100644
    index 0000000..78e40f6
    --- /dev/null
    +++ b/Doc/library/persistence.rst
    @@ -0,0 +1,32 @@
    +
    +.. _persistence:
    +
    +****************
    +Data Persistence
    +****************
    +
    +The modules described in this chapter support storing Python data in a
    +persistent form on disk.  The :mod:`pickle` and :mod:`marshal` modules can turn
    +many Python data types into a stream of bytes and then recreate the objects from
    +the bytes.  The various DBM-related modules support a family of hash-based file
    +formats that store a mapping of strings to other strings.  The :mod:`bsddb`
    +module also provides such disk-based string-to-string mappings based on hashing,
    +and also supports B-Tree and record-based formats.
    +
    +The list of modules described in this chapter is:
    +
    +
    +.. toctree::
    +
    +   pickle.rst
    +   copy_reg.rst
    +   shelve.rst
    +   marshal.rst
    +   anydbm.rst
    +   whichdb.rst
    +   dbm.rst
    +   gdbm.rst
    +   dbhash.rst
    +   bsddb.rst
    +   dumbdbm.rst
    +   sqlite3.rst
    diff --git a/Doc/library/pickle.rst b/Doc/library/pickle.rst
    new file mode 100644
    index 0000000..ab19ff8
    --- /dev/null
    +++ b/Doc/library/pickle.rst
    @@ -0,0 +1,868 @@
    +
    +:mod:`pickle` --- Python object serialization
    +=============================================
    +
    +.. index::
    +   single: persistence
    +   pair: persistent; objects
    +   pair: serializing; objects
    +   pair: marshalling; objects
    +   pair: flattening; objects
    +   pair: pickling; objects
    +
    +.. module:: pickle
    +   :synopsis: Convert Python objects to streams of bytes and back.
    +
    +
    +.. % Substantial improvements by Jim Kerr .
    +.. % Rewritten by Barry Warsaw 
    +
    +The :mod:`pickle` module implements a fundamental, but powerful algorithm for
    +serializing and de-serializing a Python object structure.  "Pickling" is the
    +process whereby a Python object hierarchy is converted into a byte stream, and
    +"unpickling" is the inverse operation, whereby a byte stream is converted back
    +into an object hierarchy.  Pickling (and unpickling) is alternatively known as
    +"serialization", "marshalling," [#]_ or "flattening", however, to avoid
    +confusion, the terms used here are "pickling" and "unpickling".
    +
    +This documentation describes both the :mod:`pickle` module and the
    +:mod:`cPickle` module.
    +
    +
    +Relationship to other Python modules
    +------------------------------------
    +
    +The :mod:`pickle` module has an optimized cousin called the :mod:`cPickle`
    +module.  As its name implies, :mod:`cPickle` is written in C, so it can be up to
    +1000 times faster than :mod:`pickle`.  However it does not support subclassing
    +of the :func:`Pickler` and :func:`Unpickler` classes, because in :mod:`cPickle`
    +these are functions, not classes.  Most applications have no need for this
    +functionality, and can benefit from the improved performance of :mod:`cPickle`.
    +Other than that, the interfaces of the two modules are nearly identical; the
    +common interface is described in this manual and differences are pointed out
    +where necessary.  In the following discussions, we use the term "pickle" to
    +collectively describe the :mod:`pickle` and :mod:`cPickle` modules.
    +
    +The data streams the two modules produce are guaranteed to be interchangeable.
    +
    +Python has a more primitive serialization module called :mod:`marshal`, but in
    +general :mod:`pickle` should always be the preferred way to serialize Python
    +objects.  :mod:`marshal` exists primarily to support Python's :file:`.pyc`
    +files.
    +
    +The :mod:`pickle` module differs from :mod:`marshal` several significant ways:
    +
    +* The :mod:`pickle` module keeps track of the objects it has already serialized,
    +  so that later references to the same object won't be serialized again.
    +  :mod:`marshal` doesn't do this.
    +
    +  This has implications both for recursive objects and object sharing.  Recursive
    +  objects are objects that contain references to themselves.  These are not
    +  handled by marshal, and in fact, attempting to marshal recursive objects will
    +  crash your Python interpreter.  Object sharing happens when there are multiple
    +  references to the same object in different places in the object hierarchy being
    +  serialized.  :mod:`pickle` stores such objects only once, and ensures that all
    +  other references point to the master copy.  Shared objects remain shared, which
    +  can be very important for mutable objects.
    +
    +* :mod:`marshal` cannot be used to serialize user-defined classes and their
    +  instances.  :mod:`pickle` can save and restore class instances transparently,
    +  however the class definition must be importable and live in the same module as
    +  when the object was stored.
    +
    +* The :mod:`marshal` serialization format is not guaranteed to be portable
    +  across Python versions.  Because its primary job in life is to support
    +  :file:`.pyc` files, the Python implementers reserve the right to change the
    +  serialization format in non-backwards compatible ways should the need arise.
    +  The :mod:`pickle` serialization format is guaranteed to be backwards compatible
    +  across Python releases.
    +
    +.. warning::
    +
    +   The :mod:`pickle` module is not intended to be secure against erroneous or
    +   maliciously constructed data.  Never unpickle data received from an untrusted or
    +   unauthenticated source.
    +
    +Note that serialization is a more primitive notion than persistence; although
    +:mod:`pickle` reads and writes file objects, it does not handle the issue of
    +naming persistent objects, nor the (even more complicated) issue of concurrent
    +access to persistent objects.  The :mod:`pickle` module can transform a complex
    +object into a byte stream and it can transform the byte stream into an object
    +with the same internal structure.  Perhaps the most obvious thing to do with
    +these byte streams is to write them onto a file, but it is also conceivable to
    +send them across a network or store them in a database.  The module
    +:mod:`shelve` provides a simple interface to pickle and unpickle objects on
    +DBM-style database files.
    +
    +
    +Data stream format
    +------------------
    +
    +.. index::
    +   single: XDR
    +   single: External Data Representation
    +
    +The data format used by :mod:`pickle` is Python-specific.  This has the
    +advantage that there are no restrictions imposed by external standards such as
    +XDR (which can't represent pointer sharing); however it means that non-Python
    +programs may not be able to reconstruct pickled Python objects.
    +
    +By default, the :mod:`pickle` data format uses a printable ASCII representation.
    +This is slightly more voluminous than a binary representation.  The big
    +advantage of using printable ASCII (and of some other characteristics of
    +:mod:`pickle`'s representation) is that for debugging or recovery purposes it is
    +possible for a human to read the pickled file with a standard text editor.
    +
    +There are currently 3 different protocols which can be used for pickling.
    +
    +* Protocol version 0 is the original ASCII protocol and is backwards compatible
    +  with earlier versions of Python.
    +
    +* Protocol version 1 is the old binary format which is also compatible with
    +  earlier versions of Python.
    +
    +* Protocol version 2 was introduced in Python 2.3.  It provides much more
    +  efficient pickling of new-style classes.
    +
    +Refer to :pep:`307` for more information.
    +
    +If a *protocol* is not specified, protocol 0 is used. If *protocol* is specified
    +as a negative value or :const:`HIGHEST_PROTOCOL`, the highest protocol version
    +available will be used.
    +
    +.. versionchanged:: 2.3
    +   Introduced the *protocol* parameter.
    +
    +A binary format, which is slightly more efficient, can be chosen by specifying a
    +*protocol* version >= 1.
    +
    +
    +Usage
    +-----
    +
    +To serialize an object hierarchy, you first create a pickler, then you call the
    +pickler's :meth:`dump` method.  To de-serialize a data stream, you first create
    +an unpickler, then you call the unpickler's :meth:`load` method.  The
    +:mod:`pickle` module provides the following constant:
    +
    +
    +.. data:: HIGHEST_PROTOCOL
    +
    +   The highest protocol version available.  This value can be passed as a
    +   *protocol* value.
    +
    +   .. versionadded:: 2.3
    +
    +.. note::
    +
    +   Be sure to always open pickle files created with protocols >= 1 in binary mode.
    +   For the old ASCII-based pickle protocol 0 you can use either text mode or binary
    +   mode as long as you stay consistent.
    +
    +   A pickle file written with protocol 0 in binary mode will contain lone linefeeds
    +   as line terminators and therefore will look "funny" when viewed in Notepad or
    +   other editors which do not support this format.
    +
    +The :mod:`pickle` module provides the following functions to make the pickling
    +process more convenient:
    +
    +
    +.. function:: dump(obj, file[, protocol])
    +
    +   Write a pickled representation of *obj* to the open file object *file*.  This is
    +   equivalent to ``Pickler(file, protocol).dump(obj)``.
    +
    +   If the *protocol* parameter is omitted, protocol 0 is used. If *protocol* is
    +   specified as a negative value or :const:`HIGHEST_PROTOCOL`, the highest protocol
    +   version will be used.
    +
    +   .. versionchanged:: 2.3
    +      Introduced the *protocol* parameter.
    +
    +   *file* must have a :meth:`write` method that accepts a single string argument.
    +   It can thus be a file object opened for writing, a :mod:`StringIO` object, or
    +   any other custom object that meets this interface.
    +
    +
    +.. function:: load(file)
    +
    +   Read a string from the open file object *file* and interpret it as a pickle data
    +   stream, reconstructing and returning the original object hierarchy.  This is
    +   equivalent to ``Unpickler(file).load()``.
    +
    +   *file* must have two methods, a :meth:`read` method that takes an integer
    +   argument, and a :meth:`readline` method that requires no arguments.  Both
    +   methods should return a string.  Thus *file* can be a file object opened for
    +   reading, a :mod:`StringIO` object, or any other custom object that meets this
    +   interface.
    +
    +   This function automatically determines whether the data stream was written in
    +   binary mode or not.
    +
    +
    +.. function:: dumps(obj[, protocol])
    +
    +   Return the pickled representation of the object as a string, instead of writing
    +   it to a file.
    +
    +   If the *protocol* parameter is omitted, protocol 0 is used. If *protocol* is
    +   specified as a negative value or :const:`HIGHEST_PROTOCOL`, the highest protocol
    +   version will be used.
    +
    +   .. versionchanged:: 2.3
    +      The *protocol* parameter was added.
    +
    +
    +.. function:: loads(string)
    +
    +   Read a pickled object hierarchy from a string.  Characters in the string past
    +   the pickled object's representation are ignored.
    +
    +The :mod:`pickle` module also defines three exceptions:
    +
    +
    +.. exception:: PickleError
    +
    +   A common base class for the other exceptions defined below.  This inherits from
    +   :exc:`Exception`.
    +
    +
    +.. exception:: PicklingError
    +
    +   This exception is raised when an unpicklable object is passed to the
    +   :meth:`dump` method.
    +
    +
    +.. exception:: UnpicklingError
    +
    +   This exception is raised when there is a problem unpickling an object. Note that
    +   other exceptions may also be raised during unpickling, including (but not
    +   necessarily limited to) :exc:`AttributeError`, :exc:`EOFError`,
    +   :exc:`ImportError`, and :exc:`IndexError`.
    +
    +The :mod:`pickle` module also exports two callables [#]_, :class:`Pickler` and
    +:class:`Unpickler`:
    +
    +
    +.. class:: Pickler(file[, protocol])
    +
    +   This takes a file-like object to which it will write a pickle data stream.
    +
    +   If the *protocol* parameter is omitted, protocol 0 is used. If *protocol* is
    +   specified as a negative value or :const:`HIGHEST_PROTOCOL`, the highest
    +   protocol version will be used.
    +
    +   .. versionchanged:: 2.3
    +      Introduced the *protocol* parameter.
    +
    +   *file* must have a :meth:`write` method that accepts a single string argument.
    +   It can thus be an open file object, a :mod:`StringIO` object, or any other
    +   custom object that meets this interface.
    +
    +:class:`Pickler` objects define one (or two) public methods:
    +
    +
    +.. method:: Pickler.dump(obj)
    +
    +   Write a pickled representation of *obj* to the open file object given in the
    +   constructor.  Either the binary or ASCII format will be used, depending on the
    +   value of the *protocol* argument passed to the constructor.
    +
    +
    +.. method:: Pickler.clear_memo()
    +
    +   Clears the pickler's "memo".  The memo is the data structure that remembers
    +   which objects the pickler has already seen, so that shared or recursive objects
    +   pickled by reference and not by value.  This method is useful when re-using
    +   picklers.
    +
    +   .. note::
    +
    +      Prior to Python 2.3, :meth:`clear_memo` was only available on the picklers
    +      created by :mod:`cPickle`.  In the :mod:`pickle` module, picklers have an
    +      instance variable called :attr:`memo` which is a Python dictionary.  So to clear
    +      the memo for a :mod:`pickle` module pickler, you could do the following::
    +
    +         mypickler.memo.clear()
    +
    +      Code that does not need to support older versions of Python should simply use
    +      :meth:`clear_memo`.
    +
    +It is possible to make multiple calls to the :meth:`dump` method of the same
    +:class:`Pickler` instance.  These must then be matched to the same number of
    +calls to the :meth:`load` method of the corresponding :class:`Unpickler`
    +instance.  If the same object is pickled by multiple :meth:`dump` calls, the
    +:meth:`load` will all yield references to the same object. [#]_
    +
    +:class:`Unpickler` objects are defined as:
    +
    +
    +.. class:: Unpickler(file)
    +
    +   This takes a file-like object from which it will read a pickle data stream.
    +   This class automatically determines whether the data stream was written in
    +   binary mode or not, so it does not need a flag as in the :class:`Pickler`
    +   factory.
    +
    +   *file* must have two methods, a :meth:`read` method that takes an integer
    +   argument, and a :meth:`readline` method that requires no arguments.  Both
    +   methods should return a string.  Thus *file* can be a file object opened for
    +   reading, a :mod:`StringIO` object, or any other custom object that meets this
    +   interface.
    +
    +:class:`Unpickler` objects have one (or two) public methods:
    +
    +
    +.. method:: Unpickler.load()
    +
    +   Read a pickled object representation from the open file object given in the
    +   constructor, and return the reconstituted object hierarchy specified therein.
    +
    +   This method automatically determines whether the data stream was written in
    +   binary mode or not.
    +
    +
    +.. method:: Unpickler.noload()
    +
    +   This is just like :meth:`load` except that it doesn't actually create any
    +   objects.  This is useful primarily for finding what's called "persistent ids"
    +   that may be referenced in a pickle data stream.  See section
    +   :ref:`pickle-protocol` below for more details.
    +
    +   **Note:** the :meth:`noload` method is currently only available on
    +   :class:`Unpickler` objects created with the :mod:`cPickle` module.
    +   :mod:`pickle` module :class:`Unpickler`\ s do not have the :meth:`noload`
    +   method.
    +
    +
    +What can be pickled and unpickled?
    +----------------------------------
    +
    +The following types can be pickled:
    +
    +* ``None``, ``True``, and ``False``
    +
    +* integers, long integers, floating point numbers, complex numbers
    +
    +* normal and Unicode strings
    +
    +* tuples, lists, sets, and dictionaries containing only picklable objects
    +
    +* functions defined at the top level of a module
    +
    +* built-in functions defined at the top level of a module
    +
    +* classes that are defined at the top level of a module
    +
    +* instances of such classes whose :attr:`__dict__` or :meth:`__setstate__` is
    +  picklable  (see section :ref:`pickle-protocol` for details)
    +
    +Attempts to pickle unpicklable objects will raise the :exc:`PicklingError`
    +exception; when this happens, an unspecified number of bytes may have already
    +been written to the underlying file. Trying to pickle a highly recursive data
    +structure may exceed the maximum recursion depth, a :exc:`RuntimeError` will be
    +raised in this case. You can carefully raise this limit with
    +:func:`sys.setrecursionlimit`.
    +
    +Note that functions (built-in and user-defined) are pickled by "fully qualified"
    +name reference, not by value.  This means that only the function name is
    +pickled, along with the name of module the function is defined in.  Neither the
    +function's code, nor any of its function attributes are pickled.  Thus the
    +defining module must be importable in the unpickling environment, and the module
    +must contain the named object, otherwise an exception will be raised. [#]_
    +
    +Similarly, classes are pickled by named reference, so the same restrictions in
    +the unpickling environment apply.  Note that none of the class's code or data is
    +pickled, so in the following example the class attribute ``attr`` is not
    +restored in the unpickling environment::
    +
    +   class Foo:
    +       attr = 'a class attr'
    +
    +   picklestring = pickle.dumps(Foo)
    +
    +These restrictions are why picklable functions and classes must be defined in
    +the top level of a module.
    +
    +Similarly, when class instances are pickled, their class's code and data are not
    +pickled along with them.  Only the instance data are pickled.  This is done on
    +purpose, so you can fix bugs in a class or add methods to the class and still
    +load objects that were created with an earlier version of the class.  If you
    +plan to have long-lived objects that will see many versions of a class, it may
    +be worthwhile to put a version number in the objects so that suitable
    +conversions can be made by the class's :meth:`__setstate__` method.
    +
    +
    +.. _pickle-protocol:
    +
    +The pickle protocol
    +-------------------
    +
    +This section describes the "pickling protocol" that defines the interface
    +between the pickler/unpickler and the objects that are being serialized.  This
    +protocol provides a standard way for you to define, customize, and control how
    +your objects are serialized and de-serialized.  The description in this section
    +doesn't cover specific customizations that you can employ to make the unpickling
    +environment slightly safer from untrusted pickle data streams; see section
    +:ref:`pickle-sub` for more details.
    +
    +
    +.. _pickle-inst:
    +
    +Pickling and unpickling normal class instances
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +.. index::
    +   single: __getinitargs__() (copy protocol)
    +   single: __init__() (instance constructor)
    +
    +When a pickled class instance is unpickled, its :meth:`__init__` method is
    +normally *not* invoked.  If it is desirable that the :meth:`__init__` method be
    +called on unpickling, an old-style class can define a method
    +:meth:`__getinitargs__`, which should return a *tuple* containing the arguments
    +to be passed to the class constructor (:meth:`__init__` for example).  The
    +:meth:`__getinitargs__` method is called at pickle time; the tuple it returns is
    +incorporated in the pickle for the instance.
    +
    +.. index:: single: __getnewargs__() (copy protocol)
    +
    +New-style types can provide a :meth:`__getnewargs__` method that is used for
    +protocol 2.  Implementing this method is needed if the type establishes some
    +internal invariants when the instance is created, or if the memory allocation is
    +affected by the values passed to the :meth:`__new__` method for the type (as it
    +is for tuples and strings).  Instances of a new-style type :class:`C` are
    +created using ::
    +
    +   obj = C.__new__(C, *args)
    +
    +
    +where *args* is the result of calling :meth:`__getnewargs__` on the original
    +object; if there is no :meth:`__getnewargs__`, an empty tuple is assumed.
    +
    +.. index::
    +   single: __getstate__() (copy protocol)
    +   single: __setstate__() (copy protocol)
    +   single: __dict__ (instance attribute)
    +
    +Classes can further influence how their instances are pickled; if the class
    +defines the method :meth:`__getstate__`, it is called and the return state is
    +pickled as the contents for the instance, instead of the contents of the
    +instance's dictionary.  If there is no :meth:`__getstate__` method, the
    +instance's :attr:`__dict__` is pickled.
    +
    +Upon unpickling, if the class also defines the method :meth:`__setstate__`, it
    +is called with the unpickled state. [#]_  If there is no :meth:`__setstate__`
    +method, the pickled state must be a dictionary and its items are assigned to the
    +new instance's dictionary.  If a class defines both :meth:`__getstate__` and
    +:meth:`__setstate__`, the state object needn't be a dictionary and these methods
    +can do what they want. [#]_
    +
    +.. warning::
    +
    +   For new-style classes, if :meth:`__getstate__` returns a false value, the
    +   :meth:`__setstate__` method will not be called.
    +
    +
    +Pickling and unpickling extension types
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +When the :class:`Pickler` encounters an object of a type it knows nothing about
    +--- such as an extension type --- it looks in two places for a hint of how to
    +pickle it.  One alternative is for the object to implement a :meth:`__reduce__`
    +method.  If provided, at pickling time :meth:`__reduce__` will be called with no
    +arguments, and it must return either a string or a tuple.
    +
    +If a string is returned, it names a global variable whose contents are pickled
    +as normal.  The string returned by :meth:`__reduce__` should be the object's
    +local name relative to its module; the pickle module searches the module
    +namespace to determine the object's module.
    +
    +When a tuple is returned, it must be between two and five elements long.
    +Optional elements can either be omitted, or ``None`` can be provided  as their
    +value.  The semantics of each element are:
    +
    +* A callable object that will be called to create the initial version of the
    +  object.  The next element of the tuple will provide arguments for this callable,
    +  and later elements provide additional state information that will subsequently
    +  be used to fully reconstruct the pickled data.
    +
    +  In the unpickling environment this object must be either a class, a callable
    +  registered as a "safe constructor" (see below), or it must have an attribute
    +  :attr:`__safe_for_unpickling__` with a true value. Otherwise, an
    +  :exc:`UnpicklingError` will be raised in the unpickling environment.  Note that
    +  as usual, the callable itself is pickled by name.
    +
    +* A tuple of arguments for the callable object.
    +
    +  .. versionchanged:: 2.5
    +     Formerly, this argument could also be ``None``.
    +
    +* Optionally, the object's state, which will be passed to the object's
    +  :meth:`__setstate__` method as described in section :ref:`pickle-inst`.  If the
    +  object has no :meth:`__setstate__` method, then, as above, the value must be a
    +  dictionary and it will be added to the object's :attr:`__dict__`.
    +
    +* Optionally, an iterator (and not a sequence) yielding successive list items.
    +  These list items will be pickled, and appended to the object using either
    +  ``obj.append(item)`` or ``obj.extend(list_of_items)``.  This is primarily used
    +  for list subclasses, but may be used by other classes as long as they have
    +  :meth:`append` and :meth:`extend` methods with the appropriate signature.
    +  (Whether :meth:`append` or :meth:`extend` is used depends on which pickle
    +  protocol version is used as well as the number of items to append, so both must
    +  be supported.)
    +
    +* Optionally, an iterator (not a sequence) yielding successive dictionary items,
    +  which should be tuples of the form ``(key, value)``.  These items will be
    +  pickled and stored to the object using ``obj[key] = value``. This is primarily
    +  used for dictionary subclasses, but may be used by other classes as long as they
    +  implement :meth:`__setitem__`.
    +
    +It is sometimes useful to know the protocol version when implementing
    +:meth:`__reduce__`.  This can be done by implementing a method named
    +:meth:`__reduce_ex__` instead of :meth:`__reduce__`. :meth:`__reduce_ex__`, when
    +it exists, is called in preference over :meth:`__reduce__` (you may still
    +provide :meth:`__reduce__` for backwards compatibility).  The
    +:meth:`__reduce_ex__` method will be called with a single integer argument, the
    +protocol version.
    +
    +The :class:`object` class implements both :meth:`__reduce__` and
    +:meth:`__reduce_ex__`; however, if a subclass overrides :meth:`__reduce__` but
    +not :meth:`__reduce_ex__`, the :meth:`__reduce_ex__` implementation detects this
    +and calls :meth:`__reduce__`.
    +
    +An alternative to implementing a :meth:`__reduce__` method on the object to be
    +pickled, is to register the callable with the :mod:`copy_reg` module.  This
    +module provides a way for programs to register "reduction functions" and
    +constructors for user-defined types.   Reduction functions have the same
    +semantics and interface as the :meth:`__reduce__` method described above, except
    +that they are called with a single argument, the object to be pickled.
    +
    +The registered constructor is deemed a "safe constructor" for purposes of
    +unpickling as described above.
    +
    +
    +Pickling and unpickling external objects
    +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    +
    +For the benefit of object persistence, the :mod:`pickle` module supports the
    +notion of a reference to an object outside the pickled data stream.  Such
    +objects are referenced by a "persistent id", which is just an arbitrary string
    +of printable ASCII characters. The resolution of such names is not defined by
    +the :mod:`pickle` module; it will delegate this resolution to user defined
    +functions on the pickler and unpickler. [#]_
    +
    +To define external persistent id resolution, you need to set the
    +:attr:`persistent_id` attribute of the pickler object and the
    +:attr:`persistent_load` attribute of the unpickler object.
    +
    +To pickle objects that have an external persistent id, the pickler must have a
    +custom :func:`persistent_id` method that takes an object as an argument and
    +returns either ``None`` or the persistent id for that object.  When ``None`` is
    +returned, the pickler simply pickles the object as normal.  When a persistent id
    +string is returned, the pickler will pickle that string, along with a marker so
    +that the unpickler will recognize the string as a persistent id.
    +
    +To unpickle external objects, the unpickler must have a custom
    +:func:`persistent_load` function that takes a persistent id string and returns
    +the referenced object.
    +
    +Here's a silly example that *might* shed more light::
    +
    +   import pickle
    +   from cStringIO import StringIO
    +
    +   src = StringIO()
    +   p = pickle.Pickler(src)
    +
    +   def persistent_id(obj):
    +       if hasattr(obj, 'x'):
    +           return 'the value %d' % obj.x
    +       else:
    +           return None
    +
    +   p.persistent_id = persistent_id
    +
    +   class Integer:
    +       def __init__(self, x):
    +           self.x = x
    +       def __str__(self):
    +           return 'My name is integer %d' % self.x
    +
    +   i = Integer(7)
    +   print i
    +   p.dump(i)
    +
    +   datastream = src.getvalue()
    +   print repr(datastream)
    +   dst = StringIO(datastream)
    +
    +   up = pickle.Unpickler(dst)
    +
    +   class FancyInteger(Integer):
    +       def __str__(self):
    +           return 'I am the integer %d' % self.x
    +
    +   def persistent_load(persid):
    +       if persid.startswith('the value '):
    +           value = int(persid.split()[2])
    +           return FancyInteger(value)
    +       else:
    +           raise pickle.UnpicklingError, 'Invalid persistent id'
    +
    +   up.persistent_load = persistent_load
    +
    +   j = up.load()
    +   print j
    +
    +In the :mod:`cPickle` module, the unpickler's :attr:`persistent_load` attribute
    +can also be set to a Python list, in which case, when the unpickler reaches a
    +persistent id, the persistent id string will simply be appended to this list.
    +This functionality exists so that a pickle data stream can be "sniffed" for
    +object references without actually instantiating all the objects in a pickle.
    +[#]_  Setting :attr:`persistent_load` to a list is usually used in conjunction
    +with the :meth:`noload` method on the Unpickler.
    +
    +.. % BAW: Both pickle and cPickle support something called
    +.. % inst_persistent_id() which appears to give unknown types a second
    +.. % shot at producing a persistent id.  Since Jim Fulton can't remember
    +.. % why it was added or what it's for, I'm leaving it undocumented.
    +
    +
    +.. _pickle-sub:
    +
    +Subclassing Unpicklers
    +----------------------
    +
    +By default, unpickling will import any class that it finds in the pickle data.
    +You can control exactly what gets unpickled and what gets called by customizing
    +your unpickler.  Unfortunately, exactly how you do this is different depending
    +on whether you're using :mod:`pickle` or :mod:`cPickle`. [#]_
    +
    +In the :mod:`pickle` module, you need to derive a subclass from
    +:class:`Unpickler`, overriding the :meth:`load_global` method.
    +:meth:`load_global` should read two lines from the pickle data stream where the
    +first line will the name of the module containing the class and the second line
    +will be the name of the instance's class.  It then looks up the class, possibly
    +importing the module and digging out the attribute, then it appends what it
    +finds to the unpickler's stack.  Later on, this class will be assigned to the
    +:attr:`__class__` attribute of an empty class, as a way of magically creating an
    +instance without calling its class's :meth:`__init__`. Your job (should you
    +choose to accept it), would be to have :meth:`load_global` push onto the
    +unpickler's stack, a known safe version of any class you deem safe to unpickle.
    +It is up to you to produce such a class.  Or you could raise an error if you
    +want to disallow all unpickling of instances.  If this sounds like a hack,
    +you're right.  Refer to the source code to make this work.
    +
    +Things are a little cleaner with :mod:`cPickle`, but not by much. To control
    +what gets unpickled, you can set the unpickler's :attr:`find_global` attribute
    +to a function or ``None``.  If it is ``None`` then any attempts to unpickle
    +instances will raise an :exc:`UnpicklingError`.  If it is a function, then it
    +should accept a module name and a class name, and return the corresponding class
    +object.  It is responsible for looking up the class and performing any necessary
    +imports, and it may raise an error to prevent instances of the class from being
    +unpickled.
    +
    +The moral of the story is that you should be really careful about the source of
    +the strings your application unpickles.
    +
    +
    +.. _pickle-example:
    +
    +Example
    +-------
    +
    +For the simplest code, use the :func:`dump` and :func:`load` functions.  Note
    +that a self-referencing list is pickled and restored correctly. ::
    +
    +   import pickle
    +
    +   data1 = {'a': [1, 2.0, 3, 4+6j],
    +            'b': ('string', u'Unicode string'),
    +            'c': None}
    +
    +   selfref_list = [1, 2, 3]
    +   selfref_list.append(selfref_list)
    +
    +   output = open('data.pkl', 'wb')
    +
    +   # Pickle dictionary using protocol 0.
    +   pickle.dump(data1, output)
    +
    +   # Pickle the list using the highest protocol available.
    +   pickle.dump(selfref_list, output, -1)
    +
    +   output.close()
    +
    +The following example reads the resulting pickled data.  When reading a
    +pickle-containing file, you should open the file in binary mode because you
    +can't be sure if the ASCII or binary format was used. ::
    +
    +   import pprint, pickle
    +
    +   pkl_file = open('data.pkl', 'rb')
    +
    +   data1 = pickle.load(pkl_file)
    +   pprint.pprint(data1)
    +
    +   data2 = pickle.load(pkl_file)
    +   pprint.pprint(data2)
    +
    +   pkl_file.close()
    +
    +Here's a larger example that shows how to modify pickling behavior for a class.
    +The :class:`TextReader` class opens a text file, and returns the line number and
    +line contents each time its :meth:`readline` method is called. If a
    +:class:`TextReader` instance is pickled, all attributes *except* the file object
    +member are saved. When the instance is unpickled, the file is reopened, and
    +reading resumes from the last location. The :meth:`__setstate__` and
    +:meth:`__getstate__` methods are used to implement this behavior. ::
    +
    +   #!/usr/local/bin/python
    +
    +   class TextReader:
    +       """Print and number lines in a text file."""
    +       def __init__(self, file):
    +           self.file = file
    +           self.fh = open(file)
    +           self.lineno = 0
    +
    +       def readline(self):
    +           self.lineno = self.lineno + 1
    +           line = self.fh.readline()
    +           if not line:
    +               return None
    +           if line.endswith("\n"):
    +               line = line[:-1]
    +           return "%d: %s" % (self.lineno, line)
    +
    +       def __getstate__(self):
    +           odict = self.__dict__.copy() # copy the dict since we change it
    +           del odict['fh']              # remove filehandle entry
    +           return odict
    +
    +       def __setstate__(self, dict):
    +           fh = open(dict['file'])      # reopen file
    +           count = dict['lineno']       # read from file...
    +           while count:                 # until line count is restored
    +               fh.readline()
    +               count = count - 1
    +           self.__dict__.update(dict)   # update attributes
    +           self.fh = fh                 # save the file object
    +
    +A sample usage might be something like this::
    +
    +   >>> import TextReader
    +   >>> obj = TextReader.TextReader("TextReader.py")
    +   >>> obj.readline()
    +   '1: #!/usr/local/bin/python'
    +   >>> obj.readline()
    +   '2: '
    +   >>> obj.readline()
    +   '3: class TextReader:'
    +   >>> import pickle
    +   >>> pickle.dump(obj, open('save.p', 'wb'))
    +
    +If you want to see that :mod:`pickle` works across Python processes, start
    +another Python session, before continuing.  What follows can happen from either
    +the same process or a new process. ::
    +
    +   >>> import pickle
    +   >>> reader = pickle.load(open('save.p', 'rb'))
    +   >>> reader.readline()
    +   '4:     """Print and number lines in a text file."""'
    +
    +
    +.. seealso::
    +
    +   Module :mod:`copy_reg`
    +      Pickle interface constructor registration for extension types.
    +
    +   Module :mod:`shelve`
    +      Indexed databases of objects; uses :mod:`pickle`.
    +
    +   Module :mod:`copy`
    +      Shallow and deep object copying.
    +
    +   Module :mod:`marshal`
    +      High-performance serialization of built-in types.
    +
    +
    +:mod:`cPickle` --- A faster :mod:`pickle`
    +=========================================
    +
    +.. module:: cPickle
    +   :synopsis: Faster version of pickle, but not subclassable.
    +.. moduleauthor:: Jim Fulton 
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +.. index:: module: pickle
    +
    +The :mod:`cPickle` module supports serialization and de-serialization of Python
    +objects, providing an interface and functionality nearly identical to the
    +:mod:`pickle` module.  There are several differences, the most important being
    +performance and subclassability.
    +
    +First, :mod:`cPickle` can be up to 1000 times faster than :mod:`pickle` because
    +the former is implemented in C.  Second, in the :mod:`cPickle` module the
    +callables :func:`Pickler` and :func:`Unpickler` are functions, not classes.
    +This means that you cannot use them to derive custom pickling and unpickling
    +subclasses.  Most applications have no need for this functionality and should
    +benefit from the greatly improved performance of the :mod:`cPickle` module.
    +
    +The pickle data stream produced by :mod:`pickle` and :mod:`cPickle` are
    +identical, so it is possible to use :mod:`pickle` and :mod:`cPickle`
    +interchangeably with existing pickles. [#]_
    +
    +There are additional minor differences in API between :mod:`cPickle` and
    +:mod:`pickle`, however for most applications, they are interchangeable.  More
    +documentation is provided in the :mod:`pickle` module documentation, which
    +includes a list of the documented differences.
    +
    +.. rubric:: Footnotes
    +
    +.. [#] Don't confuse this with the :mod:`marshal` module
    +
    +.. [#] In the :mod:`pickle` module these callables are classes, which you could
    +   subclass to customize the behavior.  However, in the :mod:`cPickle` module these
    +   callables are factory functions and so cannot be subclassed.  One common reason
    +   to subclass is to control what objects can actually be unpickled.  See section
    +   :ref:`pickle-sub` for more details.
    +
    +.. [#] *Warning*: this is intended for pickling multiple objects without intervening
    +   modifications to the objects or their parts.  If you modify an object and then
    +   pickle it again using the same :class:`Pickler` instance, the object is not
    +   pickled again --- a reference to it is pickled and the :class:`Unpickler` will
    +   return the old value, not the modified one. There are two problems here: (1)
    +   detecting changes, and (2) marshalling a minimal set of changes.  Garbage
    +   Collection may also become a problem here.
    +
    +.. [#] The exception raised will likely be an :exc:`ImportError` or an
    +   :exc:`AttributeError` but it could be something else.
    +
    +.. [#] These methods can also be used to implement copying class instances.
    +
    +.. [#] This protocol is also used by the shallow and deep copying operations defined in
    +   the :mod:`copy` module.
    +
    +.. [#] The actual mechanism for associating these user defined functions is slightly
    +   different for :mod:`pickle` and :mod:`cPickle`.  The description given here
    +   works the same for both implementations.  Users of the :mod:`pickle` module
    +   could also use subclassing to effect the same results, overriding the
    +   :meth:`persistent_id` and :meth:`persistent_load` methods in the derived
    +   classes.
    +
    +.. [#] We'll leave you with the image of Guido and Jim sitting around sniffing pickles
    +   in their living rooms.
    +
    +.. [#] A word of caution: the mechanisms described here use internal attributes and
    +   methods, which are subject to change in future versions of Python.  We intend to
    +   someday provide a common interface for controlling this behavior, which will
    +   work in either :mod:`pickle` or :mod:`cPickle`.
    +
    +.. [#] Since the pickle data format is actually a tiny stack-oriented programming
    +   language, and some freedom is taken in the encodings of certain objects, it is
    +   possible that the two modules produce different data streams for the same input
    +   objects.  However it is guaranteed that they will always be able to read each
    +   other's data streams.
    +
    diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst
    new file mode 100644
    index 0000000..ec220d9
    --- /dev/null
    +++ b/Doc/library/pickletools.rst
    @@ -0,0 +1,37 @@
    +
    +:mod:`pickletools` --- Tools for pickle developers.
    +===================================================
    +
    +.. module:: pickletools
    +   :synopsis: Contains extensive comments about the pickle protocols and pickle-machine
    +              opcodes, as well as some useful functions.
    +
    +
    +.. versionadded:: 2.3
    +
    +This module contains various constants relating to the intimate details of the
    +:mod:`pickle` module, some lengthy comments about the implementation, and a few
    +useful functions for analyzing pickled data.  The contents of this module are
    +useful for Python core developers who are working on the :mod:`pickle` and
    +:mod:`cPickle` implementations; ordinary users of the :mod:`pickle` module
    +probably won't find the :mod:`pickletools` module relevant.
    +
    +
    +.. function:: dis(pickle[, out=None, memo=None, indentlevel=4])
    +
    +   Outputs a symbolic disassembly of the pickle to the file-like object *out*,
    +   defaulting to ``sys.stdout``.  *pickle* can be a string or a file-like object.
    +   *memo* can be a Python dictionary that will be used as the pickle's memo; it can
    +   be used to perform disassemblies across multiple pickles created by the same
    +   pickler. Successive levels, indicated by ``MARK`` opcodes in the stream, are
    +   indented by *indentlevel* spaces.
    +
    +
    +.. function:: genops(pickle)
    +
    +   Provides an iterator over all of the opcodes in a pickle, returning a sequence
    +   of ``(opcode, arg, pos)`` triples. *opcode* is an instance of an
    +   :class:`OpcodeInfo` class; *arg*  is the decoded value, as a Python object, of
    +   the opcode's argument;  *pos* is the position at which this opcode is located.
    +   *pickle* can be a string or a file-like object.
    +
    diff --git a/Doc/library/pipes.rst b/Doc/library/pipes.rst
    new file mode 100644
    index 0000000..1f2b2ff
    --- /dev/null
    +++ b/Doc/library/pipes.rst
    @@ -0,0 +1,92 @@
    +
    +:mod:`pipes` --- Interface to shell pipelines
    +=============================================
    +
    +.. module:: pipes
    +   :platform: Unix
    +   :synopsis: A Python interface to Unix shell pipelines.
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`pipes` module defines a class to abstract the concept of a *pipeline*
    +--- a sequence of converters from one file to  another.
    +
    +Because the module uses :program:`/bin/sh` command lines, a POSIX or compatible
    +shell for :func:`os.system` and :func:`os.popen` is required.
    +
    +The :mod:`pipes` module defines the following class:
    +
    +
    +.. class:: Template()
    +
    +   An abstraction of a pipeline.
    +
    +Example::
    +
    +   >>> import pipes
    +   >>> t=pipes.Template()
    +   >>> t.append('tr a-z A-Z', '--')
    +   >>> f=t.open('/tmp/1', 'w')
    +   >>> f.write('hello world')
    +   >>> f.close()
    +   >>> open('/tmp/1').read()
    +   'HELLO WORLD'
    +
    +
    +.. _template-objects:
    +
    +Template Objects
    +----------------
    +
    +Template objects following methods:
    +
    +
    +.. method:: Template.reset()
    +
    +   Restore a pipeline template to its initial state.
    +
    +
    +.. method:: Template.clone()
    +
    +   Return a new, equivalent, pipeline template.
    +
    +
    +.. method:: Template.debug(flag)
    +
    +   If *flag* is true, turn debugging on. Otherwise, turn debugging off. When
    +   debugging is on, commands to be executed are printed, and the shell is given
    +   ``set -x`` command to be more verbose.
    +
    +
    +.. method:: Template.append(cmd, kind)
    +
    +   Append a new action at the end. The *cmd* variable must be a valid bourne shell
    +   command. The *kind* variable consists of two letters.
    +
    +   The first letter can be either of ``'-'`` (which means the command reads its
    +   standard input), ``'f'`` (which means the commands reads a given file on the
    +   command line) or ``'.'`` (which means the commands reads no input, and hence
    +   must be first.)
    +
    +   Similarly, the second letter can be either of ``'-'`` (which means  the command
    +   writes to standard output), ``'f'`` (which means the  command writes a file on
    +   the command line) or ``'.'`` (which means the command does not write anything,
    +   and hence must be last.)
    +
    +
    +.. method:: Template.prepend(cmd, kind)
    +
    +   Add a new action at the beginning. See :meth:`append` for explanations of the
    +   arguments.
    +
    +
    +.. method:: Template.open(file, mode)
    +
    +   Return a file-like object, open to *file*, but read from or written to by the
    +   pipeline.  Note that only one of ``'r'``, ``'w'`` may be given.
    +
    +
    +.. method:: Template.copy(infile, outfile)
    +
    +   Copy *infile* to *outfile* through the pipe.
    +
    diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst
    new file mode 100644
    index 0000000..1fbfb04
    --- /dev/null
    +++ b/Doc/library/pkgutil.rst
    @@ -0,0 +1,43 @@
    +
    +:mod:`pkgutil` --- Package extension utility
    +============================================
    +
    +.. module:: pkgutil
    +   :synopsis: Utilities to support extension of packages.
    +
    +
    +.. versionadded:: 2.3
    +
    +This module provides a single function:
    +
    +
    +.. function:: extend_path(path, name)
    +
    +   Extend the search path for the modules which comprise a package. Intended use is
    +   to place the following code in a package's :file:`__init__.py`::
    +
    +      from pkgutil import extend_path
    +      __path__ = extend_path(__path__, __name__)
    +
    +   This will add to the package's ``__path__`` all subdirectories of directories on
    +   ``sys.path`` named after the package.  This is useful if one wants to distribute
    +   different parts of a single logical package as multiple directories.
    +
    +   It also looks for :file:`\*.pkg` files beginning where ``*`` matches the *name*
    +   argument.  This feature is similar to :file:`\*.pth` files (see the :mod:`site`
    +   module for more information), except that it doesn't special-case lines starting
    +   with ``import``.  A :file:`\*.pkg` file is trusted at face value: apart from
    +   checking for duplicates, all entries found in a :file:`\*.pkg` file are added to
    +   the path, regardless of whether they exist on the filesystem.  (This is a
    +   feature.)
    +
    +   If the input path is not a list (as is the case for frozen packages) it is
    +   returned unchanged.  The input path is not modified; an extended copy is
    +   returned.  Items are only appended to the copy at the end.
    +
    +   It is assumed that ``sys.path`` is a sequence.  Items of ``sys.path`` that are
    +   not (Unicode or 8-bit) strings referring to existing directories are ignored.
    +   Unicode items on ``sys.path`` that cause errors when used as filenames may cause
    +   this function to raise an exception (in line with :func:`os.path.isdir`
    +   behavior).
    +
    diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst
    new file mode 100644
    index 0000000..a4570d2
    --- /dev/null
    +++ b/Doc/library/platform.rst
    @@ -0,0 +1,256 @@
    +
    +:mod:`platform` ---  Access to underlying platform's identifying data.
    +======================================================================
    +
    +.. module:: platform
    +   :synopsis: Retrieves as much platform identifying data as possible.
    +.. moduleauthor:: Marc-Andre Lemburg 
    +.. sectionauthor:: Bjorn Pettersen 
    +
    +
    +.. versionadded:: 2.3
    +
    +.. note::
    +
    +   Specific platforms listed alphabetically, with Linux included in the Unix
    +   section.
    +
    +
    +Cross Platform
    +--------------
    +
    +
    +.. function:: architecture(executable=sys.executable, bits='', linkage='')
    +
    +   Queries the given executable (defaults to the Python interpreter binary) for
    +   various architecture information.
    +
    +   Returns a tuple ``(bits, linkage)`` which contain information about the bit
    +   architecture and the linkage format used for the executable. Both values are
    +   returned as strings.
    +
    +   Values that cannot be determined are returned as given by the parameter presets.
    +   If bits is given as ``''``, the :cfunc:`sizeof(pointer)` (or
    +   :cfunc:`sizeof(long)` on Python version < 1.5.2) is used as indicator for the
    +   supported pointer size.
    +
    +   The function relies on the system's :file:`file` command to do the actual work.
    +   This is available on most if not all Unix  platforms and some non-Unix platforms
    +   and then only if the executable points to the Python interpreter.  Reasonable
    +   defaults are used when the above needs are not met.
    +
    +
    +.. function:: machine()
    +
    +   Returns the machine type, e.g. ``'i386'``. An empty string is returned if the
    +   value cannot be determined.
    +
    +
    +.. function:: node()
    +
    +   Returns the computer's network name (may not be fully qualified!). An empty
    +   string is returned if the value cannot be determined.
    +
    +
    +.. function:: platform(aliased=0, terse=0)
    +
    +   Returns a single string identifying the underlying platform with as much useful
    +   information as possible.
    +
    +   The output is intended to be *human readable* rather than machine parseable. It
    +   may look different on different platforms and this is intended.
    +
    +   If *aliased* is true, the function will use aliases for various platforms that
    +   report system names which differ from their common names, for example SunOS will
    +   be reported as Solaris.  The :func:`system_alias` function is used to implement
    +   this.
    +
    +   Setting *terse* to true causes the function to return only the absolute minimum
    +   information needed to identify the platform.
    +
    +
    +.. function:: processor()
    +
    +   Returns the (real) processor name, e.g. ``'amdk6'``.
    +
    +   An empty string is returned if the value cannot be determined. Note that many
    +   platforms do not provide this information or simply return the same value as for
    +   :func:`machine`.  NetBSD does this.
    +
    +
    +.. function:: python_build()
    +
    +   Returns a tuple ``(buildno, builddate)`` stating the Python build number and
    +   date as strings.
    +
    +
    +.. function:: python_compiler()
    +
    +   Returns a string identifying the compiler used for compiling Python.
    +
    +
    +.. function:: python_branch()
    +
    +   Returns a string identifying the Python implementation SCM branch.
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: python_implementation()
    +
    +   Returns a string identifying the Python implementation. Possible return values
    +   are: 'CPython', 'IronPython', 'Jython'
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: python_revision()
    +
    +   Returns a string identifying the Python implementation SCM revision.
    +
    +   .. versionadded:: 2.6
    +
    +
    +.. function:: python_version()
    +
    +   Returns the Python version as string ``'major.minor.patchlevel'``
    +
    +   Note that unlike the Python ``sys.version``, the returned value will always
    +   include the patchlevel (it defaults to 0).
    +
    +
    +.. function:: python_version_tuple()
    +
    +   Returns the Python version as tuple ``(major, minor, patchlevel)`` of strings.
    +
    +   Note that unlike the Python ``sys.version``, the returned value will always
    +   include the patchlevel (it defaults to ``'0'``).
    +
    +
    +.. function:: release()
    +
    +   Returns the system's release, e.g. ``'2.2.0'`` or ``'NT'`` An empty string is
    +   returned if the value cannot be determined.
    +
    +
    +.. function:: system()
    +
    +   Returns the system/OS name, e.g. ``'Linux'``, ``'Windows'``, or ``'Java'``. An
    +   empty string is returned if the value cannot be determined.
    +
    +
    +.. function:: system_alias(system, release, version)
    +
    +   Returns ``(system, release, version)`` aliased to common marketing names used
    +   for some systems.  It also does some reordering of the information in some cases
    +   where it would otherwise cause confusion.
    +
    +
    +.. function:: version()
    +
    +   Returns the system's release version, e.g. ``'#3 on degas'``. An empty string is
    +   returned if the value cannot be determined.
    +
    +
    +.. function:: uname()
    +
    +   Fairly portable uname interface. Returns a tuple of strings ``(system, node,
    +   release, version, machine, processor)`` identifying the underlying platform.
    +
    +   Note that unlike the :func:`os.uname` function this also returns possible
    +   processor information as additional tuple entry.
    +
    +   Entries which cannot be determined are set to ``''``.
    +
    +
    +Java Platform
    +-------------
    +
    +
    +.. function:: java_ver(release='', vendor='', vminfo=('','',''), osinfo=('','',''))
    +
    +   Version interface for JPython.
    +
    +   Returns a tuple ``(release, vendor, vminfo, osinfo)`` with *vminfo* being a
    +   tuple ``(vm_name, vm_release, vm_vendor)`` and *osinfo* being a tuple
    +   ``(os_name, os_version, os_arch)``. Values which cannot be determined are set to
    +   the defaults given as parameters (which all default to ``''``).
    +
    +
    +Windows Platform
    +----------------
    +
    +
    +.. function:: win32_ver(release='', version='', csd='', ptype='')
    +
    +   Get additional version information from the Windows Registry and return a tuple
    +   ``(version, csd, ptype)`` referring to version number, CSD level and OS type
    +   (multi/single processor).
    +
    +   As a hint: *ptype* is ``'Uniprocessor Free'`` on single processor NT machines
    +   and ``'Multiprocessor Free'`` on multi processor machines. The *'Free'* refers
    +   to the OS version being free of debugging code. It could also state *'Checked'*
    +   which means the OS version uses debugging code, i.e. code that checks arguments,
    +   ranges, etc.
    +
    +   .. note::
    +
    +      This function only works if Mark Hammond's :mod:`win32all` package is installed
    +      and (obviously) only runs on Win32 compatible platforms.
    +
    +
    +Win95/98 specific
    +^^^^^^^^^^^^^^^^^
    +
    +
    +.. function:: popen(cmd, mode='r', bufsize=None)
    +
    +   Portable :func:`popen` interface.  Find a working popen implementation
    +   preferring :func:`win32pipe.popen`.  On Windows NT, :func:`win32pipe.popen`
    +   should work; on Windows 9x it hangs due to bugs in the MS C library.
    +
    +   .. % This KnowledgeBase article appears to be missing...
    +   .. % See also \ulink{MS KnowledgeBase article Q150956}{}.
    +
    +
    +Mac OS Platform
    +---------------
    +
    +
    +.. function:: mac_ver(release='', versioninfo=('','',''), machine='')
    +
    +   Get Mac OS version information and return it as tuple ``(release, versioninfo,
    +   machine)`` with *versioninfo* being a tuple ``(version, dev_stage,
    +   non_release_version)``.
    +
    +   Entries which cannot be determined are set to ``''``.  All tuple entries are
    +   strings.
    +
    +   Documentation for the underlying :cfunc:`gestalt` API is available online at
    +   http://www.rgaros.nl/gestalt/.
    +
    +
    +Unix Platforms
    +--------------
    +
    +
    +.. function:: dist(distname='', version='', id='', supported_dists=('SuSE','debian','redhat','mandrake'))
    +
    +   Tries to determine the name of the OS distribution name Returns a tuple
    +   ``(distname, version, id)`` which defaults to the args given as parameters.
    +
    +.. % Document linux_distribution()?
    +
    +
    +.. function:: libc_ver(executable=sys.executable, lib='', version='', chunksize=2048)
    +
    +   Tries to determine the libc version against which the file executable (defaults
    +   to the Python interpreter) is linked.  Returns a tuple of strings ``(lib,
    +   version)`` which default to the given parameters in case the lookup fails.
    +
    +   Note that this function has intimate knowledge of how different libc versions
    +   add symbols to the executable is probably only useable for executables compiled
    +   using :program:`gcc`.
    +
    +   The file is read and scanned in chunks of *chunksize* bytes.
    +
    diff --git a/Doc/library/poplib.rst b/Doc/library/poplib.rst
    new file mode 100644
    index 0000000..5716204
    --- /dev/null
    +++ b/Doc/library/poplib.rst
    @@ -0,0 +1,202 @@
    +
    +:mod:`poplib` --- POP3 protocol client
    +======================================
    +
    +.. module:: poplib
    +   :synopsis: POP3 protocol client (requires sockets).
    +
    +
    +.. index:: pair: POP3; protocol
    +
    +.. % By Andrew T. Csillag
    +.. % Even though I put it into LaTeX, I cannot really claim that I wrote
    +.. % it since I just stole most of it from the poplib.py source code and
    +.. % the imaplib ``chapter''.
    +.. % Revised by ESR, January 2000
    +
    +This module defines a class, :class:`POP3`, which encapsulates a connection to a
    +POP3 server and implements the protocol as defined in :rfc:`1725`.  The
    +:class:`POP3` class supports both the minimal and optional command sets.
    +Additionally, this module provides a class :class:`POP3_SSL`, which provides
    +support for connecting to POP3 servers that use SSL as an underlying protocol
    +layer.
    +
    +Note that POP3, though widely supported, is obsolescent.  The implementation
    +quality of POP3 servers varies widely, and too many are quite poor. If your
    +mailserver supports IMAP, you would be better off using the
    +:class:`imaplib.IMAP4` class, as IMAP servers tend to be better implemented.
    +
    +A single class is provided by the :mod:`poplib` module:
    +
    +
    +.. class:: POP3(host[, port[, timeout]])
    +
    +   This class implements the actual POP3 protocol.  The connection is created when
    +   the instance is initialized. If *port* is omitted, the standard POP3 port (110)
    +   is used. The optional *timeout* parameter specifies a timeout in seconds for the
    +   connection attempt (if not specified, or passed as None, the global default
    +   timeout setting will be used).
    +
    +   .. versionchanged:: 2.6
    +      *timeout* was added.
    +
    +
    +.. class:: POP3_SSL(host[, port[, keyfile[, certfile]]])
    +
    +   This is a subclass of :class:`POP3` that connects to the server over an SSL
    +   encrypted socket.  If *port* is not specified, 995, the standard POP3-over-SSL
    +   port is used.  *keyfile* and *certfile* are also optional - they can contain a
    +   PEM formatted private key and certificate chain file for the SSL connection.
    +
    +   .. versionadded:: 2.4
    +
    +One exception is defined as an attribute of the :mod:`poplib` module:
    +
    +
    +.. exception:: error_proto
    +
    +   Exception raised on any errors from this module (errors from :mod:`socket`
    +   module are not caught). The reason for the exception is passed to the
    +   constructor as a string.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`imaplib`
    +      The standard Python IMAP module.
    +
    +   `Frequently Asked Questions About Fetchmail `_
    +      The FAQ for the :program:`fetchmail` POP/IMAP client collects information on
    +      POP3 server variations and RFC noncompliance that may be useful if you need to
    +      write an application based on the POP protocol.
    +
    +
    +.. _pop3-objects:
    +
    +POP3 Objects
    +------------
    +
    +All POP3 commands are represented by methods of the same name, in lower-case;
    +most return the response text sent by the server.
    +
    +An :class:`POP3` instance has the following methods:
    +
    +
    +.. method:: POP3.set_debuglevel(level)
    +
    +   Set the instance's debugging level.  This controls the amount of debugging
    +   output printed.  The default, ``0``, produces no debugging output.  A value of
    +   ``1`` produces a moderate amount of debugging output, generally a single line
    +   per request.  A value of ``2`` or higher produces the maximum amount of
    +   debugging output, logging each line sent and received on the control connection.
    +
    +
    +.. method:: POP3.getwelcome()
    +
    +   Returns the greeting string sent by the POP3 server.
    +
    +
    +.. method:: POP3.user(username)
    +
    +   Send user command, response should indicate that a password is required.
    +
    +
    +.. method:: POP3.pass_(password)
    +
    +   Send password, response includes message count and mailbox size. Note: the
    +   mailbox on the server is locked until :meth:`quit` is called.
    +
    +
    +.. method:: POP3.apop(user, secret)
    +
    +   Use the more secure APOP authentication to log into the POP3 server.
    +
    +
    +.. method:: POP3.rpop(user)
    +
    +   Use RPOP authentication (similar to UNIX r-commands) to log into POP3 server.
    +
    +
    +.. method:: POP3.stat()
    +
    +   Get mailbox status.  The result is a tuple of 2 integers: ``(message count,
    +   mailbox size)``.
    +
    +
    +.. method:: POP3.list([which])
    +
    +   Request message list, result is in the form ``(response, ['mesg_num octets',
    +   ...], octets)``. If *which* is set, it is the message to list.
    +
    +
    +.. method:: POP3.retr(which)
    +
    +   Retrieve whole message number *which*, and set its seen flag. Result is in form
    +   ``(response, ['line', ...], octets)``.
    +
    +
    +.. method:: POP3.dele(which)
    +
    +   Flag message number *which* for deletion.  On most servers deletions are not
    +   actually performed until QUIT (the major exception is Eudora QPOP, which
    +   deliberately violates the RFCs by doing pending deletes on any disconnect).
    +
    +
    +.. method:: POP3.rset()
    +
    +   Remove any deletion marks for the mailbox.
    +
    +
    +.. method:: POP3.noop()
    +
    +   Do nothing.  Might be used as a keep-alive.
    +
    +
    +.. method:: POP3.quit()
    +
    +   Signoff:  commit changes, unlock mailbox, drop connection.
    +
    +
    +.. method:: POP3.top(which, howmuch)
    +
    +   Retrieves the message header plus *howmuch* lines of the message after the
    +   header of message number *which*. Result is in form ``(response, ['line', ...],
    +   octets)``.
    +
    +   The POP3 TOP command this method uses, unlike the RETR command, doesn't set the
    +   message's seen flag; unfortunately, TOP is poorly specified in the RFCs and is
    +   frequently broken in off-brand servers. Test this method by hand against the
    +   POP3 servers you will use before trusting it.
    +
    +
    +.. method:: POP3.uidl([which])
    +
    +   Return message digest (unique id) list. If *which* is specified, result contains
    +   the unique id for that message in the form ``'response mesgnum uid``, otherwise
    +   result is list ``(response, ['mesgnum uid', ...], octets)``.
    +
    +Instances of :class:`POP3_SSL` have no additional methods. The interface of this
    +subclass is identical to its parent.
    +
    +
    +.. _pop3-example:
    +
    +POP3 Example
    +------------
    +
    +Here is a minimal example (without error checking) that opens a mailbox and
    +retrieves and prints all messages::
    +
    +   import getpass, poplib
    +
    +   M = poplib.POP3('localhost')
    +   M.user(getpass.getuser())
    +   M.pass_(getpass.getpass())
    +   numMessages = len(M.list()[1])
    +   for i in range(numMessages):
    +       for j in M.retr(i+1)[1]:
    +           print j
    +
    +At the end of the module, there is a test section that contains a more extensive
    +example of usage.
    +
    diff --git a/Doc/library/posix.rst b/Doc/library/posix.rst
    new file mode 100644
    index 0000000..07ecb48
    --- /dev/null
    +++ b/Doc/library/posix.rst
    @@ -0,0 +1,103 @@
    +
    +:mod:`posix` --- The most common POSIX system calls
    +===================================================
    +
    +.. module:: posix
    +   :platform: Unix
    +   :synopsis: The most common POSIX system calls (normally used via module os).
    +
    +
    +This module provides access to operating system functionality that is
    +standardized by the C Standard and the POSIX standard (a thinly disguised Unix
    +interface).
    +
    +.. index:: module: os
    +
    +**Do not import this module directly.**  Instead, import the module :mod:`os`,
    +which provides a *portable* version of this interface.  On Unix, the :mod:`os`
    +module provides a superset of the :mod:`posix` interface.  On non-Unix operating
    +systems the :mod:`posix` module is not available, but a subset is always
    +available through the :mod:`os` interface.  Once :mod:`os` is imported, there is
    +*no* performance penalty in using it instead of :mod:`posix`.  In addition,
    +:mod:`os` provides some additional functionality, such as automatically calling
    +:func:`putenv` when an entry in ``os.environ`` is changed.
    +
    +The descriptions below are very terse; refer to the corresponding Unix manual
    +(or POSIX documentation) entry for more information. Arguments called *path*
    +refer to a pathname given as a string.
    +
    +Errors are reported as exceptions; the usual exceptions are given for type
    +errors, while errors reported by the system calls raise :exc:`error` (a synonym
    +for the standard exception :exc:`OSError`), described below.
    +
    +
    +.. _posix-large-files:
    +
    +Large File Support
    +------------------
    +
    +.. index::
    +   single: large files
    +   single: file; large files
    +
    +.. sectionauthor:: Steve Clift 
    +
    +
    +Several operating systems (including AIX, HPUX, Irix and Solaris) provide
    +support for files that are larger than 2 Gb from a C programming model where
    +:ctype:`int` and :ctype:`long` are 32-bit values. This is typically accomplished
    +by defining the relevant size and offset types as 64-bit values. Such files are
    +sometimes referred to as :dfn:`large files`.
    +
    +Large file support is enabled in Python when the size of an :ctype:`off_t` is
    +larger than a :ctype:`long` and the :ctype:`long long` type is available and is
    +at least as large as an :ctype:`off_t`. Python longs are then used to represent
    +file sizes, offsets and other values that can exceed the range of a Python int.
    +It may be necessary to configure and compile Python with certain compiler flags
    +to enable this mode. For example, it is enabled by default with recent versions
    +of Irix, but with Solaris 2.6 and 2.7 you need to do something like::
    +
    +   CFLAGS="`getconf LFS_CFLAGS`" OPT="-g -O2 $CFLAGS" \
    +           ./configure
    +
    +On large-file-capable Linux systems, this might work:
    +
    +.. % $ <-- bow to font-lock
    +
    +::
    +
    +   CFLAGS='-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64' OPT="-g -O2 $CFLAGS" \
    +           ./configure
    +
    +.. % $ <-- bow to font-lock
    +
    +
    +.. _posix-contents:
    +
    +Module Contents
    +---------------
    +
    +Module :mod:`posix` defines the following data item:
    +
    +
    +.. data:: environ
    +
    +   A dictionary representing the string environment at the time the interpreter was
    +   started. For example, ``environ['HOME']`` is the pathname of your home
    +   directory, equivalent to ``getenv("HOME")`` in C.
    +
    +   Modifying this dictionary does not affect the string environment passed on by
    +   :func:`execv`, :func:`popen` or :func:`system`; if you need to change the
    +   environment, pass ``environ`` to :func:`execve` or add variable assignments and
    +   export statements to the command string for :func:`system` or :func:`popen`.
    +
    +   .. note::
    +
    +      The :mod:`os` module provides an alternate implementation of ``environ`` which
    +      updates the environment on modification.  Note also that updating ``os.environ``
    +      will render this dictionary obsolete.  Use of the :mod:`os` module version of
    +      this is recommended over direct access to the :mod:`posix` module.
    +
    +Additional contents of this module should only be accessed via the :mod:`os`
    +module; refer to the documentation for that module for further information.
    +
    diff --git a/Doc/library/pprint.rst b/Doc/library/pprint.rst
    new file mode 100644
    index 0000000..3630176
    --- /dev/null
    +++ b/Doc/library/pprint.rst
    @@ -0,0 +1,213 @@
    +
    +:mod:`pprint` --- Data pretty printer
    +=====================================
    +
    +.. module:: pprint
    +   :synopsis: Data pretty printer.
    +.. moduleauthor:: Fred L. Drake, Jr. 
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +The :mod:`pprint` module provides a capability to "pretty-print" arbitrary
    +Python data structures in a form which can be used as input to the interpreter.
    +If the formatted structures include objects which are not fundamental Python
    +types, the representation may not be loadable.  This may be the case if objects
    +such as files, sockets, classes, or instances are included, as well as many
    +other builtin objects which are not representable as Python constants.
    +
    +The formatted representation keeps objects on a single line if it can, and
    +breaks them onto multiple lines if they don't fit within the allowed width.
    +Construct :class:`PrettyPrinter` objects explicitly if you need to adjust the
    +width constraint.
    +
    +.. versionchanged:: 2.5
    +   Dictionaries are sorted by key before the display is computed; before 2.5, a
    +   dictionary was sorted only if its display required more than one line, although
    +   that wasn't documented.
    +
    +The :mod:`pprint` module defines one class:
    +
    +.. % First the implementation class:
    +
    +
    +.. class:: PrettyPrinter(...)
    +
    +   Construct a :class:`PrettyPrinter` instance.  This constructor understands
    +   several keyword parameters.  An output stream may be set using the *stream*
    +   keyword; the only method used on the stream object is the file protocol's
    +   :meth:`write` method.  If not specified, the :class:`PrettyPrinter` adopts
    +   ``sys.stdout``.  Three additional parameters may be used to control the
    +   formatted representation.  The keywords are *indent*, *depth*, and *width*.  The
    +   amount of indentation added for each recursive level is specified by *indent*;
    +   the default is one.  Other values can cause output to look a little odd, but can
    +   make nesting easier to spot.  The number of levels which may be printed is
    +   controlled by *depth*; if the data structure being printed is too deep, the next
    +   contained level is replaced by ``...``.  By default, there is no constraint on
    +   the depth of the objects being formatted.  The desired output width is
    +   constrained using the *width* parameter; the default is 80 characters.  If a
    +   structure cannot be formatted within the constrained width, a best effort will
    +   be made. ::
    +
    +      >>> import pprint, sys
    +      >>> stuff = sys.path[:]
    +      >>> stuff.insert(0, stuff[:])
    +      >>> pp = pprint.PrettyPrinter(indent=4)
    +      >>> pp.pprint(stuff)
    +      [   [   '',
    +              '/usr/local/lib/python1.5',
    +              '/usr/local/lib/python1.5/test',
    +              '/usr/local/lib/python1.5/sunos5',
    +              '/usr/local/lib/python1.5/sharedmodules',
    +              '/usr/local/lib/python1.5/tkinter'],
    +          '',
    +          '/usr/local/lib/python1.5',
    +          '/usr/local/lib/python1.5/test',
    +          '/usr/local/lib/python1.5/sunos5',
    +          '/usr/local/lib/python1.5/sharedmodules',
    +          '/usr/local/lib/python1.5/tkinter']
    +      >>>
    +      >>> import parser
    +      >>> tup = parser.ast2tuple(
    +      ...     parser.suite(open('pprint.py').read()))[1][1][1]
    +      >>> pp = pprint.PrettyPrinter(depth=6)
    +      >>> pp.pprint(tup)
    +      (266, (267, (307, (287, (288, (...))))))
    +
    +The :class:`PrettyPrinter` class supports several derivative functions:
    +
    +.. % Now the derivative functions:
    +
    +
    +.. function:: pformat(object[, indent[, width[, depth]]])
    +
    +   Return the formatted representation of *object* as a string.  *indent*, *width*
    +   and *depth* will be passed to the :class:`PrettyPrinter` constructor as
    +   formatting parameters.
    +
    +   .. versionchanged:: 2.4
    +      The parameters *indent*, *width* and *depth* were added.
    +
    +
    +.. function:: pprint(object[, stream[, indent[, width[, depth]]]])
    +
    +   Prints the formatted representation of *object* on *stream*, followed by a
    +   newline.  If *stream* is omitted, ``sys.stdout`` is used.  This may be used in
    +   the interactive interpreter instead of a :keyword:`print` statement for
    +   inspecting values.    *indent*, *width* and *depth* will be passed to the
    +   :class:`PrettyPrinter` constructor as formatting parameters. ::
    +
    +      >>> stuff = sys.path[:]
    +      >>> stuff.insert(0, stuff)
    +      >>> pprint.pprint(stuff)
    +      [,
    +       '',
    +       '/usr/local/lib/python1.5',
    +       '/usr/local/lib/python1.5/test',
    +       '/usr/local/lib/python1.5/sunos5',
    +       '/usr/local/lib/python1.5/sharedmodules',
    +       '/usr/local/lib/python1.5/tkinter']
    +
    +   .. versionchanged:: 2.4
    +      The parameters *indent*, *width* and *depth* were added.
    +
    +
    +.. function:: isreadable(object)
    +
    +   .. index:: builtin: eval
    +
    +   Determine if the formatted representation of *object* is "readable," or can be
    +   used to reconstruct the value using :func:`eval`.  This always returns ``False``
    +   for recursive objects. ::
    +
    +      >>> pprint.isreadable(stuff)
    +      False
    +
    +
    +.. function:: isrecursive(object)
    +
    +   Determine if *object* requires a recursive representation.
    +
    +One more support function is also defined:
    +
    +
    +.. function:: saferepr(object)
    +
    +   Return a string representation of *object*, protected against recursive data
    +   structures.  If the representation of *object* exposes a recursive entry, the
    +   recursive reference will be represented as ````.  The representation is not otherwise formatted.
    +
    +.. % This example is outside the {funcdesc} to keep it from running over
    +.. % the right margin.
    +
    +::
    +
    +   >>> pprint.saferepr(stuff)
    +   "[, '', '/usr/local/lib/python1.5', '/usr/loca
    +   l/lib/python1.5/test', '/usr/local/lib/python1.5/sunos5', '/usr/local/lib/python
    +   1.5/sharedmodules', '/usr/local/lib/python1.5/tkinter']"
    +
    +
    +.. _prettyprinter-objects:
    +
    +PrettyPrinter Objects
    +---------------------
    +
    +:class:`PrettyPrinter` instances have the following methods:
    +
    +
    +.. method:: PrettyPrinter.pformat(object)
    +
    +   Return the formatted representation of *object*.  This takes into account the
    +   options passed to the :class:`PrettyPrinter` constructor.
    +
    +
    +.. method:: PrettyPrinter.pprint(object)
    +
    +   Print the formatted representation of *object* on the configured stream,
    +   followed by a newline.
    +
    +The following methods provide the implementations for the corresponding
    +functions of the same names.  Using these methods on an instance is slightly
    +more efficient since new :class:`PrettyPrinter` objects don't need to be
    +created.
    +
    +
    +.. method:: PrettyPrinter.isreadable(object)
    +
    +   .. index:: builtin: eval
    +
    +   Determine if the formatted representation of the object is "readable," or can be
    +   used to reconstruct the value using :func:`eval`.  Note that this returns
    +   ``False`` for recursive objects.  If the *depth* parameter of the
    +   :class:`PrettyPrinter` is set and the object is deeper than allowed, this
    +   returns ``False``.
    +
    +
    +.. method:: PrettyPrinter.isrecursive(object)
    +
    +   Determine if the object requires a recursive representation.
    +
    +This method is provided as a hook to allow subclasses to modify the way objects
    +are converted to strings.  The default implementation uses the internals of the
    +:func:`saferepr` implementation.
    +
    +
    +.. method:: PrettyPrinter.format(object, context, maxlevels, level)
    +
    +   Returns three values: the formatted version of *object* as a string, a flag
    +   indicating whether the result is readable, and a flag indicating whether
    +   recursion was detected.  The first argument is the object to be presented.  The
    +   second is a dictionary which contains the :func:`id` of objects that are part of
    +   the current presentation context (direct and indirect containers for *object*
    +   that are affecting the presentation) as the keys; if an object needs to be
    +   presented which is already represented in *context*, the third return value
    +   should be ``True``.  Recursive calls to the :meth:`format` method should add
    +   additional entries for containers to this dictionary.  The third argument,
    +   *maxlevels*, gives the requested limit to recursion; this will be ``0`` if there
    +   is no requested limit.  This argument should be passed unmodified to recursive
    +   calls. The fourth argument, *level*, gives the current level; recursive calls
    +   should be passed a value less than that of the current call.
    +
    +   .. versionadded:: 2.3
    +
    diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst
    new file mode 100644
    index 0000000..2ab24c5
    --- /dev/null
    +++ b/Doc/library/profile.rst
    @@ -0,0 +1,682 @@
    +
    +.. _profile:
    +
    +********************
    +The Python Profilers
    +********************
    +
    +.. sectionauthor:: James Roskind
    +
    +
    +.. index:: single: InfoSeek Corporation
    +
    +Copyright © 1994, by InfoSeek Corporation, all rights reserved.
    +
    +Written by James Roskind. [#]_
    +
    +Permission to use, copy, modify, and distribute this Python software and its
    +associated documentation for any purpose (subject to the restriction in the
    +following sentence) without fee is hereby granted, provided that the above
    +copyright notice appears in all copies, and that both that copyright notice and
    +this permission notice appear in supporting documentation, and that the name of
    +InfoSeek not be used in advertising or publicity pertaining to distribution of
    +the software without specific, written prior permission.  This permission is
    +explicitly restricted to the copying and modification of the software to remain
    +in Python, compiled Python, or other languages (such as C) wherein the modified
    +or derived code is exclusively imported into a Python module.
    +
    +INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
    +INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT
    +SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
    +DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
    +WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
    +OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    +
    +The profiler was written after only programming in Python for 3 weeks. As a
    +result, it is probably clumsy code, but I don't know for sure yet 'cause I'm a
    +beginner :-).  I did work hard to make the code run fast, so that profiling
    +would be a reasonable thing to do.  I tried not to repeat code fragments, but
    +I'm sure I did some stuff in really awkward ways at times.  Please send
    +suggestions for improvements to: jar@netscape.com.  I won't promise *any*
    +support.  ...but I'd appreciate the feedback.
    +
    +
    +.. _profiler-introduction:
    +
    +Introduction to the profilers
    +=============================
    +
    +.. index::
    +   single: deterministic profiling
    +   single: profiling, deterministic
    +
    +A :dfn:`profiler` is a program that describes the run time performance of a
    +program, providing a variety of statistics.  This documentation describes the
    +profiler functionality provided in the modules :mod:`profile` and :mod:`pstats`.
    +This profiler provides :dfn:`deterministic profiling` of any Python programs.
    +It also provides a series of report generation tools to allow users to rapidly
    +examine the results of a profile operation.
    +
    +The Python standard library provides three different profilers:
    +
    +#. :mod:`profile`, a pure Python module, described in the sequel. Copyright ©
    +   1994, by InfoSeek Corporation.
    +
    +   .. versionchanged:: 2.4
    +      also reports the time spent in calls to built-in functions and methods.
    +
    +#. :mod:`cProfile`, a module written in C, with a reasonable overhead that makes
    +   it suitable for profiling long-running programs. Based on :mod:`lsprof`,
    +   contributed by Brett Rosen and Ted Czotter.
    +
    +   .. versionadded:: 2.5
    +
    +#. :mod:`hotshot`, a C module focusing on minimizing the overhead while
    +   profiling, at the expense of long data post-processing times.
    +
    +   .. versionchanged:: 2.5
    +      the results should be more meaningful than in the past: the timing core
    +      contained a critical bug.
    +
    +The :mod:`profile` and :mod:`cProfile` modules export the same interface, so
    +they are mostly interchangeables; :mod:`cProfile` has a much lower overhead but
    +is not so far as well-tested and might not be available on all systems.
    +:mod:`cProfile` is really a compatibility layer on top of the internal
    +:mod:`_lsprof` module.  The :mod:`hotshot` module is reserved to specialized
    +usages.
    +
    +.. % \section{How Is This Profiler Different From The Old Profiler?}
    +.. % \nodename{Profiler Changes}
    +.. % 
    +.. % (This section is of historical importance only; the old profiler
    +.. % discussed here was last seen in Python 1.1.)
    +.. % 
    +.. % The big changes from old profiling module are that you get more
    +.. % information, and you pay less CPU time.  It's not a trade-off, it's a
    +.. % trade-up.
    +.. % 
    +.. % To be specific:
    +.. % 
    +.. % \begin{description}
    +.. % 
    +.. % \item[Bugs removed:]
    +.. % Local stack frame is no longer molested, execution time is now charged
    +.. % to correct functions.
    +.. % 
    +.. % \item[Accuracy increased:]
    +.. % Profiler execution time is no longer charged to user's code,
    +.. % calibration for platform is supported, file reads are not done \emph{by}
    +.. % profiler \emph{during} profiling (and charged to user's code!).
    +.. % 
    +.. % \item[Speed increased:]
    +.. % Overhead CPU cost was reduced by more than a factor of two (perhaps a
    +.. % factor of five), lightweight profiler module is all that must be
    +.. % loaded, and the report generating module (\module{pstats}) is not needed
    +.. % during profiling.
    +.. % 
    +.. % \item[Recursive functions support:]
    +.. % Cumulative times in recursive functions are correctly calculated;
    +.. % recursive entries are counted.
    +.. % 
    +.. % \item[Large growth in report generating UI:]
    +.. % Distinct profiles runs can be added together forming a comprehensive
    +.. % report; functions that import statistics take arbitrary lists of
    +.. % files; sorting criteria is now based on keywords (instead of 4 integer
    +.. % options); reports shows what functions were profiled as well as what
    +.. % profile file was referenced; output format has been improved.
    +.. % 
    +.. % \end{description}
    +
    +
    +.. _profile-instant:
    +
    +Instant User's Manual
    +=====================
    +
    +This section is provided for users that "don't want to read the manual." It
    +provides a very brief overview, and allows a user to rapidly perform profiling
    +on an existing application.
    +
    +To profile an application with a main entry point of :func:`foo`, you would add
    +the following to your module::
    +
    +   import cProfile
    +   cProfile.run('foo()')
    +
    +(Use :mod:`profile` instead of :mod:`cProfile` if the latter is not available on
    +your system.)
    +
    +The above action would cause :func:`foo` to be run, and a series of informative
    +lines (the profile) to be printed.  The above approach is most useful when
    +working with the interpreter.  If you would like to save the results of a
    +profile into a file for later examination, you can supply a file name as the
    +second argument to the :func:`run` function::
    +
    +   import cProfile
    +   cProfile.run('foo()', 'fooprof')
    +
    +The file :file:`cProfile.py` can also be invoked as a script to profile another
    +script.  For example::
    +
    +   python -m cProfile myscript.py
    +
    +:file:`cProfile.py` accepts two optional arguments on the command line::
    +
    +   cProfile.py [-o output_file] [-s sort_order]
    +
    +:option:`-s` only applies to standard output (:option:`-o` is not supplied).
    +Look in the :class:`Stats` documentation for valid sort values.
    +
    +When you wish to review the profile, you should use the methods in the
    +:mod:`pstats` module.  Typically you would load the statistics data as follows::
    +
    +   import pstats
    +   p = pstats.Stats('fooprof')
    +
    +The class :class:`Stats` (the above code just created an instance of this class)
    +has a variety of methods for manipulating and printing the data that was just
    +read into ``p``.  When you ran :func:`cProfile.run` above, what was printed was
    +the result of three method calls::
    +
    +   p.strip_dirs().sort_stats(-1).print_stats()
    +
    +The first method removed the extraneous path from all the module names. The
    +second method sorted all the entries according to the standard module/line/name
    +string that is printed. The third method printed out all the statistics.  You
    +might try the following sort calls:
    +
    +.. % (this is to comply with the semantics of the old profiler).
    +
    +::
    +
    +   p.sort_stats('name')
    +   p.print_stats()
    +
    +The first call will actually sort the list by function name, and the second call
    +will print out the statistics.  The following are some interesting calls to
    +experiment with::
    +
    +   p.sort_stats('cumulative').print_stats(10)
    +
    +This sorts the profile by cumulative time in a function, and then only prints
    +the ten most significant lines.  If you want to understand what algorithms are
    +taking time, the above line is what you would use.
    +
    +If you were looking to see what functions were looping a lot, and taking a lot
    +of time, you would do::
    +
    +   p.sort_stats('time').print_stats(10)
    +
    +to sort according to time spent within each function, and then print the
    +statistics for the top ten functions.
    +
    +You might also try::
    +
    +   p.sort_stats('file').print_stats('__init__')
    +
    +This will sort all the statistics by file name, and then print out statistics
    +for only the class init methods (since they are spelled with ``__init__`` in
    +them).  As one final example, you could try::
    +
    +   p.sort_stats('time', 'cum').print_stats(.5, 'init')
    +
    +This line sorts statistics with a primary key of time, and a secondary key of
    +cumulative time, and then prints out some of the statistics. To be specific, the
    +list is first culled down to 50% (re: ``.5``) of its original size, then only
    +lines containing ``init`` are maintained, and that sub-sub-list is printed.
    +
    +If you wondered what functions called the above functions, you could now (``p``
    +is still sorted according to the last criteria) do::
    +
    +   p.print_callers(.5, 'init')
    +
    +and you would get a list of callers for each of the listed functions.
    +
    +If you want more functionality, you're going to have to read the manual, or
    +guess what the following functions do::
    +
    +   p.print_callees()
    +   p.add('fooprof')
    +
    +Invoked as a script, the :mod:`pstats` module is a statistics browser for
    +reading and examining profile dumps.  It has a simple line-oriented interface
    +(implemented using :mod:`cmd`) and interactive help.
    +
    +
    +.. _deterministic-profiling:
    +
    +What Is Deterministic Profiling?
    +================================
    +
    +:dfn:`Deterministic profiling` is meant to reflect the fact that all *function
    +call*, *function return*, and *exception* events are monitored, and precise
    +timings are made for the intervals between these events (during which time the
    +user's code is executing).  In contrast, :dfn:`statistical profiling` (which is
    +not done by this module) randomly samples the effective instruction pointer, and
    +deduces where time is being spent.  The latter technique traditionally involves
    +less overhead (as the code does not need to be instrumented), but provides only
    +relative indications of where time is being spent.
    +
    +In Python, since there is an interpreter active during execution, the presence
    +of instrumented code is not required to do deterministic profiling.  Python
    +automatically provides a :dfn:`hook` (optional callback) for each event.  In
    +addition, the interpreted nature of Python tends to add so much overhead to
    +execution, that deterministic profiling tends to only add small processing
    +overhead in typical applications.  The result is that deterministic profiling is
    +not that expensive, yet provides extensive run time statistics about the
    +execution of a Python program.
    +
    +Call count statistics can be used to identify bugs in code (surprising counts),
    +and to identify possible inline-expansion points (high call counts).  Internal
    +time statistics can be used to identify "hot loops" that should be carefully
    +optimized.  Cumulative time statistics should be used to identify high level
    +errors in the selection of algorithms.  Note that the unusual handling of
    +cumulative times in this profiler allows statistics for recursive
    +implementations of algorithms to be directly compared to iterative
    +implementations.
    +
    +
    +Reference Manual -- :mod:`profile` and :mod:`cProfile`
    +======================================================
    +
    +.. module:: cProfile
    +   :synopsis: Python profiler
    +
    +
    +The primary entry point for the profiler is the global function
    +:func:`profile.run` (resp. :func:`cProfile.run`). It is typically used to create
    +any profile information.  The reports are formatted and printed using methods of
    +the class :class:`pstats.Stats`.  The following is a description of all of these
    +standard entry points and functions.  For a more in-depth view of some of the
    +code, consider reading the later section on Profiler Extensions, which includes
    +discussion of how to derive "better" profilers from the classes presented, or
    +reading the source code for these modules.
    +
    +
    +.. function:: run(command[, filename])
    +
    +   This function takes a single argument that can be passed to the :func:`exec`
    +   function, and an optional file name.  In all cases this routine attempts to
    +   :func:`exec` its first argument, and gather profiling statistics from the
    +   execution. If no file name is present, then this function automatically
    +   prints a simple profiling report, sorted by the standard name string
    +   (file/line/function-name) that is presented in each line.  The following is a
    +   typical output from such a call::
    +
    +            2706 function calls (2004 primitive calls) in 4.504 CPU seconds
    +
    +      Ordered by: standard name
    +
    +      ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    +           2    0.006    0.003    0.953    0.477 pobject.py:75(save_objects)
    +        43/3    0.533    0.012    0.749    0.250 pobject.py:99(evaluate)
    +       ...
    +
    +   The first line indicates that 2706 calls were monitored.  Of those calls, 2004
    +   were :dfn:`primitive`.  We define :dfn:`primitive` to mean that the call was not
    +   induced via recursion. The next line: ``Ordered by: standard name``, indicates
    +   that the text string in the far right column was used to sort the output. The
    +   column headings include:
    +
    +   ncalls 
    +      for the number of calls,
    +
    +   tottime 
    +      for the total time spent in the given function (and excluding time made in calls
    +      to sub-functions),
    +
    +   percall 
    +      is the quotient of ``tottime`` divided by ``ncalls``
    +
    +   cumtime 
    +      is the total time spent in this and all subfunctions (from invocation till
    +      exit). This figure is accurate *even* for recursive functions.
    +
    +   percall 
    +      is the quotient of ``cumtime`` divided by primitive calls
    +
    +   filename:lineno(function) 
    +      provides the respective data of each function
    +
    +   When there are two numbers in the first column (for example, ``43/3``), then the
    +   latter is the number of primitive calls, and the former is the actual number of
    +   calls.  Note that when the function does not recurse, these two values are the
    +   same, and only the single figure is printed.
    +
    +
    +.. function:: runctx(command, globals, locals[, filename])
    +
    +   This function is similar to :func:`run`, with added arguments to supply the
    +   globals and locals dictionaries for the *command* string.
    +
    +Analysis of the profiler data is done using the :class:`Stats` class.
    +
    +.. note::
    +
    +   The :class:`Stats` class is defined in the :mod:`pstats` module.
    +
    +
    +.. module:: pstats
    +   :synopsis: Statistics object for use with the profiler.
    +
    +
    +.. class:: Stats(filename[, stream=sys.stdout[, ...]])
    +
    +   This class constructor creates an instance of a "statistics object" from a
    +   *filename* (or set of filenames).  :class:`Stats` objects are manipulated by
    +   methods, in order to print useful reports.  You may specify an alternate output
    +   stream by giving the keyword argument, ``stream``.
    +
    +   The file selected by the above constructor must have been created by the
    +   corresponding version of :mod:`profile` or :mod:`cProfile`.  To be specific,
    +   there is *no* file compatibility guaranteed with future versions of this
    +   profiler, and there is no compatibility with files produced by other profilers.
    +   If several files are provided, all the statistics for identical functions will
    +   be coalesced, so that an overall view of several processes can be considered in
    +   a single report.  If additional files need to be combined with data in an
    +   existing :class:`Stats` object, the :meth:`add` method can be used.
    +
    +   .. % (such as the old system profiler).
    +
    +   .. versionchanged:: 2.5
    +      The *stream* parameter was added.
    +
    +
    +.. _profile-stats:
    +
    +The :class:`Stats` Class
    +------------------------
    +
    +:class:`Stats` objects have the following methods:
    +
    +
    +.. method:: Stats.strip_dirs()
    +
    +   This method for the :class:`Stats` class removes all leading path information
    +   from file names.  It is very useful in reducing the size of the printout to fit
    +   within (close to) 80 columns.  This method modifies the object, and the stripped
    +   information is lost.  After performing a strip operation, the object is
    +   considered to have its entries in a "random" order, as it was just after object
    +   initialization and loading.  If :meth:`strip_dirs` causes two function names to
    +   be indistinguishable (they are on the same line of the same filename, and have
    +   the same function name), then the statistics for these two entries are
    +   accumulated into a single entry.
    +
    +
    +.. method:: Stats.add(filename[, ...])
    +
    +   This method of the :class:`Stats` class accumulates additional profiling
    +   information into the current profiling object.  Its arguments should refer to
    +   filenames created by the corresponding version of :func:`profile.run` or
    +   :func:`cProfile.run`. Statistics for identically named (re: file, line, name)
    +   functions are automatically accumulated into single function statistics.
    +
    +
    +.. method:: Stats.dump_stats(filename)
    +
    +   Save the data loaded into the :class:`Stats` object to a file named *filename*.
    +   The file is created if it does not exist, and is overwritten if it already
    +   exists.  This is equivalent to the method of the same name on the
    +   :class:`profile.Profile` and :class:`cProfile.Profile` classes.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. method:: Stats.sort_stats(key[, ...])
    +
    +   This method modifies the :class:`Stats` object by sorting it according to the
    +   supplied criteria.  The argument is typically a string identifying the basis of
    +   a sort (example: ``'time'`` or ``'name'``).
    +
    +   When more than one key is provided, then additional keys are used as secondary
    +   criteria when there is equality in all keys selected before them.  For example,
    +   ``sort_stats('name', 'file')`` will sort all the entries according to their
    +   function name, and resolve all ties (identical function names) by sorting by
    +   file name.
    +
    +   Abbreviations can be used for any key names, as long as the abbreviation is
    +   unambiguous.  The following are the keys currently defined:
    +
    +   +------------------+----------------------+
    +   | Valid Arg        | Meaning              |
    +   +==================+======================+
    +   | ``'calls'``      | call count           |
    +   +------------------+----------------------+
    +   | ``'cumulative'`` | cumulative time      |
    +   +------------------+----------------------+
    +   | ``'file'``       | file name            |
    +   +------------------+----------------------+
    +   | ``'module'``     | file name            |
    +   +------------------+----------------------+
    +   | ``'pcalls'``     | primitive call count |
    +   +------------------+----------------------+
    +   | ``'line'``       | line number          |
    +   +------------------+----------------------+
    +   | ``'name'``       | function name        |
    +   +------------------+----------------------+
    +   | ``'nfl'``        | name/file/line       |
    +   +------------------+----------------------+
    +   | ``'stdname'``    | standard name        |
    +   +------------------+----------------------+
    +   | ``'time'``       | internal time        |
    +   +------------------+----------------------+
    +
    +   Note that all sorts on statistics are in descending order (placing most time
    +   consuming items first), where as name, file, and line number searches are in
    +   ascending order (alphabetical). The subtle distinction between ``'nfl'`` and
    +   ``'stdname'`` is that the standard name is a sort of the name as printed, which
    +   means that the embedded line numbers get compared in an odd way.  For example,
    +   lines 3, 20, and 40 would (if the file names were the same) appear in the string
    +   order 20, 3 and 40.  In contrast, ``'nfl'`` does a numeric compare of the line
    +   numbers.  In fact, ``sort_stats('nfl')`` is the same as ``sort_stats('name',
    +   'file', 'line')``.
    +
    +   For backward-compatibility reasons, the numeric arguments ``-1``, ``0``, ``1``,
    +   and ``2`` are permitted.  They are interpreted as ``'stdname'``, ``'calls'``,
    +   ``'time'``, and ``'cumulative'`` respectively.  If this old style format
    +   (numeric) is used, only one sort key (the numeric key) will be used, and
    +   additional arguments will be silently ignored.
    +
    +   .. % For compatibility with the old profiler,
    +
    +
    +.. method:: Stats.reverse_order()
    +
    +   This method for the :class:`Stats` class reverses the ordering of the basic list
    +   within the object.  Note that by default ascending vs descending order is
    +   properly selected based on the sort key of choice.
    +
    +   .. % This method is provided primarily for
    +   .. % compatibility with the old profiler.
    +
    +
    +.. method:: Stats.print_stats([restriction, ...])
    +
    +   This method for the :class:`Stats` class prints out a report as described in the
    +   :func:`profile.run` definition.
    +
    +   The order of the printing is based on the last :meth:`sort_stats` operation done
    +   on the object (subject to caveats in :meth:`add` and :meth:`strip_dirs`).
    +
    +   The arguments provided (if any) can be used to limit the list down to the
    +   significant entries.  Initially, the list is taken to be the complete set of
    +   profiled functions.  Each restriction is either an integer (to select a count of
    +   lines), or a decimal fraction between 0.0 and 1.0 inclusive (to select a
    +   percentage of lines), or a regular expression (to pattern match the standard
    +   name that is printed; as of Python 1.5b1, this uses the Perl-style regular
    +   expression syntax defined by the :mod:`re` module).  If several restrictions are
    +   provided, then they are applied sequentially.  For example::
    +
    +      print_stats(.1, 'foo:')
    +
    +   would first limit the printing to first 10% of list, and then only print
    +   functions that were part of filename :file:`.\*foo:`.  In contrast, the
    +   command::
    +
    +      print_stats('foo:', .1)
    +
    +   would limit the list to all functions having file names :file:`.\*foo:`, and
    +   then proceed to only print the first 10% of them.
    +
    +
    +.. method:: Stats.print_callers([restriction, ...])
    +
    +   This method for the :class:`Stats` class prints a list of all functions that
    +   called each function in the profiled database.  The ordering is identical to
    +   that provided by :meth:`print_stats`, and the definition of the restricting
    +   argument is also identical.  Each caller is reported on its own line.  The
    +   format differs slightly depending on the profiler that produced the stats:
    +
    +   * With :mod:`profile`, a number is shown in parentheses after each caller to
    +     show how many times this specific call was made.  For convenience, a second
    +     non-parenthesized number repeats the cumulative time spent in the function
    +     at the right.
    +
    +   * With :mod:`cProfile`, each caller is preceeded by three numbers: the number of
    +     times this specific call was made, and the total and cumulative times spent in
    +     the current function while it was invoked by this specific caller.
    +
    +
    +.. method:: Stats.print_callees([restriction, ...])
    +
    +   This method for the :class:`Stats` class prints a list of all function that were
    +   called by the indicated function.  Aside from this reversal of direction of
    +   calls (re: called vs was called by), the arguments and ordering are identical to
    +   the :meth:`print_callers` method.
    +
    +
    +.. _profile-limits:
    +
    +Limitations
    +===========
    +
    +One limitation has to do with accuracy of timing information. There is a
    +fundamental problem with deterministic profilers involving accuracy.  The most
    +obvious restriction is that the underlying "clock" is only ticking at a rate
    +(typically) of about .001 seconds.  Hence no measurements will be more accurate
    +than the underlying clock.  If enough measurements are taken, then the "error"
    +will tend to average out. Unfortunately, removing this first error induces a
    +second source of error.
    +
    +The second problem is that it "takes a while" from when an event is dispatched
    +until the profiler's call to get the time actually *gets* the state of the
    +clock.  Similarly, there is a certain lag when exiting the profiler event
    +handler from the time that the clock's value was obtained (and then squirreled
    +away), until the user's code is once again executing.  As a result, functions
    +that are called many times, or call many functions, will typically accumulate
    +this error. The error that accumulates in this fashion is typically less than
    +the accuracy of the clock (less than one clock tick), but it *can* accumulate
    +and become very significant.
    +
    +The problem is more important with :mod:`profile` than with the lower-overhead
    +:mod:`cProfile`.  For this reason, :mod:`profile` provides a means of
    +calibrating itself for a given platform so that this error can be
    +probabilistically (on the average) removed. After the profiler is calibrated, it
    +will be more accurate (in a least square sense), but it will sometimes produce
    +negative numbers (when call counts are exceptionally low, and the gods of
    +probability work against you :-). )  Do *not* be alarmed by negative numbers in
    +the profile.  They should *only* appear if you have calibrated your profiler,
    +and the results are actually better than without calibration.
    +
    +
    +.. _profile-calibration:
    +
    +Calibration
    +===========
    +
    +The profiler of the :mod:`profile` module subtracts a constant from each event
    +handling time to compensate for the overhead of calling the time function, and
    +socking away the results.  By default, the constant is 0. The following
    +procedure can be used to obtain a better constant for a given platform (see
    +discussion in section Limitations above). ::
    +
    +   import profile
    +   pr = profile.Profile()
    +   for i in range(5):
    +       print pr.calibrate(10000)
    +
    +The method executes the number of Python calls given by the argument, directly
    +and again under the profiler, measuring the time for both. It then computes the
    +hidden overhead per profiler event, and returns that as a float.  For example,
    +on an 800 MHz Pentium running Windows 2000, and using Python's time.clock() as
    +the timer, the magical number is about 12.5e-6.
    +
    +The object of this exercise is to get a fairly consistent result. If your
    +computer is *very* fast, or your timer function has poor resolution, you might
    +have to pass 100000, or even 1000000, to get consistent results.
    +
    +When you have a consistent answer, there are three ways you can use it: [#]_ ::
    +
    +   import profile
    +
    +   # 1. Apply computed bias to all Profile instances created hereafter.
    +   profile.Profile.bias = your_computed_bias
    +
    +   # 2. Apply computed bias to a specific Profile instance.
    +   pr = profile.Profile()
    +   pr.bias = your_computed_bias
    +
    +   # 3. Specify computed bias in instance constructor.
    +   pr = profile.Profile(bias=your_computed_bias)
    +
    +If you have a choice, you are better off choosing a smaller constant, and then
    +your results will "less often" show up as negative in profile statistics.
    +
    +
    +.. _profiler-extensions:
    +
    +Extensions --- Deriving Better Profilers
    +========================================
    +
    +The :class:`Profile` class of both modules, :mod:`profile` and :mod:`cProfile`,
    +were written so that derived classes could be developed to extend the profiler.
    +The details are not described here, as doing this successfully requires an
    +expert understanding of how the :class:`Profile` class works internally.  Study
    +the source code of the module carefully if you want to pursue this.
    +
    +If all you want to do is change how current time is determined (for example, to
    +force use of wall-clock time or elapsed process time), pass the timing function
    +you want to the :class:`Profile` class constructor::
    +
    +   pr = profile.Profile(your_time_func)
    +
    +The resulting profiler will then call :func:`your_time_func`.
    +
    +:class:`profile.Profile`
    +   :func:`your_time_func` should return a single number, or a list of numbers whose
    +   sum is the current time (like what :func:`os.times` returns).  If the function
    +   returns a single time number, or the list of returned numbers has length 2, then
    +   you will get an especially fast version of the dispatch routine.
    +
    +   Be warned that you should calibrate the profiler class for the timer function
    +   that you choose.  For most machines, a timer that returns a lone integer value
    +   will provide the best results in terms of low overhead during profiling.
    +   (:func:`os.times` is *pretty* bad, as it returns a tuple of floating point
    +   values).  If you want to substitute a better timer in the cleanest fashion,
    +   derive a class and hardwire a replacement dispatch method that best handles your
    +   timer call, along with the appropriate calibration constant.
    +
    +:class:`cProfile.Profile`
    +   :func:`your_time_func` should return a single number.  If it returns plain
    +   integers, you can also invoke the class constructor with a second argument
    +   specifying the real duration of one unit of time.  For example, if
    +   :func:`your_integer_time_func` returns times measured in thousands of seconds,
    +   you would constuct the :class:`Profile` instance as follows::
    +
    +      pr = profile.Profile(your_integer_time_func, 0.001)
    +
    +   As the :mod:`cProfile.Profile` class cannot be calibrated, custom timer
    +   functions should be used with care and should be as fast as possible.  For the
    +   best results with a custom timer, it might be necessary to hard-code it in the C
    +   source of the internal :mod:`_lsprof` module.
    +
    +.. rubric:: Footnotes
    +
    +.. [#] Updated and converted to LaTeX by Guido van Rossum. Further updated by Armin
    +   Rigo to integrate the documentation for the new :mod:`cProfile` module of Python
    +   2.5.
    +
    +.. [#] Prior to Python 2.2, it was necessary to edit the profiler source code to embed
    +   the bias as a literal number.  You still can, but that method is no longer
    +   described, because no longer needed.
    +
    diff --git a/Doc/library/pty.rst b/Doc/library/pty.rst
    new file mode 100644
    index 0000000..5e1da22
    --- /dev/null
    +++ b/Doc/library/pty.rst
    @@ -0,0 +1,48 @@
    +
    +:mod:`pty` --- Pseudo-terminal utilities
    +========================================
    +
    +.. module:: pty
    +   :platform: IRIX, Linux
    +   :synopsis: Pseudo-Terminal Handling for SGI and Linux.
    +.. moduleauthor:: Steen Lumholt
    +.. sectionauthor:: Moshe Zadka 
    +
    +
    +The :mod:`pty` module defines operations for handling the pseudo-terminal
    +concept: starting another process and being able to write to and read from its
    +controlling terminal programmatically.
    +
    +Because pseudo-terminal handling is highly platform dependant, there is code to
    +do it only for SGI and Linux. (The Linux code is supposed to work on other
    +platforms, but hasn't been tested yet.)
    +
    +The :mod:`pty` module defines the following functions:
    +
    +
    +.. function:: fork()
    +
    +   Fork. Connect the child's controlling terminal to a pseudo-terminal. Return
    +   value is ``(pid, fd)``. Note that the child  gets *pid* 0, and the *fd* is
    +   *invalid*. The parent's return value is the *pid* of the child, and *fd* is a
    +   file descriptor connected to the child's controlling terminal (and also to the
    +   child's standard input and output).
    +
    +
    +.. function:: openpty()
    +
    +   Open a new pseudo-terminal pair, using :func:`os.openpty` if possible, or
    +   emulation code for SGI and generic Unix systems. Return a pair of file
    +   descriptors ``(master, slave)``, for the master and the slave end, respectively.
    +
    +
    +.. function:: spawn(argv[, master_read[, stdin_read]])
    +
    +   Spawn a process, and connect its controlling terminal with the current
    +   process's standard io. This is often used to baffle programs which insist on
    +   reading from the controlling terminal.
    +
    +   The functions *master_read* and *stdin_read* should be functions which read from
    +   a file-descriptor. The defaults try to read 1024 bytes each time they are
    +   called.
    +
    diff --git a/Doc/library/pwd.rst b/Doc/library/pwd.rst
    new file mode 100644
    index 0000000..562afd9
    --- /dev/null
    +++ b/Doc/library/pwd.rst
    @@ -0,0 +1,76 @@
    +
    +:mod:`pwd` --- The password database
    +====================================
    +
    +.. module:: pwd
    +   :platform: Unix
    +   :synopsis: The password database (getpwnam() and friends).
    +
    +
    +This module provides access to the Unix user account and password database.  It
    +is available on all Unix versions.
    +
    +Password database entries are reported as a tuple-like object, whose attributes
    +correspond to the members of the ``passwd`` structure (Attribute field below,
    +see ````):
    +
    ++-------+---------------+-----------------------------+
    +| Index | Attribute     | Meaning                     |
    ++=======+===============+=============================+
    +| 0     | ``pw_name``   | Login name                  |
    ++-------+---------------+-----------------------------+
    +| 1     | ``pw_passwd`` | Optional encrypted password |
    ++-------+---------------+-----------------------------+
    +| 2     | ``pw_uid``    | Numerical user ID           |
    ++-------+---------------+-----------------------------+
    +| 3     | ``pw_gid``    | Numerical group ID          |
    ++-------+---------------+-----------------------------+
    +| 4     | ``pw_gecos``  | User name or comment field  |
    ++-------+---------------+-----------------------------+
    +| 5     | ``pw_dir``    | User home directory         |
    ++-------+---------------+-----------------------------+
    +| 6     | ``pw_shell``  | User command interpreter    |
    ++-------+---------------+-----------------------------+
    +
    +The uid and gid items are integers, all others are strings. :exc:`KeyError` is
    +raised if the entry asked for cannot be found.
    +
    +.. note::
    +
    +   .. index:: module: crypt
    +
    +   In traditional Unix the field ``pw_passwd`` usually contains a password
    +   encrypted with a DES derived algorithm (see module :mod:`crypt`).  However most
    +   modern unices  use a so-called *shadow password* system.  On those unices the
    +   *pw_passwd* field only contains an asterisk (``'*'``) or the  letter ``'x'``
    +   where the encrypted password is stored in a file :file:`/etc/shadow` which is
    +   not world readable.  Whether the *pw_passwd* field contains anything useful is
    +   system-dependent.  If available, the :mod:`spwd` module should be used where
    +   access to the encrypted password is required.
    +
    +It defines the following items:
    +
    +
    +.. function:: getpwuid(uid)
    +
    +   Return the password database entry for the given numeric user ID.
    +
    +
    +.. function:: getpwnam(name)
    +
    +   Return the password database entry for the given user name.
    +
    +
    +.. function:: getpwall()
    +
    +   Return a list of all available password database entries, in arbitrary order.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`grp`
    +      An interface to the group database, similar to this.
    +
    +   Module :mod:`spwd`
    +      An interface to the shadow password database, similar to this.
    +
    diff --git a/Doc/library/py_compile.rst b/Doc/library/py_compile.rst
    new file mode 100644
    index 0000000..c815846
    --- /dev/null
    +++ b/Doc/library/py_compile.rst
    @@ -0,0 +1,55 @@
    +:mod:`py_compile` --- Compile Python source files
    +=================================================
    +
    +.. module:: py_compile
    +   :synopsis: Generate byte-code files from Python source files.
    +
    +.. % Documentation based on module docstrings, by Fred L. Drake, Jr.
    +.. % 
    +
    +
    +
    +.. index:: pair: file; byte-code
    +
    +The :mod:`py_compile` module provides a function to generate a byte-code file
    +from a source file, and another function used when the module source file is
    +invoked as a script.
    +
    +Though not often needed, this function can be useful when installing modules for
    +shared use, especially if some of the users may not have permission to write the
    +byte-code cache files in the directory containing the source code.
    +
    +
    +.. exception:: PyCompileError
    +
    +   Exception raised when an error occurs while attempting to compile the file.
    +
    +
    +.. function:: compile(file[, cfile[, dfile[, doraise]]])
    +
    +   Compile a source file to byte-code and write out the byte-code cache  file.  The
    +   source code is loaded from the file name *file*.  The  byte-code is written to
    +   *cfile*, which defaults to *file* ``+`` ``'c'`` (``'o'`` if optimization is
    +   enabled in the current interpreter).  If *dfile* is specified, it is used as the
    +   name of the source file in error messages instead of *file*.  If *doraise* is
    +   true, a :exc:`PyCompileError` is raised when an error is encountered while
    +   compiling *file*. If *doraise* is false (the default), an error string is
    +   written to ``sys.stderr``, but no exception is raised.
    +
    +
    +.. function:: main([args])
    +
    +   Compile several source files.  The files named in *args* (or on the command
    +   line, if *args* is not specified) are compiled and the resulting bytecode is
    +   cached in the normal manner.  This function does not search a directory
    +   structure to locate source files; it only compiles files named explicitly.
    +
    +When this module is run as a script, the :func:`main` is used to compile all the
    +files named on the command line.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`compileall`
    +      Utilities to compile all Python source files in a directory tree.
    +
    diff --git a/Doc/library/pyclbr.rst b/Doc/library/pyclbr.rst
    new file mode 100644
    index 0000000..5a77b4e
    --- /dev/null
    +++ b/Doc/library/pyclbr.rst
    @@ -0,0 +1,112 @@
    +
    +:mod:`pyclbr` --- Python class browser support
    +==============================================
    +
    +.. module:: pyclbr
    +   :synopsis: Supports information extraction for a Python class browser.
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +The :mod:`pyclbr` can be used to determine some limited information about the
    +classes, methods and top-level functions defined in a module.  The information
    +provided is sufficient to implement a traditional three-pane class browser.  The
    +information is extracted from the source code rather than by importing the
    +module, so this module is safe to use with untrusted source code.  This
    +restriction makes it impossible to use this module with modules not implemented
    +in Python, including many standard and optional extension modules.
    +
    +
    +.. function:: readmodule(module[, path])
    +
    +   Read a module and return a dictionary mapping class names to class descriptor
    +   objects.  The parameter *module* should be the name of a module as a string; it
    +   may be the name of a module within a package.  The *path* parameter should be a
    +   sequence, and is used to augment the value of ``sys.path``, which is used to
    +   locate module source code.
    +
    +   .. % The 'inpackage' parameter appears to be for internal use only....
    +
    +
    +.. function:: readmodule_ex(module[, path])
    +
    +   Like :func:`readmodule`, but the returned dictionary, in addition to mapping
    +   class names to class descriptor objects, also maps top-level function names to
    +   function descriptor objects.  Moreover, if the module being read is a package,
    +   the key ``'__path__'`` in the returned dictionary has as its value a list which
    +   contains the package search path.
    +
    +   .. % The 'inpackage' parameter appears to be for internal use only....
    +
    +
    +.. _pyclbr-class-objects:
    +
    +Class Descriptor Objects
    +------------------------
    +
    +The class descriptor objects used as values in the dictionary returned by
    +:func:`readmodule` and :func:`readmodule_ex` provide the following data members:
    +
    +
    +.. attribute:: class_descriptor.module
    +
    +   The name of the module defining the class described by the class descriptor.
    +
    +
    +.. attribute:: class_descriptor.name
    +
    +   The name of the class.
    +
    +
    +.. attribute:: class_descriptor.super
    +
    +   A list of class descriptors which describe the immediate base classes of the
    +   class being described.  Classes which are named as superclasses but which are
    +   not discoverable by :func:`readmodule` are listed as a string with the class
    +   name instead of class descriptors.
    +
    +
    +.. attribute:: class_descriptor.methods
    +
    +   A dictionary mapping method names to line numbers.
    +
    +
    +.. attribute:: class_descriptor.file
    +
    +   Name of the file containing the ``class`` statement defining the class.
    +
    +
    +.. attribute:: class_descriptor.lineno
    +
    +   The line number of the ``class`` statement within the file named by
    +   :attr:`file`.
    +
    +
    +.. _pyclbr-function-objects:
    +
    +Function Descriptor Objects
    +---------------------------
    +
    +The function descriptor objects used as values in the dictionary returned by
    +:func:`readmodule_ex` provide the following data members:
    +
    +
    +.. attribute:: function_descriptor.module
    +
    +   The name of the module defining the function described by the function
    +   descriptor.
    +
    +
    +.. attribute:: function_descriptor.name
    +
    +   The name of the function.
    +
    +
    +.. attribute:: function_descriptor.file
    +
    +   Name of the file containing the ``def`` statement defining the function.
    +
    +
    +.. attribute:: function_descriptor.lineno
    +
    +   The line number of the ``def`` statement within the file named by :attr:`file`.
    +
    diff --git a/Doc/library/pydoc.rst b/Doc/library/pydoc.rst
    new file mode 100644
    index 0000000..2df127c
    --- /dev/null
    +++ b/Doc/library/pydoc.rst
    @@ -0,0 +1,65 @@
    +
    +:mod:`pydoc` --- Documentation generator and online help system
    +===============================================================
    +
    +.. module:: pydoc
    +   :synopsis: Documentation generator and online help system.
    +.. moduleauthor:: Ka-Ping Yee 
    +.. sectionauthor:: Ka-Ping Yee 
    +
    +
    +.. versionadded:: 2.1
    +
    +.. index::
    +   single: documentation; generation
    +   single: documentation; online
    +   single: help; online
    +
    +The :mod:`pydoc` module automatically generates documentation from Python
    +modules.  The documentation can be presented as pages of text on the console,
    +served to a Web browser, or saved to HTML files.
    +
    +The built-in function :func:`help` invokes the online help system in the
    +interactive interpreter, which uses :mod:`pydoc` to generate its documentation
    +as text on the console.  The same text documentation can also be viewed from
    +outside the Python interpreter by running :program:`pydoc` as a script at the
    +operating system's command prompt. For example, running ::
    +
    +   pydoc sys
    +
    +at a shell prompt will display documentation on the :mod:`sys` module, in a
    +style similar to the manual pages shown by the Unix :program:`man` command.  The
    +argument to :program:`pydoc` can be the name of a function, module, or package,
    +or a dotted reference to a class, method, or function within a module or module
    +in a package.  If the argument to :program:`pydoc` looks like a path (that is,
    +it contains the path separator for your operating system, such as a slash in
    +Unix), and refers to an existing Python source file, then documentation is
    +produced for that file.
    +
    +Specifying a :option:`-w` flag before the argument will cause HTML documentation
    +to be written out to a file in the current directory, instead of displaying text
    +on the console.
    +
    +Specifying a :option:`-k` flag before the argument will search the synopsis
    +lines of all available modules for the keyword given as the argument, again in a
    +manner similar to the Unix :program:`man` command.  The synopsis line of a
    +module is the first line of its documentation string.
    +
    +You can also use :program:`pydoc` to start an HTTP server on the local machine
    +that will serve documentation to visiting Web browsers. :program:`pydoc`
    +:option:`-p 1234` will start a HTTP server on port 1234, allowing you to browse
    +the documentation at ``http://localhost:1234/`` in your preferred Web browser.
    +:program:`pydoc` :option:`-g` will start the server and additionally bring up a
    +small :mod:`Tkinter`\ -based graphical interface to help you search for
    +documentation pages.
    +
    +When :program:`pydoc` generates documentation, it uses the current environment
    +and path to locate modules.  Thus, invoking :program:`pydoc` :option:`spam`
    +documents precisely the version of the module you would get if you started the
    +Python interpreter and typed ``import spam``.
    +
    +Module docs for core modules are assumed to reside in
    +http://www.python.org/doc/current/lib/.  This can be overridden by setting the
    +:envvar:`PYTHONDOCS` environment variable to a different URL or to a local
    +directory containing the Library Reference Manual pages.
    +
    diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst
    new file mode 100644
    index 0000000..87ed501
    --- /dev/null
    +++ b/Doc/library/pyexpat.rst
    @@ -0,0 +1,873 @@
    +
    +:mod:`xml.parsers.expat` --- Fast XML parsing using Expat
    +=========================================================
    +
    +.. module:: xml.parsers.expat
    +   :synopsis: An interface to the Expat non-validating XML parser.
    +.. moduleauthor:: Paul Prescod 
    +
    +
    +.. % Markup notes:
    +.. % 
    +.. % Many of the attributes of the XMLParser objects are callbacks.
    +.. % Since signature information must be presented, these are described
    +.. % using the methoddesc environment.  Since they are attributes which
    +.. % are set by client code, in-text references to these attributes
    +.. % should be marked using the \member macro and should not include the
    +.. % parentheses used when marking functions and methods.
    +
    +.. versionadded:: 2.0
    +
    +.. index:: single: Expat
    +
    +The :mod:`xml.parsers.expat` module is a Python interface to the Expat
    +non-validating XML parser. The module provides a single extension type,
    +:class:`xmlparser`, that represents the current state of an XML parser.  After
    +an :class:`xmlparser` object has been created, various attributes of the object
    +can be set to handler functions.  When an XML document is then fed to the
    +parser, the handler functions are called for the character data and markup in
    +the XML document.
    +
    +.. index:: module: pyexpat
    +
    +This module uses the :mod:`pyexpat` module to provide access to the Expat
    +parser.  Direct use of the :mod:`pyexpat` module is deprecated.
    +
    +This module provides one exception and one type object:
    +
    +
    +.. exception:: ExpatError
    +
    +   The exception raised when Expat reports an error.  See section
    +   :ref:`expaterror-objects` for more information on interpreting Expat errors.
    +
    +
    +.. exception:: error
    +
    +   Alias for :exc:`ExpatError`.
    +
    +
    +.. data:: XMLParserType
    +
    +   The type of the return values from the :func:`ParserCreate` function.
    +
    +The :mod:`xml.parsers.expat` module contains two functions:
    +
    +
    +.. function:: ErrorString(errno)
    +
    +   Returns an explanatory string for a given error number *errno*.
    +
    +
    +.. function:: ParserCreate([encoding[, namespace_separator]])
    +
    +   Creates and returns a new :class:`xmlparser` object.   *encoding*, if specified,
    +   must be a string naming the encoding  used by the XML data.  Expat doesn't
    +   support as many encodings as Python does, and its repertoire of encodings can't
    +   be extended; it supports UTF-8, UTF-16, ISO-8859-1 (Latin1), and ASCII.  If
    +   *encoding* is given it will override the implicit or explicit encoding of the
    +   document.
    +
    +   Expat can optionally do XML namespace processing for you, enabled by providing a
    +   value for *namespace_separator*.  The value must be a one-character string; a
    +   :exc:`ValueError` will be raised if the string has an illegal length (``None``
    +   is considered the same as omission).  When namespace processing is enabled,
    +   element type names and attribute names that belong to a namespace will be
    +   expanded.  The element name passed to the element handlers
    +   :attr:`StartElementHandler` and :attr:`EndElementHandler` will be the
    +   concatenation of the namespace URI, the namespace separator character, and the
    +   local part of the name.  If the namespace separator is a zero byte (``chr(0)``)
    +   then the namespace URI and the local part will be concatenated without any
    +   separator.
    +
    +   For example, if *namespace_separator* is set to a space character (``' '``) and
    +   the following document is parsed::
    +
    +      
    +      
    +        
    +        
    +      
    +
    +   :attr:`StartElementHandler` will receive the following strings for each
    +   element::
    +
    +      http://default-namespace.org/ root
    +      http://www.python.org/ns/ elem1
    +      elem2
    +
    +
    +.. seealso::
    +
    +   `The Expat XML Parser `_
    +      Home page of the Expat project.
    +
    +
    +.. _xmlparser-objects:
    +
    +XMLParser Objects
    +-----------------
    +
    +:class:`xmlparser` objects have the following methods:
    +
    +
    +.. method:: xmlparser.Parse(data[, isfinal])
    +
    +   Parses the contents of the string *data*, calling the appropriate handler
    +   functions to process the parsed data.  *isfinal* must be true on the final call
    +   to this method.  *data* can be the empty string at any time.
    +
    +
    +.. method:: xmlparser.ParseFile(file)
    +
    +   Parse XML data reading from the object *file*.  *file* only needs to provide
    +   the ``read(nbytes)`` method, returning the empty string when there's no more
    +   data.
    +
    +
    +.. method:: xmlparser.SetBase(base)
    +
    +   Sets the base to be used for resolving relative URIs in system identifiers in
    +   declarations.  Resolving relative identifiers is left to the application: this
    +   value will be passed through as the *base* argument to the
    +   :func:`ExternalEntityRefHandler`, :func:`NotationDeclHandler`, and
    +   :func:`UnparsedEntityDeclHandler` functions.
    +
    +
    +.. method:: xmlparser.GetBase()
    +
    +   Returns a string containing the base set by a previous call to :meth:`SetBase`,
    +   or ``None`` if  :meth:`SetBase` hasn't been called.
    +
    +
    +.. method:: xmlparser.GetInputContext()
    +
    +   Returns the input data that generated the current event as a string. The data is
    +   in the encoding of the entity which contains the text. When called while an
    +   event handler is not active, the return value is ``None``.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. method:: xmlparser.ExternalEntityParserCreate(context[, encoding])
    +
    +   Create a "child" parser which can be used to parse an external parsed entity
    +   referred to by content parsed by the parent parser.  The *context* parameter
    +   should be the string passed to the :meth:`ExternalEntityRefHandler` handler
    +   function, described below. The child parser is created with the
    +   :attr:`ordered_attributes` and :attr:`specified_attributes` set to the values of
    +   this parser.
    +
    +
    +.. method:: xmlparser.UseForeignDTD([flag])
    +
    +   Calling this with a true value for *flag* (the default) will cause Expat to call
    +   the :attr:`ExternalEntityRefHandler` with :const:`None` for all arguments to
    +   allow an alternate DTD to be loaded.  If the document does not contain a
    +   document type declaration, the :attr:`ExternalEntityRefHandler` will still be
    +   called, but the :attr:`StartDoctypeDeclHandler` and
    +   :attr:`EndDoctypeDeclHandler` will not be called.
    +
    +   Passing a false value for *flag* will cancel a previous call that passed a true
    +   value, but otherwise has no effect.
    +
    +   This method can only be called before the :meth:`Parse` or :meth:`ParseFile`
    +   methods are called; calling it after either of those have been called causes
    +   :exc:`ExpatError` to be raised with the :attr:`code` attribute set to
    +   :const:`errors.XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING`.
    +
    +   .. versionadded:: 2.3
    +
    +:class:`xmlparser` objects have the following attributes:
    +
    +
    +.. attribute:: xmlparser.buffer_size
    +
    +   The size of the buffer used when :attr:`buffer_text` is true.  This value cannot
    +   be changed at this time.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. attribute:: xmlparser.buffer_text
    +
    +   Setting this to true causes the :class:`xmlparser` object to buffer textual
    +   content returned by Expat to avoid multiple calls to the
    +   :meth:`CharacterDataHandler` callback whenever possible.  This can improve
    +   performance substantially since Expat normally breaks character data into chunks
    +   at every line ending.  This attribute is false by default, and may be changed at
    +   any time.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. attribute:: xmlparser.buffer_used
    +
    +   If :attr:`buffer_text` is enabled, the number of bytes stored in the buffer.
    +   These bytes represent UTF-8 encoded text.  This attribute has no meaningful
    +   interpretation when :attr:`buffer_text` is false.
    +
    +   .. versionadded:: 2.3
    +
    +
    +.. attribute:: xmlparser.ordered_attributes
    +
    +   Setting this attribute to a non-zero integer causes the attributes to be
    +   reported as a list rather than a dictionary.  The attributes are presented in
    +   the order found in the document text.  For each attribute, two list entries are
    +   presented: the attribute name and the attribute value.  (Older versions of this
    +   module also used this format.)  By default, this attribute is false; it may be
    +   changed at any time.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. attribute:: xmlparser.specified_attributes
    +
    +   If set to a non-zero integer, the parser will report only those attributes which
    +   were specified in the document instance and not those which were derived from
    +   attribute declarations.  Applications which set this need to be especially
    +   careful to use what additional information is available from the declarations as
    +   needed to comply with the standards for the behavior of XML processors.  By
    +   default, this attribute is false; it may be changed at any time.
    +
    +   .. versionadded:: 2.1
    +
    +The following attributes contain values relating to the most recent error
    +encountered by an :class:`xmlparser` object, and will only have correct values
    +once a call to :meth:`Parse` or :meth:`ParseFile` has raised a
    +:exc:`xml.parsers.expat.ExpatError` exception.
    +
    +
    +.. attribute:: xmlparser.ErrorByteIndex
    +
    +   Byte index at which an error occurred.
    +
    +
    +.. attribute:: xmlparser.ErrorCode
    +
    +   Numeric code specifying the problem.  This value can be passed to the
    +   :func:`ErrorString` function, or compared to one of the constants defined in the
    +   ``errors`` object.
    +
    +
    +.. attribute:: xmlparser.ErrorColumnNumber
    +
    +   Column number at which an error occurred.
    +
    +
    +.. attribute:: xmlparser.ErrorLineNumber
    +
    +   Line number at which an error occurred.
    +
    +The following attributes contain values relating to the current parse location
    +in an :class:`xmlparser` object.  During a callback reporting a parse event they
    +indicate the location of the first of the sequence of characters that generated
    +the event.  When called outside of a callback, the position indicated will be
    +just past the last parse event (regardless of whether there was an associated
    +callback).
    +
    +.. versionadded:: 2.4
    +
    +
    +.. attribute:: xmlparser.CurrentByteIndex
    +
    +   Current byte index in the parser input.
    +
    +
    +.. attribute:: xmlparser.CurrentColumnNumber
    +
    +   Current column number in the parser input.
    +
    +
    +.. attribute:: xmlparser.CurrentLineNumber
    +
    +   Current line number in the parser input.
    +
    +Here is the list of handlers that can be set.  To set a handler on an
    +:class:`xmlparser` object *o*, use ``o.handlername = func``.  *handlername* must
    +be taken from the following list, and *func* must be a callable object accepting
    +the correct number of arguments.  The arguments are all strings, unless
    +otherwise stated.
    +
    +
    +.. method:: xmlparser.XmlDeclHandler(version, encoding, standalone)
    +
    +   Called when the XML declaration is parsed.  The XML declaration is the
    +   (optional) declaration of the applicable version of the XML recommendation, the
    +   encoding of the document text, and an optional "standalone" declaration.
    +   *version* and *encoding* will be strings, and *standalone* will be ``1`` if the
    +   document is declared standalone, ``0`` if it is declared not to be standalone,
    +   or ``-1`` if the standalone clause was omitted. This is only available with
    +   Expat version 1.95.0 or newer.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. method:: xmlparser.StartDoctypeDeclHandler(doctypeName, systemId, publicId, has_internal_subset)
    +
    +   Called when Expat begins parsing the document type declaration (````'.
    +
    +
    +.. method:: xmlparser.StartCdataSectionHandler()
    +
    +   Called at the start of a CDATA section.  This and :attr:`EndCdataSectionHandler`
    +   are needed to be able to identify the syntactical start and end for CDATA
    +   sections.
    +
    +
    +.. method:: xmlparser.EndCdataSectionHandler()
    +
    +   Called at the end of a CDATA section.
    +
    +
    +.. method:: xmlparser.DefaultHandler(data)
    +
    +   Called for any characters in the XML document for which no applicable handler
    +   has been specified.  This means characters that are part of a construct which
    +   could be reported, but for which no handler has been supplied.
    +
    +
    +.. method:: xmlparser.DefaultHandlerExpand(data)
    +
    +   This is the same as the :func:`DefaultHandler`,  but doesn't inhibit expansion
    +   of internal entities. The entity reference will not be passed to the default
    +   handler.
    +
    +
    +.. method:: xmlparser.NotStandaloneHandler()
    +
    +   Called if the XML document hasn't been declared as being a standalone document.
    +   This happens when there is an external subset or a reference to a parameter
    +   entity, but the XML declaration does not set standalone to ``yes`` in an XML
    +   declaration.  If this handler returns ``0``, then the parser will throw an
    +   :const:`XML_ERROR_NOT_STANDALONE` error.  If this handler is not set, no
    +   exception is raised by the parser for this condition.
    +
    +
    +.. method:: xmlparser.ExternalEntityRefHandler(context, base, systemId, publicId)
    +
    +   Called for references to external entities.  *base* is the current base, as set
    +   by a previous call to :meth:`SetBase`.  The public and system identifiers,
    +   *systemId* and *publicId*, are strings if given; if the public identifier is not
    +   given, *publicId* will be ``None``.  The *context* value is opaque and should
    +   only be used as described below.
    +
    +   For external entities to be parsed, this handler must be implemented. It is
    +   responsible for creating the sub-parser using
    +   ``ExternalEntityParserCreate(context)``, initializing it with the appropriate
    +   callbacks, and parsing the entity.  This handler should return an integer; if it
    +   returns ``0``, the parser will throw an
    +   :const:`XML_ERROR_EXTERNAL_ENTITY_HANDLING` error, otherwise parsing will
    +   continue.
    +
    +   If this handler is not provided, external entities are reported by the
    +   :attr:`DefaultHandler` callback, if provided.
    +
    +
    +.. _expaterror-objects:
    +
    +ExpatError Exceptions
    +---------------------
    +
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +:exc:`ExpatError` exceptions have a number of interesting attributes:
    +
    +
    +.. attribute:: ExpatError.code
    +
    +   Expat's internal error number for the specific error.  This will match one of
    +   the constants defined in the ``errors`` object from this module.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. attribute:: ExpatError.lineno
    +
    +   Line number on which the error was detected.  The first line is numbered ``1``.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. attribute:: ExpatError.offset
    +
    +   Character offset into the line where the error occurred.  The first column is
    +   numbered ``0``.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. _expat-example:
    +
    +Example
    +-------
    +
    +The following program defines three handlers that just print out their
    +arguments. ::
    +
    +   import xml.parsers.expat
    +
    +   # 3 handler functions
    +   def start_element(name, attrs):
    +       print 'Start element:', name, attrs
    +   def end_element(name):
    +       print 'End element:', name
    +   def char_data(data):
    +       print 'Character data:', repr(data)
    +
    +   p = xml.parsers.expat.ParserCreate()
    +
    +   p.StartElementHandler = start_element
    +   p.EndElementHandler = end_element
    +   p.CharacterDataHandler = char_data
    +
    +   p.Parse("""
    +   Text goes here
    +   More text
    +   """, 1)
    +
    +The output from this program is::
    +
    +   Start element: parent {'id': 'top'}
    +   Start element: child1 {'name': 'paul'}
    +   Character data: 'Text goes here'
    +   End element: child1
    +   Character data: '\n'
    +   Start element: child2 {'name': 'fred'}
    +   Character data: 'More text'
    +   End element: child2
    +   Character data: '\n'
    +   End element: parent
    +
    +
    +.. _expat-content-models:
    +
    +Content Model Descriptions
    +--------------------------
    +
    +.. sectionauthor:: Fred L. Drake, Jr. 
    +
    +
    +Content modules are described using nested tuples.  Each tuple contains four
    +values: the type, the quantifier, the name, and a tuple of children.  Children
    +are simply additional content module descriptions.
    +
    +The values of the first two fields are constants defined in the ``model`` object
    +of the :mod:`xml.parsers.expat` module.  These constants can be collected in two
    +groups: the model type group and the quantifier group.
    +
    +The constants in the model type group are:
    +
    +
    +.. data:: XML_CTYPE_ANY
    +   :noindex:
    +
    +   The element named by the model name was declared to have a content model of
    +   ``ANY``.
    +
    +
    +.. data:: XML_CTYPE_CHOICE
    +   :noindex:
    +
    +   The named element allows a choice from a number of options; this is used for
    +   content models such as ``(A | B | C)``.
    +
    +
    +.. data:: XML_CTYPE_EMPTY
    +   :noindex:
    +
    +   Elements which are declared to be ``EMPTY`` have this model type.
    +
    +
    +.. data:: XML_CTYPE_MIXED
    +   :noindex:
    +
    +
    +.. data:: XML_CTYPE_NAME
    +   :noindex:
    +
    +
    +.. data:: XML_CTYPE_SEQ
    +   :noindex:
    +
    +   Models which represent a series of models which follow one after the other are
    +   indicated with this model type.  This is used for models such as ``(A, B, C)``.
    +
    +The constants in the quantifier group are:
    +
    +
    +.. data:: XML_CQUANT_NONE
    +   :noindex:
    +
    +   No modifier is given, so it can appear exactly once, as for ``A``.
    +
    +
    +.. data:: XML_CQUANT_OPT
    +   :noindex:
    +
    +   The model is optional: it can appear once or not at all, as for ``A?``.
    +
    +
    +.. data:: XML_CQUANT_PLUS
    +   :noindex:
    +
    +   The model must occur one or more times (like ``A+``).
    +
    +
    +.. data:: XML_CQUANT_REP
    +   :noindex:
    +
    +   The model must occur zero or more times, as for ``A*``.
    +
    +
    +.. _expat-errors:
    +
    +Expat error constants
    +---------------------
    +
    +The following constants are provided in the ``errors`` object of the
    +:mod:`xml.parsers.expat` module.  These constants are useful in interpreting
    +some of the attributes of the :exc:`ExpatError` exception objects raised when an
    +error has occurred.
    +
    +The ``errors`` object has the following attributes:
    +
    +
    +.. data:: XML_ERROR_ASYNC_ENTITY
    +   :noindex:
    +
    +
    +.. data:: XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
    +   :noindex:
    +
    +   An entity reference in an attribute value referred to an external entity instead
    +   of an internal entity.
    +
    +
    +.. data:: XML_ERROR_BAD_CHAR_REF
    +   :noindex:
    +
    +   A character reference referred to a character which is illegal in XML (for
    +   example, character ``0``, or '``�``').
    +
    +
    +.. data:: XML_ERROR_BINARY_ENTITY_REF
    +   :noindex:
    +
    +   An entity reference referred to an entity which was declared with a notation, so
    +   cannot be parsed.
    +
    +
    +.. data:: XML_ERROR_DUPLICATE_ATTRIBUTE
    +   :noindex:
    +
    +   An attribute was used more than once in a start tag.
    +
    +
    +.. data:: XML_ERROR_INCORRECT_ENCODING
    +   :noindex:
    +
    +
    +.. data:: XML_ERROR_INVALID_TOKEN
    +   :noindex:
    +
    +   Raised when an input byte could not properly be assigned to a character; for
    +   example, a NUL byte (value ``0``) in a UTF-8 input stream.
    +
    +
    +.. data:: XML_ERROR_JUNK_AFTER_DOC_ELEMENT
    +   :noindex:
    +
    +   Something other than whitespace occurred after the document element.
    +
    +
    +.. data:: XML_ERROR_MISPLACED_XML_PI
    +   :noindex:
    +
    +   An XML declaration was found somewhere other than the start of the input data.
    +
    +
    +.. data:: XML_ERROR_NO_ELEMENTS
    +   :noindex:
    +
    +   The document contains no elements (XML requires all documents to contain exactly
    +   one top-level element)..
    +
    +
    +.. data:: XML_ERROR_NO_MEMORY
    +   :noindex:
    +
    +   Expat was not able to allocate memory internally.
    +
    +
    +.. data:: XML_ERROR_PARAM_ENTITY_REF
    +   :noindex:
    +
    +   A parameter entity reference was found where it was not allowed.
    +
    +
    +.. data:: XML_ERROR_PARTIAL_CHAR
    +   :noindex:
    +
    +   An incomplete character was found in the input.
    +
    +
    +.. data:: XML_ERROR_RECURSIVE_ENTITY_REF
    +   :noindex:
    +
    +   An entity reference contained another reference to the same entity; possibly via
    +   a different name, and possibly indirectly.
    +
    +
    +.. data:: XML_ERROR_SYNTAX
    +   :noindex:
    +
    +   Some unspecified syntax error was encountered.
    +
    +
    +.. data:: XML_ERROR_TAG_MISMATCH
    +   :noindex:
    +
    +   An end tag did not match the innermost open start tag.
    +
    +
    +.. data:: XML_ERROR_UNCLOSED_TOKEN
    +   :noindex:
    +
    +   Some token (such as a start tag) was not closed before the end of the stream or
    +   the next token was encountered.
    +
    +
    +.. data:: XML_ERROR_UNDEFINED_ENTITY
    +   :noindex:
    +
    +   A reference was made to a entity which was not defined.
    +
    +
    +.. data:: XML_ERROR_UNKNOWN_ENCODING
    +   :noindex:
    +
    +   The document encoding is not supported by Expat.
    +
    +
    +.. data:: XML_ERROR_UNCLOSED_CDATA_SECTION
    +   :noindex:
    +
    +   A CDATA marked section was not closed.
    +
    +
    +.. data:: XML_ERROR_EXTERNAL_ENTITY_HANDLING
    +   :noindex:
    +
    +
    +.. data:: XML_ERROR_NOT_STANDALONE
    +   :noindex:
    +
    +   The parser determined that the document was not "standalone" though it declared
    +   itself to be in the XML declaration, and the :attr:`NotStandaloneHandler` was
    +   set and returned ``0``.
    +
    +
    +.. data:: XML_ERROR_UNEXPECTED_STATE
    +   :noindex:
    +
    +
    +.. data:: XML_ERROR_ENTITY_DECLARED_IN_PE
    +   :noindex:
    +
    +
    +.. data:: XML_ERROR_FEATURE_REQUIRES_XML_DTD
    +   :noindex:
    +
    +   An operation was requested that requires DTD support to be compiled in, but
    +   Expat was configured without DTD support.  This should never be reported by a
    +   standard build of the :mod:`xml.parsers.expat` module.
    +
    +
    +.. data:: XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
    +   :noindex:
    +
    +   A behavioral change was requested after parsing started that can only be changed
    +   before parsing has started.  This is (currently) only raised by
    +   :meth:`UseForeignDTD`.
    +
    +
    +.. data:: XML_ERROR_UNBOUND_PREFIX
    +   :noindex:
    +
    +   An undeclared prefix was found when namespace processing was enabled.
    +
    +
    +.. data:: XML_ERROR_UNDECLARING_PREFIX
    +   :noindex:
    +
    +   The document attempted to remove the namespace declaration associated with a
    +   prefix.
    +
    +
    +.. data:: XML_ERROR_INCOMPLETE_PE
    +   :noindex:
    +
    +   A parameter entity contained incomplete markup.
    +
    +
    +.. data:: XML_ERROR_XML_DECL
    +   :noindex:
    +
    +   The document contained no document element at all.
    +
    +
    +.. data:: XML_ERROR_TEXT_DECL
    +   :noindex:
    +
    +   There was an error parsing a text declaration in an external entity.
    +
    +
    +.. data:: XML_ERROR_PUBLICID
    +   :noindex:
    +
    +   Characters were found in the public id that are not allowed.
    +
    +
    +.. data:: XML_ERROR_SUSPENDED
    +   :noindex:
    +
    +   The requested operation was made on a suspended parser, but isn't allowed.  This
    +   includes attempts to provide additional input or to stop the parser.
    +
    +
    +.. data:: XML_ERROR_NOT_SUSPENDED
    +   :noindex:
    +
    +   An attempt to resume the parser was made when the parser had not been suspended.
    +
    +
    +.. data:: XML_ERROR_ABORTED
    +   :noindex:
    +
    +   This should not be reported to Python applications.
    +
    +
    +.. data:: XML_ERROR_FINISHED
    +   :noindex:
    +
    +   The requested operation was made on a parser which was finished parsing input,
    +   but isn't allowed.  This includes attempts to provide additional input or to
    +   stop the parser.
    +
    +
    +.. data:: XML_ERROR_SUSPEND_PE
    +   :noindex:
    +
    diff --git a/Doc/library/python.rst b/Doc/library/python.rst
    new file mode 100644
    index 0000000..3b58eee
    --- /dev/null
    +++ b/Doc/library/python.rst
    @@ -0,0 +1,27 @@
    +
    +.. _python:
    +
    +***********************
    +Python Runtime Services
    +***********************
    +
    +The modules described in this chapter provide a wide range of services related
    +to the Python interpreter and its interaction with its environment.  Here's an
    +overview:
    +
    +
    +.. toctree::
    +
    +   sys.rst
    +   __builtin__.rst
    +   __main__.rst
    +   warnings.rst
    +   contextlib.rst
    +   atexit.rst
    +   traceback.rst
    +   __future__.rst
    +   gc.rst
    +   inspect.rst
    +   site.rst
    +   user.rst
    +   fpectl.rst
    diff --git a/Doc/library/queue.rst b/Doc/library/queue.rst
    new file mode 100644
    index 0000000..c7b65fd
    --- /dev/null
    +++ b/Doc/library/queue.rst
    @@ -0,0 +1,152 @@
    +
    +:mod:`Queue` --- A synchronized queue class
    +===========================================
    +
    +.. module:: Queue
    +   :synopsis: A synchronized queue class.
    +
    +
    +The :mod:`Queue` module implements a multi-producer, multi-consumer FIFO queue.
    +It is especially useful in threads programming when information must be
    +exchanged safely between multiple threads.  The :class:`Queue` class in this
    +module implements all the required locking semantics.  It depends on the
    +availability of thread support in Python.
    +
    +The :mod:`Queue` module defines the following class and exception:
    +
    +
    +.. class:: Queue(maxsize)
    +
    +   Constructor for the class.  *maxsize* is an integer that sets the upperbound
    +   limit on the number of items that can be placed in the queue.  Insertion will
    +   block once this size has been reached, until queue items are consumed.  If
    +   *maxsize* is less than or equal to zero, the queue size is infinite.
    +
    +
    +.. exception:: Empty
    +
    +   Exception raised when non-blocking :meth:`get` (or :meth:`get_nowait`) is called
    +   on a :class:`Queue` object which is empty.
    +
    +
    +.. exception:: Full
    +
    +   Exception raised when non-blocking :meth:`put` (or :meth:`put_nowait`) is called
    +   on a :class:`Queue` object which is full.
    +
    +
    +.. _queueobjects:
    +
    +Queue Objects
    +-------------
    +
    +Class :class:`Queue` implements queue objects and has the methods described
    +below.  This class can be derived from in order to implement other queue
    +organizations (e.g. stack) but the inheritable interface is not described here.
    +See the source code for details.  The public methods are:
    +
    +
    +.. method:: Queue.qsize()
    +
    +   Return the approximate size of the queue.  Because of multithreading semantics,
    +   this number is not reliable.
    +
    +
    +.. method:: Queue.empty()
    +
    +   Return ``True`` if the queue is empty, ``False`` otherwise. Because of
    +   multithreading semantics, this is not reliable.
    +
    +
    +.. method:: Queue.full()
    +
    +   Return ``True`` if the queue is full, ``False`` otherwise. Because of
    +   multithreading semantics, this is not reliable.
    +
    +
    +.. method:: Queue.put(item[, block[, timeout]])
    +
    +   Put *item* into the queue. If optional args *block* is true and *timeout* is
    +   None (the default), block if necessary until a free slot is available. If
    +   *timeout* is a positive number, it blocks at most *timeout* seconds and raises
    +   the :exc:`Full` exception if no free slot was available within that time.
    +   Otherwise (*block* is false), put an item on the queue if a free slot is
    +   immediately available, else raise the :exc:`Full` exception (*timeout* is
    +   ignored in that case).
    +
    +   .. versionadded:: 2.3
    +      The *timeout* parameter.
    +
    +
    +.. method:: Queue.put_nowait(item)
    +
    +   Equivalent to ``put(item, False)``.
    +
    +
    +.. method:: Queue.get([block[, timeout]])
    +
    +   Remove and return an item from the queue. If optional args *block* is true and
    +   *timeout* is None (the default), block if necessary until an item is available.
    +   If *timeout* is a positive number, it blocks at most *timeout* seconds and
    +   raises the :exc:`Empty` exception if no item was available within that time.
    +   Otherwise (*block* is false), return an item if one is immediately available,
    +   else raise the :exc:`Empty` exception (*timeout* is ignored in that case).
    +
    +   .. versionadded:: 2.3
    +      The *timeout* parameter.
    +
    +
    +.. method:: Queue.get_nowait()
    +
    +   Equivalent to ``get(False)``.
    +
    +Two methods are offered to support tracking whether enqueued tasks have been
    +fully processed by daemon consumer threads.
    +
    +
    +.. method:: Queue.task_done()
    +
    +   Indicate that a formerly enqueued task is complete.  Used by queue consumer
    +   threads.  For each :meth:`get` used to fetch a task, a subsequent call to
    +   :meth:`task_done` tells the queue that the processing on the task is complete.
    +
    +   If a :meth:`join` is currently blocking, it will resume when all items have been
    +   processed (meaning that a :meth:`task_done` call was received for every item
    +   that had been :meth:`put` into the queue).
    +
    +   Raises a :exc:`ValueError` if called more times than there were items placed in
    +   the queue.
    +
    +   .. versionadded:: 2.5
    +
    +
    +.. method:: Queue.join()
    +
    +   Blocks until all items in the queue have been gotten and processed.
    +
    +   The count of unfinished tasks goes up whenever an item is added to the queue.
    +   The count goes down whenever a consumer thread calls :meth:`task_done` to
    +   indicate that the item was retrieved and all work on it is complete. When the
    +   count of unfinished tasks drops to zero, join() unblocks.
    +
    +   .. versionadded:: 2.5
    +
    +Example of how to wait for enqueued tasks to be completed::
    +
    +   def worker(): 
    +       while True: 
    +           item = q.get() 
    +           do_work(item) 
    +           q.task_done() 
    +
    +   q = Queue() 
    +   for i in range(num_worker_threads): 
    +        t = Thread(target=worker)
    +        t.setDaemon(True)
    +        t.start() 
    +
    +   for item in source():
    +       q.put(item) 
    +
    +   q.join()       # block until all tasks are done
    +
    diff --git a/Doc/library/quopri.rst b/Doc/library/quopri.rst
    new file mode 100644
    index 0000000..8f525ef
    --- /dev/null
    +++ b/Doc/library/quopri.rst
    @@ -0,0 +1,61 @@
    +
    +:mod:`quopri` --- Encode and decode MIME quoted-printable data
    +==============================================================
    +
    +.. module:: quopri
    +   :synopsis: Encode and decode files using the MIME quoted-printable encoding.
    +
    +
    +.. index::
    +   pair: quoted-printable; encoding
    +   single: MIME; quoted-printable encoding
    +
    +This module performs quoted-printable transport encoding and decoding, as
    +defined in :rfc:`1521`: "MIME (Multipurpose Internet Mail Extensions) Part One:
    +Mechanisms for Specifying and Describing the Format of Internet Message Bodies".
    +The quoted-printable encoding is designed for data where there are relatively
    +few nonprintable characters; the base64 encoding scheme available via the
    +:mod:`base64` module is more compact if there are many such characters, as when
    +sending a graphics file.
    +
    +
    +.. function:: decode(input, output[,header])
    +
    +   Decode the contents of the *input* file and write the resulting decoded binary
    +   data to the *output* file. *input* and *output* must either be file objects or
    +   objects that mimic the file object interface. *input* will be read until
    +   ``input.readline()`` returns an empty string. If the optional argument *header*
    +   is present and true, underscore will be decoded as space. This is used to decode
    +   "Q"-encoded headers as described in :rfc:`1522`: "MIME (Multipurpose Internet
    +   Mail Extensions) Part Two: Message Header Extensions for Non-ASCII Text".
    +
    +
    +.. function:: encode(input, output, quotetabs)
    +
    +   Encode the contents of the *input* file and write the resulting quoted-printable
    +   data to the *output* file. *input* and *output* must either be file objects or
    +   objects that mimic the file object interface. *input* will be read until
    +   ``input.readline()`` returns an empty string. *quotetabs* is a flag which
    +   controls whether to encode embedded spaces and tabs; when true it encodes such
    +   embedded whitespace, and when false it leaves them unencoded.  Note that spaces
    +   and tabs appearing at the end of lines are always encoded, as per :rfc:`1521`.
    +
    +
    +.. function:: decodestring(s[,header])
    +
    +   Like :func:`decode`, except that it accepts a source string and returns the
    +   corresponding decoded string.
    +
    +
    +.. function:: encodestring(s[, quotetabs])
    +
    +   Like :func:`encode`, except that it accepts a source string and returns the
    +   corresponding encoded string.  *quotetabs* is optional (defaulting to 0), and is
    +   passed straight through to :func:`encode`.
    +
    +
    +.. seealso::
    +
    +   Module :mod:`base64`
    +      Encode and decode MIME base64 data
    +
    diff --git a/Doc/library/random.rst b/Doc/library/random.rst
    new file mode 100644
    index 0000000..c5d289c
    --- /dev/null
    +++ b/Doc/library/random.rst
    @@ -0,0 +1,315 @@
    +
    +:mod:`random` --- Generate pseudo-random numbers
    +================================================
    +
    +.. module:: random
    +   :synopsis: Generate pseudo-random numbers with various common distributions.
    +
    +
    +This module implements pseudo-random number generators for various
    +distributions.
    +
    +For integers, uniform selection from a range. For sequences, uniform selection
    +of a random element, a function to generate a random permutation of a list
    +in-place, and a function for random sampling without replacement.
    +
    +On the real line, there are functions to compute uniform, normal (Gaussian),
    +lognormal, negative exponential, gamma, and beta distributions. For generating
    +distributions of angles, the von Mises distribution is available.
    +
    +Almost all module functions depend on the basic function :func:`random`, which
    +generates a random float uniformly in the semi-open range [0.0, 1.0).  Python
    +uses the Mersenne Twister as the core generator.  It produces 53-bit precision
    +floats and has a period of 2\*\*19937-1.  The underlying implementation in C is
    +both fast and threadsafe.  The Mersenne Twister is one of the most extensively
    +tested random number generators in existence.  However, being completely
    +deterministic, it is not suitable for all purposes, and is completely unsuitable
    +for cryptographic purposes.
    +
    +The functions supplied by this module are actually bound methods of a hidden
    +instance of the :class:`random.Random` class.  You can instantiate your own
    +instances of :class:`Random` to get generators that don't share state.  This is
    +especially useful for multi-threaded programs, creating a different instance of
    +:class:`Random` for each thread, and using the :meth:`jumpahead` method to make
    +it likely that the generated sequences seen by each thread don't overlap.
    +
    +Class :class:`Random` can also be subclassed if you want to use a different
    +basic generator of your own devising: in that case, override the :meth:`random`,
    +:meth:`seed`, :meth:`getstate`, :meth:`setstate` and :meth:`jumpahead` methods.
    +Optionally, a new generator can supply a :meth:`getrandombits` method --- this
    +allows :meth:`randrange` to produce selections over an arbitrarily large range.
    +
    +.. versionadded:: 2.4
    +   the :meth:`getrandombits` method.
    +
    +As an example of subclassing, the :mod:`random` module provides the
    +:class:`WichmannHill` class that implements an alternative generator in pure
    +Python.  The class provides a backward compatible way to reproduce results from
    +earlier versions of Python, which used the Wichmann-Hill algorithm as the core
    +generator.  Note that this Wichmann-Hill generator can no longer be recommended:
    +its period is too short by contemporary standards, and the sequence generated is
    +known to fail some stringent randomness tests.  See the references below for a
    +recent variant that repairs these flaws.
    +
    +.. versionchanged:: 2.3
    +   Substituted MersenneTwister for Wichmann-Hill.
    +
    +Bookkeeping functions:
    +
    +
    +.. function:: seed([x])
    +
    +   Initialize the basic random number generator. Optional argument *x* can be any
    +   hashable object. If *x* is omitted or ``None``, current system time is used;
    +   current system time is also used to initialize the generator when the module is
    +   first imported.  If randomness sources are provided by the operating system,
    +   they are used instead of the system time (see the :func:`os.urandom` function
    +   for details on availability).
    +
    +   .. versionchanged:: 2.4
    +      formerly, operating system resources were not used.
    +
    +   If *x* is not ``None`` or an int or long, ``hash(x)`` is used instead. If *x* is
    +   an int or long, *x* is used directly.
    +
    +
    +.. function:: getstate()
    +
    +   Return an object capturing the current internal state of the generator.  This
    +   object can be passed to :func:`setstate` to restore the state.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. function:: setstate(state)
    +
    +   *state* should have been obtained from a previous call to :func:`getstate`, and
    +   :func:`setstate` restores the internal state of the generator to what it was at
    +   the time :func:`setstate` was called.
    +
    +   .. versionadded:: 2.1
    +
    +
    +.. function:: jumpahead(n)
    +
    +   Change the internal state to one different from and likely far away from the
    +   current state.  *n* is a non-negative integer which is used to scramble the
    +   current state vector.  This is most useful in multi-threaded programs, in
    +   conjuction with multiple instances of the :class:`Random` class:
    +   :meth:`setstate` or :meth:`seed` can be used to force all instances into the
    +   same internal state, and then :meth:`jumpahead` can be used to force the
    +   instances' states far apart.
    +
    +   .. versionadded:: 2.1
    +
    +   .. versionchanged:: 2.3
    +      Instead of jumping to a specific state, *n* steps ahead, ``jumpahead(n)``
    +      jumps to another state likely to be separated by many steps.
    +
    +
    +.. function:: getrandbits(k)
    +
    +   Returns a python :class:`long` int with *k* random bits. This method is supplied
    +   with the MersenneTwister generator and some other generators may also provide it
    +   as an optional part of the API. When available, :meth:`getrandbits` enables
    +   :meth:`randrange` to handle arbitrarily large ranges.
    +
    +   .. versionadded:: 2.4
    +
    +Functions for integers:
    +
    +
    +.. function:: randrange([start,] stop[, step])
    +
    +   Return a randomly selected element from ``range(start, stop, step)``.  This is
    +   equivalent to ``choice(range(start, stop, step))``, but doesn't actually build a
    +   range object.
    +
    +   .. versionadded:: 1.5.2
    +
    +
    +.. function:: randint(a, b)
    +
    +   Return a random integer *N* such that ``a <= N <= b``.
    +
    +Functions for sequences:
    +
    +
    +.. function:: choice(seq)
    +
    +   Return a random element from the non-empty sequence *seq*. If *seq* is empty,
    +   raises :exc:`IndexError`.
    +
    +
    +.. function:: shuffle(x[, random])
    +
    +   Shuffle the sequence *x* in place. The optional argument *random* is a
    +   0-argument function returning a random float in [0.0, 1.0); by default, this is
    +   the function :func:`random`.
    +
    +   Note that for even rather small ``len(x)``, the total number of permutations of
    +   *x* is larger than the period of most random number generators; this implies
    +   that most permutations of a long sequence can never be generated.
    +
    +
    +.. function:: sample(population, k)
    +
    +   Return a *k* length list of unique elements chosen from the population sequence.
    +   Used for random sampling without replacement.
    +
    +   .. versionadded:: 2.3
    +
    +   Returns a new list containing elements from the population while leaving the
    +   original population unchanged.  The resulting list is in selection order so that
    +   all sub-slices will also be valid random samples.  This allows raffle winners
    +   (the sample) to be partitioned into grand prize and second place winners (the
    +   subslices).
    +
    +   Members of the population need not be hashable or unique.  If the population
    +   contains repeats, then each occurrence is a possible selection in the sample.
    +
    +   To choose a sample from a range of integers, use an :func:`range` object as an
    +   argument.  This is especially fast and space efficient for sampling from a large
    +   population:  ``sample(range(10000000), 60)``.
    +
    +The following functions generate specific real-valued distributions. Function
    +parameters are named after the corresponding variables in the distribution's
    +equation, as used in common mathematical practice; most of these equations can
    +be found in any statistics text.
    +
    +
    +.. function:: random()
    +
    +   Return the next random floating point number in the range [0.0, 1.0).
    +
    +
    +.. function:: uniform(a, b)
    +
    +   Return a random floating point number *N* such that ``a <= N < b``.
    +
    +
    +.. function:: betavariate(alpha, beta)
    +
    +   Beta distribution.  Conditions on the parameters are ``alpha > 0`` and ``beta >
    +   0``. Returned values range between 0 and 1.
    +
    +
    +.. function:: expovariate(lambd)
    +
    +   Exponential distribution.  *lambd* is 1.0 divided by the desired mean.  (The
    +   parameter would be called "lambda", but that is a reserved word in Python.)
    +   Returned values range from 0 to positive infinity.
    +
    +
    +.. function:: gammavariate(alpha, beta)
    +
    +   Gamma distribution.  (*Not* the gamma function!)  Conditions on the parameters
    +   are ``alpha > 0`` and ``beta > 0``.
    +
    +
    +.. function:: gauss(mu, sigma)
    +
    +   Gaussian distribution.  *mu* is the mean, and *sigma* is the standard deviation.
    +   This is slightly faster than the :func:`normalvariate` function defined below.
    +
    +
    +.. function:: lognormvariate(mu, sigma)
    +
    +   Log normal distribution.  If you take the natural logarithm of this
    +   distribution, you'll get a normal distribution with mean *mu* and standard
    +   deviation *sigma*.  *mu* can have any value, and *sigma* must be greater than
    +   zero.
    +
    +
    +.. function:: normalvariate(mu, sigma)
    +
    +   Normal distribution.  *mu* is the mean, and *sigma* is the standard deviation.
    +
    +
    +.. function:: vonmisesvariate(mu, kappa)
    +
    +   *mu* is the mean angle, expressed in radians between 0 and 2\*\ *pi*, and *kappa*
    +   is the concentration parameter, which must be greater than or equal to zero.  If
    +   *kappa* is equal to zero, this distribution reduces to a uniform random angle
    +   over the range 0 to 2\*\ *pi*.
    +
    +
    +.. function:: paretovariate(alpha)
    +
    +   Pareto distribution.  *alpha* is the shape parameter.
    +
    +
    +.. function:: weibullvariate(alpha, beta)
    +
    +   Weibull distribution.  *alpha* is the scale parameter and *beta* is the shape
    +   parameter.
    +
    +
    +Alternative Generators:
    +
    +.. class:: WichmannHill([seed])
    +
    +   Class that implements the Wichmann-Hill algorithm as the core generator. Has all
    +   of the same methods as :class:`Random` plus the :meth:`whseed` method described
    +   below.  Because this class is implemented in pure Python, it is not threadsafe
    +   and may require locks between calls.  The period of the generator is
    +   6,953,607,871,644 which is small enough to require care that two independent
    +   random sequences do not overlap.
    +
    +
    +.. function:: whseed([x])
    +
    +   This is obsolete, supplied for bit-level compatibility with versions of Python
    +   prior to 2.1. See :func:`seed` for details.  :func:`whseed` does not guarantee
    +   that distinct integer arguments yield distinct internal states, and can yield no
    +   more than about 2\*\*24 distinct internal states in all.
    +
    +
    +.. class:: SystemRandom([seed])
    +
    +   Class that uses the :func:`os.urandom` function for generating random numbers
    +   from sources provided by the operating system. Not available on all systems.
    +   Does not rely on software state and sequences are not reproducible. Accordingly,
    +   the :meth:`seed` and :meth:`jumpahead` methods have no effect and are ignored.
    +   The :meth:`getstate` and :meth:`setstate` methods raise
    +   :exc:`NotImplementedError` if called.
    +
    +   .. versionadded:: 2.4
    +
    +Examples of basic usage::
    +
    +   >>> random.random()        # Random float x, 0.0 <= x < 1.0
    +   0.37444887175646646
    +   >>> random.uniform(1, 10)  # Random float x, 1.0 <= x < 10.0
    +   1.1800146073117523
    +   >>> random.randint(1, 10)  # Integer from 1 to 10, endpoints included
    +   7
    +   >>> random.randrange(0, 101, 2)  # Even integer from 0 to 100
    +   26
    +   >>> random.choice('abcdefghij')  # Choose a random element
    +   'c'
    +
    +   >>> items = [1, 2, 3, 4, 5, 6, 7]
    +   >>> random.shuffle(items)
    +   >>> items
    +   [7, 3, 2, 5, 6, 4, 1]
    +
    +   >>> random.sample([1, 2, 3, 4, 5],  3)  # Choose 3 elements
    +   [4, 1, 5]
    +
    +
    +
    +.. seealso::
    +
    +   M. Matsumoto and T. Nishimura, "Mersenne Twister: A 623-dimensionally
    +   equidistributed uniform pseudorandom number generator", ACM Transactions on
    +   Modeling and Computer Simulation Vol. 8, No. 1, January pp.3-30 1998.
    +
    +   Wichmann, B. A. & Hill, I. D., "Algorithm AS 183: An efficient and portable
    +   pseudo-random number generator", Applied Statistics 31 (1982) 188-190.
    +
    +   http://www.npl.co.uk/ssfm/download/abstracts.html#196
    +      A modern variation of the Wichmann-Hill generator that greatly increases the
    +      period, and passes now-standard statistical tests that the original generator
    +      failed.
    +
    diff --git a/Doc/library/re.rst b/Doc/library/re.rst
    new file mode 100644
    index 0000000..027ff16
    --- /dev/null
    +++ b/Doc/library/re.rst
    @@ -0,0 +1,921 @@
    +
    +:mod:`re` --- Regular expression operations
    +===========================================
    +
    +.. module:: re
    +   :synopsis: Regular expression operations.
    +.. moduleauthor:: Fredrik Lundh 
    +.. sectionauthor:: Andrew M. Kuchling 
    +
    +
    +
    +
    +This module provides regular expression matching operations similar to
    +those found in Perl. Both patterns and strings to be searched can be
    +Unicode strings as well as 8-bit strings.  The :mod:`re` module is
    +always available.
    +
    +Regular expressions use the backslash character (``'\'``) to indicate
    +special forms or to allow special characters to be used without invoking
    +their special meaning.  This collides with Python's usage of the same
    +character for the same purpose in string literals; for example, to match
    +a literal backslash, one might have to write ``'\\\\'`` as the pattern
    +string, because the regular expression must be ``\\``, and each
    +backslash must be expressed as ``\\`` inside a regular Python string
    +literal.
    +
    +The solution is to use Python's raw string notation for regular expression
    +patterns; backslashes are not handled in any special way in a string literal
    +prefixed with ``'r'``.  So ``r"\n"`` is a two-character string containing
    +``'\'`` and ``'n'``, while ``"\n"`` is a one-character string containing a
    +newline. Usually patterns will be expressed in Python code using this raw string
    +notation.
    +
    +.. seealso::
    +
    +   Mastering Regular Expressions
    +      Book on regular expressions by Jeffrey Friedl, published by O'Reilly.  The
    +      second  edition of the book no longer covers Python at all,  but the first
    +      edition covered writing good regular expression patterns in great detail.
    +
    +
    +.. _re-syntax:
    +
    +Regular Expression Syntax
    +-------------------------
    +
    +A regular expression (or RE) specifies a set of strings that matches it; the
    +functions in this module let you check if a particular string matches a given
    +regular expression (or if a given regular expression matches a particular
    +string, which comes down to the same thing).
    +
    +Regular expressions can be concatenated to form new regular expressions; if *A*
    +and *B* are both regular expressions, then *AB* is also a regular expression.
    +In general, if a string *p* matches *A* and another string *q* matches *B*, the
    +string *pq* will match AB.  This holds unless *A* or *B* contain low precedence
    +operations; boundary conditions between *A* and *B*; or have numbered group
    +references.  Thus, complex expressions can easily be constructed from simpler
    +primitive expressions like the ones described here.  For details of the theory
    +and implementation of regular expressions, consult the Friedl book referenced
    +above, or almost any textbook about compiler construction.
    +
    +A brief explanation of the format of regular expressions follows.  For further
    +information and a gentler presentation, consult the Regular Expression HOWTO,
    +accessible from http://www.python.org/doc/howto/.
    +
    +Regular expressions can contain both special and ordinary characters. Most
    +ordinary characters, like ``'A'``, ``'a'``, or ``'0'``, are the simplest regular
    +expressions; they simply match themselves.  You can concatenate ordinary
    +characters, so ``last`` matches the string ``'last'``.  (In the rest of this
    +section, we'll write RE's in ``this special style``, usually without quotes, and
    +strings to be matched ``'in single quotes'``.)
    +
    +Some characters, like ``'|'`` or ``'('``, are special. Special
    +characters either stand for classes of ordinary characters, or affect
    +how the regular expressions around them are interpreted. Regular
    +expression pattern strings may not contain null bytes, but can specify
    +the null byte using the ``\number`` notation, e.g., ``'\x00'``.
    +
    +
    +The special characters are:
    +
    +.. % 
    +
    +``'.'``
    +   (Dot.)  In the default mode, this matches any character except a newline.  If
    +   the :const:`DOTALL` flag has been specified, this matches any character
    +   including a newline.
    +
    +``'^'``
    +   (Caret.)  Matches the start of the string, and in :const:`MULTILINE` mode also
    +   matches immediately after each newline.
    +
    +``'$'``
    +   Matches the end of the string or just before the newline at the end of the
    +   string, and in :const:`MULTILINE` mode also matches before a newline.  ``foo``
    +   matches both 'foo' and 'foobar', while the regular expression ``foo$`` matches
    +   only 'foo'.  More interestingly, searching for ``foo.$`` in ``'foo1\nfoo2\n'``
    +   matches 'foo2' normally, but 'foo1' in :const:`MULTILINE` mode.
    +
    +``'*'``
    +   Causes the resulting RE to match 0 or more repetitions of the preceding RE, as
    +   many repetitions as are possible.  ``ab*`` will match 'a', 'ab', or 'a' followed
    +   by any number of 'b's.
    +
    +``'+'``
    +   Causes the resulting RE to match 1 or more repetitions of the preceding RE.
    +   ``ab+`` will match 'a' followed by any non-zero number of 'b's; it will not
    +   match just 'a'.
    +
    +``'?'``
    +   Causes the resulting RE to match 0 or 1 repetitions of the preceding RE.
    +   ``ab?`` will match either 'a' or 'ab'.
    +
    +``*?``, ``+?``, ``??``
    +   The ``'*'``, ``'+'``, and ``'?'`` qualifiers are all :dfn:`greedy`; they match
    +   as much text as possible.  Sometimes this behaviour isn't desired; if the RE
    +   ``<.*>`` is matched against ``'

    title

    '``, it will match the entire + string, and not just ``'

    '``. Adding ``'?'`` after the qualifier makes it + perform the match in :dfn:`non-greedy` or :dfn:`minimal` fashion; as *few* + characters as possible will be matched. Using ``.*?`` in the previous + expression will match only ``'

    '``. + +``{m}`` + Specifies that exactly *m* copies of the previous RE should be matched; fewer + matches cause the entire RE not to match. For example, ``a{6}`` will match + exactly six ``'a'`` characters, but not five. + +``{m,n}`` + Causes the resulting RE to match from *m* to *n* repetitions of the preceding + RE, attempting to match as many repetitions as possible. For example, + ``a{3,5}`` will match from 3 to 5 ``'a'`` characters. Omitting *m* specifies a + lower bound of zero, and omitting *n* specifies an infinite upper bound. As an + example, ``a{4,}b`` will match ``aaaab`` or a thousand ``'a'`` characters + followed by a ``b``, but not ``aaab``. The comma may not be omitted or the + modifier would be confused with the previously described form. + +``{m,n}?`` + Causes the resulting RE to match from *m* to *n* repetitions of the preceding + RE, attempting to match as *few* repetitions as possible. This is the + non-greedy version of the previous qualifier. For example, on the + 6-character string ``'aaaaaa'``, ``a{3,5}`` will match 5 ``'a'`` characters, + while ``a{3,5}?`` will only match 3 characters. + +``'\'`` + Either escapes special characters (permitting you to match characters like + ``'*'``, ``'?'``, and so forth), or signals a special sequence; special + sequences are discussed below. + + If you're not using a raw string to express the pattern, remember that Python + also uses the backslash as an escape sequence in string literals; if the escape + sequence isn't recognized by Python's parser, the backslash and subsequent + character are included in the resulting string. However, if Python would + recognize the resulting sequence, the backslash should be repeated twice. This + is complicated and hard to understand, so it's highly recommended that you use + raw strings for all but the simplest expressions. + +``[]`` + Used to indicate a set of characters. Characters can be listed individually, or + a range of characters can be indicated by giving two characters and separating + them by a ``'-'``. Special characters are not active inside sets. For example, + ``[akm$]`` will match any of the characters ``'a'``, ``'k'``, + ``'m'``, or ``'$'``; ``[a-z]`` will match any lowercase letter, and + ``[a-zA-Z0-9]`` matches any letter or digit. Character classes such + as ``\w`` or ``\S`` (defined below) are also acceptable inside a + range, although the characters they match depends on whether :const:`LOCALE` + or :const:`UNICODE` mode is in force. If you want to include a + ``']'`` or a ``'-'`` inside a set, precede it with a backslash, or + place it as the first character. The pattern ``[]]`` will match + ``']'``, for example. + + You can match the characters not within a range by :dfn:`complementing` the set. + This is indicated by including a ``'^'`` as the first character of the set; + ``'^'`` elsewhere will simply match the ``'^'`` character. For example, + ``[^5]`` will match any character except ``'5'``, and ``[^^]`` will match any + character except ``'^'``. + +``'|'`` + ``A|B``, where A and B can be arbitrary REs, creates a regular expression that + will match either A or B. An arbitrary number of REs can be separated by the + ``'|'`` in this way. This can be used inside groups (see below) as well. As + the target string is scanned, REs separated by ``'|'`` are tried from left to + right. When one pattern completely matches, that branch is accepted. This means + that once ``A`` matches, ``B`` will not be tested further, even if it would + produce a longer overall match. In other words, the ``'|'`` operator is never + greedy. To match a literal ``'|'``, use ``\|``, or enclose it inside a + character class, as in ``[|]``. + +``(...)`` + Matches whatever regular expression is inside the parentheses, and indicates the + start and end of a group; the contents of a group can be retrieved after a match + has been performed, and can be matched later in the string with the ``\number`` + special sequence, described below. To match the literals ``'('`` or ``')'``, + use ``\(`` or ``\)``, or enclose them inside a character class: ``[(] [)]``. + +``(?...)`` + This is an extension notation (a ``'?'`` following a ``'('`` is not meaningful + otherwise). The first character after the ``'?'`` determines what the meaning + and further syntax of the construct is. Extensions usually do not create a new + group; ``(?P...)`` is the only exception to this rule. Following are the + currently supported extensions. + +``(?iLmsux)`` + (One or more letters from the set ``'i'``, ``'L'``, ``'m'``, ``'s'``, + ``'u'``, ``'x'``.) The group matches the empty string; the letters + set the corresponding flags: :const:`re.I` (ignore case), + :const:`re.L` (locale dependent), :const:`re.M` (multi-line), + :const:`re.S` (dot matches all), :const:`re.U` (Unicode dependent), + and :const:`re.X` (verbose), for the entire regular expression. (The + flags are described in :ref:`contents-of-module-re`.) This + is useful if you wish to include the flags as part of the regular + expression, instead of passing a *flag* argument to the + :func:`compile` function. + + Note that the ``(?x)`` flag changes how the expression is parsed. It should be + used first in the expression string, or after one or more whitespace characters. + If there are non-whitespace characters before the flag, the results are + undefined. + +``(?:...)`` + A non-grouping version of regular parentheses. Matches whatever regular + expression is inside the parentheses, but the substring matched by the group + *cannot* be retrieved after performing a match or referenced later in the + pattern. + +``(?P...)`` + Similar to regular parentheses, but the substring matched by the group is + accessible via the symbolic group name *name*. Group names must be valid Python + identifiers, and each group name must be defined only once within a regular + expression. A symbolic group is also a numbered group, just as if the group + were not named. So the group named 'id' in the example below can also be + referenced as the numbered group 1. + + For example, if the pattern is ``(?P[a-zA-Z_]\w*)``, the group can be + referenced by its name in arguments to methods of match objects, such as + ``m.group('id')`` or ``m.end('id')``, and also by name in pattern text (for + example, ``(?P=id)``) and replacement text (such as ``\g``). + +``(?P=name)`` + Matches whatever text was matched by the earlier group named *name*. + +``(?#...)`` + A comment; the contents of the parentheses are simply ignored. + +``(?=...)`` + Matches if ``...`` matches next, but doesn't consume any of the string. This is + called a lookahead assertion. For example, ``Isaac (?=Asimov)`` will match + ``'Isaac '`` only if it's followed by ``'Asimov'``. + +``(?!...)`` + Matches if ``...`` doesn't match next. This is a negative lookahead assertion. + For example, ``Isaac (?!Asimov)`` will match ``'Isaac '`` only if it's *not* + followed by ``'Asimov'``. + +``(?<=...)`` + Matches if the current position in the string is preceded by a match for ``...`` + that ends at the current position. This is called a :dfn:`positive lookbehind + assertion`. ``(?<=abc)def`` will find a match in ``abcdef``, since the + lookbehind will back up 3 characters and check if the contained pattern matches. + The contained pattern must only match strings of some fixed length, meaning that + ``abc`` or ``a|b`` are allowed, but ``a*`` and ``a{3,4}`` are not. Note that + patterns which start with positive lookbehind assertions will never match at the + beginning of the string being searched; you will most likely want to use the + :func:`search` function rather than the :func:`match` function:: + + >>> import re + >>> m = re.search('(?<=abc)def', 'abcdef') + >>> m.group(0) + 'def' + + This example looks for a word following a hyphen:: + + >>> m = re.search('(?<=-)\w+', 'spam-egg') + >>> m.group(0) + 'egg' + +``(?)`` is a poor email + matching pattern, which will match with ``''`` as well as + ``'user@host.com'``, but not with ``' + + +Python offers two different primitive operations based on regular expressions: +match and search. If you are accustomed to Perl's semantics, the search +operation is what you're looking for. See the :func:`search` function and +corresponding method of compiled regular expression objects. + +Note that match may differ from search using a regular expression beginning with +``'^'``: ``'^'`` matches only at the start of the string, or in +:const:`MULTILINE` mode also immediately following a newline. The "match" +operation succeeds only if the pattern matches at the start of the string +regardless of mode, or at the starting position given by the optional *pos* +argument regardless of whether a newline precedes it. + +.. % Examples from Tim Peters: + +:: + + re.compile("a").match("ba", 1) # succeeds + re.compile("^a").search("ba", 1) # fails; 'a' not at start + re.compile("^a").search("\na", 1) # fails; 'a' not at start + re.compile("^a", re.M).search("\na", 1) # succeeds + re.compile("^a", re.M).search("ba", 1) # fails; no preceding \n + + +.. _contents-of-module-re: + +Module Contents +--------------- + +The module defines several functions, constants, and an exception. Some of the +functions are simplified versions of the full featured methods for compiled +regular expressions. Most non-trivial applications always use the compiled +form. + + +.. function:: compile(pattern[, flags]) + + Compile a regular expression pattern into a regular expression object, which can + be used for matching using its :func:`match` and :func:`search` methods, + described below. + + The expression's behaviour can be modified by specifying a *flags* value. + Values can be any of the following variables, combined using bitwise OR (the + ``|`` operator). + + The sequence :: + + prog = re.compile(pat) + result = prog.match(str) + + is equivalent to :: + + result = re.match(pat, str) + + but the version using :func:`compile` is more efficient when the expression will + be used several times in a single program. + + .. % (The compiled version of the last pattern passed to + .. % \function{re.match()} or \function{re.search()} is cached, so + .. % programs that use only a single regular expression at a time needn't + .. % worry about compiling regular expressions.) + + +.. data:: I + IGNORECASE + + Perform case-insensitive matching; expressions like ``[A-Z]`` will match + lowercase letters, too. This is not affected by the current locale. + + +.. data:: L + LOCALE + + Make ``\w``, ``\W``, ``\b``, ``\B``, ``\s`` and ``\S`` dependent on the current + locale. + + +.. data:: M + MULTILINE + + When specified, the pattern character ``'^'`` matches at the beginning of the + string and at the beginning of each line (immediately following each newline); + and the pattern character ``'$'`` matches at the end of the string and at the + end of each line (immediately preceding each newline). By default, ``'^'`` + matches only at the beginning of the string, and ``'$'`` only at the end of the + string and immediately before the newline (if any) at the end of the string. + + +.. data:: S + DOTALL + + Make the ``'.'`` special character match any character at all, including a + newline; without this flag, ``'.'`` will match anything *except* a newline. + + +.. data:: U + UNICODE + + Make ``\w``, ``\W``, ``\b``, ``\B``, ``\d``, ``\D``, ``\s`` and ``\S`` dependent + on the Unicode character properties database. + + .. versionadded:: 2.0 + + +.. data:: X + VERBOSE + + This flag allows you to write regular expressions that look nicer. Whitespace + within the pattern is ignored, except when in a character class or preceded by + an unescaped backslash, and, when a line contains a ``'#'`` neither in a + character class or preceded by an unescaped backslash, all characters from the + leftmost such ``'#'`` through the end of the line are ignored. + + .. % XXX should add an example here + + +.. function:: search(pattern, string[, flags]) + + Scan through *string* looking for a location where the regular expression + *pattern* produces a match, and return a corresponding :class:`MatchObject` + instance. Return ``None`` if no position in the string matches the pattern; note + that this is different from finding a zero-length match at some point in the + string. + + +.. function:: match(pattern, string[, flags]) + + If zero or more characters at the beginning of *string* match the regular + expression *pattern*, return a corresponding :class:`MatchObject` instance. + Return ``None`` if the string does not match the pattern; note that this is + different from a zero-length match. + + .. note:: + + If you want to locate a match anywhere in *string*, use :meth:`search` instead. + + +.. function:: split(pattern, string[, maxsplit=0]) + + Split *string* by the occurrences of *pattern*. If capturing parentheses are + used in *pattern*, then the text of all groups in the pattern are also returned + as part of the resulting list. If *maxsplit* is nonzero, at most *maxsplit* + splits occur, and the remainder of the string is returned as the final element + of the list. (Incompatibility note: in the original Python 1.5 release, + *maxsplit* was ignored. This has been fixed in later releases.) :: + + >>> re.split('\W+', 'Words, words, words.') + ['Words', 'words', 'words', ''] + >>> re.split('(\W+)', 'Words, words, words.') + ['Words', ', ', 'words', ', ', 'words', '.', ''] + >>> re.split('\W+', 'Words, words, words.', 1) + ['Words', 'words, words.'] + + +.. function:: findall(pattern, string[, flags]) + + Return a list of all non-overlapping matches of *pattern* in *string*. If one + or more groups are present in the pattern, return a list of groups; this will be + a list of tuples if the pattern has more than one group. Empty matches are + included in the result unless they touch the beginning of another match. + + .. versionadded:: 1.5.2 + + .. versionchanged:: 2.4 + Added the optional flags argument. + + +.. function:: finditer(pattern, string[, flags]) + + Return an iterator over all non-overlapping matches for the RE *pattern* in + *string*. For each match, the iterator returns a match object. Empty matches + are included in the result unless they touch the beginning of another match. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.4 + Added the optional flags argument. + + +.. function:: sub(pattern, repl, string[, count]) + + Return the string obtained by replacing the leftmost non-overlapping occurrences + of *pattern* in *string* by the replacement *repl*. If the pattern isn't found, + *string* is returned unchanged. *repl* can be a string or a function; if it is + a string, any backslash escapes in it are processed. That is, ``\n`` is + converted to a single newline character, ``\r`` is converted to a linefeed, and + so forth. Unknown escapes such as ``\j`` are left alone. Backreferences, such + as ``\6``, are replaced with the substring matched by group 6 in the pattern. + For example:: + + >>> re.sub(r'def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):', + ... r'static PyObject*\npy_\1(void)\n{', + ... 'def myfunc():') + 'static PyObject*\npy_myfunc(void)\n{' + + If *repl* is a function, it is called for every non-overlapping occurrence of + *pattern*. The function takes a single match object argument, and returns the + replacement string. For example:: + + >>> def dashrepl(matchobj): + ... if matchobj.group(0) == '-': return ' ' + ... else: return '-' + >>> re.sub('-{1,2}', dashrepl, 'pro----gram-files') + 'pro--gram files' + + The pattern may be a string or an RE object; if you need to specify regular + expression flags, you must use a RE object, or use embedded modifiers in a + pattern; for example, ``sub("(?i)b+", "x", "bbbb BBBB")`` returns ``'x x'``. + + The optional argument *count* is the maximum number of pattern occurrences to be + replaced; *count* must be a non-negative integer. If omitted or zero, all + occurrences will be replaced. Empty matches for the pattern are replaced only + when not adjacent to a previous match, so ``sub('x*', '-', 'abc')`` returns + ``'-a-b-c-'``. + + In addition to character escapes and backreferences as described above, + ``\g`` will use the substring matched by the group named ``name``, as + defined by the ``(?P...)`` syntax. ``\g`` uses the corresponding + group number; ``\g<2>`` is therefore equivalent to ``\2``, but isn't ambiguous + in a replacement such as ``\g<2>0``. ``\20`` would be interpreted as a + reference to group 20, not a reference to group 2 followed by the literal + character ``'0'``. The backreference ``\g<0>`` substitutes in the entire + substring matched by the RE. + + +.. function:: subn(pattern, repl, string[, count]) + + Perform the same operation as :func:`sub`, but return a tuple ``(new_string, + number_of_subs_made)``. + + +.. function:: escape(string) + + Return *string* with all non-alphanumerics backslashed; this is useful if you + want to match an arbitrary literal string that may have regular expression + metacharacters in it. + + +.. exception:: error + + Exception raised when a string passed to one of the functions here is not a + valid regular expression (for example, it might contain unmatched parentheses) + or when some other error occurs during compilation or matching. It is never an + error if a string contains no match for a pattern. + + +.. _re-objects: + +Regular Expression Objects +-------------------------- + +Compiled regular expression objects support the following methods and +attributes: + + +.. method:: RegexObject.match(string[, pos[, endpos]]) + + If zero or more characters at the beginning of *string* match this regular + expression, return a corresponding :class:`MatchObject` instance. Return + ``None`` if the string does not match the pattern; note that this is different + from a zero-length match. + + .. note:: + + If you want to locate a match anywhere in *string*, use :meth:`search` instead. + + The optional second parameter *pos* gives an index in the string where the + search is to start; it defaults to ``0``. This is not completely equivalent to + slicing the string; the ``'^'`` pattern character matches at the real beginning + of the string and at positions just after a newline, but not necessarily at the + index where the search is to start. + + The optional parameter *endpos* limits how far the string will be searched; it + will be as if the string is *endpos* characters long, so only the characters + from *pos* to ``endpos - 1`` will be searched for a match. If *endpos* is less + than *pos*, no match will be found, otherwise, if *rx* is a compiled regular + expression object, ``rx.match(string, 0, 50)`` is equivalent to + ``rx.match(string[:50], 0)``. + + +.. method:: RegexObject.search(string[, pos[, endpos]]) + + Scan through *string* looking for a location where this regular expression + produces a match, and return a corresponding :class:`MatchObject` instance. + Return ``None`` if no position in the string matches the pattern; note that this + is different from finding a zero-length match at some point in the string. + + The optional *pos* and *endpos* parameters have the same meaning as for the + :meth:`match` method. + + +.. method:: RegexObject.split(string[, maxsplit=0]) + + Identical to the :func:`split` function, using the compiled pattern. + + +.. method:: RegexObject.findall(string[, pos[, endpos]]) + + Identical to the :func:`findall` function, using the compiled pattern. + + +.. method:: RegexObject.finditer(string[, pos[, endpos]]) + + Identical to the :func:`finditer` function, using the compiled pattern. + + +.. method:: RegexObject.sub(repl, string[, count=0]) + + Identical to the :func:`sub` function, using the compiled pattern. + + +.. method:: RegexObject.subn(repl, string[, count=0]) + + Identical to the :func:`subn` function, using the compiled pattern. + + +.. attribute:: RegexObject.flags + + The flags argument used when the RE object was compiled, or ``0`` if no flags + were provided. + + +.. attribute:: RegexObject.groupindex + + A dictionary mapping any symbolic group names defined by ``(?P)`` to group + numbers. The dictionary is empty if no symbolic groups were used in the + pattern. + + +.. attribute:: RegexObject.pattern + + The pattern string from which the RE object was compiled. + + +.. _match-objects: + +Match Objects +------------- + +:class:`MatchObject` instances support the following methods and attributes: + + +.. method:: MatchObject.expand(template) + + Return the string obtained by doing backslash substitution on the template + string *template*, as done by the :meth:`sub` method. Escapes such as ``\n`` are + converted to the appropriate characters, and numeric backreferences (``\1``, + ``\2``) and named backreferences (``\g<1>``, ``\g``) are replaced by the + contents of the corresponding group. + + +.. method:: MatchObject.group([group1, ...]) + + Returns one or more subgroups of the match. If there is a single argument, the + result is a single string; if there are multiple arguments, the result is a + tuple with one item per argument. Without arguments, *group1* defaults to zero + (the whole match is returned). If a *groupN* argument is zero, the corresponding + return value is the entire matching string; if it is in the inclusive range + [1..99], it is the string matching the corresponding parenthesized group. If a + group number is negative or larger than the number of groups defined in the + pattern, an :exc:`IndexError` exception is raised. If a group is contained in a + part of the pattern that did not match, the corresponding result is ``None``. + If a group is contained in a part of the pattern that matched multiple times, + the last match is returned. + + If the regular expression uses the ``(?P...)`` syntax, the *groupN* + arguments may also be strings identifying groups by their group name. If a + string argument is not used as a group name in the pattern, an :exc:`IndexError` + exception is raised. + + A moderately complicated example:: + + m = re.match(r"(?P\d+)\.(\d*)", '3.14') + + After performing this match, ``m.group(1)`` is ``'3'``, as is + ``m.group('int')``, and ``m.group(2)`` is ``'14'``. + + +.. method:: MatchObject.groups([default]) + + Return a tuple containing all the subgroups of the match, from 1 up to however + many groups are in the pattern. The *default* argument is used for groups that + did not participate in the match; it defaults to ``None``. (Incompatibility + note: in the original Python 1.5 release, if the tuple was one element long, a + string would be returned instead. In later versions (from 1.5.1 on), a + singleton tuple is returned in such cases.) + + +.. method:: MatchObject.groupdict([default]) + + Return a dictionary containing all the *named* subgroups of the match, keyed by + the subgroup name. The *default* argument is used for groups that did not + participate in the match; it defaults to ``None``. + + +.. method:: MatchObject.start([group]) + MatchObject.end([group]) + + Return the indices of the start and end of the substring matched by *group*; + *group* defaults to zero (meaning the whole matched substring). Return ``-1`` if + *group* exists but did not contribute to the match. For a match object *m*, and + a group *g* that did contribute to the match, the substring matched by group *g* + (equivalent to ``m.group(g)``) is :: + + m.string[m.start(g):m.end(g)] + + Note that ``m.start(group)`` will equal ``m.end(group)`` if *group* matched a + null string. For example, after ``m = re.search('b(c?)', 'cba')``, + ``m.start(0)`` is 1, ``m.end(0)`` is 2, ``m.start(1)`` and ``m.end(1)`` are both + 2, and ``m.start(2)`` raises an :exc:`IndexError` exception. + + +.. method:: MatchObject.span([group]) + + For :class:`MatchObject` *m*, return the 2-tuple ``(m.start(group), + m.end(group))``. Note that if *group* did not contribute to the match, this is + ``(-1, -1)``. Again, *group* defaults to zero. + + +.. attribute:: MatchObject.pos + + The value of *pos* which was passed to the :func:`search` or :func:`match` + method of the :class:`RegexObject`. This is the index into the string at which + the RE engine started looking for a match. + + +.. attribute:: MatchObject.endpos + + The value of *endpos* which was passed to the :func:`search` or :func:`match` + method of the :class:`RegexObject`. This is the index into the string beyond + which the RE engine will not go. + + +.. attribute:: MatchObject.lastindex + + The integer index of the last matched capturing group, or ``None`` if no group + was matched at all. For example, the expressions ``(a)b``, ``((a)(b))``, and + ``((ab))`` will have ``lastindex == 1`` if applied to the string ``'ab'``, while + the expression ``(a)(b)`` will have ``lastindex == 2``, if applied to the same + string. + + +.. attribute:: MatchObject.lastgroup + + The name of the last matched capturing group, or ``None`` if the group didn't + have a name, or if no group was matched at all. + + +.. attribute:: MatchObject.re + + The regular expression object whose :meth:`match` or :meth:`search` method + produced this :class:`MatchObject` instance. + + +.. attribute:: MatchObject.string + + The string passed to :func:`match` or :func:`search`. + + +Examples +-------- + +**Simulating scanf()** + +.. index:: single: scanf() + +Python does not currently have an equivalent to :cfunc:`scanf`. Regular +expressions are generally more powerful, though also more verbose, than +:cfunc:`scanf` format strings. The table below offers some more-or-less +equivalent mappings between :cfunc:`scanf` format tokens and regular +expressions. + ++--------------------------------+---------------------------------------------+ +| :cfunc:`scanf` Token | Regular Expression | ++================================+=============================================+ +| ``%c`` | ``.`` | ++--------------------------------+---------------------------------------------+ +| ``%5c`` | ``.{5}`` | ++--------------------------------+---------------------------------------------+ +| ``%d`` | ``[-+]?\d+`` | ++--------------------------------+---------------------------------------------+ +| ``%e``, ``%E``, ``%f``, ``%g`` | ``[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?`` | ++--------------------------------+---------------------------------------------+ +| ``%i`` | ``[-+]?(0[xX][\dA-Fa-f]+|0[0-7]*|\d+)`` | ++--------------------------------+---------------------------------------------+ +| ``%o`` | ``0[0-7]*`` | ++--------------------------------+---------------------------------------------+ +| ``%s`` | ``\S+`` | ++--------------------------------+---------------------------------------------+ +| ``%u`` | ``\d+`` | ++--------------------------------+---------------------------------------------+ +| ``%x``, ``%X`` | ``0[xX][\dA-Fa-f]+`` | ++--------------------------------+---------------------------------------------+ + +To extract the filename and numbers from a string like :: + + /usr/sbin/sendmail - 0 errors, 4 warnings + +you would use a :cfunc:`scanf` format like :: + + %s - %d errors, %d warnings + +The equivalent regular expression would be :: + + (\S+) - (\d+) errors, (\d+) warnings + +**Avoiding recursion** + +If you create regular expressions that require the engine to perform a lot of +recursion, you may encounter a :exc:`RuntimeError` exception with the message +``maximum recursion limit`` exceeded. For example, :: + + >>> import re + >>> s = 'Begin ' + 1000*'a very long string ' + 'end' + >>> re.match('Begin (\w| )*? end', s).end() + Traceback (most recent call last): + File "", line 1, in ? + File "/usr/local/lib/python2.5/re.py", line 132, in match + return _compile(pattern, flags).match(string) + RuntimeError: maximum recursion limit exceeded + +You can often restructure your regular expression to avoid recursion. + +Starting with Python 2.3, simple uses of the ``*?`` pattern are special-cased to +avoid recursion. Thus, the above regular expression can avoid recursion by +being recast as ``Begin [a-zA-Z0-9_ ]*?end``. As a further benefit, such +regular expressions will run faster than their recursive equivalents. + diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst new file mode 100644 index 0000000..9a40747 --- /dev/null +++ b/Doc/library/readline.rst @@ -0,0 +1,222 @@ + +:mod:`readline` --- GNU readline interface +========================================== + +.. module:: readline + :platform: Unix + :synopsis: GNU readline support for Python. +.. sectionauthor:: Skip Montanaro + + +The :mod:`readline` module defines a number of functions to facilitate +completion and reading/writing of history files from the Python interpreter. +This module can be used directly or via the :mod:`rlcompleter` module. Settings +made using this module affect the behaviour of both the interpreter's +interactive prompt and the prompts offered by the :func:`raw_input` and +:func:`input` built-in functions. + +The :mod:`readline` module defines the following functions: + + +.. function:: parse_and_bind(string) + + Parse and execute single line of a readline init file. + + +.. function:: get_line_buffer() + + Return the current contents of the line buffer. + + +.. function:: insert_text(string) + + Insert text into the command line. + + +.. function:: read_init_file([filename]) + + Parse a readline initialization file. The default filename is the last filename + used. + + +.. function:: read_history_file([filename]) + + Load a readline history file. The default filename is :file:`~/.history`. + + +.. function:: write_history_file([filename]) + + Save a readline history file. The default filename is :file:`~/.history`. + + +.. function:: clear_history() + + Clear the current history. (Note: this function is not available if the + installed version of GNU readline doesn't support it.) + + .. versionadded:: 2.4 + + +.. function:: get_history_length() + + Return the desired length of the history file. Negative values imply unlimited + history file size. + + +.. function:: set_history_length(length) + + Set the number of lines to save in the history file. :func:`write_history_file` + uses this value to truncate the history file when saving. Negative values imply + unlimited history file size. + + +.. function:: get_current_history_length() + + Return the number of lines currently in the history. (This is different from + :func:`get_history_length`, which returns the maximum number of lines that will + be written to a history file.) + + .. versionadded:: 2.3 + + +.. function:: get_history_item(index) + + Return the current contents of history item at *index*. + + .. versionadded:: 2.3 + + +.. function:: remove_history_item(pos) + + Remove history item specified by its position from the history. + + .. versionadded:: 2.4 + + +.. function:: replace_history_item(pos, line) + + Replace history item specified by its position with the given line. + + .. versionadded:: 2.4 + + +.. function:: redisplay() + + Change what's displayed on the screen to reflect the current contents of the + line buffer. + + .. versionadded:: 2.3 + + +.. function:: set_startup_hook([function]) + + Set or remove the startup_hook function. If *function* is specified, it will be + used as the new startup_hook function; if omitted or ``None``, any hook function + already installed is removed. The startup_hook function is called with no + arguments just before readline prints the first prompt. + + +.. function:: set_pre_input_hook([function]) + + Set or remove the pre_input_hook function. If *function* is specified, it will + be used as the new pre_input_hook function; if omitted or ``None``, any hook + function already installed is removed. The pre_input_hook function is called + with no arguments after the first prompt has been printed and just before + readline starts reading input characters. + + +.. function:: set_completer([function]) + + Set or remove the completer function. If *function* is specified, it will be + used as the new completer function; if omitted or ``None``, any completer + function already installed is removed. The completer function is called as + ``function(text, state)``, for *state* in ``0``, ``1``, ``2``, ..., until it + returns a non-string value. It should return the next possible completion + starting with *text*. + + +.. function:: get_completer() + + Get the completer function, or ``None`` if no completer function has been set. + + .. versionadded:: 2.3 + + +.. function:: get_begidx() + + Get the beginning index of the readline tab-completion scope. + + +.. function:: get_endidx() + + Get the ending index of the readline tab-completion scope. + + +.. function:: set_completer_delims(string) + + Set the readline word delimiters for tab-completion. + + +.. function:: get_completer_delims() + + Get the readline word delimiters for tab-completion. + + +.. function:: add_history(line) + + Append a line to the history buffer, as if it was the last line typed. + + +.. seealso:: + + Module :mod:`rlcompleter` + Completion of Python identifiers at the interactive prompt. + + +.. _readline-example: + +Example +------- + +The following example demonstrates how to use the :mod:`readline` module's +history reading and writing functions to automatically load and save a history +file named :file:`.pyhist` from the user's home directory. The code below would +normally be executed automatically during interactive sessions from the user's +:envvar:`PYTHONSTARTUP` file. :: + + import os + histfile = os.path.join(os.environ["HOME"], ".pyhist") + try: + readline.read_history_file(histfile) + except IOError: + pass + import atexit + atexit.register(readline.write_history_file, histfile) + del os, histfile + +The following example extends the :class:`code.InteractiveConsole` class to +support history save/restore. :: + + import code + import readline + import atexit + import os + + class HistoryConsole(code.InteractiveConsole): + def __init__(self, locals=None, filename="", + histfile=os.path.expanduser("~/.console-history")): + code.InteractiveConsole.__init__(self) + self.init_history(histfile) + + def init_history(self, histfile): + readline.parse_and_bind("tab: complete") + if hasattr(readline, "read_history_file"): + try: + readline.read_history_file(histfile) + except IOError: + pass + atexit.register(self.save_history, histfile) + + def save_history(self, histfile): + readline.write_history_file(histfile) + diff --git a/Doc/library/repr.rst b/Doc/library/repr.rst new file mode 100644 index 0000000..493e2b3 --- /dev/null +++ b/Doc/library/repr.rst @@ -0,0 +1,136 @@ + +:mod:`repr` --- Alternate :func:`repr` implementation +===================================================== + +.. module:: repr + :synopsis: Alternate repr() implementation with size limits. +.. sectionauthor:: Fred L. Drake, Jr. + + +The :mod:`repr` module provides a means for producing object representations +with limits on the size of the resulting strings. This is used in the Python +debugger and may be useful in other contexts as well. + +This module provides a class, an instance, and a function: + + +.. class:: Repr() + + Class which provides formatting services useful in implementing functions + similar to the built-in :func:`repr`; size limits for different object types + are added to avoid the generation of representations which are excessively long. + + +.. data:: aRepr + + This is an instance of :class:`Repr` which is used to provide the :func:`repr` + function described below. Changing the attributes of this object will affect + the size limits used by :func:`repr` and the Python debugger. + + +.. function:: repr(obj) + + This is the :meth:`repr` method of ``aRepr``. It returns a string similar to + that returned by the built-in function of the same name, but with limits on + most sizes. + + +.. _repr-objects: + +Repr Objects +------------ + +:class:`Repr` instances provide several members which can be used to provide +size limits for the representations of different object types, and methods +which format specific object types. + + +.. attribute:: Repr.maxlevel + + Depth limit on the creation of recursive representations. The default is ``6``. + + +.. attribute:: Repr.maxdict + Repr.maxlist + Repr.maxtuple + Repr.maxset + Repr.maxfrozenset + Repr.maxdeque + Repr.maxarray + + Limits on the number of entries represented for the named object type. The + default is ``4`` for :attr:`maxdict`, ``5`` for :attr:`maxarray`, and ``6`` for + the others. + + .. versionadded:: 2.4 + :attr:`maxset`, :attr:`maxfrozenset`, and :attr:`set`. + + +.. attribute:: Repr.maxlong + + Maximum number of characters in the representation for a long integer. Digits + are dropped from the middle. The default is ``40``. + + +.. attribute:: Repr.maxstring + + Limit on the number of characters in the representation of the string. Note + that the "normal" representation of the string is used as the character source: + if escape sequences are needed in the representation, these may be mangled when + the representation is shortened. The default is ``30``. + + +.. attribute:: Repr.maxother + + This limit is used to control the size of object types for which no specific + formatting method is available on the :class:`Repr` object. It is applied in a + similar manner as :attr:`maxstring`. The default is ``20``. + + +.. method:: Repr.repr(obj) + + The equivalent to the built-in :func:`repr` that uses the formatting imposed by + the instance. + + +.. method:: Repr.repr1(obj, level) + + Recursive implementation used by :meth:`repr`. This uses the type of *obj* to + determine which formatting method to call, passing it *obj* and *level*. The + type-specific methods should call :meth:`repr1` to perform recursive formatting, + with ``level - 1`` for the value of *level* in the recursive call. + + +.. method:: Repr.repr_TYPE(obj, level) + :noindex: + + Formatting methods for specific types are implemented as methods with a name + based on the type name. In the method name, **TYPE** is replaced by + ``string.join(string.split(type(obj).__name__, '_'))``. Dispatch to these + methods is handled by :meth:`repr1`. Type-specific methods which need to + recursively format a value should call ``self.repr1(subobj, level - 1)``. + + +.. _subclassing-reprs: + +Subclassing Repr Objects +------------------------ + +The use of dynamic dispatching by :meth:`Repr.repr1` allows subclasses of +:class:`Repr` to add support for additional built-in object types or to modify +the handling of types already supported. This example shows how special support +for file objects could be added:: + + import repr + import sys + + class MyRepr(repr.Repr): + def repr_file(self, obj, level): + if obj.name in ['', '', '']: + return obj.name + else: + return `obj` + + aRepr = MyRepr() + print aRepr.repr(sys.stdin) # prints '' + diff --git a/Doc/library/resource.rst b/Doc/library/resource.rst new file mode 100644 index 0000000..834dace --- /dev/null +++ b/Doc/library/resource.rst @@ -0,0 +1,238 @@ + +:mod:`resource` --- Resource usage information +============================================== + +.. module:: resource + :platform: Unix + :synopsis: An interface to provide resource usage information on the current process. +.. moduleauthor:: Jeremy Hylton +.. sectionauthor:: Jeremy Hylton + + +This module provides basic mechanisms for measuring and controlling system +resources utilized by a program. + +Symbolic constants are used to specify particular system resources and to +request usage information about either the current process or its children. + +A single exception is defined for errors: + + +.. exception:: error + + The functions described below may raise this error if the underlying system call + failures unexpectedly. + + +Resource Limits +--------------- + +Resources usage can be limited using the :func:`setrlimit` function described +below. Each resource is controlled by a pair of limits: a soft limit and a hard +limit. The soft limit is the current limit, and may be lowered or raised by a +process over time. The soft limit can never exceed the hard limit. The hard +limit can be lowered to any value greater than the soft limit, but not raised. +(Only processes with the effective UID of the super-user can raise a hard +limit.) + +The specific resources that can be limited are system dependent. They are +described in the :manpage:`getrlimit(2)` man page. The resources listed below +are supported when the underlying operating system supports them; resources +which cannot be checked or controlled by the operating system are not defined in +this module for those platforms. + + +.. function:: getrlimit(resource) + + Returns a tuple ``(soft, hard)`` with the current soft and hard limits of + *resource*. Raises :exc:`ValueError` if an invalid resource is specified, or + :exc:`error` if the underlying system call fails unexpectedly. + + +.. function:: setrlimit(resource, limits) + + Sets new limits of consumption of *resource*. The *limits* argument must be a + tuple ``(soft, hard)`` of two integers describing the new limits. A value of + ``-1`` can be used to specify the maximum possible upper limit. + + Raises :exc:`ValueError` if an invalid resource is specified, if the new soft + limit exceeds the hard limit, or if a process tries to raise its hard limit + (unless the process has an effective UID of super-user). Can also raise + :exc:`error` if the underlying system call fails. + +These symbols define resources whose consumption can be controlled using the +:func:`setrlimit` and :func:`getrlimit` functions described below. The values of +these symbols are exactly the constants used by C programs. + +The Unix man page for :manpage:`getrlimit(2)` lists the available resources. +Note that not all systems use the same symbol or same value to denote the same +resource. This module does not attempt to mask platform differences --- symbols +not defined for a platform will not be available from this module on that +platform. + + +.. data:: RLIMIT_CORE + + The maximum size (in bytes) of a core file that the current process can create. + This may result in the creation of a partial core file if a larger core would be + required to contain the entire process image. + + +.. data:: RLIMIT_CPU + + The maximum amount of processor time (in seconds) that a process can use. If + this limit is exceeded, a :const:`SIGXCPU` signal is sent to the process. (See + the :mod:`signal` module documentation for information about how to catch this + signal and do something useful, e.g. flush open files to disk.) + + +.. data:: RLIMIT_FSIZE + + The maximum size of a file which the process may create. This only affects the + stack of the main thread in a multi-threaded process. + + +.. data:: RLIMIT_DATA + + The maximum size (in bytes) of the process's heap. + + +.. data:: RLIMIT_STACK + + The maximum size (in bytes) of the call stack for the current process. + + +.. data:: RLIMIT_RSS + + The maximum resident set size that should be made available to the process. + + +.. data:: RLIMIT_NPROC + + The maximum number of processes the current process may create. + + +.. data:: RLIMIT_NOFILE + + The maximum number of open file descriptors for the current process. + + +.. data:: RLIMIT_OFILE + + The BSD name for :const:`RLIMIT_NOFILE`. + + +.. data:: RLIMIT_MEMLOCK + + The maximum address space which may be locked in memory. + + +.. data:: RLIMIT_VMEM + + The largest area of mapped memory which the process may occupy. + + +.. data:: RLIMIT_AS + + The maximum area (in bytes) of address space which may be taken by the process. + + +Resource Usage +-------------- + +These functions are used to retrieve resource usage information: + + +.. function:: getrusage(who) + + This function returns an object that describes the resources consumed by either + the current process or its children, as specified by the *who* parameter. The + *who* parameter should be specified using one of the :const:`RUSAGE_\*` + constants described below. + + The fields of the return value each describe how a particular system resource + has been used, e.g. amount of time spent running is user mode or number of times + the process was swapped out of main memory. Some values are dependent on the + clock tick internal, e.g. the amount of memory the process is using. + + For backward compatibility, the return value is also accessible as a tuple of 16 + elements. + + The fields :attr:`ru_utime` and :attr:`ru_stime` of the return value are + floating point values representing the amount of time spent executing in user + mode and the amount of time spent executing in system mode, respectively. The + remaining values are integers. Consult the :manpage:`getrusage(2)` man page for + detailed information about these values. A brief summary is presented here: + + +--------+---------------------+-------------------------------+ + | Index | Field | Resource | + +========+=====================+===============================+ + | ``0`` | :attr:`ru_utime` | time in user mode (float) | + +--------+---------------------+-------------------------------+ + | ``1`` | :attr:`ru_stime` | time in system mode (float) | + +--------+---------------------+-------------------------------+ + | ``2`` | :attr:`ru_maxrss` | maximum resident set size | + +--------+---------------------+-------------------------------+ + | ``3`` | :attr:`ru_ixrss` | shared memory size | + +--------+---------------------+-------------------------------+ + | ``4`` | :attr:`ru_idrss` | unshared memory size | + +--------+---------------------+-------------------------------+ + | ``5`` | :attr:`ru_isrss` | unshared stack size | + +--------+---------------------+-------------------------------+ + | ``6`` | :attr:`ru_minflt` | page faults not requiring I/O | + +--------+---------------------+-------------------------------+ + | ``7`` | :attr:`ru_majflt` | page faults requiring I/O | + +--------+---------------------+-------------------------------+ + | ``8`` | :attr:`ru_nswap` | number of swap outs | + +--------+---------------------+-------------------------------+ + | ``9`` | :attr:`ru_inblock` | block input operations | + +--------+---------------------+-------------------------------+ + | ``10`` | :attr:`ru_oublock` | block output operations | + +--------+---------------------+-------------------------------+ + | ``11`` | :attr:`ru_msgsnd` | messages sent | + +--------+---------------------+-------------------------------+ + | ``12`` | :attr:`ru_msgrcv` | messages received | + +--------+---------------------+-------------------------------+ + | ``13`` | :attr:`ru_nsignals` | signals received | + +--------+---------------------+-------------------------------+ + | ``14`` | :attr:`ru_nvcsw` | voluntary context switches | + +--------+---------------------+-------------------------------+ + | ``15`` | :attr:`ru_nivcsw` | involuntary context switches | + +--------+---------------------+-------------------------------+ + + This function will raise a :exc:`ValueError` if an invalid *who* parameter is + specified. It may also raise :exc:`error` exception in unusual circumstances. + + .. versionchanged:: 2.3 + Added access to values as attributes of the returned object. + + +.. function:: getpagesize() + + Returns the number of bytes in a system page. (This need not be the same as the + hardware page size.) This function is useful for determining the number of bytes + of memory a process is using. The third element of the tuple returned by + :func:`getrusage` describes memory usage in pages; multiplying by page size + produces number of bytes. + +The following :const:`RUSAGE_\*` symbols are passed to the :func:`getrusage` +function to specify which processes information should be provided for. + + +.. data:: RUSAGE_SELF + + :const:`RUSAGE_SELF` should be used to request information pertaining only to + the process itself. + + +.. data:: RUSAGE_CHILDREN + + Pass to :func:`getrusage` to request resource information for child processes of + the calling process. + + +.. data:: RUSAGE_BOTH + + Pass to :func:`getrusage` to request resources consumed by both the current + process and child processes. May not be available on all systems. + diff --git a/Doc/library/rfc822.rst b/Doc/library/rfc822.rst new file mode 100644 index 0000000..fa25ba5 --- /dev/null +++ b/Doc/library/rfc822.rst @@ -0,0 +1,351 @@ + +:mod:`rfc822` --- Parse RFC 2822 mail headers +============================================= + +.. module:: rfc822 + :synopsis: Parse 2822 style mail messages. + + +.. deprecated:: 2.3 + The :mod:`email` package should be used in preference to the :mod:`rfc822` + module. This module is present only to maintain backward compatibility. + +This module defines a class, :class:`Message`, which represents an "email +message" as defined by the Internet standard :rfc:`2822`. [#]_ Such messages +consist of a collection of message headers, and a message body. This module +also defines a helper class :class:`AddressList` for parsing :rfc:`2822` +addresses. Please refer to the RFC for information on the specific syntax of +:rfc:`2822` messages. + +.. index:: module: mailbox + +The :mod:`mailbox` module provides classes to read mailboxes produced by +various end-user mail programs. + + +.. class:: Message(file[, seekable]) + + A :class:`Message` instance is instantiated with an input object as parameter. + Message relies only on the input object having a :meth:`readline` method; in + particular, ordinary file objects qualify. Instantiation reads headers from the + input object up to a delimiter line (normally a blank line) and stores them in + the instance. The message body, following the headers, is not consumed. + + This class can work with any input object that supports a :meth:`readline` + method. If the input object has seek and tell capability, the + :meth:`rewindbody` method will work; also, illegal lines will be pushed back + onto the input stream. If the input object lacks seek but has an :meth:`unread` + method that can push back a line of input, :class:`Message` will use that to + push back illegal lines. Thus this class can be used to parse messages coming + from a buffered stream. + + The optional *seekable* argument is provided as a workaround for certain stdio + libraries in which :cfunc:`tell` discards buffered data before discovering that + the :cfunc:`lseek` system call doesn't work. For maximum portability, you + should set the seekable argument to zero to prevent that initial :meth:`tell` + when passing in an unseekable object such as a file object created from a socket + object. + + Input lines as read from the file may either be terminated by CR-LF or by a + single linefeed; a terminating CR-LF is replaced by a single linefeed before the + line is stored. + + All header matching is done independent of upper or lower case; e.g. + ``m['From']``, ``m['from']`` and ``m['FROM']`` all yield the same result. + + +.. class:: AddressList(field) + + You may instantiate the :class:`AddressList` helper class using a single string + parameter, a comma-separated list of :rfc:`2822` addresses to be parsed. (The + parameter ``None`` yields an empty list.) + + +.. function:: quote(str) + + Return a new string with backslashes in *str* replaced by two backslashes and + double quotes replaced by backslash-double quote. + + +.. function:: unquote(str) + + Return a new string which is an *unquoted* version of *str*. If *str* ends and + begins with double quotes, they are stripped off. Likewise if *str* ends and + begins with angle brackets, they are stripped off. + + +.. function:: parseaddr(address) + + Parse *address*, which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc`, into its constituent "realname" and + "email address" parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple ``(None, None)`` is returned. + + +.. function:: dump_address_pair(pair) + + The inverse of :meth:`parseaddr`, this takes a 2-tuple of the form ``(realname, + email_address)`` and returns the string value suitable for a :mailheader:`To` or + :mailheader:`Cc` header. If the first element of *pair* is false, then the + second element is returned unmodified. + + +.. function:: parsedate(date) + + Attempts to parse a date according to the rules in :rfc:`2822`. however, some + mailers don't follow that format as specified, so :func:`parsedate` tries to + guess correctly in such cases. *date* is a string containing an :rfc:`2822` + date, such as ``'Mon, 20 Nov 1995 19:12:08 -0500'``. If it succeeds in parsing + the date, :func:`parsedate` returns a 9-tuple that can be passed directly to + :func:`time.mktime`; otherwise ``None`` will be returned. Note that indexes 6, + 7, and 8 of the result tuple are not usable. + + +.. function:: parsedate_tz(date) + + Performs the same function as :func:`parsedate`, but returns either ``None`` or + a 10-tuple; the first 9 elements make up a tuple that can be passed directly to + :func:`time.mktime`, and the tenth is the offset of the date's timezone from UTC + (which is the official term for Greenwich Mean Time). (Note that the sign of + the timezone offset is the opposite of the sign of the ``time.timezone`` + variable for the same timezone; the latter variable follows the POSIX standard + while this module follows :rfc:`2822`.) If the input string has no timezone, + the last element of the tuple returned is ``None``. Note that indexes 6, 7, and + 8 of the result tuple are not usable. + + +.. function:: mktime_tz(tuple) + + Turn a 10-tuple as returned by :func:`parsedate_tz` into a UTC timestamp. If + the timezone item in the tuple is ``None``, assume local time. Minor + deficiency: this first interprets the first 8 elements as a local time and then + compensates for the timezone difference; this may yield a slight error around + daylight savings time switch dates. Not enough to worry about for common use. + + +.. seealso:: + + Module :mod:`email` + Comprehensive email handling package; supersedes the :mod:`rfc822` module. + + Module :mod:`mailbox` + Classes to read various mailbox formats produced by end-user mail programs. + + Module :mod:`mimetools` + Subclass of :class:`rfc822.Message` that handles MIME encoded messages. + + +.. _message-objects: + +Message Objects +--------------- + +A :class:`Message` instance has the following methods: + + +.. method:: Message.rewindbody() + + Seek to the start of the message body. This only works if the file object is + seekable. + + +.. method:: Message.isheader(line) + + Returns a line's canonicalized fieldname (the dictionary key that will be used + to index it) if the line is a legal :rfc:`2822` header; otherwise returns + ``None`` (implying that parsing should stop here and the line be pushed back on + the input stream). It is sometimes useful to override this method in a + subclass. + + +.. method:: Message.islast(line) + + Return true if the given line is a delimiter on which Message should stop. The + delimiter line is consumed, and the file object's read location positioned + immediately after it. By default this method just checks that the line is + blank, but you can override it in a subclass. + + +.. method:: Message.iscomment(line) + + Return ``True`` if the given line should be ignored entirely, just skipped. By + default this is a stub that always returns ``False``, but you can override it in + a subclass. + + +.. method:: Message.getallmatchingheaders(name) + + Return a list of lines consisting of all headers matching *name*, if any. Each + physical line, whether it is a continuation line or not, is a separate list + item. Return the empty list if no header matches *name*. + + +.. method:: Message.getfirstmatchingheader(name) + + Return a list of lines comprising the first header matching *name*, and its + continuation line(s), if any. Return ``None`` if there is no header matching + *name*. + + +.. method:: Message.getrawheader(name) + + Return a single string consisting of the text after the colon in the first + header matching *name*. This includes leading whitespace, the trailing + linefeed, and internal linefeeds and whitespace if there any continuation + line(s) were present. Return ``None`` if there is no header matching *name*. + + +.. method:: Message.getheader(name[, default]) + + Like ``getrawheader(name)``, but strip leading and trailing whitespace. + Internal whitespace is not stripped. The optional *default* argument can be + used to specify a different default to be returned when there is no header + matching *name*. + + +.. method:: Message.get(name[, default]) + + An alias for :meth:`getheader`, to make the interface more compatible with + regular dictionaries. + + +.. method:: Message.getaddr(name) + + Return a pair ``(full name, email address)`` parsed from the string returned by + ``getheader(name)``. If no header matching *name* exists, return ``(None, + None)``; otherwise both the full name and the address are (possibly empty) + strings. + + Example: If *m*'s first :mailheader:`From` header contains the string + ``'jack@cwi.nl (Jack Jansen)'``, then ``m.getaddr('From')`` will yield the pair + ``('Jack Jansen', 'jack@cwi.nl')``. If the header contained ``'Jack Jansen + '`` instead, it would yield the exact same result. + + +.. method:: Message.getaddrlist(name) + + This is similar to ``getaddr(list)``, but parses a header containing a list of + email addresses (e.g. a :mailheader:`To` header) and returns a list of ``(full + name, email address)`` pairs (even if there was only one address in the header). + If there is no header matching *name*, return an empty list. + + If multiple headers exist that match the named header (e.g. if there are several + :mailheader:`Cc` headers), all are parsed for addresses. Any continuation lines + the named headers contain are also parsed. + + +.. method:: Message.getdate(name) + + Retrieve a header using :meth:`getheader` and parse it into a 9-tuple compatible + with :func:`time.mktime`; note that fields 6, 7, and 8 are not usable. If + there is no header matching *name*, or it is unparsable, return ``None``. + + Date parsing appears to be a black art, and not all mailers adhere to the + standard. While it has been tested and found correct on a large collection of + email from many sources, it is still possible that this function may + occasionally yield an incorrect result. + + +.. method:: Message.getdate_tz(name) + + Retrieve a header using :meth:`getheader` and parse it into a 10-tuple; the + first 9 elements will make a tuple compatible with :func:`time.mktime`, and the + 10th is a number giving the offset of the date's timezone from UTC. Note that + fields 6, 7, and 8 are not usable. Similarly to :meth:`getdate`, if there is + no header matching *name*, or it is unparsable, return ``None``. + +:class:`Message` instances also support a limited mapping interface. In +particular: ``m[name]`` is like ``m.getheader(name)`` but raises :exc:`KeyError` +if there is no matching header; and ``len(m)``, ``m.get(name[, default])``, +``m.has_key(name)``, ``m.keys()``, ``m.values()`` ``m.items()``, and +``m.setdefault(name[, default])`` act as expected, with the one difference +that :meth:`setdefault` uses an empty string as the default value. +:class:`Message` instances also support the mapping writable interface ``m[name] += value`` and ``del m[name]``. :class:`Message` objects do not support the +:meth:`clear`, :meth:`copy`, :meth:`popitem`, or :meth:`update` methods of the +mapping interface. (Support for :meth:`get` and :meth:`setdefault` was only +added in Python 2.2.) + +Finally, :class:`Message` instances have some public instance variables: + + +.. attribute:: Message.headers + + A list containing the entire set of header lines, in the order in which they + were read (except that setitem calls may disturb this order). Each line contains + a trailing newline. The blank line terminating the headers is not contained in + the list. + + +.. attribute:: Message.fp + + The file or file-like object passed at instantiation time. This can be used to + read the message content. + + +.. attribute:: Message.unixfrom + + The Unix ``From`` line, if the message had one, or an empty string. This is + needed to regenerate the message in some contexts, such as an ``mbox``\ -style + mailbox file. + + +.. _addresslist-objects: + +AddressList Objects +------------------- + +An :class:`AddressList` instance has the following methods: + + +.. method:: AddressList.__len__() + + Return the number of addresses in the address list. + + +.. method:: AddressList.__str__() + + Return a canonicalized string representation of the address list. Addresses are + rendered in "name" form, comma-separated. + + +.. method:: AddressList.__add__(alist) + + Return a new :class:`AddressList` instance that contains all addresses in both + :class:`AddressList` operands, with duplicates removed (set union). + + +.. method:: AddressList.__iadd__(alist) + + In-place version of :meth:`__add__`; turns this :class:`AddressList` instance + into the union of itself and the right-hand instance, *alist*. + + +.. method:: AddressList.__sub__(alist) + + Return a new :class:`AddressList` instance that contains every address in the + left-hand :class:`AddressList` operand that is not present in the right-hand + address operand (set difference). + + +.. method:: AddressList.__isub__(alist) + + In-place version of :meth:`__sub__`, removing addresses in this list which are + also in *alist*. + +Finally, :class:`AddressList` instances have one public instance variable: + + +.. attribute:: AddressList.addresslist + + A list of tuple string pairs, one per address. In each member, the first is the + canonicalized name part, the second is the actual route-address (``'@'``\ + -separated username-host.domain pair). + +.. rubric:: Footnotes + +.. [#] This module originally conformed to :rfc:`822`, hence the name. Since then, + :rfc:`2822` has been released as an update to :rfc:`822`. This module should be + considered :rfc:`2822`\ -conformant, especially in cases where the syntax or + semantics have changed since :rfc:`822`. + diff --git a/Doc/library/rlcompleter.rst b/Doc/library/rlcompleter.rst new file mode 100644 index 0000000..b882cb0 --- /dev/null +++ b/Doc/library/rlcompleter.rst @@ -0,0 +1,65 @@ + +:mod:`rlcompleter` --- Completion function for GNU readline +=========================================================== + +.. module:: rlcompleter + :synopsis: Python identifier completion, suitable for the GNU readline library. +.. sectionauthor:: Moshe Zadka + + +The :mod:`rlcompleter` module defines a completion function suitable for the +:mod:`readline` module by completing valid Python identifiers and keywords. + +When this module is imported on a Unix platform with the :mod:`readline` module +available, an instance of the :class:`Completer` class is automatically created +and its :meth:`complete` method is set as the :mod:`readline` completer. + +Example:: + + >>> import rlcompleter + >>> import readline + >>> readline.parse_and_bind("tab: complete") + >>> readline. + readline.__doc__ readline.get_line_buffer readline.read_init_file + readline.__file__ readline.insert_text readline.set_completer + readline.__name__ readline.parse_and_bind + >>> readline. + +The :mod:`rlcompleter` module is designed for use with Python's interactive +mode. A user can add the following lines to his or her initialization file +(identified by the :envvar:`PYTHONSTARTUP` environment variable) to get +automatic :kbd:`Tab` completion:: + + try: + import readline + except ImportError: + print "Module readline not available." + else: + import rlcompleter + readline.parse_and_bind("tab: complete") + +On platforms without :mod:`readline`, the :class:`Completer` class defined by +this module can still be used for custom purposes. + + +.. _completer-objects: + +Completer Objects +----------------- + +Completer objects have the following method: + + +.. method:: Completer.complete(text, state) + + Return the *state*th completion for *text*. + + If called for *text* that doesn't include a period character (``'.'``), it will + complete from names currently defined in :mod:`__main__`, :mod:`__builtin__` and + keywords (as defined by the :mod:`keyword` module). + + If called for a dotted name, it will try to evaluate anything without obvious + side-effects (functions will not be evaluated, but it can generate calls to + :meth:`__getattr__`) up to the last part, and find matches for the rest via the + :func:`dir` function. + diff --git a/Doc/library/robotparser.rst b/Doc/library/robotparser.rst new file mode 100644 index 0000000..1a66955 --- /dev/null +++ b/Doc/library/robotparser.rst @@ -0,0 +1,71 @@ + +:mod:`robotparser` --- Parser for robots.txt +============================================= + +.. module:: robotparser + :synopsis: Loads a robots.txt file and answers questions about fetchability of other URLs. +.. sectionauthor:: Skip Montanaro + + +.. index:: + single: WWW + single: World Wide Web + single: URL + single: robots.txt + +This module provides a single class, :class:`RobotFileParser`, which answers +questions about whether or not a particular user agent can fetch a URL on the +Web site that published the :file:`robots.txt` file. For more details on the +structure of :file:`robots.txt` files, see +http://www.robotstxt.org/wc/norobots.html. + + +.. class:: RobotFileParser() + + This class provides a set of methods to read, parse and answer questions about a + single :file:`robots.txt` file. + + + .. method:: RobotFileParser.set_url(url) + + Sets the URL referring to a :file:`robots.txt` file. + + + .. method:: RobotFileParser.read() + + Reads the :file:`robots.txt` URL and feeds it to the parser. + + + .. method:: RobotFileParser.parse(lines) + + Parses the lines argument. + + + .. method:: RobotFileParser.can_fetch(useragent, url) + + Returns ``True`` if the *useragent* is allowed to fetch the *url* according to + the rules contained in the parsed :file:`robots.txt` file. + + + .. method:: RobotFileParser.mtime() + + Returns the time the ``robots.txt`` file was last fetched. This is useful for + long-running web spiders that need to check for new ``robots.txt`` files + periodically. + + + .. method:: RobotFileParser.modified() + + Sets the time the ``robots.txt`` file was last fetched to the current time. + +The following example demonstrates basic use of the RobotFileParser class. :: + + >>> import robotparser + >>> rp = robotparser.RobotFileParser() + >>> rp.set_url("http://www.musi-cal.com/robots.txt") + >>> rp.read() + >>> rp.can_fetch("*", "http://www.musi-cal.com/cgi-bin/search?city=San+Francisco") + False + >>> rp.can_fetch("*", "http://www.musi-cal.com/") + True + diff --git a/Doc/library/runpy.rst b/Doc/library/runpy.rst new file mode 100644 index 0000000..8846973 --- /dev/null +++ b/Doc/library/runpy.rst @@ -0,0 +1,71 @@ +:mod:`runpy` --- Locating and executing Python modules +====================================================== + +.. module:: runpy + :synopsis: Locate and run Python modules without importing them first. +.. moduleauthor:: Nick Coghlan + + +.. versionadded:: 2.5 + +The :mod:`runpy` module is used to locate and run Python modules without +importing them first. Its main use is to implement the :option:`-m` command line +switch that allows scripts to be located using the Python module namespace +rather than the filesystem. + +When executed as a script, the module effectively operates as follows:: + + del sys.argv[0] # Remove the runpy module from the arguments + run_module(sys.argv[0], run_name="__main__", alter_sys=True) + +The :mod:`runpy` module provides a single function: + + +.. function:: run_module(mod_name[, init_globals] [, run_name][, alter_sys]) + + Execute the code of the specified module and return the resulting module globals + dictionary. The module's code is first located using the standard import + mechanism (refer to PEP 302 for details) and then executed in a fresh module + namespace. + + The optional dictionary argument *init_globals* may be used to pre-populate the + globals dictionary before the code is executed. The supplied dictionary will not + be modified. If any of the special global variables below are defined in the + supplied dictionary, those definitions are overridden by the ``run_module`` + function. + + The special global variables ``__name__``, ``__file__``, ``__loader__`` and + ``__builtins__`` are set in the globals dictionary before the module code is + executed. + + ``__name__`` is set to *run_name* if this optional argument is supplied, and the + *mod_name* argument otherwise. + + ``__loader__`` is set to the PEP 302 module loader used to retrieve the code for + the module (This loader may be a wrapper around the standard import mechanism). + + ``__file__`` is set to the name provided by the module loader. If the loader + does not make filename information available, this variable is set to ``None``. + + ``__builtins__`` is automatically initialised with a reference to the top level + namespace of the :mod:`__builtin__` module. + + If the argument *alter_sys* is supplied and evaluates to ``True``, then + ``sys.argv[0]`` is updated with the value of ``__file__`` and + ``sys.modules[__name__]`` is updated with a new module object for the module + being executed. Note that neither ``sys.argv[0]`` nor ``sys.modules[__name__]`` + are restored to their original values before the function returns -- if client + code needs these values preserved, it must either save them explicitly or + else avoid enabling the automatic alterations to :mod:`sys`. + + Note that this manipulation of :mod:`sys` is not thread-safe. Other threads may + see the partially initialised module, as well as the altered list of arguments. + It is recommended that the :mod:`sys` module be left alone when invoking this + function from threaded code. + + +.. seealso:: + + :pep:`338` - Executing modules as scripts + PEP written and implemented by Nick Coghlan. + diff --git a/Doc/library/sched.rst b/Doc/library/sched.rst new file mode 100644 index 0000000..bf3efbf --- /dev/null +++ b/Doc/library/sched.rst @@ -0,0 +1,104 @@ + +:mod:`sched` --- Event scheduler +================================ + +.. module:: sched + :synopsis: General purpose event scheduler. +.. sectionauthor:: Moshe Zadka + + +.. % LaTeXed and enhanced from comments in file + +.. index:: single: event scheduling + +The :mod:`sched` module defines a class which implements a general purpose event +scheduler: + + +.. class:: scheduler(timefunc, delayfunc) + + The :class:`scheduler` class defines a generic interface to scheduling events. + It needs two functions to actually deal with the "outside world" --- *timefunc* + should be callable without arguments, and return a number (the "time", in any + units whatsoever). The *delayfunc* function should be callable with one + argument, compatible with the output of *timefunc*, and should delay that many + time units. *delayfunc* will also be called with the argument ``0`` after each + event is run to allow other threads an opportunity to run in multi-threaded + applications. + +Example:: + + >>> import sched, time + >>> s=sched.scheduler(time.time, time.sleep) + >>> def print_time(): print "From print_time", time.time() + ... + >>> def print_some_times(): + ... print time.time() + ... s.enter(5, 1, print_time, ()) + ... s.enter(10, 1, print_time, ()) + ... s.run() + ... print time.time() + ... + >>> print_some_times() + 930343690.257 + From print_time 930343695.274 + From print_time 930343700.273 + 930343700.276 + + +.. _scheduler-objects: + +Scheduler Objects +----------------- + +:class:`scheduler` instances have the following methods: + + +.. method:: scheduler.enterabs(time, priority, action, argument) + + Schedule a new event. The *time* argument should be a numeric type compatible + with the return value of the *timefunc* function passed to the constructor. + Events scheduled for the same *time* will be executed in the order of their + *priority*. + + Executing the event means executing ``action(*argument)``. *argument* must be a + sequence holding the parameters for *action*. + + Return value is an event which may be used for later cancellation of the event + (see :meth:`cancel`). + + +.. method:: scheduler.enter(delay, priority, action, argument) + + Schedule an event for *delay* more time units. Other then the relative time, the + other arguments, the effect and the return value are the same as those for + :meth:`enterabs`. + + +.. method:: scheduler.cancel(event) + + Remove the event from the queue. If *event* is not an event currently in the + queue, this method will raise a :exc:`RuntimeError`. + + +.. method:: scheduler.empty() + + Return true if the event queue is empty. + + +.. method:: scheduler.run() + + Run all scheduled events. This function will wait (using the :func:`delayfunc` + function passed to the constructor) for the next event, then execute it and so + on until there are no more scheduled events. + + Either *action* or *delayfunc* can raise an exception. In either case, the + scheduler will maintain a consistent state and propagate the exception. If an + exception is raised by *action*, the event will not be attempted in future calls + to :meth:`run`. + + If a sequence of events takes longer to run than the time available before the + next event, the scheduler will simply fall behind. No events will be dropped; + the calling code is responsible for canceling events which are no longer + pertinent. + diff --git a/Doc/library/scrolledtext.rst b/Doc/library/scrolledtext.rst new file mode 100644 index 0000000..85456b9 --- /dev/null +++ b/Doc/library/scrolledtext.rst @@ -0,0 +1,32 @@ +:mod:`ScrolledText` --- Scrolled Text Widget +============================================ + +.. module:: ScrolledText + :platform: Tk + :synopsis: Text widget with a vertical scroll bar. +.. sectionauthor:: Fred L. Drake, Jr. + + +The :mod:`ScrolledText` module provides a class of the same name which +implements a basic text widget which has a vertical scroll bar configured to do +the "right thing." Using the :class:`ScrolledText` class is a lot easier than +setting up a text widget and scroll bar directly. The constructor is the same +as that of the :class:`Tkinter.Text` class. + +The text widget and scrollbar are packed together in a :class:`Frame`, and the +methods of the :class:`Grid` and :class:`Pack` geometry managers are acquired +from the :class:`Frame` object. This allows the :class:`ScrolledText` widget to +be used directly to achieve most normal geometry management behavior. + +Should more specific control be necessary, the following attributes are +available: + + +.. attribute:: ScrolledText.frame + + The frame which surrounds the text and scroll bar widgets. + + +.. attribute:: ScrolledText.vbar + + The scroll bar widget. diff --git a/Doc/library/select.rst b/Doc/library/select.rst new file mode 100644 index 0000000..f68a0da --- /dev/null +++ b/Doc/library/select.rst @@ -0,0 +1,141 @@ + +:mod:`select` --- Waiting for I/O completion +============================================ + +.. module:: select + :synopsis: Wait for I/O completion on multiple streams. + + +This module provides access to the :cfunc:`select` and :cfunc:`poll` functions +available in most operating systems. Note that on Windows, it only works for +sockets; on other operating systems, it also works for other file types (in +particular, on Unix, it works on pipes). It cannot be used on regular files to +determine whether a file has grown since it was last read. + +The module defines the following: + + +.. exception:: error + + The exception raised when an error occurs. The accompanying value is a pair + containing the numeric error code from :cdata:`errno` and the corresponding + string, as would be printed by the C function :cfunc:`perror`. + + +.. function:: poll() + + (Not supported by all operating systems.) Returns a polling object, which + supports registering and unregistering file descriptors, and then polling them + for I/O events; see section :ref:`poll-objects` below for the methods supported + by polling objects. + + +.. function:: select(iwtd, owtd, ewtd[, timeout]) + + This is a straightforward interface to the Unix :cfunc:`select` system call. + The first three arguments are sequences of 'waitable objects': either + integers representing file descriptors or objects with a parameterless method + named :meth:`fileno` returning such an integer. The three sequences of + waitable objects are for input, output and 'exceptional conditions', + respectively. Empty sequences are allowed, but acceptance of three empty + sequences is platform-dependent. (It is known to work on Unix but not on + Windows.) The optional *timeout* argument specifies a time-out as a floating + point number in seconds. When the *timeout* argument is omitted the function + blocks until at least one file descriptor is ready. A time-out value of zero + specifies a poll and never blocks. + + The return value is a triple of lists of objects that are ready: subsets of the + first three arguments. When the time-out is reached without a file descriptor + becoming ready, three empty lists are returned. + + .. index:: + single: socket() (in module socket) + single: popen() (in module os) + + Among the acceptable object types in the sequences are Python file objects (e.g. + ``sys.stdin``, or objects returned by :func:`open` or :func:`os.popen`), socket + objects returned by :func:`socket.socket`. You may also define a :dfn:`wrapper` + class yourself, as long as it has an appropriate :meth:`fileno` method (that + really returns a file descriptor, not just a random integer). + + .. % + + .. note:: + + .. index:: single: WinSock + + File objects on Windows are not acceptable, but sockets are. On Windows, the + underlying :cfunc:`select` function is provided by the WinSock library, and does + not handle file descriptors that don't originate from WinSock. + + +.. _poll-objects: + +Polling Objects +--------------- + +The :cfunc:`poll` system call, supported on most Unix systems, provides better +scalability for network servers that service many, many clients at the same +time. :cfunc:`poll` scales better because the system call only requires listing +the file descriptors of interest, while :cfunc:`select` builds a bitmap, turns +on bits for the fds of interest, and then afterward the whole bitmap has to be +linearly scanned again. :cfunc:`select` is O(highest file descriptor), while +:cfunc:`poll` is O(number of file descriptors). + + +.. method:: poll.register(fd[, eventmask]) + + Register a file descriptor with the polling object. Future calls to the + :meth:`poll` method will then check whether the file descriptor has any pending + I/O events. *fd* can be either an integer, or an object with a :meth:`fileno` + method that returns an integer. File objects implement :meth:`fileno`, so they + can also be used as the argument. + + *eventmask* is an optional bitmask describing the type of events you want to + check for, and can be a combination of the constants :const:`POLLIN`, + :const:`POLLPRI`, and :const:`POLLOUT`, described in the table below. If not + specified, the default value used will check for all 3 types of events. + + +-------------------+------------------------------------------+ + | Constant | Meaning | + +===================+==========================================+ + | :const:`POLLIN` | There is data to read | + +-------------------+------------------------------------------+ + | :const:`POLLPRI` | There is urgent data to read | + +-------------------+------------------------------------------+ + | :const:`POLLOUT` | Ready for output: writing will not block | + +-------------------+------------------------------------------+ + | :const:`POLLERR` | Error condition of some sort | + +-------------------+------------------------------------------+ + | :const:`POLLHUP` | Hung up | + +-------------------+------------------------------------------+ + | :const:`POLLNVAL` | Invalid request: descriptor not open | + +-------------------+------------------------------------------+ + + Registering a file descriptor that's already registered is not an error, and has + the same effect as registering the descriptor exactly once. + + +.. method:: poll.unregister(fd) + + Remove a file descriptor being tracked by a polling object. Just like the + :meth:`register` method, *fd* can be an integer or an object with a + :meth:`fileno` method that returns an integer. + + Attempting to remove a file descriptor that was never registered causes a + :exc:`KeyError` exception to be raised. + + +.. method:: poll.poll([timeout]) + + Polls the set of registered file descriptors, and returns a possibly-empty list + containing ``(fd, event)`` 2-tuples for the descriptors that have events or + errors to report. *fd* is the file descriptor, and *event* is a bitmask with + bits set for the reported events for that descriptor --- :const:`POLLIN` for + waiting input, :const:`POLLOUT` to indicate that the descriptor can be written + to, and so forth. An empty list indicates that the call timed out and no file + descriptors had any events to report. If *timeout* is given, it specifies the + length of time in milliseconds which the system will wait for events before + returning. If *timeout* is omitted, negative, or :const:`None`, the call will + block until there is an event for this poll object. + diff --git a/Doc/library/sgmllib.rst b/Doc/library/sgmllib.rst new file mode 100644 index 0000000..c0ef1a2 --- /dev/null +++ b/Doc/library/sgmllib.rst @@ -0,0 +1,270 @@ + +:mod:`sgmllib` --- Simple SGML parser +===================================== + +.. module:: sgmllib + :synopsis: Only as much of an SGML parser as needed to parse HTML. + + +.. index:: single: SGML + +This module defines a class :class:`SGMLParser` which serves as the basis for +parsing text files formatted in SGML (Standard Generalized Mark-up Language). +In fact, it does not provide a full SGML parser --- it only parses SGML insofar +as it is used by HTML, and the module only exists as a base for the +:mod:`htmllib` module. Another HTML parser which supports XHTML and offers a +somewhat different interface is available in the :mod:`HTMLParser` module. + + +.. class:: SGMLParser() + + The :class:`SGMLParser` class is instantiated without arguments. The parser is + hardcoded to recognize the following constructs: + + * Opening and closing tags of the form ```` and + ````, respectively. + + * Numeric character references of the form ``&#name;``. + + * Entity references of the form ``&name;``. + + * SGML comments of the form ````. Note that spaces, tabs, and + newlines are allowed between the trailing ``>`` and the immediately preceding + ``--``. + +A single exception is defined as well: + + +.. exception:: SGMLParseError + + Exception raised by the :class:`SGMLParser` class when it encounters an error + while parsing. + + .. versionadded:: 2.1 + +:class:`SGMLParser` instances have the following methods: + + +.. method:: SGMLParser.reset() + + Reset the instance. Loses all unprocessed data. This is called implicitly at + instantiation time. + + +.. method:: SGMLParser.setnomoretags() + + Stop processing tags. Treat all following input as literal input (CDATA). + (This is only provided so the HTML tag ```` can be implemented.) + + +.. method:: SGMLParser.setliteral() + + Enter literal mode (CDATA mode). + + +.. method:: SGMLParser.feed(data) + + Feed some text to the parser. It is processed insofar as it consists of + complete elements; incomplete data is buffered until more data is fed or + :meth:`close` is called. + + +.. method:: SGMLParser.close() + + Force processing of all buffered data as if it were followed by an end-of-file + mark. This method may be redefined by a derived class to define additional + processing at the end of the input, but the redefined version should always call + :meth:`close`. + + +.. method:: SGMLParser.get_starttag_text() + + Return the text of the most recently opened start tag. This should not normally + be needed for structured processing, but may be useful in dealing with HTML "as + deployed" or for re-generating input with minimal changes (whitespace between + attributes can be preserved, etc.). + + +.. method:: SGMLParser.handle_starttag(tag, method, attributes) + + This method is called to handle start tags for which either a :meth:`start_tag` + or :meth:`do_tag` method has been defined. The *tag* argument is the name of + the tag converted to lower case, and the *method* argument is the bound method + which should be used to support semantic interpretation of the start tag. The + *attributes* argument is a list of ``(name, value)`` pairs containing the + attributes found inside the tag's ``<>`` brackets. + + The *name* has been translated to lower case. Double quotes and backslashes in + the *value* have been interpreted, as well as known character references and + known entity references terminated by a semicolon (normally, entity references + can be terminated by any non-alphanumerical character, but this would break the + very common case of ``<A HREF="url?spam=1&eggs=2">`` when ``eggs`` is a valid + entity name). + + For instance, for the tag ``<A HREF="http://www.cwi.nl/">``, this method would + be called as ``unknown_starttag('a', [('href', 'http://www.cwi.nl/')])``. The + base implementation simply calls *method* with *attributes* as the only + argument. + + .. versionadded:: 2.5 + Handling of entity and character references within attribute values. + + +.. method:: SGMLParser.handle_endtag(tag, method) + + This method is called to handle endtags for which an :meth:`end_tag` method has + been defined. The *tag* argument is the name of the tag converted to lower + case, and the *method* argument is the bound method which should be used to + support semantic interpretation of the end tag. If no :meth:`end_tag` method is + defined for the closing element, this handler is not called. The base + implementation simply calls *method*. + + +.. method:: SGMLParser.handle_data(data) + + This method is called to process arbitrary data. It is intended to be + overridden by a derived class; the base class implementation does nothing. + + +.. method:: SGMLParser.handle_charref(ref) + + This method is called to process a character reference of the form ``&#ref;``. + The base implementation uses :meth:`convert_charref` to convert the reference to + a string. If that method returns a string, it is passed to :meth:`handle_data`, + otherwise ``unknown_charref(ref)`` is called to handle the error. + + .. versionchanged:: 2.5 + Use :meth:`convert_charref` instead of hard-coding the conversion. + + +.. method:: SGMLParser.convert_charref(ref) + + Convert a character reference to a string, or ``None``. *ref* is the reference + passed in as a string. In the base implementation, *ref* must be a decimal + number in the range 0-255. It converts the code point found using the + :meth:`convert_codepoint` method. If *ref* is invalid or out of range, this + method returns ``None``. This method is called by the default + :meth:`handle_charref` implementation and by the attribute value parser. + + .. versionadded:: 2.5 + + +.. method:: SGMLParser.convert_codepoint(codepoint) + + Convert a codepoint to a :class:`str` value. Encodings can be handled here if + appropriate, though the rest of :mod:`sgmllib` is oblivious on this matter. + + .. versionadded:: 2.5 + + +.. method:: SGMLParser.handle_entityref(ref) + + This method is called to process a general entity reference of the form + ``&ref;`` where *ref* is an general entity reference. It converts *ref* by + passing it to :meth:`convert_entityref`. If a translation is returned, it calls + the method :meth:`handle_data` with the translation; otherwise, it calls the + method ``unknown_entityref(ref)``. The default :attr:`entitydefs` defines + translations for ``&amp;``, ``&apos``, ``&gt;``, ``&lt;``, and ``&quot;``. + + .. versionchanged:: 2.5 + Use :meth:`convert_entityref` instead of hard-coding the conversion. + + +.. method:: SGMLParser.convert_entityref(ref) + + Convert a named entity reference to a :class:`str` value, or ``None``. The + resulting value will not be parsed. *ref* will be only the name of the entity. + The default implementation looks for *ref* in the instance (or class) variable + :attr:`entitydefs` which should be a mapping from entity names to corresponding + translations. If no translation is available for *ref*, this method returns + ``None``. This method is called by the default :meth:`handle_entityref` + implementation and by the attribute value parser. + + .. versionadded:: 2.5 + + +.. method:: SGMLParser.handle_comment(comment) + + This method is called when a comment is encountered. The *comment* argument is + a string containing the text between the ``<!--`` and ``-->`` delimiters, but + not the delimiters themselves. For example, the comment ``<!--text-->`` will + cause this method to be called with the argument ``'text'``. The default method + does nothing. + + +.. method:: SGMLParser.handle_decl(data) + + Method called when an SGML declaration is read by the parser. In practice, the + ``DOCTYPE`` declaration is the only thing observed in HTML, but the parser does + not discriminate among different (or broken) declarations. Internal subsets in + a ``DOCTYPE`` declaration are not supported. The *data* parameter will be the + entire contents of the declaration inside the ``<!``...\ ``>`` markup. The + default implementation does nothing. + + +.. method:: SGMLParser.report_unbalanced(tag) + + This method is called when an end tag is found which does not correspond to any + open element. + + +.. method:: SGMLParser.unknown_starttag(tag, attributes) + + This method is called to process an unknown start tag. It is intended to be + overridden by a derived class; the base class implementation does nothing. + + +.. method:: SGMLParser.unknown_endtag(tag) + + This method is called to process an unknown end tag. It is intended to be + overridden by a derived class; the base class implementation does nothing. + + +.. method:: SGMLParser.unknown_charref(ref) + + This method is called to process unresolvable numeric character references. + Refer to :meth:`handle_charref` to determine what is handled by default. It is + intended to be overridden by a derived class; the base class implementation does + nothing. + + +.. method:: SGMLParser.unknown_entityref(ref) + + This method is called to process an unknown entity reference. It is intended to + be overridden by a derived class; the base class implementation does nothing. + +Apart from overriding or extending the methods listed above, derived classes may +also define methods of the following form to define processing of specific tags. +Tag names in the input stream are case independent; the *tag* occurring in +method names must be in lower case: + + +.. method:: SGMLParser.start_tag(attributes) + :noindex: + + This method is called to process an opening tag *tag*. It has preference over + :meth:`do_tag`. The *attributes* argument has the same meaning as described for + :meth:`handle_starttag` above. + + +.. method:: SGMLParser.do_tag(attributes) + :noindex: + + This method is called to process an opening tag *tag* for which no + :meth:`start_tag` method is defined. The *attributes* argument has the same + meaning as described for :meth:`handle_starttag` above. + + +.. method:: SGMLParser.end_tag() + :noindex: + + This method is called to process a closing tag *tag*. + +Note that the parser maintains a stack of open elements for which no end tag has +been found yet. Only tags processed by :meth:`start_tag` are pushed on this +stack. Definition of an :meth:`end_tag` method is optional for these tags. For +tags processed by :meth:`do_tag` or by :meth:`unknown_tag`, no :meth:`end_tag` +method must be defined; if defined, it will not be used. If both +:meth:`start_tag` and :meth:`do_tag` methods exist for a tag, the +:meth:`start_tag` method takes precedence. + diff --git a/Doc/library/shelve.rst b/Doc/library/shelve.rst new file mode 100644 index 0000000..1776b7d --- /dev/null +++ b/Doc/library/shelve.rst @@ -0,0 +1,185 @@ + +:mod:`shelve` --- Python object persistence +=========================================== + +.. module:: shelve + :synopsis: Python object persistence. + + +.. index:: module: pickle + +A "shelf" is a persistent, dictionary-like object. The difference with "dbm" +databases is that the values (not the keys!) in a shelf can be essentially +arbitrary Python objects --- anything that the :mod:`pickle` module can handle. +This includes most class instances, recursive data types, and objects containing +lots of shared sub-objects. The keys are ordinary strings. + + +.. function:: open(filename[, flag='c'[, protocol=None[, writeback=False]]]) + + Open a persistent dictionary. The filename specified is the base filename for + the underlying database. As a side-effect, an extension may be added to the + filename and more than one file may be created. By default, the underlying + database file is opened for reading and writing. The optional *flag* parameter + has the same interpretation as the *flag* parameter of :func:`anydbm.open`. + + By default, version 0 pickles are used to serialize values. The version of the + pickle protocol can be specified with the *protocol* parameter. + + .. versionchanged:: 2.3 + The *protocol* parameter was added. + + By default, mutations to persistent-dictionary mutable entries are not + automatically written back. If the optional *writeback* parameter is set to + *True*, all entries accessed are cached in memory, and written back at close + time; this can make it handier to mutate mutable entries in the persistent + dictionary, but, if many entries are accessed, it can consume vast amounts of + memory for the cache, and it can make the close operation very slow since all + accessed entries are written back (there is no way to determine which accessed + entries are mutable, nor which ones were actually mutated). + +Shelve objects support all methods supported by dictionaries. This eases the +transition from dictionary based scripts to those requiring persistent storage. + +One additional method is supported: + + +.. method:: Shelf.sync() + + Write back all entries in the cache if the shelf was opened with *writeback* set + to *True*. Also empty the cache and synchronize the persistent dictionary on + disk, if feasible. This is called automatically when the shelf is closed with + :meth:`close`. + + +Restrictions +------------ + + .. index:: + module: dbm + module: gdbm + module: bsddb + +* The choice of which database package will be used (such as :mod:`dbm`, + :mod:`gdbm` or :mod:`bsddb`) depends on which interface is available. Therefore + it is not safe to open the database directly using :mod:`dbm`. The database is + also (unfortunately) subject to the limitations of :mod:`dbm`, if it is used --- + this means that (the pickled representation of) the objects stored in the + database should be fairly small, and in rare cases key collisions may cause the + database to refuse updates. + +* Depending on the implementation, closing a persistent dictionary may or may + not be necessary to flush changes to disk. The :meth:`__del__` method of the + :class:`Shelf` class calls the :meth:`close` method, so the programmer generally + need not do this explicitly. + +* The :mod:`shelve` module does not support *concurrent* read/write access to + shelved objects. (Multiple simultaneous read accesses are safe.) When a + program has a shelf open for writing, no other program should have it open for + reading or writing. Unix file locking can be used to solve this, but this + differs across Unix versions and requires knowledge about the database + implementation used. + + +.. class:: Shelf(dict[, protocol=None[, writeback=False]]) + + A subclass of :class:`UserDict.DictMixin` which stores pickled values in the + *dict* object. + + By default, version 0 pickles are used to serialize values. The version of the + pickle protocol can be specified with the *protocol* parameter. See the + :mod:`pickle` documentation for a discussion of the pickle protocols. + + .. versionchanged:: 2.3 + The *protocol* parameter was added. + + If the *writeback* parameter is ``True``, the object will hold a cache of all + entries accessed and write them back to the *dict* at sync and close times. + This allows natural operations on mutable entries, but can consume much more + memory and make sync and close take a long time. + + +.. class:: BsdDbShelf(dict[, protocol=None[, writeback=False]]) + + A subclass of :class:`Shelf` which exposes :meth:`first`, :meth:`next`, + :meth:`previous`, :meth:`last` and :meth:`set_location` which are available in + the :mod:`bsddb` module but not in other database modules. The *dict* object + passed to the constructor must support those methods. This is generally + accomplished by calling one of :func:`bsddb.hashopen`, :func:`bsddb.btopen` or + :func:`bsddb.rnopen`. The optional *protocol* and *writeback* parameters have + the same interpretation as for the :class:`Shelf` class. + + +.. class:: DbfilenameShelf(filename[, flag='c'[, protocol=None[, writeback=False]]]) + + A subclass of :class:`Shelf` which accepts a *filename* instead of a dict-like + object. The underlying file will be opened using :func:`anydbm.open`. By + default, the file will be created and opened for both read and write. The + optional *flag* parameter has the same interpretation as for the :func:`open` + function. The optional *protocol* and *writeback* parameters have the same + interpretation as for the :class:`Shelf` class. + + +Example +------- + +To summarize the interface (``key`` is a string, ``data`` is an arbitrary +object):: + + import shelve + + d = shelve.open(filename) # open -- file may get suffix added by low-level + # library + + d[key] = data # store data at key (overwrites old data if + # using an existing key) + data = d[key] # retrieve a COPY of data at key (raise KeyError if no + # such key) + del d[key] # delete data stored at key (raises KeyError + # if no such key) + flag = d.has_key(key) # true if the key exists + klist = d.keys() # a list of all existing keys (slow!) + + # as d was opened WITHOUT writeback=True, beware: + d['xx'] = range(4) # this works as expected, but... + d['xx'].append(5) # *this doesn't!* -- d['xx'] is STILL range(4)!!! + + # having opened d without writeback=True, you need to code carefully: + temp = d['xx'] # extracts the copy + temp.append(5) # mutates the copy + d['xx'] = temp # stores the copy right back, to persist it + + # or, d=shelve.open(filename,writeback=True) would let you just code + # d['xx'].append(5) and have it work as expected, BUT it would also + # consume more memory and make the d.close() operation slower. + + d.close() # close it + + +.. seealso:: + + Module :mod:`anydbm` + Generic interface to ``dbm``\ -style databases. + + Module :mod:`bsddb` + BSD ``db`` database interface. + + Module :mod:`dbhash` + Thin layer around the :mod:`bsddb` which provides an :func:`open` function like + the other database modules. + + Module :mod:`dbm` + Standard Unix database interface. + + Module :mod:`dumbdbm` + Portable implementation of the ``dbm`` interface. + + Module :mod:`gdbm` + GNU database interface, based on the ``dbm`` interface. + + Module :mod:`pickle` + Object serialization used by :mod:`shelve`. + + Module :mod:`cPickle` + High-performance version of :mod:`pickle`. + diff --git a/Doc/library/shlex.rst b/Doc/library/shlex.rst new file mode 100644 index 0000000..0ae77c1 --- /dev/null +++ b/Doc/library/shlex.rst @@ -0,0 +1,307 @@ + +:mod:`shlex` --- Simple lexical analysis +======================================== + +.. module:: shlex + :synopsis: Simple lexical analysis for Unix shell-like languages. +.. moduleauthor:: Eric S. Raymond <esr@snark.thyrsus.com> +.. moduleauthor:: Gustavo Niemeyer <niemeyer@conectiva.com> +.. sectionauthor:: Eric S. Raymond <esr@snark.thyrsus.com> +.. sectionauthor:: Gustavo Niemeyer <niemeyer@conectiva.com> + + +.. versionadded:: 1.5.2 + +The :class:`shlex` class makes it easy to write lexical analyzers for simple +syntaxes resembling that of the Unix shell. This will often be useful for +writing minilanguages, (for example, in run control files for Python +applications) or for parsing quoted strings. + +.. note:: + + The :mod:`shlex` module currently does not support Unicode input. + +The :mod:`shlex` module defines the following functions: + + +.. function:: split(s[, comments[, posix]]) + + Split the string *s* using shell-like syntax. If *comments* is :const:`False` + (the default), the parsing of comments in the given string will be disabled + (setting the :attr:`commenters` member of the :class:`shlex` instance to the + empty string). This function operates in POSIX mode by default, but uses + non-POSIX mode if the *posix* argument is false. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.6 + Added the *posix* parameter. + + .. note:: + + Since the :func:`split` function instantiates a :class:`shlex` instance, passing + ``None`` for *s* will read the string to split from standard input. + +The :mod:`shlex` module defines the following class: + + +.. class:: shlex([instream[, infile[, posix]]]) + + A :class:`shlex` instance or subclass instance is a lexical analyzer object. + The initialization argument, if present, specifies where to read characters + from. It must be a file-/stream-like object with :meth:`read` and + :meth:`readline` methods, or a string (strings are accepted since Python 2.3). + If no argument is given, input will be taken from ``sys.stdin``. The second + optional argument is a filename string, which sets the initial value of the + :attr:`infile` member. If the *instream* argument is omitted or equal to + ``sys.stdin``, this second argument defaults to "stdin". The *posix* argument + was introduced in Python 2.3, and defines the operational mode. When *posix* is + not true (default), the :class:`shlex` instance will operate in compatibility + mode. When operating in POSIX mode, :class:`shlex` will try to be as close as + possible to the POSIX shell parsing rules. + + +.. seealso:: + + Module :mod:`ConfigParser` + Parser for configuration files similar to the Windows :file:`.ini` files. + + +.. _shlex-objects: + +shlex Objects +------------- + +A :class:`shlex` instance has the following methods: + + +.. method:: shlex.get_token() + + Return a token. If tokens have been stacked using :meth:`push_token`, pop a + token off the stack. Otherwise, read one from the input stream. If reading + encounters an immediate end-of-file, :attr:`self.eof` is returned (the empty + string (``''``) in non-POSIX mode, and ``None`` in POSIX mode). + + +.. method:: shlex.push_token(str) + + Push the argument onto the token stack. + + +.. method:: shlex.read_token() + + Read a raw token. Ignore the pushback stack, and do not interpret source + requests. (This is not ordinarily a useful entry point, and is documented here + only for the sake of completeness.) + + +.. method:: shlex.sourcehook(filename) + + When :class:`shlex` detects a source request (see :attr:`source` below) this + method is given the following token as argument, and expected to return a tuple + consisting of a filename and an open file-like object. + + Normally, this method first strips any quotes off the argument. If the result + is an absolute pathname, or there was no previous source request in effect, or + the previous source was a stream (such as ``sys.stdin``), the result is left + alone. Otherwise, if the result is a relative pathname, the directory part of + the name of the file immediately before it on the source inclusion stack is + prepended (this behavior is like the way the C preprocessor handles ``#include + "file.h"``). + + The result of the manipulations is treated as a filename, and returned as the + first component of the tuple, with :func:`open` called on it to yield the second + component. (Note: this is the reverse of the order of arguments in instance + initialization!) + + This hook is exposed so that you can use it to implement directory search paths, + addition of file extensions, and other namespace hacks. There is no + corresponding 'close' hook, but a shlex instance will call the :meth:`close` + method of the sourced input stream when it returns EOF. + + For more explicit control of source stacking, use the :meth:`push_source` and + :meth:`pop_source` methods. + + +.. method:: shlex.push_source(stream[, filename]) + + Push an input source stream onto the input stack. If the filename argument is + specified it will later be available for use in error messages. This is the + same method used internally by the :meth:`sourcehook` method. + + .. versionadded:: 2.1 + + +.. method:: shlex.pop_source() + + Pop the last-pushed input source from the input stack. This is the same method + used internally when the lexer reaches EOF on a stacked input stream. + + .. versionadded:: 2.1 + + +.. method:: shlex.error_leader([file[, line]]) + + This method generates an error message leader in the format of a Unix C compiler + error label; the format is ``'"%s", line %d: '``, where the ``%s`` is replaced + with the name of the current source file and the ``%d`` with the current input + line number (the optional arguments can be used to override these). + + This convenience is provided to encourage :mod:`shlex` users to generate error + messages in the standard, parseable format understood by Emacs and other Unix + tools. + +Instances of :class:`shlex` subclasses have some public instance variables which +either control lexical analysis or can be used for debugging: + + +.. attribute:: shlex.commenters + + The string of characters that are recognized as comment beginners. All + characters from the comment beginner to end of line are ignored. Includes just + ``'#'`` by default. + + +.. attribute:: shlex.wordchars + + The string of characters that will accumulate into multi-character tokens. By + default, includes all ASCII alphanumerics and underscore. + + +.. attribute:: shlex.whitespace + + Characters that will be considered whitespace and skipped. Whitespace bounds + tokens. By default, includes space, tab, linefeed and carriage-return. + + +.. attribute:: shlex.escape + + Characters that will be considered as escape. This will be only used in POSIX + mode, and includes just ``'\'`` by default. + + .. versionadded:: 2.3 + + +.. attribute:: shlex.quotes + + Characters that will be considered string quotes. The token accumulates until + the same quote is encountered again (thus, different quote types protect each + other as in the shell.) By default, includes ASCII single and double quotes. + + +.. attribute:: shlex.escapedquotes + + Characters in :attr:`quotes` that will interpret escape characters defined in + :attr:`escape`. This is only used in POSIX mode, and includes just ``'"'`` by + default. + + .. versionadded:: 2.3 + + +.. attribute:: shlex.whitespace_split + + If ``True``, tokens will only be split in whitespaces. This is useful, for + example, for parsing command lines with :class:`shlex`, getting tokens in a + similar way to shell arguments. + + .. versionadded:: 2.3 + + +.. attribute:: shlex.infile + + The name of the current input file, as initially set at class instantiation time + or stacked by later source requests. It may be useful to examine this when + constructing error messages. + + +.. attribute:: shlex.instream + + The input stream from which this :class:`shlex` instance is reading characters. + + +.. attribute:: shlex.source + + This member is ``None`` by default. If you assign a string to it, that string + will be recognized as a lexical-level inclusion request similar to the + ``source`` keyword in various shells. That is, the immediately following token + will opened as a filename and input taken from that stream until EOF, at which + point the :meth:`close` method of that stream will be called and the input + source will again become the original input stream. Source requests may be + stacked any number of levels deep. + + +.. attribute:: shlex.debug + + If this member is numeric and ``1`` or more, a :class:`shlex` instance will + print verbose progress output on its behavior. If you need to use this, you can + read the module source code to learn the details. + + +.. attribute:: shlex.lineno + + Source line number (count of newlines seen so far plus one). + + +.. attribute:: shlex.token + + The token buffer. It may be useful to examine this when catching exceptions. + + +.. attribute:: shlex.eof + + Token used to determine end of file. This will be set to the empty string + (``''``), in non-POSIX mode, and to ``None`` in POSIX mode. + + .. versionadded:: 2.3 + + +.. _shlex-parsing-rules: + +Parsing Rules +------------- + +When operating in non-POSIX mode, :class:`shlex` will try to obey to the +following rules. + +* Quote characters are not recognized within words (``Do"Not"Separate`` is + parsed as the single word ``Do"Not"Separate``); + +* Escape characters are not recognized; + +* Enclosing characters in quotes preserve the literal value of all characters + within the quotes; + +* Closing quotes separate words (``"Do"Separate`` is parsed as ``"Do"`` and + ``Separate``); + +* If :attr:`whitespace_split` is ``False``, any character not declared to be a + word character, whitespace, or a quote will be returned as a single-character + token. If it is ``True``, :class:`shlex` will only split words in whitespaces; + +* EOF is signaled with an empty string (``''``); + +* It's not possible to parse empty strings, even if quoted. + +When operating in POSIX mode, :class:`shlex` will try to obey to the following +parsing rules. + +* Quotes are stripped out, and do not separate words (``"Do"Not"Separate"`` is + parsed as the single word ``DoNotSeparate``); + +* Non-quoted escape characters (e.g. ``'\'``) preserve the literal value of the + next character that follows; + +* Enclosing characters in quotes which are not part of :attr:`escapedquotes` + (e.g. ``"'"``) preserve the literal value of all characters within the quotes; + +* Enclosing characters in quotes which are part of :attr:`escapedquotes` (e.g. + ``'"'``) preserves the literal value of all characters within the quotes, with + the exception of the characters mentioned in :attr:`escape`. The escape + characters retain its special meaning only when followed by the quote in use, or + the escape character itself. Otherwise the escape character will be considered a + normal character. + +* EOF is signaled with a :const:`None` value; + +* Quoted empty strings (``''``) are allowed; + diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst new file mode 100644 index 0000000..ef0758d --- /dev/null +++ b/Doc/library/shutil.rst @@ -0,0 +1,171 @@ + +:mod:`shutil` --- High-level file operations +============================================ + +.. module:: shutil + :synopsis: High-level file operations, including copying. +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +.. % partly based on the docstrings + +.. index:: + single: file; copying + single: copying files + +The :mod:`shutil` module offers a number of high-level operations on files and +collections of files. In particular, functions are provided which support file +copying and removal. + +**Caveat:** On MacOS, the resource fork and other metadata are not used. For +file copies, this means that resources will be lost and file type and creator +codes will not be correct. + + +.. function:: copyfile(src, dst) + + Copy the contents of the file named *src* to a file named *dst*. The + destination location must be writable; otherwise, an :exc:`IOError` exception + will be raised. If *dst* already exists, it will be replaced. Special files + such as character or block devices and pipes cannot be copied with this + function. *src* and *dst* are path names given as strings. + + +.. function:: copyfileobj(fsrc, fdst[, length]) + + Copy the contents of the file-like object *fsrc* to the file-like object *fdst*. + The integer *length*, if given, is the buffer size. In particular, a negative + *length* value means to copy the data without looping over the source data in + chunks; by default the data is read in chunks to avoid uncontrolled memory + consumption. Note that if the current file position of the *fsrc* object is not + 0, only the contents from the current file position to the end of the file will + be copied. + + +.. function:: copymode(src, dst) + + Copy the permission bits from *src* to *dst*. The file contents, owner, and + group are unaffected. *src* and *dst* are path names given as strings. + + +.. function:: copystat(src, dst) + + Copy the permission bits, last access time, last modification time, and flags + from *src* to *dst*. The file contents, owner, and group are unaffected. *src* + and *dst* are path names given as strings. + + +.. function:: copy(src, dst) + + Copy the file *src* to the file or directory *dst*. If *dst* is a directory, a + file with the same basename as *src* is created (or overwritten) in the + directory specified. Permission bits are copied. *src* and *dst* are path + names given as strings. + + +.. function:: copy2(src, dst) + + Similar to :func:`copy`, but last access time and last modification time are + copied as well. This is similar to the Unix command :program:`cp -p`. + + +.. function:: copytree(src, dst[, symlinks]) + + Recursively copy an entire directory tree rooted at *src*. The destination + directory, named by *dst*, must not already exist; it will be created as well as + missing parent directories. Permissions and times of directories are copied with + :func:`copystat`, individual files are copied using :func:`copy2`. If + *symlinks* is true, symbolic links in the source tree are represented as + symbolic links in the new tree; if false or omitted, the contents of the linked + files are copied to the new tree. If exception(s) occur, an :exc:`Error` is + raised with a list of reasons. + + The source code for this should be considered an example rather than a tool. + + .. versionchanged:: 2.3 + :exc:`Error` is raised if any exceptions occur during copying, rather than + printing a message. + + .. versionchanged:: 2.5 + Create intermediate directories needed to create *dst*, rather than raising an + error. Copy permissions and times of directories using :func:`copystat`. + + +.. function:: rmtree(path[, ignore_errors[, onerror]]) + + .. index:: single: directory; deleting + + Delete an entire directory tree (*path* must point to a directory). If + *ignore_errors* is true, errors resulting from failed removals will be ignored; + if false or omitted, such errors are handled by calling a handler specified by + *onerror* or, if that is omitted, they raise an exception. + + If *onerror* is provided, it must be a callable that accepts three parameters: + *function*, *path*, and *excinfo*. The first parameter, *function*, is the + function which raised the exception; it will be :func:`os.listdir`, + :func:`os.remove` or :func:`os.rmdir`. The second parameter, *path*, will be + the path name passed to *function*. The third parameter, *excinfo*, will be the + exception information return by :func:`sys.exc_info`. Exceptions raised by + *onerror* will not be caught. + + +.. function:: move(src, dst) + + Recursively move a file or directory to another location. + + If the destination is on our current filesystem, then simply use rename. + Otherwise, copy src to the dst and then remove src. + + .. versionadded:: 2.3 + + +.. exception:: Error + + This exception collects exceptions that raised during a mult-file operation. For + :func:`copytree`, the exception argument is a list of 3-tuples (*srcname*, + *dstname*, *exception*). + + .. versionadded:: 2.3 + + +.. _shutil-example: + +Example +------- + +This example is the implementation of the :func:`copytree` function, described +above, with the docstring omitted. It demonstrates many of the other functions +provided by this module. :: + + def copytree(src, dst, symlinks=False): + names = os.listdir(src) + os.makedirs(dst) + errors = [] + for name in names: + srcname = os.path.join(src, name) + dstname = os.path.join(dst, name) + try: + if symlinks and os.path.islink(srcname): + linkto = os.readlink(srcname) + os.symlink(linkto, dstname) + elif os.path.isdir(srcname): + copytree(srcname, dstname, symlinks) + else: + copy2(srcname, dstname) + # XXX What about devices, sockets etc.? + except (IOError, os.error) as why: + errors.append((srcname, dstname, str(why))) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + try: + copystat(src, dst) + except WindowsError: + # can't copy file access times on Windows + pass + except OSError as why: + errors.extend((src, dst, str(why))) + if errors: + raise Error, errors + diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst new file mode 100644 index 0000000..54cce53 --- /dev/null +++ b/Doc/library/signal.rst @@ -0,0 +1,157 @@ + +:mod:`signal` --- Set handlers for asynchronous events +====================================================== + +.. module:: signal + :synopsis: Set handlers for asynchronous events. + + +This module provides mechanisms to use signal handlers in Python. Some general +rules for working with signals and their handlers: + +* A handler for a particular signal, once set, remains installed until it is + explicitly reset (Python emulates the BSD style interface regardless of the + underlying implementation), with the exception of the handler for + :const:`SIGCHLD`, which follows the underlying implementation. + +* There is no way to "block" signals temporarily from critical sections (since + this is not supported by all Unix flavors). + +* Although Python signal handlers are called asynchronously as far as the Python + user is concerned, they can only occur between the "atomic" instructions of the + Python interpreter. This means that signals arriving during long calculations + implemented purely in C (such as regular expression matches on large bodies of + text) may be delayed for an arbitrary amount of time. + +* When a signal arrives during an I/O operation, it is possible that the I/O + operation raises an exception after the signal handler returns. This is + dependent on the underlying Unix system's semantics regarding interrupted system + calls. + +* Because the C signal handler always returns, it makes little sense to catch + synchronous errors like :const:`SIGFPE` or :const:`SIGSEGV`. + +* Python installs a small number of signal handlers by default: :const:`SIGPIPE` + is ignored (so write errors on pipes and sockets can be reported as ordinary + Python exceptions) and :const:`SIGINT` is translated into a + :exc:`KeyboardInterrupt` exception. All of these can be overridden. + +* Some care must be taken if both signals and threads are used in the same + program. The fundamental thing to remember in using signals and threads + simultaneously is: always perform :func:`signal` operations in the main thread + of execution. Any thread can perform an :func:`alarm`, :func:`getsignal`, or + :func:`pause`; only the main thread can set a new signal handler, and the main + thread will be the only one to receive signals (this is enforced by the Python + :mod:`signal` module, even if the underlying thread implementation supports + sending signals to individual threads). This means that signals can't be used + as a means of inter-thread communication. Use locks instead. + +The variables defined in the :mod:`signal` module are: + + +.. data:: SIG_DFL + + This is one of two standard signal handling options; it will simply perform the + default function for the signal. For example, on most systems the default + action for :const:`SIGQUIT` is to dump core and exit, while the default action + for :const:`SIGCLD` is to simply ignore it. + + +.. data:: SIG_IGN + + This is another standard signal handler, which will simply ignore the given + signal. + + +.. data:: SIG* + + All the signal numbers are defined symbolically. For example, the hangup signal + is defined as :const:`signal.SIGHUP`; the variable names are identical to the + names used in C programs, as found in ``<signal.h>``. The Unix man page for + ':cfunc:`signal`' lists the existing signals (on some systems this is + :manpage:`signal(2)`, on others the list is in :manpage:`signal(7)`). Note that + not all systems define the same set of signal names; only those names defined by + the system are defined by this module. + + +.. data:: NSIG + + One more than the number of the highest signal number. + +The :mod:`signal` module defines the following functions: + + +.. function:: alarm(time) + + If *time* is non-zero, this function requests that a :const:`SIGALRM` signal be + sent to the process in *time* seconds. Any previously scheduled alarm is + canceled (only one alarm can be scheduled at any time). The returned value is + then the number of seconds before any previously set alarm was to have been + delivered. If *time* is zero, no alarm is scheduled, and any scheduled alarm is + canceled. If the return value is zero, no alarm is currently scheduled. (See + the Unix man page :manpage:`alarm(2)`.) Availability: Unix. + + +.. function:: getsignal(signalnum) + + Return the current signal handler for the signal *signalnum*. The returned value + may be a callable Python object, or one of the special values + :const:`signal.SIG_IGN`, :const:`signal.SIG_DFL` or :const:`None`. Here, + :const:`signal.SIG_IGN` means that the signal was previously ignored, + :const:`signal.SIG_DFL` means that the default way of handling the signal was + previously in use, and ``None`` means that the previous signal handler was not + installed from Python. + + +.. function:: pause() + + Cause the process to sleep until a signal is received; the appropriate handler + will then be called. Returns nothing. Not on Windows. (See the Unix man page + :manpage:`signal(2)`.) + + +.. function:: signal(signalnum, handler) + + Set the handler for signal *signalnum* to the function *handler*. *handler* can + be a callable Python object taking two arguments (see below), or one of the + special values :const:`signal.SIG_IGN` or :const:`signal.SIG_DFL`. The previous + signal handler will be returned (see the description of :func:`getsignal` + above). (See the Unix man page :manpage:`signal(2)`.) + + When threads are enabled, this function can only be called from the main thread; + attempting to call it from other threads will cause a :exc:`ValueError` + exception to be raised. + + The *handler* is called with two arguments: the signal number and the current + stack frame (``None`` or a frame object; for a description of frame objects, see + the reference manual section on the standard type hierarchy or see the attribute + descriptions in the :mod:`inspect` module). + + +.. _signal-example: + +Example +------- + +Here is a minimal example program. It uses the :func:`alarm` function to limit +the time spent waiting to open a file; this is useful if the file is for a +serial device that may not be turned on, which would normally cause the +:func:`os.open` to hang indefinitely. The solution is to set a 5-second alarm +before opening the file; if the operation takes too long, the alarm signal will +be sent, and the handler raises an exception. :: + + import signal, os + + def handler(signum, frame): + print 'Signal handler called with signal', signum + raise IOError, "Couldn't open device!" + + # Set the signal handler and a 5-second alarm + signal.signal(signal.SIGALRM, handler) + signal.alarm(5) + + # This open() may hang indefinitely + fd = os.open('/dev/ttyS0', os.O_RDWR) + + signal.alarm(0) # Disable the alarm + diff --git a/Doc/library/simplehttpserver.rst b/Doc/library/simplehttpserver.rst new file mode 100644 index 0000000..766253e --- /dev/null +++ b/Doc/library/simplehttpserver.rst @@ -0,0 +1,86 @@ + +:mod:`SimpleHTTPServer` --- Simple HTTP request handler +======================================================= + +.. module:: SimpleHTTPServer + :synopsis: This module provides a basic request handler for HTTP servers. +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +The :mod:`SimpleHTTPServer` module defines a request-handler class, +interface-compatible with :class:`BaseHTTPServer.BaseHTTPRequestHandler`, that +serves files only from a base directory. + +The :mod:`SimpleHTTPServer` module defines the following class: + + +.. class:: SimpleHTTPRequestHandler(request, client_address, server) + + This class is used to serve files from the current directory and below, directly + mapping the directory structure to HTTP requests. + + A lot of the work, such as parsing the request, is done by the base class + :class:`BaseHTTPServer.BaseHTTPRequestHandler`. This class implements the + :func:`do_GET` and :func:`do_HEAD` functions. + +The :class:`SimpleHTTPRequestHandler` defines the following member variables: + + +.. attribute:: SimpleHTTPRequestHandler.server_version + + This will be ``"SimpleHTTP/" + __version__``, where ``__version__`` is defined + in the module. + + +.. attribute:: SimpleHTTPRequestHandler.extensions_map + + A dictionary mapping suffixes into MIME types. The default is signified by an + empty string, and is considered to be ``application/octet-stream``. The mapping + is used case-insensitively, and so should contain only lower-cased keys. + +The :class:`SimpleHTTPRequestHandler` defines the following methods: + + +.. method:: SimpleHTTPRequestHandler.do_HEAD() + + This method serves the ``'HEAD'`` request type: it sends the headers it would + send for the equivalent ``GET`` request. See the :meth:`do_GET` method for a + more complete explanation of the possible headers. + + +.. method:: SimpleHTTPRequestHandler.do_GET() + + The request is mapped to a local file by interpreting the request as a path + relative to the current working directory. + + If the request was mapped to a directory, the directory is checked for a file + named ``index.html`` or ``index.htm`` (in that order). If found, the file's + contents are returned; otherwise a directory listing is generated by calling the + :meth:`list_directory` method. This method uses :func:`os.listdir` to scan the + directory, and returns a ``404`` error response if the :func:`listdir` fails. + + If the request was mapped to a file, it is opened and the contents are returned. + Any :exc:`IOError` exception in opening the requested file is mapped to a + ``404``, ``'File not found'`` error. Otherwise, the content type is guessed by + calling the :meth:`guess_type` method, which in turn uses the *extensions_map* + variable. + + A ``'Content-type:'`` header with the guessed content type is output, followed + by a ``'Content-Length:'`` header with the file's size and a + ``'Last-Modified:'`` header with the file's modification time. + + Then follows a blank line signifying the end of the headers, and then the + contents of the file are output. If the file's MIME type starts with ``text/`` + the file is opened in text mode; otherwise binary mode is used. + + For example usage, see the implementation of the :func:`test` function. + + .. versionadded:: 2.5 + The ``'Last-Modified'`` header. + + +.. seealso:: + + Module :mod:`BaseHTTPServer` + Base class implementation for Web server and request handler. + diff --git a/Doc/library/simplexmlrpcserver.rst b/Doc/library/simplexmlrpcserver.rst new file mode 100644 index 0000000..51ce8d8 --- /dev/null +++ b/Doc/library/simplexmlrpcserver.rst @@ -0,0 +1,232 @@ + +:mod:`SimpleXMLRPCServer` --- Basic XML-RPC server +================================================== + +.. module:: SimpleXMLRPCServer + :synopsis: Basic XML-RPC server implementation. +.. moduleauthor:: Brian Quinlan <brianq@activestate.com> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +.. versionadded:: 2.2 + +The :mod:`SimpleXMLRPCServer` module provides a basic server framework for +XML-RPC servers written in Python. Servers can either be free standing, using +:class:`SimpleXMLRPCServer`, or embedded in a CGI environment, using +:class:`CGIXMLRPCRequestHandler`. + + +.. class:: SimpleXMLRPCServer(addr[, requestHandler[, logRequests[, allow_none[, encoding]]]]) + + Create a new server instance. This class provides methods for registration of + functions that can be called by the XML-RPC protocol. The *requestHandler* + parameter should be a factory for request handler instances; it defaults to + :class:`SimpleXMLRPCRequestHandler`. The *addr* and *requestHandler* parameters + are passed to the :class:`SocketServer.TCPServer` constructor. If *logRequests* + is true (the default), requests will be logged; setting this parameter to false + will turn off logging. The *allow_none* and *encoding* parameters are passed + on to :mod:`xmlrpclib` and control the XML-RPC responses that will be returned + from the server. The *bind_and_activate* parameter controls whether + :meth:`server_bind` and :meth:`server_activate` are called immediately by the + constructor; it defaults to true. Setting it to false allows code to manipulate + the *allow_reuse_address* class variable before the address is bound. + + .. versionchanged:: 2.5 + The *allow_none* and *encoding* parameters were added. + + .. versionchanged:: 2.6 + The *bind_and_activate* parameter was added. + + +.. class:: CGIXMLRPCRequestHandler([allow_none[, encoding]]) + + Create a new instance to handle XML-RPC requests in a CGI environment. The + *allow_none* and *encoding* parameters are passed on to :mod:`xmlrpclib` and + control the XML-RPC responses that will be returned from the server. + + .. versionadded:: 2.3 + + .. versionchanged:: 2.5 + The *allow_none* and *encoding* parameters were added. + + +.. class:: SimpleXMLRPCRequestHandler() + + Create a new request handler instance. This request handler supports ``POST`` + requests and modifies logging so that the *logRequests* parameter to the + :class:`SimpleXMLRPCServer` constructor parameter is honored. + + +.. _simple-xmlrpc-servers: + +SimpleXMLRPCServer Objects +-------------------------- + +The :class:`SimpleXMLRPCServer` class is based on +:class:`SocketServer.TCPServer` and provides a means of creating simple, stand +alone XML-RPC servers. + + +.. method:: SimpleXMLRPCServer.register_function(function[, name]) + + Register a function that can respond to XML-RPC requests. If *name* is given, + it will be the method name associated with *function*, otherwise + ``function.__name__`` will be used. *name* can be either a normal or Unicode + string, and may contain characters not legal in Python identifiers, including + the period character. + + +.. method:: SimpleXMLRPCServer.register_instance(instance[, allow_dotted_names]) + + Register an object which is used to expose method names which have not been + registered using :meth:`register_function`. If *instance* contains a + :meth:`_dispatch` method, it is called with the requested method name and the + parameters from the request. Its API is ``def _dispatch(self, method, params)`` + (note that *params* does not represent a variable argument list). If it calls + an underlying function to perform its task, that function is called as + ``func(*params)``, expanding the parameter list. The return value from + :meth:`_dispatch` is returned to the client as the result. If *instance* does + not have a :meth:`_dispatch` method, it is searched for an attribute matching + the name of the requested method. + + If the optional *allow_dotted_names* argument is true and the instance does not + have a :meth:`_dispatch` method, then if the requested method name contains + periods, each component of the method name is searched for individually, with + the effect that a simple hierarchical search is performed. The value found from + this search is then called with the parameters from the request, and the return + value is passed back to the client. + + .. warning:: + + Enabling the *allow_dotted_names* option allows intruders to access your + module's global variables and may allow intruders to execute arbitrary code on + your machine. Only use this option on a secure, closed network. + + .. versionchanged:: 2.3.5, 2.4.1 + *allow_dotted_names* was added to plug a security hole; prior versions are + insecure. + + +.. method:: SimpleXMLRPCServer.register_introspection_functions() + + Registers the XML-RPC introspection functions ``system.listMethods``, + ``system.methodHelp`` and ``system.methodSignature``. + + .. versionadded:: 2.3 + + +.. method:: SimpleXMLRPCServer.register_multicall_functions() + + Registers the XML-RPC multicall function system.multicall. + + +.. attribute:: SimpleXMLRPCServer.rpc_paths + + An attribute value that must be a tuple listing valid path portions of the URL + for receiving XML-RPC requests. Requests posted to other paths will result in a + 404 "no such page" HTTP error. If this tuple is empty, all paths will be + considered valid. The default value is ``('/', '/RPC2')``. + + .. versionadded:: 2.5 + +Example:: + + from SimpleXMLRPCServer import SimpleXMLRPCServer + + # Create server + server = SimpleXMLRPCServer(("localhost", 8000)) + server.register_introspection_functions() + + # Register pow() function; this will use the value of + # pow.__name__ as the name, which is just 'pow'. + server.register_function(pow) + + # Register a function under a different name + def adder_function(x,y): + return x + y + server.register_function(adder_function, 'add') + + # Register an instance; all the methods of the instance are + # published as XML-RPC methods (in this case, just 'div'). + class MyFuncs: + def div(self, x, y): + return x // y + + server.register_instance(MyFuncs()) + + # Run the server's main loop + server.serve_forever() + +The following client code will call the methods made available by the preceding +server:: + + import xmlrpclib + + s = xmlrpclib.Server('http://localhost:8000') + print s.pow(2,3) # Returns 2**3 = 8 + print s.add(2,3) # Returns 5 + print s.div(5,2) # Returns 5//2 = 2 + + # Print list of available methods + print s.system.listMethods() + + +CGIXMLRPCRequestHandler +----------------------- + +The :class:`CGIXMLRPCRequestHandler` class can be used to handle XML-RPC +requests sent to Python CGI scripts. + + +.. method:: CGIXMLRPCRequestHandler.register_function(function[, name]) + + Register a function that can respond to XML-RPC requests. If *name* is given, + it will be the method name associated with function, otherwise + *function.__name__* will be used. *name* can be either a normal or Unicode + string, and may contain characters not legal in Python identifiers, including + the period character. + + +.. method:: CGIXMLRPCRequestHandler.register_instance(instance) + + Register an object which is used to expose method names which have not been + registered using :meth:`register_function`. If instance contains a + :meth:`_dispatch` method, it is called with the requested method name and the + parameters from the request; the return value is returned to the client as the + result. If instance does not have a :meth:`_dispatch` method, it is searched + for an attribute matching the name of the requested method; if the requested + method name contains periods, each component of the method name is searched for + individually, with the effect that a simple hierarchical search is performed. + The value found from this search is then called with the parameters from the + request, and the return value is passed back to the client. + + +.. method:: CGIXMLRPCRequestHandler.register_introspection_functions() + + Register the XML-RPC introspection functions ``system.listMethods``, + ``system.methodHelp`` and ``system.methodSignature``. + + +.. method:: CGIXMLRPCRequestHandler.register_multicall_functions() + + Register the XML-RPC multicall function ``system.multicall``. + + +.. method:: CGIXMLRPCRequestHandler.handle_request([request_text = None]) + + Handle a XML-RPC request. If *request_text* is given, it should be the POST + data provided by the HTTP server, otherwise the contents of stdin will be used. + +Example:: + + class MyFuncs: + def div(self, x, y) : return x // y + + + handler = CGIXMLRPCRequestHandler() + handler.register_function(pow) + handler.register_function(lambda x,y: x+y, 'add') + handler.register_introspection_functions() + handler.register_instance(MyFuncs()) + handler.handle_request() + diff --git a/Doc/library/site.rst b/Doc/library/site.rst new file mode 100644 index 0000000..4e54900 --- /dev/null +++ b/Doc/library/site.rst @@ -0,0 +1,87 @@ + +:mod:`site` --- Site-specific configuration hook +================================================ + +.. module:: site + :synopsis: A standard way to reference site-specific modules. + + +**This module is automatically imported during initialization.** The automatic +import can be suppressed using the interpreter's :option:`-S` option. + +.. index:: triple: module; search; path + +Importing this module will append site-specific paths to the module search path. + +.. index:: + pair: site-python; directory + pair: site-packages; directory + +It starts by constructing up to four directories from a head and a tail part. +For the head part, it uses ``sys.prefix`` and ``sys.exec_prefix``; empty heads +are skipped. For the tail part, it uses the empty string and then +:file:`lib/site-packages` (on Windows) or +:file:`lib/python|version|/site-packages` and then :file:`lib/site-python` (on +Unix and Macintosh). For each of the distinct head-tail combinations, it sees +if it refers to an existing directory, and if so, adds it to ``sys.path`` and +also inspects the newly added path for configuration files. + +A path configuration file is a file whose name has the form :file:`package.pth` +and exists in one of the four directories mentioned above; its contents are +additional items (one per line) to be added to ``sys.path``. Non-existing items +are never added to ``sys.path``, but no check is made that the item refers to a +directory (rather than a file). No item is added to ``sys.path`` more than +once. Blank lines and lines beginning with ``#`` are skipped. Lines starting +with ``import`` (followed by space or tab) are executed. + +.. versionchanged:: 2.6 + A space or tab is now required after the import keyword. + +.. index:: + single: package + triple: path; configuration; file + +For example, suppose ``sys.prefix`` and ``sys.exec_prefix`` are set to +:file:`/usr/local`. The Python X.Y library is then installed in +:file:`/usr/local/lib/python{X.Y}` (where only the first three characters of +``sys.version`` are used to form the installation path name). Suppose this has +a subdirectory :file:`/usr/local/lib/python{X.Y}/site-packages` with three +subsubdirectories, :file:`foo`, :file:`bar` and :file:`spam`, and two path +configuration files, :file:`foo.pth` and :file:`bar.pth`. Assume +:file:`foo.pth` contains the following:: + + # foo package configuration + + foo + bar + bletch + +and :file:`bar.pth` contains:: + + # bar package configuration + + bar + +Then the following directories are added to ``sys.path``, in this order:: + + /usr/local/lib/python2.3/site-packages/bar + /usr/local/lib/python2.3/site-packages/foo + +Note that :file:`bletch` is omitted because it doesn't exist; the :file:`bar` +directory precedes the :file:`foo` directory because :file:`bar.pth` comes +alphabetically before :file:`foo.pth`; and :file:`spam` is omitted because it is +not mentioned in either path configuration file. + +.. index:: module: sitecustomize + +After these path manipulations, an attempt is made to import a module named +:mod:`sitecustomize`, which can perform arbitrary site-specific customizations. +If this import fails with an :exc:`ImportError` exception, it is silently +ignored. + +.. index:: module: sitecustomize + +Note that for some non-Unix systems, ``sys.prefix`` and ``sys.exec_prefix`` are +empty, and the path manipulations are skipped; however the import of +:mod:`sitecustomize` is still attempted. + diff --git a/Doc/library/smtpd.rst b/Doc/library/smtpd.rst new file mode 100644 index 0000000..8927a64 --- /dev/null +++ b/Doc/library/smtpd.rst @@ -0,0 +1,72 @@ +:mod:`smtpd` --- SMTP Server +============================ + +.. module:: smtpd + :synopsis: A SMTP server implementation in Python. + +.. moduleauthor:: Barry Warsaw <barry@zope.com> +.. sectionauthor:: Moshe Zadka <moshez@moshez.org> + + + + +This module offers several classes to implement SMTP servers. One is a generic +do-nothing implementation, which can be overridden, while the other two offer +specific mail-sending strategies. + + +SMTPServer Objects +------------------ + + +.. class:: SMTPServer(localaddr, remoteaddr) + + Create a new :class:`SMTPServer` object, which binds to local address + *localaddr*. It will treat *remoteaddr* as an upstream SMTP relayer. It + inherits from :class:`asyncore.dispatcher`, and so will insert itself into + :mod:`asyncore`'s event loop on instantiation. + + +.. method:: SMTPServer.process_message(peer, mailfrom, rcpttos, data) + + Raise :exc:`NotImplementedError` exception. Override this in subclasses to do + something useful with this message. Whatever was passed in the constructor as + *remoteaddr* will be available as the :attr:`_remoteaddr` attribute. *peer* is + the remote host's address, *mailfrom* is the envelope originator, *rcpttos* are + the envelope recipients and *data* is a string containing the contents of the + e-mail (which should be in :rfc:`2822` format). + + +DebuggingServer Objects +----------------------- + + +.. class:: DebuggingServer(localaddr, remoteaddr) + + Create a new debugging server. Arguments are as per :class:`SMTPServer`. + Messages will be discarded, and printed on stdout. + + +PureProxy Objects +----------------- + + +.. class:: PureProxy(localaddr, remoteaddr) + + Create a new pure proxy server. Arguments are as per :class:`SMTPServer`. + Everything will be relayed to *remoteaddr*. Note that running this has a good + chance to make you into an open relay, so please be careful. + + +MailmanProxy Objects +-------------------- + + +.. class:: MailmanProxy(localaddr, remoteaddr) + + Create a new pure proxy server. Arguments are as per :class:`SMTPServer`. + Everything will be relayed to *remoteaddr*, unless local mailman configurations + knows about an address, in which case it will be handled via mailman. Note that + running this has a good chance to make you into an open relay, so please be + careful. + diff --git a/Doc/library/smtplib.rst b/Doc/library/smtplib.rst new file mode 100644 index 0000000..fd898ca --- /dev/null +++ b/Doc/library/smtplib.rst @@ -0,0 +1,347 @@ + +:mod:`smtplib` --- SMTP protocol client +======================================= + +.. module:: smtplib + :synopsis: SMTP protocol client (requires sockets). +.. sectionauthor:: Eric S. Raymond <esr@snark.thyrsus.com> + + +.. index:: + pair: SMTP; protocol + single: Simple Mail Transfer Protocol + +The :mod:`smtplib` module defines an SMTP client session object that can be used +to send mail to any Internet machine with an SMTP or ESMTP listener daemon. For +details of SMTP and ESMTP operation, consult :rfc:`821` (Simple Mail Transfer +Protocol) and :rfc:`1869` (SMTP Service Extensions). + + +.. class:: SMTP([host[, port[, local_hostname[, timeout]]]]) + + A :class:`SMTP` instance encapsulates an SMTP connection. It has methods that + support a full repertoire of SMTP and ESMTP operations. If the optional host and + port parameters are given, the SMTP :meth:`connect` method is called with those + parameters during initialization. An :exc:`SMTPConnectError` is raised if the + specified host doesn't respond correctly. The optional *timeout* parameter + specifies a timeout in seconds for the connection attempt (if not specified, or + passed as None, the global default timeout setting will be used). + + For normal use, you should only require the initialization/connect, + :meth:`sendmail`, and :meth:`quit` methods. An example is included below. + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. class:: SMTP_SSL([host[, port[, local_hostname[, keyfile[, certfile[, timeout]]]]]]) + + A :class:`SMTP_SSL` instance behaves exactly the same as instances of + :class:`SMTP`. :class:`SMTP_SSL` should be used for situations where SSL is + required from the beginning of the connection and using :meth:`starttls` is not + appropriate. If *host* is not specified, the local host is used. If *port* is + omitted, the standard SMTP-over-SSL port (465) is used. *keyfile* and *certfile* + are also optional, and can contain a PEM formatted private key and certificate + chain file for the SSL connection. The optional *timeout* parameter specifies a + timeout in seconds for the connection attempt (if not specified, or passed as + None, the global default timeout setting will be used). + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. class:: LMTP([host[, port[, local_hostname]]]) + + The LMTP protocol, which is very similar to ESMTP, is heavily based on the + standard SMTP client. It's common to use Unix sockets for LMTP, so our connect() + method must support that as well as a regular host:port server. To specify a + Unix socket, you must use an absolute path for *host*, starting with a '/'. + + Authentication is supported, using the regular SMTP mechanism. When using a Unix + socket, LMTP generally don't support or require any authentication, but your + mileage might vary. + + .. versionadded:: 2.6 + +A nice selection of exceptions is defined as well: + + +.. exception:: SMTPException + + Base exception class for all exceptions raised by this module. + + +.. exception:: SMTPServerDisconnected + + This exception is raised when the server unexpectedly disconnects, or when an + attempt is made to use the :class:`SMTP` instance before connecting it to a + server. + + +.. exception:: SMTPResponseException + + Base class for all exceptions that include an SMTP error code. These exceptions + are generated in some instances when the SMTP server returns an error code. The + error code is stored in the :attr:`smtp_code` attribute of the error, and the + :attr:`smtp_error` attribute is set to the error message. + + +.. exception:: SMTPSenderRefused + + Sender address refused. In addition to the attributes set by on all + :exc:`SMTPResponseException` exceptions, this sets 'sender' to the string that + the SMTP server refused. + + +.. exception:: SMTPRecipientsRefused + + All recipient addresses refused. The errors for each recipient are accessible + through the attribute :attr:`recipients`, which is a dictionary of exactly the + same sort as :meth:`SMTP.sendmail` returns. + + +.. exception:: SMTPDataError + + The SMTP server refused to accept the message data. + + +.. exception:: SMTPConnectError + + Error occurred during establishment of a connection with the server. + + +.. exception:: SMTPHeloError + + The server refused our ``HELO`` message. + + +.. exception:: SMTPAuthenticationError + + SMTP authentication went wrong. Most probably the server didn't accept the + username/password combination provided. + + +.. seealso:: + + :rfc:`821` - Simple Mail Transfer Protocol + Protocol definition for SMTP. This document covers the model, operating + procedure, and protocol details for SMTP. + + :rfc:`1869` - SMTP Service Extensions + Definition of the ESMTP extensions for SMTP. This describes a framework for + extending SMTP with new commands, supporting dynamic discovery of the commands + provided by the server, and defines a few additional commands. + + +.. _smtp-objects: + +SMTP Objects +------------ + +An :class:`SMTP` instance has the following methods: + + +.. method:: SMTP.set_debuglevel(level) + + Set the debug output level. A true value for *level* results in debug messages + for connection and for all messages sent to and received from the server. + + +.. method:: SMTP.connect([host[, port]]) + + Connect to a host on a given port. The defaults are to connect to the local + host at the standard SMTP port (25). If the hostname ends with a colon (``':'``) + followed by a number, that suffix will be stripped off and the number + interpreted as the port number to use. This method is automatically invoked by + the constructor if a host is specified during instantiation. + + +.. method:: SMTP.docmd(cmd, [, argstring]) + + Send a command *cmd* to the server. The optional argument *argstring* is simply + concatenated to the command, separated by a space. + + This returns a 2-tuple composed of a numeric response code and the actual + response line (multiline responses are joined into one long line.) + + In normal operation it should not be necessary to call this method explicitly. + It is used to implement other methods and may be useful for testing private + extensions. + + If the connection to the server is lost while waiting for the reply, + :exc:`SMTPServerDisconnected` will be raised. + + +.. method:: SMTP.helo([hostname]) + + Identify yourself to the SMTP server using ``HELO``. The hostname argument + defaults to the fully qualified domain name of the local host. + + In normal operation it should not be necessary to call this method explicitly. + It will be implicitly called by the :meth:`sendmail` when necessary. + + +.. method:: SMTP.ehlo([hostname]) + + Identify yourself to an ESMTP server using ``EHLO``. The hostname argument + defaults to the fully qualified domain name of the local host. Examine the + response for ESMTP option and store them for use by :meth:`has_extn`. + + Unless you wish to use :meth:`has_extn` before sending mail, it should not be + necessary to call this method explicitly. It will be implicitly called by + :meth:`sendmail` when necessary. + + +.. method:: SMTP.has_extn(name) + + Return :const:`True` if *name* is in the set of SMTP service extensions returned + by the server, :const:`False` otherwise. Case is ignored. + + +.. method:: SMTP.verify(address) + + Check the validity of an address on this server using SMTP ``VRFY``. Returns a + tuple consisting of code 250 and a full :rfc:`822` address (including human + name) if the user address is valid. Otherwise returns an SMTP error code of 400 + or greater and an error string. + + .. note:: + + Many sites disable SMTP ``VRFY`` in order to foil spammers. + + +.. method:: SMTP.login(user, password) + + Log in on an SMTP server that requires authentication. The arguments are the + username and the password to authenticate with. If there has been no previous + ``EHLO`` or ``HELO`` command this session, this method tries ESMTP ``EHLO`` + first. This method will return normally if the authentication was successful, or + may raise the following exceptions: + + :exc:`SMTPHeloError` + The server didn't reply properly to the ``HELO`` greeting. + + :exc:`SMTPAuthenticationError` + The server didn't accept the username/password combination. + + :exc:`SMTPException` + No suitable authentication method was found. + + +.. method:: SMTP.starttls([keyfile[, certfile]]) + + Put the SMTP connection in TLS (Transport Layer Security) mode. All SMTP + commands that follow will be encrypted. You should then call :meth:`ehlo` + again. + + If *keyfile* and *certfile* are provided, these are passed to the :mod:`socket` + module's :func:`ssl` function. + + +.. method:: SMTP.sendmail(from_addr, to_addrs, msg[, mail_options, rcpt_options]) + + Send mail. The required arguments are an :rfc:`822` from-address string, a list + of :rfc:`822` to-address strings (a bare string will be treated as a list with 1 + address), and a message string. The caller may pass a list of ESMTP options + (such as ``8bitmime``) to be used in ``MAIL FROM`` commands as *mail_options*. + ESMTP options (such as ``DSN`` commands) that should be used with all ``RCPT`` + commands can be passed as *rcpt_options*. (If you need to use different ESMTP + options to different recipients you have to use the low-level methods such as + :meth:`mail`, :meth:`rcpt` and :meth:`data` to send the message.) + + .. note:: + + The *from_addr* and *to_addrs* parameters are used to construct the message + envelope used by the transport agents. The :class:`SMTP` does not modify the + message headers in any way. + + If there has been no previous ``EHLO`` or ``HELO`` command this session, this + method tries ESMTP ``EHLO`` first. If the server does ESMTP, message size and + each of the specified options will be passed to it (if the option is in the + feature set the server advertises). If ``EHLO`` fails, ``HELO`` will be tried + and ESMTP options suppressed. + + This method will return normally if the mail is accepted for at least one + recipient. Otherwise it will throw an exception. That is, if this method does + not throw an exception, then someone should get your mail. If this method does + not throw an exception, it returns a dictionary, with one entry for each + recipient that was refused. Each entry contains a tuple of the SMTP error code + and the accompanying error message sent by the server. + + This method may raise the following exceptions: + + :exc:`SMTPRecipientsRefused` + All recipients were refused. Nobody got the mail. The :attr:`recipients` + attribute of the exception object is a dictionary with information about the + refused recipients (like the one returned when at least one recipient was + accepted). + + :exc:`SMTPHeloError` + The server didn't reply properly to the ``HELO`` greeting. + + :exc:`SMTPSenderRefused` + The server didn't accept the *from_addr*. + + :exc:`SMTPDataError` + The server replied with an unexpected error code (other than a refusal of a + recipient). + + Unless otherwise noted, the connection will be open even after an exception is + raised. + + +.. method:: SMTP.quit() + + Terminate the SMTP session and close the connection. + +Low-level methods corresponding to the standard SMTP/ESMTP commands ``HELP``, +``RSET``, ``NOOP``, ``MAIL``, ``RCPT``, and ``DATA`` are also supported. +Normally these do not need to be called directly, so they are not documented +here. For details, consult the module code. + + +.. _smtp-example: + +SMTP Example +------------ + +This example prompts the user for addresses needed in the message envelope ('To' +and 'From' addresses), and the message to be delivered. Note that the headers +to be included with the message must be included in the message as entered; this +example doesn't do any processing of the :rfc:`822` headers. In particular, the +'To' and 'From' addresses must be included in the message headers explicitly. :: + + import smtplib + + def raw_input(prompt): + import sys + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + def prompt(prompt): + return raw_input(prompt).strip() + + fromaddr = prompt("From: ") + toaddrs = prompt("To: ").split() + print "Enter message, end with ^D (Unix) or ^Z (Windows):" + + # Add the From: and To: headers at the start! + msg = ("From: %s\r\nTo: %s\r\n\r\n" + % (fromaddr, ", ".join(toaddrs))) + while 1: + try: + line = raw_input() + except EOFError: + break + if not line: + break + msg = msg + line + + print "Message length is " + repr(len(msg)) + + server = smtplib.SMTP('localhost') + server.set_debuglevel(1) + server.sendmail(fromaddr, toaddrs, msg) + server.quit() + diff --git a/Doc/library/sndhdr.rst b/Doc/library/sndhdr.rst new file mode 100644 index 0000000..90d71a9 --- /dev/null +++ b/Doc/library/sndhdr.rst @@ -0,0 +1,42 @@ + +:mod:`sndhdr` --- Determine type of sound file +============================================== + +.. module:: sndhdr + :synopsis: Determine type of a sound file. +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +.. % Based on comments in the module source file. + +.. index:: + single: A-LAW + single: u-LAW + +The :mod:`sndhdr` provides utility functions which attempt to determine the type +of sound data which is in a file. When these functions are able to determine +what type of sound data is stored in a file, they return a tuple ``(type, +sampling_rate, channels, frames, bits_per_sample)``. The value for *type* +indicates the data type and will be one of the strings ``'aifc'``, ``'aiff'``, +``'au'``, ``'hcom'``, ``'sndr'``, ``'sndt'``, ``'voc'``, ``'wav'``, ``'8svx'``, +``'sb'``, ``'ub'``, or ``'ul'``. The *sampling_rate* will be either the actual +value or ``0`` if unknown or difficult to decode. Similarly, *channels* will be +either the number of channels or ``0`` if it cannot be determined or if the +value is difficult to decode. The value for *frames* will be either the number +of frames or ``-1``. The last item in the tuple, *bits_per_sample*, will either +be the sample size in bits or ``'A'`` for A-LAW or ``'U'`` for u-LAW. + + +.. function:: what(filename) + + Determines the type of sound data stored in the file *filename* using + :func:`whathdr`. If it succeeds, returns a tuple as described above, otherwise + ``None`` is returned. + + +.. function:: whathdr(filename) + + Determines the type of sound data stored in a file based on the file header. + The name of the file is given by *filename*. This function returns a tuple as + described above on success, or ``None``. + diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst new file mode 100644 index 0000000..0ec4461 --- /dev/null +++ b/Doc/library/socket.rst @@ -0,0 +1,941 @@ + +:mod:`socket` --- Low-level networking interface +================================================ + +.. module:: socket + :synopsis: Low-level networking interface. + + +This module provides access to the BSD *socket* interface. It is available on +all modern Unix systems, Windows, MacOS, BeOS, OS/2, and probably additional +platforms. + +.. note:: + + Some behavior may be platform dependent, since calls are made to the operating + system socket APIs. + +For an introduction to socket programming (in C), see the following papers: An +Introductory 4.3BSD Interprocess Communication Tutorial, by Stuart Sechrest and +An Advanced 4.3BSD Interprocess Communication Tutorial, by Samuel J. Leffler et +al, both in the UNIX Programmer's Manual, Supplementary Documents 1 (sections +PS1:7 and PS1:8). The platform-specific reference material for the various +socket-related system calls are also a valuable source of information on the +details of socket semantics. For Unix, refer to the manual pages; for Windows, +see the WinSock (or Winsock 2) specification. For IPv6-ready APIs, readers may +want to refer to :rfc:`2553` titled Basic Socket Interface Extensions for IPv6. + +.. index:: object: socket + +The Python interface is a straightforward transliteration of the Unix system +call and library interface for sockets to Python's object-oriented style: the +:func:`socket` function returns a :dfn:`socket object` whose methods implement +the various socket system calls. Parameter types are somewhat higher-level than +in the C interface: as with :meth:`read` and :meth:`write` operations on Python +files, buffer allocation on receive operations is automatic, and buffer length +is implicit on send operations. + +Socket addresses are represented as follows: A single string is used for the +:const:`AF_UNIX` address family. A pair ``(host, port)`` is used for the +:const:`AF_INET` address family, where *host* is a string representing either a +hostname in Internet domain notation like ``'daring.cwi.nl'`` or an IPv4 address +like ``'100.50.200.5'``, and *port* is an integral port number. For +:const:`AF_INET6` address family, a four-tuple ``(host, port, flowinfo, +scopeid)`` is used, where *flowinfo* and *scopeid* represents ``sin6_flowinfo`` +and ``sin6_scope_id`` member in :const:`struct sockaddr_in6` in C. For +:mod:`socket` module methods, *flowinfo* and *scopeid* can be omitted just for +backward compatibility. Note, however, omission of *scopeid* can cause problems +in manipulating scoped IPv6 addresses. Other address families are currently not +supported. The address format required by a particular socket object is +automatically selected based on the address family specified when the socket +object was created. + +For IPv4 addresses, two special forms are accepted instead of a host address: +the empty string represents :const:`INADDR_ANY`, and the string +``'<broadcast>'`` represents :const:`INADDR_BROADCAST`. The behavior is not +available for IPv6 for backward compatibility, therefore, you may want to avoid +these if you intend to support IPv6 with your Python programs. + +If you use a hostname in the *host* portion of IPv4/v6 socket address, the +program may show a nondeterministic behavior, as Python uses the first address +returned from the DNS resolution. The socket address will be resolved +differently into an actual IPv4/v6 address, depending on the results from DNS +resolution and/or the host configuration. For deterministic behavior use a +numeric address in *host* portion. + +.. versionadded:: 2.5 + AF_NETLINK sockets are represented as pairs ``pid, groups``. + +All errors raise exceptions. The normal exceptions for invalid argument types +and out-of-memory conditions can be raised; errors related to socket or address +semantics raise the error :exc:`socket.error`. + +Non-blocking mode is supported through :meth:`setblocking`. A generalization of +this based on timeouts is supported through :meth:`settimeout`. + +The module :mod:`socket` exports the following constants and functions: + + +.. exception:: error + + .. index:: module: errno + + This exception is raised for socket-related errors. The accompanying value is + either a string telling what went wrong or a pair ``(errno, string)`` + representing an error returned by a system call, similar to the value + accompanying :exc:`os.error`. See the module :mod:`errno`, which contains names + for the error codes defined by the underlying operating system. + + +.. exception:: herror + + This exception is raised for address-related errors, i.e. for functions that use + *h_errno* in the C API, including :func:`gethostbyname_ex` and + :func:`gethostbyaddr`. + + The accompanying value is a pair ``(h_errno, string)`` representing an error + returned by a library call. *string* represents the description of *h_errno*, as + returned by the :cfunc:`hstrerror` C function. + + +.. exception:: gaierror + + This exception is raised for address-related errors, for :func:`getaddrinfo` and + :func:`getnameinfo`. The accompanying value is a pair ``(error, string)`` + representing an error returned by a library call. *string* represents the + description of *error*, as returned by the :cfunc:`gai_strerror` C function. The + *error* value will match one of the :const:`EAI_\*` constants defined in this + module. + + +.. exception:: timeout + + This exception is raised when a timeout occurs on a socket which has had + timeouts enabled via a prior call to :meth:`settimeout`. The accompanying value + is a string whose value is currently always "timed out". + + .. versionadded:: 2.3 + + +.. data:: AF_UNIX + AF_INET + AF_INET6 + + These constants represent the address (and protocol) families, used for the + first argument to :func:`socket`. If the :const:`AF_UNIX` constant is not + defined then this protocol is unsupported. + + +.. data:: SOCK_STREAM + SOCK_DGRAM + SOCK_RAW + SOCK_RDM + SOCK_SEQPACKET + + These constants represent the socket types, used for the second argument to + :func:`socket`. (Only :const:`SOCK_STREAM` and :const:`SOCK_DGRAM` appear to be + generally useful.) + + +.. data:: SO_* + SOMAXCONN + MSG_* + SOL_* + IPPROTO_* + IPPORT_* + INADDR_* + IP_* + IPV6_* + EAI_* + AI_* + NI_* + TCP_* + + Many constants of these forms, documented in the Unix documentation on sockets + and/or the IP protocol, are also defined in the socket module. They are + generally used in arguments to the :meth:`setsockopt` and :meth:`getsockopt` + methods of socket objects. In most cases, only those symbols that are defined + in the Unix header files are defined; for a few symbols, default values are + provided. + + +.. data:: has_ipv6 + + This constant contains a boolean value which indicates if IPv6 is supported on + this platform. + + .. versionadded:: 2.3 + + +.. function:: create_connection(address[, timeout]) + + Connects to the *address* received (as usual, a ``(host, port)`` pair), with an + optional timeout for the connection. Specially useful for higher-level + protocols, it is not normally used directly from application-level code. + Passing the optional *timeout* parameter will set the timeout on the socket + instance (if it is not given or ``None``, the global default timeout setting is + used). + + .. versionadded:: 2.6 + + +.. function:: getaddrinfo(host, port[, family[, socktype[, proto[, flags]]]]) + + Resolves the *host*/*port* argument, into a sequence of 5-tuples that contain + all the necessary argument for the sockets manipulation. *host* is a domain + name, a string representation of IPv4/v6 address or ``None``. *port* is a string + service name (like ``'http'``), a numeric port number or ``None``. + + The rest of the arguments are optional and must be numeric if specified. For + *host* and *port*, by passing either an empty string or ``None``, you can pass + ``NULL`` to the C API. The :func:`getaddrinfo` function returns a list of + 5-tuples with the following structure: + + ``(family, socktype, proto, canonname, sockaddr)`` + + *family*, *socktype*, *proto* are all integer and are meant to be passed to the + :func:`socket` function. *canonname* is a string representing the canonical name + of the *host*. It can be a numeric IPv4/v6 address when :const:`AI_CANONNAME` is + specified for a numeric *host*. *sockaddr* is a tuple describing a socket + address, as described above. See the source for the :mod:`httplib` and other + library modules for a typical usage of the function. + + .. versionadded:: 2.2 + + +.. function:: getfqdn([name]) + + Return a fully qualified domain name for *name*. If *name* is omitted or empty, + it is interpreted as the local host. To find the fully qualified name, the + hostname returned by :func:`gethostbyaddr` is checked, then aliases for the + host, if available. The first name which includes a period is selected. In + case no fully qualified domain name is available, the hostname as returned by + :func:`gethostname` is returned. + + .. versionadded:: 2.0 + + +.. function:: gethostbyname(hostname) + + Translate a host name to IPv4 address format. The IPv4 address is returned as a + string, such as ``'100.50.200.5'``. If the host name is an IPv4 address itself + it is returned unchanged. See :func:`gethostbyname_ex` for a more complete + interface. :func:`gethostbyname` does not support IPv6 name resolution, and + :func:`getaddrinfo` should be used instead for IPv4/v6 dual stack support. + + +.. function:: gethostbyname_ex(hostname) + + Translate a host name to IPv4 address format, extended interface. Return a + triple ``(hostname, aliaslist, ipaddrlist)`` where *hostname* is the primary + host name responding to the given *ip_address*, *aliaslist* is a (possibly + empty) list of alternative host names for the same address, and *ipaddrlist* is + a list of IPv4 addresses for the same interface on the same host (often but not + always a single address). :func:`gethostbyname_ex` does not support IPv6 name + resolution, and :func:`getaddrinfo` should be used instead for IPv4/v6 dual + stack support. + + +.. function:: gethostname() + + Return a string containing the hostname of the machine where the Python + interpreter is currently executing. If you want to know the current machine's IP + address, you may want to use ``gethostbyname(gethostname())``. This operation + assumes that there is a valid address-to-host mapping for the host, and the + assumption does not always hold. Note: :func:`gethostname` doesn't always return + the fully qualified domain name; use ``getfqdn()`` (see above). + + +.. function:: gethostbyaddr(ip_address) + + Return a triple ``(hostname, aliaslist, ipaddrlist)`` where *hostname* is the + primary host name responding to the given *ip_address*, *aliaslist* is a + (possibly empty) list of alternative host names for the same address, and + *ipaddrlist* is a list of IPv4/v6 addresses for the same interface on the same + host (most likely containing only a single address). To find the fully qualified + domain name, use the function :func:`getfqdn`. :func:`gethostbyaddr` supports + both IPv4 and IPv6. + + +.. function:: getnameinfo(sockaddr, flags) + + Translate a socket address *sockaddr* into a 2-tuple ``(host, port)``. Depending + on the settings of *flags*, the result can contain a fully-qualified domain name + or numeric address representation in *host*. Similarly, *port* can contain a + string port name or a numeric port number. + + .. versionadded:: 2.2 + + +.. function:: getprotobyname(protocolname) + + Translate an Internet protocol name (for example, ``'icmp'``) to a constant + suitable for passing as the (optional) third argument to the :func:`socket` + function. This is usually only needed for sockets opened in "raw" mode + (:const:`SOCK_RAW`); for the normal socket modes, the correct protocol is chosen + automatically if the protocol is omitted or zero. + + +.. function:: getservbyname(servicename[, protocolname]) + + Translate an Internet service name and protocol name to a port number for that + service. The optional protocol name, if given, should be ``'tcp'`` or + ``'udp'``, otherwise any protocol will match. + + +.. function:: getservbyport(port[, protocolname]) + + Translate an Internet port number and protocol name to a service name for that + service. The optional protocol name, if given, should be ``'tcp'`` or + ``'udp'``, otherwise any protocol will match. + + +.. function:: socket([family[, type[, proto]]]) + + Create a new socket using the given address family, socket type and protocol + number. The address family should be :const:`AF_INET` (the default), + :const:`AF_INET6` or :const:`AF_UNIX`. The socket type should be + :const:`SOCK_STREAM` (the default), :const:`SOCK_DGRAM` or perhaps one of the + other ``SOCK_`` constants. The protocol number is usually zero and may be + omitted in that case. + + +.. function:: ssl(sock[, keyfile, certfile]) + + Initiate a SSL connection over the socket *sock*. *keyfile* is the name of a PEM + formatted file that contains your private key. *certfile* is a PEM formatted + certificate chain file. On success, a new :class:`SSLObject` is returned. + + .. warning:: + + This does not do any certificate verification! + + +.. function:: socketpair([family[, type[, proto]]]) + + Build a pair of connected socket objects using the given address family, socket + type, and protocol number. Address family, socket type, and protocol number are + as for the :func:`socket` function above. The default family is :const:`AF_UNIX` + if defined on the platform; otherwise, the default is :const:`AF_INET`. + Availability: Unix. + + .. versionadded:: 2.4 + + +.. function:: fromfd(fd, family, type[, proto]) + + Duplicate the file descriptor *fd* (an integer as returned by a file object's + :meth:`fileno` method) and build a socket object from the result. Address + family, socket type and protocol number are as for the :func:`socket` function + above. The file descriptor should refer to a socket, but this is not checked --- + subsequent operations on the object may fail if the file descriptor is invalid. + This function is rarely needed, but can be used to get or set socket options on + a socket passed to a program as standard input or output (such as a server + started by the Unix inet daemon). The socket is assumed to be in blocking mode. + Availability: Unix. + + +.. function:: ntohl(x) + + Convert 32-bit positive integers from network to host byte order. On machines + where the host byte order is the same as network byte order, this is a no-op; + otherwise, it performs a 4-byte swap operation. + + +.. function:: ntohs(x) + + Convert 16-bit positive integers from network to host byte order. On machines + where the host byte order is the same as network byte order, this is a no-op; + otherwise, it performs a 2-byte swap operation. + + +.. function:: htonl(x) + + Convert 32-bit positive integers from host to network byte order. On machines + where the host byte order is the same as network byte order, this is a no-op; + otherwise, it performs a 4-byte swap operation. + + +.. function:: htons(x) + + Convert 16-bit positive integers from host to network byte order. On machines + where the host byte order is the same as network byte order, this is a no-op; + otherwise, it performs a 2-byte swap operation. + + +.. function:: inet_aton(ip_string) + + Convert an IPv4 address from dotted-quad string format (for example, + '123.45.67.89') to 32-bit packed binary format, as a string four characters in + length. This is useful when conversing with a program that uses the standard C + library and needs objects of type :ctype:`struct in_addr`, which is the C type + for the 32-bit packed binary this function returns. + + If the IPv4 address string passed to this function is invalid, + :exc:`socket.error` will be raised. Note that exactly what is valid depends on + the underlying C implementation of :cfunc:`inet_aton`. + + :func:`inet_aton` does not support IPv6, and :func:`getnameinfo` should be used + instead for IPv4/v6 dual stack support. + + +.. function:: inet_ntoa(packed_ip) + + Convert a 32-bit packed IPv4 address (a string four characters in length) to its + standard dotted-quad string representation (for example, '123.45.67.89'). This + is useful when conversing with a program that uses the standard C library and + needs objects of type :ctype:`struct in_addr`, which is the C type for the + 32-bit packed binary data this function takes as an argument. + + If the string passed to this function is not exactly 4 bytes in length, + :exc:`socket.error` will be raised. :func:`inet_ntoa` does not support IPv6, and + :func:`getnameinfo` should be used instead for IPv4/v6 dual stack support. + + +.. function:: inet_pton(address_family, ip_string) + + Convert an IP address from its family-specific string format to a packed, binary + format. :func:`inet_pton` is useful when a library or network protocol calls for + an object of type :ctype:`struct in_addr` (similar to :func:`inet_aton`) or + :ctype:`struct in6_addr`. + + Supported values for *address_family* are currently :const:`AF_INET` and + :const:`AF_INET6`. If the IP address string *ip_string* is invalid, + :exc:`socket.error` will be raised. Note that exactly what is valid depends on + both the value of *address_family* and the underlying implementation of + :cfunc:`inet_pton`. + + Availability: Unix (maybe not all platforms). + + .. versionadded:: 2.3 + + +.. function:: inet_ntop(address_family, packed_ip) + + Convert a packed IP address (a string of some number of characters) to its + standard, family-specific string representation (for example, ``'7.10.0.5'`` or + ``'5aef:2b::8'``) :func:`inet_ntop` is useful when a library or network protocol + returns an object of type :ctype:`struct in_addr` (similar to :func:`inet_ntoa`) + or :ctype:`struct in6_addr`. + + Supported values for *address_family* are currently :const:`AF_INET` and + :const:`AF_INET6`. If the string *packed_ip* is not the correct length for the + specified address family, :exc:`ValueError` will be raised. A + :exc:`socket.error` is raised for errors from the call to :func:`inet_ntop`. + + Availability: Unix (maybe not all platforms). + + .. versionadded:: 2.3 + + +.. function:: getdefaulttimeout() + + Return the default timeout in floating seconds for new socket objects. A value + of ``None`` indicates that new socket objects have no timeout. When the socket + module is first imported, the default is ``None``. + + .. versionadded:: 2.3 + + +.. function:: setdefaulttimeout(timeout) + + Set the default timeout in floating seconds for new socket objects. A value of + ``None`` indicates that new socket objects have no timeout. When the socket + module is first imported, the default is ``None``. + + .. versionadded:: 2.3 + + +.. data:: SocketType + + This is a Python type object that represents the socket object type. It is the + same as ``type(socket(...))``. + + +.. seealso:: + + Module :mod:`SocketServer` + Classes that simplify writing network servers. + + +.. _socket-objects: + +Socket Objects +-------------- + +Socket objects have the following methods. Except for :meth:`makefile` these +correspond to Unix system calls applicable to sockets. + + +.. method:: socket.accept() + + Accept a connection. The socket must be bound to an address and listening for + connections. The return value is a pair ``(conn, address)`` where *conn* is a + *new* socket object usable to send and receive data on the connection, and + *address* is the address bound to the socket on the other end of the connection. + + +.. method:: socket.bind(address) + + Bind the socket to *address*. The socket must not already be bound. (The format + of *address* depends on the address family --- see above.) + + .. note:: + + This method has historically accepted a pair of parameters for :const:`AF_INET` + addresses instead of only a tuple. This was never intentional and is no longer + available in Python 2.0 and later. + + +.. method:: socket.close() + + Close the socket. All future operations on the socket object will fail. The + remote end will receive no more data (after queued data is flushed). Sockets are + automatically closed when they are garbage-collected. + + +.. method:: socket.connect(address) + + Connect to a remote socket at *address*. (The format of *address* depends on the + address family --- see above.) + + .. note:: + + This method has historically accepted a pair of parameters for :const:`AF_INET` + addresses instead of only a tuple. This was never intentional and is no longer + available in Python 2.0 and later. + + +.. method:: socket.connect_ex(address) + + Like ``connect(address)``, but return an error indicator instead of raising an + exception for errors returned by the C-level :cfunc:`connect` call (other + problems, such as "host not found," can still raise exceptions). The error + indicator is ``0`` if the operation succeeded, otherwise the value of the + :cdata:`errno` variable. This is useful to support, for example, asynchronous + connects. + + .. note:: + + This method has historically accepted a pair of parameters for :const:`AF_INET` + addresses instead of only a tuple. This was never intentional and is no longer + available in Python 2.0 and later. + + +.. method:: socket.fileno() + + Return the socket's file descriptor (a small integer). This is useful with + :func:`select.select`. + + Under Windows the small integer returned by this method cannot be used where a + file descriptor can be used (such as :func:`os.fdopen`). Unix does not have + this limitation. + + +.. method:: socket.getpeername() + + Return the remote address to which the socket is connected. This is useful to + find out the port number of a remote IPv4/v6 socket, for instance. (The format + of the address returned depends on the address family --- see above.) On some + systems this function is not supported. + + +.. method:: socket.getsockname() + + Return the socket's own address. This is useful to find out the port number of + an IPv4/v6 socket, for instance. (The format of the address returned depends on + the address family --- see above.) + + +.. method:: socket.getsockopt(level, optname[, buflen]) + + Return the value of the given socket option (see the Unix man page + :manpage:`getsockopt(2)`). The needed symbolic constants (:const:`SO_\*` etc.) + are defined in this module. If *buflen* is absent, an integer option is assumed + and its integer value is returned by the function. If *buflen* is present, it + specifies the maximum length of the buffer used to receive the option in, and + this buffer is returned as a string. It is up to the caller to decode the + contents of the buffer (see the optional built-in module :mod:`struct` for a way + to decode C structures encoded as strings). + + +.. method:: socket.listen(backlog) + + Listen for connections made to the socket. The *backlog* argument specifies the + maximum number of queued connections and should be at least 1; the maximum value + is system-dependent (usually 5). + + +.. method:: socket.makefile([mode[, bufsize]]) + + .. index:: single: I/O control; buffering + + Return a :dfn:`file object` associated with the socket. (File objects are + described in :ref:`bltin-file-objects`.) The file object + references a :cfunc:`dup`\ ped version of the socket file descriptor, so the + file object and socket object may be closed or garbage-collected independently. + The socket must be in blocking mode (it can not have a timeout). The optional + *mode* and *bufsize* arguments are interpreted the same way as by the built-in + :func:`file` function; see :ref:`built-in-funcs` for more information. + + +.. method:: socket.recv(bufsize[, flags]) + + Receive data from the socket. The return value is a string representing the + data received. The maximum amount of data to be received at once is specified + by *bufsize*. See the Unix manual page :manpage:`recv(2)` for the meaning of + the optional argument *flags*; it defaults to zero. + + .. note:: + + For best match with hardware and network realities, the value of *bufsize* + should be a relatively small power of 2, for example, 4096. + + +.. method:: socket.recvfrom(bufsize[, flags]) + + Receive data from the socket. The return value is a pair ``(string, address)`` + where *string* is a string representing the data received and *address* is the + address of the socket sending the data. See the Unix manual page + :manpage:`recv(2)` for the meaning of the optional argument *flags*; it defaults + to zero. (The format of *address* depends on the address family --- see above.) + + +.. method:: socket.recvfrom_into(buffer[, nbytes[, flags]]) + + Receive data from the socket, writing it into *buffer* instead of creating a + new string. The return value is a pair ``(nbytes, address)`` where *nbytes* is + the number of bytes received and *address* is the address of the socket sending + the data. See the Unix manual page :manpage:`recv(2)` for the meaning of the + optional argument *flags*; it defaults to zero. (The format of *address* + depends on the address family --- see above.) + + .. versionadded:: 2.5 + + +.. method:: socket.recv_into(buffer[, nbytes[, flags]]) + + Receive up to *nbytes* bytes from the socket, storing the data into a buffer + rather than creating a new string. If *nbytes* is not specified (or 0), + receive up to the size available in the given buffer. See the Unix manual page + :manpage:`recv(2)` for the meaning of the optional argument *flags*; it defaults + to zero. + + .. versionadded:: 2.5 + + +.. method:: socket.send(string[, flags]) + + Send data to the socket. The socket must be connected to a remote socket. The + optional *flags* argument has the same meaning as for :meth:`recv` above. + Returns the number of bytes sent. Applications are responsible for checking that + all data has been sent; if only some of the data was transmitted, the + application needs to attempt delivery of the remaining data. + + +.. method:: socket.sendall(string[, flags]) + + Send data to the socket. The socket must be connected to a remote socket. The + optional *flags* argument has the same meaning as for :meth:`recv` above. + Unlike :meth:`send`, this method continues to send data from *string* until + either all data has been sent or an error occurs. ``None`` is returned on + success. On error, an exception is raised, and there is no way to determine how + much data, if any, was successfully sent. + + +.. method:: socket.sendto(string[, flags], address) + + Send data to the socket. The socket should not be connected to a remote socket, + since the destination socket is specified by *address*. The optional *flags* + argument has the same meaning as for :meth:`recv` above. Return the number of + bytes sent. (The format of *address* depends on the address family --- see + above.) + + +.. method:: socket.setblocking(flag) + + Set blocking or non-blocking mode of the socket: if *flag* is 0, the socket is + set to non-blocking, else to blocking mode. Initially all sockets are in + blocking mode. In non-blocking mode, if a :meth:`recv` call doesn't find any + data, or if a :meth:`send` call can't immediately dispose of the data, a + :exc:`error` exception is raised; in blocking mode, the calls block until they + can proceed. ``s.setblocking(0)`` is equivalent to ``s.settimeout(0)``; + ``s.setblocking(1)`` is equivalent to ``s.settimeout(None)``. + + +.. method:: socket.settimeout(value) + + Set a timeout on blocking socket operations. The *value* argument can be a + nonnegative float expressing seconds, or ``None``. If a float is given, + subsequent socket operations will raise an :exc:`timeout` exception if the + timeout period *value* has elapsed before the operation has completed. Setting + a timeout of ``None`` disables timeouts on socket operations. + ``s.settimeout(0.0)`` is equivalent to ``s.setblocking(0)``; + ``s.settimeout(None)`` is equivalent to ``s.setblocking(1)``. + + .. versionadded:: 2.3 + + +.. method:: socket.gettimeout() + + Return the timeout in floating seconds associated with socket operations, or + ``None`` if no timeout is set. This reflects the last call to + :meth:`setblocking` or :meth:`settimeout`. + + .. versionadded:: 2.3 + +Some notes on socket blocking and timeouts: A socket object can be in one of +three modes: blocking, non-blocking, or timeout. Sockets are always created in +blocking mode. In blocking mode, operations block until complete. In +non-blocking mode, operations fail (with an error that is unfortunately +system-dependent) if they cannot be completed immediately. In timeout mode, +operations fail if they cannot be completed within the timeout specified for the +socket. The :meth:`setblocking` method is simply a shorthand for certain +:meth:`settimeout` calls. + +Timeout mode internally sets the socket in non-blocking mode. The blocking and +timeout modes are shared between file descriptors and socket objects that refer +to the same network endpoint. A consequence of this is that file objects +returned by the :meth:`makefile` method must only be used when the socket is in +blocking mode; in timeout or non-blocking mode file operations that cannot be +completed immediately will fail. + +Note that the :meth:`connect` operation is subject to the timeout setting, and +in general it is recommended to call :meth:`settimeout` before calling +:meth:`connect`. + + +.. method:: socket.setsockopt(level, optname, value) + + .. index:: module: struct + + Set the value of the given socket option (see the Unix manual page + :manpage:`setsockopt(2)`). The needed symbolic constants are defined in the + :mod:`socket` module (:const:`SO_\*` etc.). The value can be an integer or a + string representing a buffer. In the latter case it is up to the caller to + ensure that the string contains the proper bits (see the optional built-in + module :mod:`struct` for a way to encode C structures as strings). + + +.. method:: socket.shutdown(how) + + Shut down one or both halves of the connection. If *how* is :const:`SHUT_RD`, + further receives are disallowed. If *how* is :const:`SHUT_WR`, further sends + are disallowed. If *how* is :const:`SHUT_RDWR`, further sends and receives are + disallowed. + +Note that there are no methods :meth:`read` or :meth:`write`; use :meth:`recv` +and :meth:`send` without *flags* argument instead. + +Socket objects also have these (read-only) attributes that correspond to the +values given to the :class:`socket` constructor. + + +.. attribute:: socket.family + + The socket family. + + .. versionadded:: 2.5 + + +.. attribute:: socket.type + + The socket type. + + .. versionadded:: 2.5 + + +.. attribute:: socket.proto + + The socket protocol. + + .. versionadded:: 2.5 + + +.. _ssl-objects: + +SSL Objects +----------- + +SSL objects have the following methods. + + +.. method:: SSL.write(s) + + Writes the string *s* to the on the object's SSL connection. The return value is + the number of bytes written. + + +.. method:: SSL.read([n]) + + If *n* is provided, read *n* bytes from the SSL connection, otherwise read until + EOF. The return value is a string of the bytes read. + + +.. method:: SSL.server() + + Returns a string describing the server's certificate. Useful for debugging + purposes; do not parse the content of this string because its format can't be + parsed unambiguously. + + +.. method:: SSL.issuer() + + Returns a string describing the issuer of the server's certificate. Useful for + debugging purposes; do not parse the content of this string because its format + can't be parsed unambiguously. + + +.. _socket-example: + +Example +------- + +Here are four minimal example programs using the TCP/IP protocol: a server that +echoes all data that it receives back (servicing only one client), and a client +using it. Note that a server must perform the sequence :func:`socket`, +:meth:`bind`, :meth:`listen`, :meth:`accept` (possibly repeating the +:meth:`accept` to service more than one client), while a client only needs the +sequence :func:`socket`, :meth:`connect`. Also note that the server does not +:meth:`send`/:meth:`recv` on the socket it is listening on but on the new +socket returned by :meth:`accept`. + +The first two examples support IPv4 only. :: + + # Echo server program + import socket + + HOST = '' # Symbolic name meaning the local host + PORT = 50007 # Arbitrary non-privileged port + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind((HOST, PORT)) + s.listen(1) + conn, addr = s.accept() + print 'Connected by', addr + while 1: + data = conn.recv(1024) + if not data: break + conn.send(data) + conn.close() + +:: + + # Echo client program + import socket + + HOST = 'daring.cwi.nl' # The remote host + PORT = 50007 # The same port as used by the server + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((HOST, PORT)) + s.send('Hello, world') + data = s.recv(1024) + s.close() + print 'Received', repr(data) + +The next two examples are identical to the above two, but support both IPv4 and +IPv6. The server side will listen to the first address family available (it +should listen to both instead). On most of IPv6-ready systems, IPv6 will take +precedence and the server may not accept IPv4 traffic. The client side will try +to connect to the all addresses returned as a result of the name resolution, and +sends traffic to the first one connected successfully. :: + + # Echo server program + import socket + import sys + + HOST = '' # Symbolic name meaning the local host + PORT = 50007 # Arbitrary non-privileged port + s = None + for res in socket.getaddrinfo(HOST, PORT, socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE): + af, socktype, proto, canonname, sa = res + try: + s = socket.socket(af, socktype, proto) + except socket.error as msg: + s = None + continue + try: + s.bind(sa) + s.listen(1) + except socket.error as msg: + s.close() + s = None + continue + break + if s is None: + print 'could not open socket' + sys.exit(1) + conn, addr = s.accept() + print 'Connected by', addr + while 1: + data = conn.recv(1024) + if not data: break + conn.send(data) + conn.close() + +:: + + # Echo client program + import socket + import sys + + HOST = 'daring.cwi.nl' # The remote host + PORT = 50007 # The same port as used by the server + s = None + for res in socket.getaddrinfo(HOST, PORT, socket.AF_UNSPEC, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + try: + s = socket.socket(af, socktype, proto) + except socket.error as msg: + s = None + continue + try: + s.connect(sa) + except socket.error as msg: + s.close() + s = None + continue + break + if s is None: + print 'could not open socket' + sys.exit(1) + s.send('Hello, world') + data = s.recv(1024) + s.close() + print 'Received', repr(data) + +This example connects to an SSL server, prints the server and issuer's +distinguished names, sends some bytes, and reads part of the response:: + + import socket + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect(('www.verisign.com', 443)) + + ssl_sock = socket.ssl(s) + + print repr(ssl_sock.server()) + print repr(ssl_sock.issuer()) + + # Set a simple HTTP request -- use httplib in actual code. + ssl_sock.write("""GET / HTTP/1.0\r + Host: www.verisign.com\r\n\r\n""") + + # Read a chunk of data. Will not necessarily + # read all the data returned by the server. + data = ssl_sock.read() + + # Note that you need to close the underlying socket, not the SSL object. + del ssl_sock + s.close() + +At this writing, this SSL example prints the following output (line breaks +inserted for readability):: + + '/C=US/ST=California/L=Mountain View/ + O=VeriSign, Inc./OU=Production Services/ + OU=Terms of use at www.verisign.com/rpa (c)00/ + CN=www.verisign.com' + '/O=VeriSign Trust Network/OU=VeriSign, Inc./ + OU=VeriSign International Server CA - Class 3/ + OU=www.verisign.com/CPS Incorp.by Ref. LIABILITY LTD.(c)97 VeriSign' + diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst new file mode 100644 index 0000000..96fae6b --- /dev/null +++ b/Doc/library/socketserver.rst @@ -0,0 +1,295 @@ + +:mod:`SocketServer` --- A framework for network servers +======================================================= + +.. module:: SocketServer + :synopsis: A framework for network servers. + + +The :mod:`SocketServer` module simplifies the task of writing network servers. + +There are four basic server classes: :class:`TCPServer` uses the Internet TCP +protocol, which provides for continuous streams of data between the client and +server. :class:`UDPServer` uses datagrams, which are discrete packets of +information that may arrive out of order or be lost while in transit. The more +infrequently used :class:`UnixStreamServer` and :class:`UnixDatagramServer` +classes are similar, but use Unix domain sockets; they're not available on +non-Unix platforms. For more details on network programming, consult a book +such as +W. Richard Steven's UNIX Network Programming or Ralph Davis's Win32 Network +Programming. + +These four classes process requests :dfn:`synchronously`; each request must be +completed before the next request can be started. This isn't suitable if each +request takes a long time to complete, because it requires a lot of computation, +or because it returns a lot of data which the client is slow to process. The +solution is to create a separate process or thread to handle each request; the +:class:`ForkingMixIn` and :class:`ThreadingMixIn` mix-in classes can be used to +support asynchronous behaviour. + +Creating a server requires several steps. First, you must create a request +handler class by subclassing the :class:`BaseRequestHandler` class and +overriding its :meth:`handle` method; this method will process incoming +requests. Second, you must instantiate one of the server classes, passing it +the server's address and the request handler class. Finally, call the +:meth:`handle_request` or :meth:`serve_forever` method of the server object to +process one or many requests. + +When inheriting from :class:`ThreadingMixIn` for threaded connection behavior, +you should explicitly declare how you want your threads to behave on an abrupt +shutdown. The :class:`ThreadingMixIn` class defines an attribute +*daemon_threads*, which indicates whether or not the server should wait for +thread termination. You should set the flag explicitly if you would like threads +to behave autonomously; the default is :const:`False`, meaning that Python will +not exit until all threads created by :class:`ThreadingMixIn` have exited. + +Server classes have the same external methods and attributes, no matter what +network protocol they use: + + +Server Creation Notes +--------------------- + +There are five classes in an inheritance diagram, four of which represent +synchronous servers of four types:: + + +------------+ + | BaseServer | + +------------+ + | + v + +-----------+ +------------------+ + | TCPServer |------->| UnixStreamServer | + +-----------+ +------------------+ + | + v + +-----------+ +--------------------+ + | UDPServer |------->| UnixDatagramServer | + +-----------+ +--------------------+ + +Note that :class:`UnixDatagramServer` derives from :class:`UDPServer`, not from +:class:`UnixStreamServer` --- the only difference between an IP and a Unix +stream server is the address family, which is simply repeated in both Unix +server classes. + +Forking and threading versions of each type of server can be created using the +:class:`ForkingMixIn` and :class:`ThreadingMixIn` mix-in classes. For instance, +a threading UDP server class is created as follows:: + + class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass + +The mix-in class must come first, since it overrides a method defined in +:class:`UDPServer`. Setting the various member variables also changes the +behavior of the underlying server mechanism. + +To implement a service, you must derive a class from :class:`BaseRequestHandler` +and redefine its :meth:`handle` method. You can then run various versions of +the service by combining one of the server classes with your request handler +class. The request handler class must be different for datagram or stream +services. This can be hidden by using the handler subclasses +:class:`StreamRequestHandler` or :class:`DatagramRequestHandler`. + +Of course, you still have to use your head! For instance, it makes no sense to +use a forking server if the service contains state in memory that can be +modified by different requests, since the modifications in the child process +would never reach the initial state kept in the parent process and passed to +each child. In this case, you can use a threading server, but you will probably +have to use locks to protect the integrity of the shared data. + +On the other hand, if you are building an HTTP server where all data is stored +externally (for instance, in the file system), a synchronous class will +essentially render the service "deaf" while one request is being handled -- +which may be for a very long time if a client is slow to receive all the data it +has requested. Here a threading or forking server is appropriate. + +In some cases, it may be appropriate to process part of a request synchronously, +but to finish processing in a forked child depending on the request data. This +can be implemented by using a synchronous server and doing an explicit fork in +the request handler class :meth:`handle` method. + +Another approach to handling multiple simultaneous requests in an environment +that supports neither threads nor :func:`fork` (or where these are too expensive +or inappropriate for the service) is to maintain an explicit table of partially +finished requests and to use :func:`select` to decide which request to work on +next (or whether to handle a new incoming request). This is particularly +important for stream services where each client can potentially be connected for +a long time (if threads or subprocesses cannot be used). + +.. % XXX should data and methods be intermingled, or separate? +.. % how should the distinction between class and instance variables be +.. % drawn? + + +Server Objects +-------------- + + +.. function:: fileno() + + Return an integer file descriptor for the socket on which the server is + listening. This function is most commonly passed to :func:`select.select`, to + allow monitoring multiple servers in the same process. + + +.. function:: handle_request() + + Process a single request. This function calls the following methods in order: + :meth:`get_request`, :meth:`verify_request`, and :meth:`process_request`. If + the user-provided :meth:`handle` method of the handler class raises an + exception, the server's :meth:`handle_error` method will be called. + + +.. function:: serve_forever() + + Handle an infinite number of requests. This simply calls :meth:`handle_request` + inside an infinite loop. + + +.. data:: address_family + + The family of protocols to which the server's socket belongs. + :const:`socket.AF_INET` and :const:`socket.AF_UNIX` are two possible values. + + +.. data:: RequestHandlerClass + + The user-provided request handler class; an instance of this class is created + for each request. + + +.. data:: server_address + + The address on which the server is listening. The format of addresses varies + depending on the protocol family; see the documentation for the socket module + for details. For Internet protocols, this is a tuple containing a string giving + the address, and an integer port number: ``('127.0.0.1', 80)``, for example. + + +.. data:: socket + + The socket object on which the server will listen for incoming requests. + +The server classes support the following class variables: + +.. % XXX should class variables be covered before instance variables, or +.. % vice versa? + + +.. data:: allow_reuse_address + + Whether the server will allow the reuse of an address. This defaults to + :const:`False`, and can be set in subclasses to change the policy. + + +.. data:: request_queue_size + + The size of the request queue. If it takes a long time to process a single + request, any requests that arrive while the server is busy are placed into a + queue, up to :attr:`request_queue_size` requests. Once the queue is full, + further requests from clients will get a "Connection denied" error. The default + value is usually 5, but this can be overridden by subclasses. + + +.. data:: socket_type + + The type of socket used by the server; :const:`socket.SOCK_STREAM` and + :const:`socket.SOCK_DGRAM` are two possible values. + +There are various server methods that can be overridden by subclasses of base +server classes like :class:`TCPServer`; these methods aren't useful to external +users of the server object. + +.. % should the default implementations of these be documented, or should +.. % it be assumed that the user will look at SocketServer.py? + + +.. function:: finish_request() + + Actually processes the request by instantiating :attr:`RequestHandlerClass` and + calling its :meth:`handle` method. + + +.. function:: get_request() + + Must accept a request from the socket, and return a 2-tuple containing the *new* + socket object to be used to communicate with the client, and the client's + address. + + +.. function:: handle_error(request, client_address) + + This function is called if the :attr:`RequestHandlerClass`'s :meth:`handle` + method raises an exception. The default action is to print the traceback to + standard output and continue handling further requests. + + +.. function:: process_request(request, client_address) + + Calls :meth:`finish_request` to create an instance of the + :attr:`RequestHandlerClass`. If desired, this function can create a new process + or thread to handle the request; the :class:`ForkingMixIn` and + :class:`ThreadingMixIn` classes do this. + +.. % Is there any point in documenting the following two functions? +.. % What would the purpose of overriding them be: initializing server +.. % instance variables, adding new network families? + + +.. function:: server_activate() + + Called by the server's constructor to activate the server. The default behavior + just :meth:`listen`\ s to the server's socket. May be overridden. + + +.. function:: server_bind() + + Called by the server's constructor to bind the socket to the desired address. + May be overridden. + + +.. function:: verify_request(request, client_address) + + Must return a Boolean value; if the value is :const:`True`, the request will be + processed, and if it's :const:`False`, the request will be denied. This function + can be overridden to implement access controls for a server. The default + implementation always returns :const:`True`. + + +RequestHandler Objects +---------------------- + +The request handler class must define a new :meth:`handle` method, and can +override any of the following methods. A new instance is created for each +request. + + +.. function:: finish() + + Called after the :meth:`handle` method to perform any clean-up actions required. + The default implementation does nothing. If :meth:`setup` or :meth:`handle` + raise an exception, this function will not be called. + + +.. function:: handle() + + This function must do all the work required to service a request. The default + implementation does nothing. Several instance attributes are available to it; + the request is available as :attr:`self.request`; the client address as + :attr:`self.client_address`; and the server instance as :attr:`self.server`, in + case it needs access to per-server information. + + The type of :attr:`self.request` is different for datagram or stream services. + For stream services, :attr:`self.request` is a socket object; for datagram + services, :attr:`self.request` is a string. However, this can be hidden by using + the request handler subclasses :class:`StreamRequestHandler` or + :class:`DatagramRequestHandler`, which override the :meth:`setup` and + :meth:`finish` methods, and provide :attr:`self.rfile` and :attr:`self.wfile` + attributes. :attr:`self.rfile` and :attr:`self.wfile` can be read or written, + respectively, to get the request data or return data to the client. + + +.. function:: setup() + + Called before the :meth:`handle` method to perform any initialization actions + required. The default implementation does nothing. + diff --git a/Doc/library/someos.rst b/Doc/library/someos.rst new file mode 100644 index 0000000..5ee96bc --- /dev/null +++ b/Doc/library/someos.rst @@ -0,0 +1,23 @@ + +.. _someos: + +********************************** +Optional Operating System Services +********************************** + +The modules described in this chapter provide interfaces to operating system +features that are available on selected operating systems only. The interfaces +are generally modeled after the Unix or C interfaces but they are available on +some other systems as well (e.g. Windows or NT). Here's an overview: + + +.. toctree:: + + select.rst + thread.rst + threading.rst + dummy_thread.rst + dummy_threading.rst + mmap.rst + readline.rst + rlcompleter.rst diff --git a/Doc/library/spwd.rst b/Doc/library/spwd.rst new file mode 100644 index 0000000..6cbe925 --- /dev/null +++ b/Doc/library/spwd.rst @@ -0,0 +1,74 @@ + +:mod:`spwd` --- The shadow password database +============================================ + +.. module:: spwd + :platform: Unix + :synopsis: The shadow password database (getspnam() and friends). + + +.. versionadded:: 2.5 + +This module provides access to the Unix shadow password database. It is +available on various Unix versions. + +You must have enough privileges to access the shadow password database (this +usually means you have to be root). + +Shadow password database entries are reported as a tuple-like object, whose +attributes correspond to the members of the ``spwd`` structure (Attribute field +below, see ``<shadow.h>``): + ++-------+---------------+---------------------------------+ +| Index | Attribute | Meaning | ++=======+===============+=================================+ +| 0 | ``sp_nam`` | Login name | ++-------+---------------+---------------------------------+ +| 1 | ``sp_pwd`` | Encrypted password | ++-------+---------------+---------------------------------+ +| 2 | ``sp_lstchg`` | Date of last change | ++-------+---------------+---------------------------------+ +| 3 | ``sp_min`` | Minimal number of days between | +| | | changes | ++-------+---------------+---------------------------------+ +| 4 | ``sp_max`` | Maximum number of days between | +| | | changes | ++-------+---------------+---------------------------------+ +| 5 | ``sp_warn`` | Number of days before password | +| | | expires to warn user about it | ++-------+---------------+---------------------------------+ +| 6 | ``sp_inact`` | Number of days after password | +| | | expires until account is | +| | | blocked | ++-------+---------------+---------------------------------+ +| 7 | ``sp_expire`` | Number of days since 1970-01-01 | +| | | until account is disabled | ++-------+---------------+---------------------------------+ +| 8 | ``sp_flag`` | Reserved | ++-------+---------------+---------------------------------+ + +The sp_nam and sp_pwd items are strings, all others are integers. +:exc:`KeyError` is raised if the entry asked for cannot be found. + +It defines the following items: + + +.. function:: getspnam(name) + + Return the shadow password database entry for the given user name. + + +.. function:: getspall() + + Return a list of all available shadow password database entries, in arbitrary + order. + + +.. seealso:: + + Module :mod:`grp` + An interface to the group database, similar to this. + + Module :mod:`pwd` + An interface to the normal password database, similar to this. + diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst new file mode 100644 index 0000000..707092b --- /dev/null +++ b/Doc/library/sqlite3.rst @@ -0,0 +1,689 @@ + +:mod:`sqlite3` --- DB-API 2.0 interface for SQLite databases +============================================================ + +.. module:: sqlite3 + :synopsis: A DB-API 2.0 implementation using SQLite 3.x. +.. sectionauthor:: Gerhard Häring <gh@ghaering.de> + + +.. versionadded:: 2.5 + +SQLite is a C library that provides a lightweight disk-based database that +doesn't require a separate server process and allows accessing the database +using a nonstandard variant of the SQL query language. Some applications can use +SQLite for internal data storage. It's also possible to prototype an +application using SQLite and then port the code to a larger database such as +PostgreSQL or Oracle. + +pysqlite was written by Gerhard Häring and provides a SQL interface compliant +with the DB-API 2.0 specification described by :pep:`249`. + +To use the module, you must first create a :class:`Connection` object that +represents the database. Here the data will be stored in the +:file:`/tmp/example` file:: + + conn = sqlite3.connect('/tmp/example') + +You can also supply the special name ``:memory:`` to create a database in RAM. + +Once you have a :class:`Connection`, you can create a :class:`Cursor` object +and call its :meth:`execute` method to perform SQL commands:: + + c = conn.cursor() + + # Create table + c.execute('''create table stocks + (date text, trans text, symbol text, + qty real, price real)''') + + # Insert a row of data + c.execute("""insert into stocks + values ('2006-01-05','BUY','RHAT',100,35.14)""") + + # Save (commit) the changes + conn.commit() + + # We can also close the cursor if we are done with it + c.close() + +Usually your SQL operations will need to use values from Python variables. You +shouldn't assemble your query using Python's string operations because doing so +is insecure; it makes your program vulnerable to an SQL injection attack. + +Instead, use the DB-API's parameter substitution. Put ``?`` as a placeholder +wherever you want to use a value, and then provide a tuple of values as the +second argument to the cursor's :meth:`execute` method. (Other database modules +may use a different placeholder, such as ``%s`` or ``:1``.) For example:: + + # Never do this -- insecure! + symbol = 'IBM' + c.execute("... where symbol = '%s'" % symbol) + + # Do this instead + t = (symbol,) + c.execute('select * from stocks where symbol=?', t) + + # Larger example + for t in (('2006-03-28', 'BUY', 'IBM', 1000, 45.00), + ('2006-04-05', 'BUY', 'MSOFT', 1000, 72.00), + ('2006-04-06', 'SELL', 'IBM', 500, 53.00), + ): + c.execute('insert into stocks values (?,?,?,?,?)', t) + +To retrieve data after executing a SELECT statement, you can either treat the +cursor as an iterator, call the cursor's :meth:`fetchone` method to retrieve a +single matching row, or call :meth:`fetchall` to get a list of the matching +rows. + +This example uses the iterator form:: + + >>> c = conn.cursor() + >>> c.execute('select * from stocks order by price') + >>> for row in c: + ... print row + ... + (u'2006-01-05', u'BUY', u'RHAT', 100, 35.140000000000001) + (u'2006-03-28', u'BUY', u'IBM', 1000, 45.0) + (u'2006-04-06', u'SELL', u'IBM', 500, 53.0) + (u'2006-04-05', u'BUY', u'MSOFT', 1000, 72.0) + >>> + + +.. seealso:: + + http://www.pysqlite.org + The pysqlite web page. + + http://www.sqlite.org + The SQLite web page; the documentation describes the syntax and the available + data types for the supported SQL dialect. + + :pep:`249` - Database API Specification 2.0 + PEP written by Marc-André Lemburg. + + +.. _sqlite3-module-contents: + +Module functions and constants +------------------------------ + + +.. data:: PARSE_DECLTYPES + + This constant is meant to be used with the *detect_types* parameter of the + :func:`connect` function. + + Setting it makes the :mod:`sqlite3` module parse the declared type for each + column it returns. It will parse out the first word of the declared type, i. e. + for "integer primary key", it will parse out "integer". Then for that column, it + will look into the converters dictionary and use the converter function + registered for that type there. Converter names are case-sensitive! + + +.. data:: PARSE_COLNAMES + + This constant is meant to be used with the *detect_types* parameter of the + :func:`connect` function. + + Setting this makes the SQLite interface parse the column name for each column it + returns. It will look for a string formed [mytype] in there, and then decide + that 'mytype' is the type of the column. It will try to find an entry of + 'mytype' in the converters dictionary and then use the converter function found + there to return the value. The column name found in :attr:`cursor.description` + is only the first word of the column name, i. e. if you use something like + ``'as "x [datetime]"'`` in your SQL, then we will parse out everything until the + first blank for the column name: the column name would simply be "x". + + +.. function:: connect(database[, timeout, isolation_level, detect_types, factory]) + + Opens a connection to the SQLite database file *database*. You can use + ``":memory:"`` to open a database connection to a database that resides in RAM + instead of on disk. + + When a database is accessed by multiple connections, and one of the processes + modifies the database, the SQLite database is locked until that transaction is + committed. The *timeout* parameter specifies how long the connection should wait + for the lock to go away until raising an exception. The default for the timeout + parameter is 5.0 (five seconds). + + For the *isolation_level* parameter, please see the + :attr:`Connection.isolation_level` property of :class:`Connection` objects. + + SQLite natively supports only the types TEXT, INTEGER, FLOAT, BLOB and NULL. If + you want to use other types you must add support for them yourself. The + *detect_types* parameter and the using custom **converters** registered with the + module-level :func:`register_converter` function allow you to easily do that. + + *detect_types* defaults to 0 (i. e. off, no type detection), you can set it to + any combination of :const:`PARSE_DECLTYPES` and :const:`PARSE_COLNAMES` to turn + type detection on. + + By default, the :mod:`sqlite3` module uses its :class:`Connection` class for the + connect call. You can, however, subclass the :class:`Connection` class and make + :func:`connect` use your class instead by providing your class for the *factory* + parameter. + + Consult the section :ref:`sqlite3-types` of this manual for details. + + The :mod:`sqlite3` module internally uses a statement cache to avoid SQL parsing + overhead. If you want to explicitly set the number of statements that are cached + for the connection, you can set the *cached_statements* parameter. The currently + implemented default is to cache 100 statements. + + +.. function:: register_converter(typename, callable) + + Registers a callable to convert a bytestring from the database into a custom + Python type. The callable will be invoked for all database values that are of + the type *typename*. Confer the parameter *detect_types* of the :func:`connect` + function for how the type detection works. Note that the case of *typename* and + the name of the type in your query must match! + + +.. function:: register_adapter(type, callable) + + Registers a callable to convert the custom Python type *type* into one of + SQLite's supported types. The callable *callable* accepts as single parameter + the Python value, and must return a value of the following types: int, long, + float, str (UTF-8 encoded), unicode or buffer. + + +.. function:: complete_statement(sql) + + Returns :const:`True` if the string *sql* contains one or more complete SQL + statements terminated by semicolons. It does not verify that the SQL is + syntactically correct, only that there are no unclosed string literals and the + statement is terminated by a semicolon. + + This can be used to build a shell for SQLite, as in the following example: + + + .. literalinclude:: ../includes/sqlite3/complete_statement.py + + +.. function:: enable_callback_tracebacks(flag) + + By default you will not get any tracebacks in user-defined functions, + aggregates, converters, authorizer callbacks etc. If you want to debug them, you + can call this function with *flag* as True. Afterwards, you will get tracebacks + from callbacks on ``sys.stderr``. Use :const:`False` to disable the feature + again. + + +.. _sqlite3-connection-objects: + +Connection Objects +------------------ + +A :class:`Connection` instance has the following attributes and methods: + +.. attribute:: Connection.isolation_level + + Get or set the current isolation level. None for autocommit mode or one of + "DEFERRED", "IMMEDIATE" or "EXLUSIVE". See section + :ref:`sqlite3-controlling-transactions` for a more detailed explanation. + + +.. method:: Connection.cursor([cursorClass]) + + The cursor method accepts a single optional parameter *cursorClass*. If + supplied, this must be a custom cursor class that extends + :class:`sqlite3.Cursor`. + + +.. method:: Connection.execute(sql, [parameters]) + + This is a nonstandard shortcut that creates an intermediate cursor object by + calling the cursor method, then calls the cursor's :meth:`execute` method with + the parameters given. + + +.. method:: Connection.executemany(sql, [parameters]) + + This is a nonstandard shortcut that creates an intermediate cursor object by + calling the cursor method, then calls the cursor's :meth:`executemany` method + with the parameters given. + + +.. method:: Connection.executescript(sql_script) + + This is a nonstandard shortcut that creates an intermediate cursor object by + calling the cursor method, then calls the cursor's :meth:`executescript` method + with the parameters given. + + +.. method:: Connection.create_function(name, num_params, func) + + Creates a user-defined function that you can later use from within SQL + statements under the function name *name*. *num_params* is the number of + parameters the function accepts, and *func* is a Python callable that is called + as the SQL function. + + The function can return any of the types supported by SQLite: unicode, str, int, + long, float, buffer and None. + + Example: + + .. literalinclude:: ../includes/sqlite3/md5func.py + + +.. method:: Connection.create_aggregate(name, num_params, aggregate_class) + + Creates a user-defined aggregate function. + + The aggregate class must implement a ``step`` method, which accepts the number + of parameters *num_params*, and a ``finalize`` method which will return the + final result of the aggregate. + + The ``finalize`` method can return any of the types supported by SQLite: + unicode, str, int, long, float, buffer and None. + + Example: + + .. literalinclude:: ../includes/sqlite3/mysumaggr.py + + +.. method:: Connection.create_collation(name, callable) + + Creates a collation with the specified *name* and *callable*. The callable will + be passed two string arguments. It should return -1 if the first is ordered + lower than the second, 0 if they are ordered equal and 1 if the first is ordered + higher than the second. Note that this controls sorting (ORDER BY in SQL) so + your comparisons don't affect other SQL operations. + + Note that the callable will get its parameters as Python bytestrings, which will + normally be encoded in UTF-8. + + The following example shows a custom collation that sorts "the wrong way": + + .. literalinclude:: ../includes/sqlite3/collation_reverse.py + + To remove a collation, call ``create_collation`` with None as callable:: + + con.create_collation("reverse", None) + + +.. method:: Connection.interrupt() + + You can call this method from a different thread to abort any queries that might + be executing on the connection. The query will then abort and the caller will + get an exception. + + +.. method:: Connection.set_authorizer(authorizer_callback) + + This routine registers a callback. The callback is invoked for each attempt to + access a column of a table in the database. The callback should return + :const:`SQLITE_OK` if access is allowed, :const:`SQLITE_DENY` if the entire SQL + statement should be aborted with an error and :const:`SQLITE_IGNORE` if the + column should be treated as a NULL value. These constants are available in the + :mod:`sqlite3` module. + + The first argument to the callback signifies what kind of operation is to be + authorized. The second and third argument will be arguments or :const:`None` + depending on the first argument. The 4th argument is the name of the database + ("main", "temp", etc.) if applicable. The 5th argument is the name of the + inner-most trigger or view that is responsible for the access attempt or + :const:`None` if this access attempt is directly from input SQL code. + + Please consult the SQLite documentation about the possible values for the first + argument and the meaning of the second and third argument depending on the first + one. All necessary constants are available in the :mod:`sqlite3` module. + + +.. attribute:: Connection.row_factory + + You can change this attribute to a callable that accepts the cursor and the + original row as a tuple and will return the real result row. This way, you can + implement more advanced ways of returning results, such as returning an object + that can also access columns by name. + + Example: + + .. literalinclude:: ../includes/sqlite3/row_factory.py + + If returning a tuple doesn't suffice and you want name-based access to + columns, you should consider setting :attr:`row_factory` to the + highly-optimized :class:`sqlite3.Row` type. :class:`Row` provides both + index-based and case-insensitive name-based access to columns with almost no + memory overhead. It will probably be better than your own custom + dictionary-based approach or even a db_row based solution. + + .. % XXX what's a db_row-based solution? + + +.. attribute:: Connection.text_factory + + Using this attribute you can control what objects are returned for the TEXT data + type. By default, this attribute is set to :class:`unicode` and the + :mod:`sqlite3` module will return Unicode objects for TEXT. If you want to + return bytestrings instead, you can set it to :class:`str`. + + For efficiency reasons, there's also a way to return Unicode objects only for + non-ASCII data, and bytestrings otherwise. To activate it, set this attribute to + :const:`sqlite3.OptimizedUnicode`. + + You can also set it to any other callable that accepts a single bytestring + parameter and returns the resulting object. + + See the following example code for illustration: + + .. literalinclude:: ../includes/sqlite3/text_factory.py + + +.. attribute:: Connection.total_changes + + Returns the total number of database rows that have been modified, inserted, or + deleted since the database connection was opened. + + +.. _sqlite3-cursor-objects: + +Cursor Objects +-------------- + +A :class:`Cursor` instance has the following attributes and methods: + + +.. method:: Cursor.execute(sql, [parameters]) + + Executes a SQL statement. The SQL statement may be parametrized (i. e. + placeholders instead of SQL literals). The :mod:`sqlite3` module supports two + kinds of placeholders: question marks (qmark style) and named placeholders + (named style). + + This example shows how to use parameters with qmark style: + + .. literalinclude:: ../includes/sqlite3/execute_1.py + + This example shows how to use the named style: + + .. literalinclude:: ../includes/sqlite3/execute_2.py + + :meth:`execute` will only execute a single SQL statement. If you try to execute + more than one statement with it, it will raise a Warning. Use + :meth:`executescript` if you want to execute multiple SQL statements with one + call. + + +.. method:: Cursor.executemany(sql, seq_of_parameters) + + Executes a SQL command against all parameter sequences or mappings found in the + sequence *sql*. The :mod:`sqlite3` module also allows using an iterator yielding + parameters instead of a sequence. + + .. literalinclude:: ../includes/sqlite3/executemany_1.py + + Here's a shorter example using a generator: + + .. literalinclude:: ../includes/sqlite3/executemany_2.py + + +.. method:: Cursor.executescript(sql_script) + + This is a nonstandard convenience method for executing multiple SQL statements + at once. It issues a COMMIT statement first, then executes the SQL script it + gets as a parameter. + + *sql_script* can be a bytestring or a Unicode string. + + Example: + + .. literalinclude:: ../includes/sqlite3/executescript.py + + +.. attribute:: Cursor.rowcount + + Although the :class:`Cursor` class of the :mod:`sqlite3` module implements this + attribute, the database engine's own support for the determination of "rows + affected"/"rows selected" is quirky. + + For ``SELECT`` statements, :attr:`rowcount` is always None because we cannot + determine the number of rows a query produced until all rows were fetched. + + For ``DELETE`` statements, SQLite reports :attr:`rowcount` as 0 if you make a + ``DELETE FROM table`` without any condition. + + For :meth:`executemany` statements, the number of modifications are summed up + into :attr:`rowcount`. + + As required by the Python DB API Spec, the :attr:`rowcount` attribute "is -1 in + case no executeXX() has been performed on the cursor or the rowcount of the last + operation is not determinable by the interface". + + +.. _sqlite3-types: + +SQLite and Python types +----------------------- + + +Introduction +^^^^^^^^^^^^ + +SQLite natively supports the following types: NULL, INTEGER, REAL, TEXT, BLOB. + +The following Python types can thus be sent to SQLite without any problem: + ++------------------------+-------------+ +| Python type | SQLite type | ++========================+=============+ +| ``None`` | NULL | ++------------------------+-------------+ +| ``int`` | INTEGER | ++------------------------+-------------+ +| ``long`` | INTEGER | ++------------------------+-------------+ +| ``float`` | REAL | ++------------------------+-------------+ +| ``str (UTF8-encoded)`` | TEXT | ++------------------------+-------------+ +| ``unicode`` | TEXT | ++------------------------+-------------+ +| ``buffer`` | BLOB | ++------------------------+-------------+ + +This is how SQLite types are converted to Python types by default: + ++-------------+---------------------------------------------+ +| SQLite type | Python type | ++=============+=============================================+ +| ``NULL`` | None | ++-------------+---------------------------------------------+ +| ``INTEGER`` | int or long, depending on size | ++-------------+---------------------------------------------+ +| ``REAL`` | float | ++-------------+---------------------------------------------+ +| ``TEXT`` | depends on text_factory, unicode by default | ++-------------+---------------------------------------------+ +| ``BLOB`` | buffer | ++-------------+---------------------------------------------+ + +The type system of the :mod:`sqlite3` module is extensible in two ways: you can +store additional Python types in a SQLite database via object adaptation, and +you can let the :mod:`sqlite3` module convert SQLite types to different Python +types via converters. + + +Using adapters to store additional Python types in SQLite databases +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As described before, SQLite supports only a limited set of types natively. To +use other Python types with SQLite, you must **adapt** them to one of the +sqlite3 module's supported types for SQLite: one of NoneType, int, long, float, +str, unicode, buffer. + +The :mod:`sqlite3` module uses Python object adaptation, as described in +:pep:`246` for this. The protocol to use is :class:`PrepareProtocol`. + +There are two ways to enable the :mod:`sqlite3` module to adapt a custom Python +type to one of the supported ones. + + +Letting your object adapt itself +"""""""""""""""""""""""""""""""" + +This is a good approach if you write the class yourself. Let's suppose you have +a class like this:: + + class Point(object): + def __init__(self, x, y): + self.x, self.y = x, y + +Now you want to store the point in a single SQLite column. First you'll have to +choose one of the supported types first to be used for representing the point. +Let's just use str and separate the coordinates using a semicolon. Then you need +to give your class a method ``__conform__(self, protocol)`` which must return +the converted value. The parameter *protocol* will be :class:`PrepareProtocol`. + +.. literalinclude:: ../includes/sqlite3/adapter_point_1.py + + +Registering an adapter callable +""""""""""""""""""""""""""""""" + +The other possibility is to create a function that converts the type to the +string representation and register the function with :meth:`register_adapter`. + +.. note:: + + The type/class to adapt must be a new-style class, i. e. it must have + :class:`object` as one of its bases. + +.. literalinclude:: ../includes/sqlite3/adapter_point_2.py + +The :mod:`sqlite3` module has two default adapters for Python's built-in +:class:`datetime.date` and :class:`datetime.datetime` types. Now let's suppose +we want to store :class:`datetime.datetime` objects not in ISO representation, +but as a Unix timestamp. + +.. literalinclude:: ../includes/sqlite3/adapter_datetime.py + + +Converting SQLite values to custom Python types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Writing an adapter lets you send custom Python types to SQLite. But to make it +really useful we need to make the Python to SQLite to Python roundtrip work. + +Enter converters. + +Let's go back to the :class:`Point` class. We stored the x and y coordinates +separated via semicolons as strings in SQLite. + +First, we'll define a converter function that accepts the string as a parameter +and constructs a :class:`Point` object from it. + +.. note:: + + Converter functions **always** get called with a string, no matter under which + data type you sent the value to SQLite. + +.. note:: + + Converter names are looked up in a case-sensitive manner. + +:: + + def convert_point(s): + x, y = map(float, s.split(";")) + return Point(x, y) + +Now you need to make the :mod:`sqlite3` module know that what you select from +the database is actually a point. There are two ways of doing this: + +* Implicitly via the declared type + +* Explicitly via the column name + +Both ways are described in section :ref:`sqlite3-module-contents`, in the entries +for the constants :const:`PARSE_DECLTYPES` and :const:`PARSE_COLNAMES`. + +The following example illustrates both approaches. + +.. literalinclude:: ../includes/sqlite3/converter_point.py + + +Default adapters and converters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are default adapters for the date and datetime types in the datetime +module. They will be sent as ISO dates/ISO timestamps to SQLite. + +The default converters are registered under the name "date" for +:class:`datetime.date` and under the name "timestamp" for +:class:`datetime.datetime`. + +This way, you can use date/timestamps from Python without any additional +fiddling in most cases. The format of the adapters is also compatible with the +experimental SQLite date/time functions. + +The following example demonstrates this. + +.. literalinclude:: ../includes/sqlite3/pysqlite_datetime.py + + +.. _sqlite3-controlling-transactions: + +Controlling Transactions +------------------------ + +By default, the :mod:`sqlite3` module opens transactions implicitly before a +Data Modification Language (DML) statement (i.e. INSERT/UPDATE/DELETE/REPLACE), +and commits transactions implicitly before a non-DML, non-query statement (i. e. +anything other than SELECT/INSERT/UPDATE/DELETE/REPLACE). + +So if you are within a transaction and issue a command like ``CREATE TABLE +...``, ``VACUUM``, ``PRAGMA``, the :mod:`sqlite3` module will commit implicitly +before executing that command. There are two reasons for doing that. The first +is that some of these commands don't work within transactions. The other reason +is that pysqlite needs to keep track of the transaction state (if a transaction +is active or not). + +You can control which kind of "BEGIN" statements pysqlite implicitly executes +(or none at all) via the *isolation_level* parameter to the :func:`connect` +call, or via the :attr:`isolation_level` property of connections. + +If you want **autocommit mode**, then set :attr:`isolation_level` to None. + +Otherwise leave it at its default, which will result in a plain "BEGIN" +statement, or set it to one of SQLite's supported isolation levels: DEFERRED, +IMMEDIATE or EXCLUSIVE. + +As the :mod:`sqlite3` module needs to keep track of the transaction state, you +should not use ``OR ROLLBACK`` or ``ON CONFLICT ROLLBACK`` in your SQL. Instead, +catch the :exc:`IntegrityError` and call the :meth:`rollback` method of the +connection yourself. + + +Using pysqlite efficiently +-------------------------- + + +Using shortcut methods +^^^^^^^^^^^^^^^^^^^^^^ + +Using the nonstandard :meth:`execute`, :meth:`executemany` and +:meth:`executescript` methods of the :class:`Connection` object, your code can +be written more concisely because you don't have to create the (often +superfluous) :class:`Cursor` objects explicitly. Instead, the :class:`Cursor` +objects are created implicitly and these shortcut methods return the cursor +objects. This way, you can execute a SELECT statement and iterate over it +directly using only a single call on the :class:`Connection` object. + +.. literalinclude:: ../includes/sqlite3/shortcut_methods.py + + +Accessing columns by name instead of by index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One useful feature of the :mod:`sqlite3` module is the builtin +:class:`sqlite3.Row` class designed to be used as a row factory. + +Rows wrapped with this class can be accessed both by index (like tuples) and +case-insensitively by name: + +.. literalinclude:: ../includes/sqlite3/rowclass.py + diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst new file mode 100644 index 0000000..430bb23 --- /dev/null +++ b/Doc/library/stat.rst @@ -0,0 +1,167 @@ + +:mod:`stat` --- Interpreting :func:`stat` results +================================================= + +.. module:: stat + :synopsis: Utilities for interpreting the results of os.stat(), os.lstat() and os.fstat(). +.. sectionauthor:: Skip Montanaro <skip@automatrix.com> + + +The :mod:`stat` module defines constants and functions for interpreting the +results of :func:`os.stat`, :func:`os.fstat` and :func:`os.lstat` (if they +exist). For complete details about the :cfunc:`stat`, :cfunc:`fstat` and +:cfunc:`lstat` calls, consult the documentation for your system. + +The :mod:`stat` module defines the following functions to test for specific file +types: + + +.. function:: S_ISDIR(mode) + + Return non-zero if the mode is from a directory. + + +.. function:: S_ISCHR(mode) + + Return non-zero if the mode is from a character special device file. + + +.. function:: S_ISBLK(mode) + + Return non-zero if the mode is from a block special device file. + + +.. function:: S_ISREG(mode) + + Return non-zero if the mode is from a regular file. + + +.. function:: S_ISFIFO(mode) + + Return non-zero if the mode is from a FIFO (named pipe). + + +.. function:: S_ISLNK(mode) + + Return non-zero if the mode is from a symbolic link. + + +.. function:: S_ISSOCK(mode) + + Return non-zero if the mode is from a socket. + +Two additional functions are defined for more general manipulation of the file's +mode: + + +.. function:: S_IMODE(mode) + + Return the portion of the file's mode that can be set by :func:`os.chmod`\ + ---that is, the file's permission bits, plus the sticky bit, set-group-id, and + set-user-id bits (on systems that support them). + + +.. function:: S_IFMT(mode) + + Return the portion of the file's mode that describes the file type (used by the + :func:`S_IS\*` functions above). + +Normally, you would use the :func:`os.path.is\*` functions for testing the type +of a file; the functions here are useful when you are doing multiple tests of +the same file and wish to avoid the overhead of the :cfunc:`stat` system call +for each test. These are also useful when checking for information about a file +that isn't handled by :mod:`os.path`, like the tests for block and character +devices. + +All the variables below are simply symbolic indexes into the 10-tuple returned +by :func:`os.stat`, :func:`os.fstat` or :func:`os.lstat`. + + +.. data:: ST_MODE + + Inode protection mode. + + +.. data:: ST_INO + + Inode number. + + +.. data:: ST_DEV + + Device inode resides on. + + +.. data:: ST_NLINK + + Number of links to the inode. + + +.. data:: ST_UID + + User id of the owner. + + +.. data:: ST_GID + + Group id of the owner. + + +.. data:: ST_SIZE + + Size in bytes of a plain file; amount of data waiting on some special files. + + +.. data:: ST_ATIME + + Time of last access. + + +.. data:: ST_MTIME + + Time of last modification. + + +.. data:: ST_CTIME + + The "ctime" as reported by the operating system. On some systems (like Unix) is + the time of the last metadata change, and, on others (like Windows), is the + creation time (see platform documentation for details). + +The interpretation of "file size" changes according to the file type. For plain +files this is the size of the file in bytes. For FIFOs and sockets under most +flavors of Unix (including Linux in particular), the "size" is the number of +bytes waiting to be read at the time of the call to :func:`os.stat`, +:func:`os.fstat`, or :func:`os.lstat`; this can sometimes be useful, especially +for polling one of these special files after a non-blocking open. The meaning +of the size field for other character and block devices varies more, depending +on the implementation of the underlying system call. + +Example:: + + import os, sys + from stat import * + + def walktree(top, callback): + '''recursively descend the directory tree rooted at top, + calling the callback function for each regular file''' + + for f in os.listdir(top): + pathname = os.path.join(top, f) + mode = os.stat(pathname)[ST_MODE] + if S_ISDIR(mode): + # It's a directory, recurse into it + walktree(pathname, callback) + elif S_ISREG(mode): + # It's a file, call the callback function + callback(pathname) + else: + # Unknown file type, print a message + print 'Skipping %s' % pathname + + def visitfile(file): + print 'visiting', file + + if __name__ == '__main__': + walktree(sys.argv[1], visitfile) + diff --git a/Doc/library/statvfs.rst b/Doc/library/statvfs.rst new file mode 100644 index 0000000..6ec7c38 --- /dev/null +++ b/Doc/library/statvfs.rst @@ -0,0 +1,67 @@ + +:mod:`statvfs` --- Constants used with :func:`os.statvfs` +========================================================= + +.. module:: statvfs + :synopsis: Constants for interpreting the result of os.statvfs(). +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +.. % LaTeX'ed from comments in module + +The :mod:`statvfs` module defines constants so interpreting the result if +:func:`os.statvfs`, which returns a tuple, can be made without remembering +"magic numbers." Each of the constants defined in this module is the *index* of +the entry in the tuple returned by :func:`os.statvfs` that contains the +specified information. + + +.. data:: F_BSIZE + + Preferred file system block size. + + +.. data:: F_FRSIZE + + Fundamental file system block size. + + +.. data:: F_BLOCKS + + Total number of blocks in the filesystem. + + +.. data:: F_BFREE + + Total number of free blocks. + + +.. data:: F_BAVAIL + + Free blocks available to non-super user. + + +.. data:: F_FILES + + Total number of file nodes. + + +.. data:: F_FFREE + + Total number of free file nodes. + + +.. data:: F_FAVAIL + + Free nodes available to non-super user. + + +.. data:: F_FLAG + + Flags. System dependent: see :cfunc:`statvfs` man page. + + +.. data:: F_NAMEMAX + + Maximum file name length. + diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst new file mode 100644 index 0000000..34c943c --- /dev/null +++ b/Doc/library/stdtypes.rst @@ -0,0 +1,2409 @@ +.. XXX: reference/datamodel and this have quite a few overlaps! + + +.. _bltin-types: + +************** +Built-in Types +************** + +The following sections describe the standard types that are built into the +interpreter. + +.. note:: + + Historically (until release 2.2), Python's built-in types have differed from + user-defined types because it was not possible to use the built-in types as the + basis for object-oriented inheritance. This limitation no longer + exists. + +.. index:: pair: built-in; types + +The principal built-in types are numerics, sequences, mappings, files, classes, +instances and exceptions. + +.. index:: statement: print + +Some operations are supported by several object types; in particular, +practically all objects can be compared, tested for truth value, and converted +to a string (with the :func:`repr` function or the slightly different +:func:`str` function). The latter function is implicitly used when an object is +written by the :func:`print` function. + + +.. _truth: + +Truth Value Testing +=================== + +.. index:: + statement: if + statement: while + pair: truth; value + pair: Boolean; operations + single: false + +Any object can be tested for truth value, for use in an :keyword:`if` or +:keyword:`while` condition or as operand of the Boolean operations below. The +following values are considered false: + + .. index:: single: None (Built-in object) + +* ``None`` + + .. index:: single: False (Built-in object) + +* ``False`` + +* zero of any numeric type, for example, ``0``, ``0L``, ``0.0``, ``0j``. + +* any empty sequence, for example, ``''``, ``()``, ``[]``. + +* any empty mapping, for example, ``{}``. + +* instances of user-defined classes, if the class defines a :meth:`__bool__` or + :meth:`__len__` method, when that method returns the integer zero or + :class:`bool` value ``False``. [#]_ + +.. index:: single: true + +All other values are considered true --- so objects of many types are always +true. + +.. index:: + operator: or + operator: and + single: False + single: True + +Operations and built-in functions that have a Boolean result always return ``0`` +or ``False`` for false and ``1`` or ``True`` for true, unless otherwise stated. +(Important exception: the Boolean operations ``or`` and ``and`` always return +one of their operands.) + + +.. _boolean: + +Boolean Operations --- :keyword:`and`, :keyword:`or`, :keyword:`not` +==================================================================== + +.. index:: pair: Boolean; operations + +These are the Boolean operations, ordered by ascending priority: + ++-------------+---------------------------------+-------+ +| Operation | Result | Notes | ++=============+=================================+=======+ +| ``x or y`` | if *x* is false, then *y*, else | \(1) | +| | *x* | | ++-------------+---------------------------------+-------+ +| ``x and y`` | if *x* is false, then *x*, else | \(2) | +| | *y* | | ++-------------+---------------------------------+-------+ +| ``not x`` | if *x* is false, then ``True``, | \(3) | +| | else ``False`` | | ++-------------+---------------------------------+-------+ + +.. index:: + operator: and + operator: or + operator: not + +Notes: + +(1) + This is a short-circuit operator, so it only evaluates the second + argument if the first one is :const:`False`. + +(2) + This is a short-circuit operator, so it only evaluates the second + argument if the first one is :const:`True`. + +(3) + ``not`` has a lower priority than non-Boolean operators, so ``not a == b`` is + interpreted as ``not (a == b)``, and ``a == not b`` is a syntax error. + + +.. _stdcomparisons: + +Comparisons +=========== + +.. index:: pair: chaining; comparisons + +Comparison operations are supported by all objects. They all have the same +priority (which is higher than that of the Boolean operations). Comparisons can +be chained arbitrarily; for example, ``x < y <= z`` is equivalent to ``x < y and +y <= z``, except that *y* is evaluated only once (but in both cases *z* is not +evaluated at all when ``x < y`` is found to be false). + +This table summarizes the comparison operations: + ++------------+-------------------------+-------+ +| Operation | Meaning | Notes | ++============+=========================+=======+ +| ``<`` | strictly less than | | ++------------+-------------------------+-------+ +| ``<=`` | less than or equal | | ++------------+-------------------------+-------+ +| ``>`` | strictly greater than | | ++------------+-------------------------+-------+ +| ``>=`` | greater than or equal | | ++------------+-------------------------+-------+ +| ``==`` | equal | | ++------------+-------------------------+-------+ +| ``!=`` | not equal | | ++------------+-------------------------+-------+ +| ``is`` | object identity | | ++------------+-------------------------+-------+ +| ``is not`` | negated object identity | | ++------------+-------------------------+-------+ + +.. index:: + pair: operator; comparison + operator: == + operator: is + operator: is not + +.. % XXX *All* others have funny characters < ! > + +.. index:: + pair: object; numeric + pair: objects; comparing + +Objects of different types, except different numeric types and different string +types, never compare equal; such objects are ordered consistently but +arbitrarily (so that sorting a heterogeneous array yields a consistent result). +Furthermore, some types (for example, file objects) support only a degenerate +notion of comparison where any two objects of that type are unequal. Again, +such objects are ordered arbitrarily but consistently. The ``<``, ``<=``, ``>`` +and ``>=`` operators will raise a :exc:`TypeError` exception when any operand is +a complex number. + +.. index:: single: __cmp__() (instance method) + +Instances of a class normally compare as non-equal unless the class defines the +:meth:`__cmp__` method. Refer to :ref:`customization`) for information on the +use of this method to effect object comparisons. + +**Implementation note:** Objects of different types except numbers are ordered +by their type names; objects of the same types that don't support proper +comparison are ordered by their address. + +.. index:: + operator: in + operator: not in + +Two more operations with the same syntactic priority, ``in`` and ``not in``, are +supported only by sequence types (below). + + +.. _typesnumeric: + +Numeric Types --- :class:`int`, :class:`float`, :class:`long`, :class:`complex` +=============================================================================== + +.. index:: + object: numeric + object: Boolean + object: integer + object: long integer + object: floating point + object: complex number + pair: C; language + +There are four distinct numeric types: :dfn:`plain integers`, :dfn:`long +integers`, :dfn:`floating point numbers`, and :dfn:`complex numbers`. In +addition, Booleans are a subtype of plain integers. Plain integers (also just +called :dfn:`integers`) are implemented using :ctype:`long` in C, which gives +them at least 32 bits of precision (``sys.maxint`` is always set to the maximum +plain integer value for the current platform, the minimum value is +``-sys.maxint - 1``). Long integers have unlimited precision. Floating point +numbers are implemented using :ctype:`double` in C. All bets on their precision +are off unless you happen to know the machine you are working with. + +Complex numbers have a real and imaginary part, which are each implemented using +:ctype:`double` in C. To extract these parts from a complex number *z*, use +``z.real`` and ``z.imag``. + +.. index:: + pair: numeric; literals + pair: integer; literals + triple: long; integer; literals + pair: floating point; literals + pair: complex number; literals + pair: hexadecimal; literals + pair: octal; literals + +Numbers are created by numeric literals or as the result of built-in functions +and operators. Unadorned integer literals (including hex and octal numbers) +yield plain integers unless the value they denote is too large to be represented +as a plain integer, in which case they yield a long integer. Integer literals +with an ``'L'`` or ``'l'`` suffix yield long integers (``'L'`` is preferred +because ``1l`` looks too much like eleven!). Numeric literals containing a +decimal point or an exponent sign yield floating point numbers. Appending +``'j'`` or ``'J'`` to a numeric literal yields a complex number with a zero real +part. A complex numeric literal is the sum of a real and an imaginary part. + +.. index:: + single: arithmetic + builtin: int + builtin: long + builtin: float + builtin: complex + +Python fully supports mixed arithmetic: when a binary arithmetic operator has +operands of different numeric types, the operand with the "narrower" type is +widened to that of the other, where plain integer is narrower than long integer +is narrower than floating point is narrower than complex. Comparisons between +numbers of mixed type use the same rule. [#]_ The constructors :func:`int`, +:func:`long`, :func:`float`, and :func:`complex` can be used to produce numbers +of a specific type. + +All numeric types (except complex) support the following operations, sorted by +ascending priority (operations in the same box have the same priority; all +numeric operations have a higher priority than comparison operations): + ++--------------------+---------------------------------+--------+ +| Operation | Result | Notes | ++====================+=================================+========+ +| ``x + y`` | sum of *x* and *y* | | ++--------------------+---------------------------------+--------+ +| ``x - y`` | difference of *x* and *y* | | ++--------------------+---------------------------------+--------+ +| ``x * y`` | product of *x* and *y* | | ++--------------------+---------------------------------+--------+ +| ``x / y`` | quotient of *x* and *y* | \(1) | ++--------------------+---------------------------------+--------+ +| ``x // y`` | (floored) quotient of *x* and | \(5) | +| | *y* | | ++--------------------+---------------------------------+--------+ +| ``x % y`` | remainder of ``x / y`` | \(4) | ++--------------------+---------------------------------+--------+ +| ``-x`` | *x* negated | | ++--------------------+---------------------------------+--------+ +| ``+x`` | *x* unchanged | | ++--------------------+---------------------------------+--------+ +| ``abs(x)`` | absolute value or magnitude of | | +| | *x* | | ++--------------------+---------------------------------+--------+ +| ``int(x)`` | *x* converted to integer | \(2) | ++--------------------+---------------------------------+--------+ +| ``long(x)`` | *x* converted to long integer | \(2) | ++--------------------+---------------------------------+--------+ +| ``float(x)`` | *x* converted to floating point | | ++--------------------+---------------------------------+--------+ +| ``complex(re,im)`` | a complex number with real part | | +| | *re*, imaginary part *im*. | | +| | *im* defaults to zero. | | ++--------------------+---------------------------------+--------+ +| ``c.conjugate()`` | conjugate of the complex number | | +| | *c* | | ++--------------------+---------------------------------+--------+ +| ``divmod(x, y)`` | the pair ``(x // y, x % y)`` | (3)(4) | ++--------------------+---------------------------------+--------+ +| ``pow(x, y)`` | *x* to the power *y* | | ++--------------------+---------------------------------+--------+ +| ``x ** y`` | *x* to the power *y* | | ++--------------------+---------------------------------+--------+ + +.. index:: + triple: operations on; numeric; types + single: conjugate() (complex number method) + +Notes: + +(1) + .. index:: + pair: integer; division + triple: long; integer; division + + For (plain or long) integer division, the result is an integer. The result is + always rounded towards minus infinity: 1/2 is 0, (-1)/2 is -1, 1/(-2) is -1, and + (-1)/(-2) is 0. Note that the result is a long integer if either operand is a + long integer, regardless of the numeric value. + +(2) + .. index:: + module: math + single: floor() (in module math) + single: ceil() (in module math) + pair: numeric; conversions + pair: C; language + + Conversion from floating point to (long or plain) integer may round or truncate + as in C; see functions :func:`floor` and :func:`ceil` in the :mod:`math` module + for well-defined conversions. + +(3) + See :ref:`built-in-funcs` for a full description. + +(4) + Complex floor division operator, modulo operator, and :func:`divmod`. + + .. deprecated:: 2.3 + Instead convert to float using :func:`abs` if appropriate. + +(5) + Also referred to as integer division. The resultant value is a whole integer, + though the result's type is not necessarily int. + +.. % XXXJH exceptions: overflow (when? what operations?) zerodivision + + +.. _bitstring-ops: + +Bit-string Operations on Integer Types +-------------------------------------- + +.. _bit-string-operations: + +Plain and long integer types support additional operations that make sense only +for bit-strings. Negative numbers are treated as their 2's complement value +(for long integers, this assumes a sufficiently large number of bits that no +overflow occurs during the operation). + +The priorities of the binary bit-wise operations are all lower than the numeric +operations and higher than the comparisons; the unary operation ``~`` has the +same priority as the other unary numeric operations (``+`` and ``-``). + +This table lists the bit-string operations sorted in ascending priority +(operations in the same box have the same priority): + ++------------+--------------------------------+----------+ +| Operation | Result | Notes | ++============+================================+==========+ +| ``x | y`` | bitwise :dfn:`or` of *x* and | | +| | *y* | | ++------------+--------------------------------+----------+ +| ``x ^ y`` | bitwise :dfn:`exclusive or` of | | +| | *x* and *y* | | ++------------+--------------------------------+----------+ +| ``x & y`` | bitwise :dfn:`and` of *x* and | | +| | *y* | | ++------------+--------------------------------+----------+ +| ``x << n`` | *x* shifted left by *n* bits | (1), (2) | ++------------+--------------------------------+----------+ +| ``x >> n`` | *x* shifted right by *n* bits | (1), (3) | ++------------+--------------------------------+----------+ +| ``~x`` | the bits of *x* inverted | | ++------------+--------------------------------+----------+ + +.. index:: + triple: operations on; integer; types + pair: bit-string; operations + pair: shifting; operations + pair: masking; operations + +Notes: + +(1) + Negative shift counts are illegal and cause a :exc:`ValueError` to be raised. + +(2) + A left shift by *n* bits is equivalent to multiplication by ``pow(2, n)`` + without overflow check. + +(3) + A right shift by *n* bits is equivalent to division by ``pow(2, n)`` without + overflow check. + + +.. _typeiter: + +Iterator Types +============== + +.. versionadded:: 2.2 + +.. index:: + single: iterator protocol + single: protocol; iterator + single: sequence; iteration + single: container; iteration over + +Python supports a concept of iteration over containers. This is implemented +using two distinct methods; these are used to allow user-defined classes to +support iteration. Sequences, described below in more detail, always support +the iteration methods. + +One method needs to be defined for container objects to provide iteration +support: + + +.. method:: container.__iter__() + + Return an iterator object. The object is required to support the iterator + protocol described below. If a container supports different types of + iteration, additional methods can be provided to specifically request + iterators for those iteration types. (An example of an object supporting + multiple forms of iteration would be a tree structure which supports both + breadth-first and depth-first traversal.) This method corresponds to the + :attr:`tp_iter` slot of the type structure for Python objects in the Python/C + API. + +The iterator objects themselves are required to support the following two +methods, which together form the :dfn:`iterator protocol`: + + +.. method:: iterator.__iter__() + + Return the iterator object itself. This is required to allow both containers + and iterators to be used with the :keyword:`for` and :keyword:`in` statements. + This method corresponds to the :attr:`tp_iter` slot of the type structure for + Python objects in the Python/C API. + + +.. method:: iterator.next() + + Return the next item from the container. If there are no further items, raise + the :exc:`StopIteration` exception. This method corresponds to the + :attr:`tp_iternext` slot of the type structure for Python objects in the + Python/C API. + +Python defines several iterator objects to support iteration over general and +specific sequence types, dictionaries, and other more specialized forms. The +specific types are not important beyond their implementation of the iterator +protocol. + +The intention of the protocol is that once an iterator's :meth:`__next__` method +raises :exc:`StopIteration`, it will continue to do so on subsequent calls. +Implementations that do not obey this property are deemed broken. (This +constraint was added in Python 2.3; in Python 2.2, various iterators are broken +according to this rule.) + +Python's generators provide a convenient way to implement the iterator protocol. +If a container object's :meth:`__iter__` method is implemented as a generator, +it will automatically return an iterator object (technically, a generator +object) supplying the :meth:`__iter__` and :meth:`__next__` methods. + + +.. _typesseq: + +Sequence Types --- :class:`str`, :class:`unicode`, :class:`list`, :class:`tuple`, :class:`buffer`, :class:`range` +================================================================================================================= + +There are six sequence types: strings, Unicode strings, lists, tuples, buffers, +and range objects. +(For other containers see the built in :class:`dict`, :class:`list`, +:class:`set`, and :class:`tuple` classes, and the :mod:`collections` +module.) + + +.. index:: + object: sequence + object: string + object: tuple + object: list + object: buffer + object: range + +String literals are written in single or double quotes: ``'xyzzy'``, +``"frobozz"``. See :ref:`strings` for more about string literals. In addition +to the functionality described here, there are also string-specific methods +described in the :ref:`string-methods` section. Lists are constructed with +square brackets, separating items with commas: ``[a, b, c]``. Tuples are +constructed by the comma operator (not within square brackets), with or without +enclosing parentheses, but an empty tuple must have the enclosing parentheses, +such as ``a, b, c`` or ``()``. A single item tuple must have a trailing comma, +such as ``(d,)``. + +Buffer objects are not directly supported by Python syntax, but can be created +by calling the builtin function :func:`buffer`. They don't support +concatenation or repetition. + +Objects of type range are similar to buffers in that there is no specific syntax to +create them, but they are created using the :func:`range` function. They don't +support slicing, concatenation or repetition, and using ``in``, ``not in``, +:func:`min` or :func:`max` on them is inefficient. + +Most sequence types support the following operations. The ``in`` and ``not in`` +operations have the same priorities as the comparison operations. The ``+`` and +``*`` operations have the same priority as the corresponding numeric operations. +[#]_ + +This table lists the sequence operations sorted in ascending priority +(operations in the same box have the same priority). In the table, *s* and *t* +are sequences of the same type; *n*, *i* and *j* are integers: + ++------------------+--------------------------------+----------+ +| Operation | Result | Notes | ++==================+================================+==========+ +| ``x in s`` | ``True`` if an item of *s* is | \(1) | +| | equal to *x*, else ``False`` | | ++------------------+--------------------------------+----------+ +| ``x not in s`` | ``False`` if an item of *s* is | \(1) | +| | equal to *x*, else ``True`` | | ++------------------+--------------------------------+----------+ +| ``s + t`` | the concatenation of *s* and | \(6) | +| | *t* | | ++------------------+--------------------------------+----------+ +| ``s * n, n * s`` | *n* shallow copies of *s* | \(2) | +| | concatenated | | ++------------------+--------------------------------+----------+ +| ``s[i]`` | *i*'th item of *s*, origin 0 | \(3) | ++------------------+--------------------------------+----------+ +| ``s[i:j]`` | slice of *s* from *i* to *j* | (3), (4) | ++------------------+--------------------------------+----------+ +| ``s[i:j:k]`` | slice of *s* from *i* to *j* | (3), (5) | +| | with step *k* | | ++------------------+--------------------------------+----------+ +| ``len(s)`` | length of *s* | | ++------------------+--------------------------------+----------+ +| ``min(s)`` | smallest item of *s* | | ++------------------+--------------------------------+----------+ +| ``max(s)`` | largest item of *s* | | ++------------------+--------------------------------+----------+ + +Sequence types also support comparisons. In particular, tuples and lists +are compared lexicographically by comparing corresponding +elements. This means that to compare equal, every element must compare +equal and the two sequences must be of the same type and have the same +length. (For full details see :ref:`comparisons` in the language +reference.) + +.. index:: + triple: operations on; sequence; types + builtin: len + builtin: min + builtin: max + pair: concatenation; operation + pair: repetition; operation + pair: subscript; operation + pair: slice; operation + pair: extended slice; operation + operator: in + operator: not in + +Notes: + +(1) + When *s* is a string or Unicode string object the ``in`` and ``not in`` + operations act like a substring test. In Python versions before 2.3, *x* had to + be a string of length 1. In Python 2.3 and beyond, *x* may be a string of any + length. + +(2) + Values of *n* less than ``0`` are treated as ``0`` (which yields an empty + sequence of the same type as *s*). Note also that the copies are shallow; + nested structures are not copied. This often haunts new Python programmers; + consider:: + + >>> lists = [[]] * 3 + >>> lists + [[], [], []] + >>> lists[0].append(3) + >>> lists + [[3], [3], [3]] + + What has happened is that ``[[]]`` is a one-element list containing an empty + list, so all three elements of ``[[]] * 3`` are (pointers to) this single empty + list. Modifying any of the elements of ``lists`` modifies this single list. + You can create a list of different lists this way:: + + >>> lists = [[] for i in range(3)] + >>> lists[0].append(3) + >>> lists[1].append(5) + >>> lists[2].append(7) + >>> lists + [[3], [5], [7]] + +(3) + If *i* or *j* is negative, the index is relative to the end of the string: + ``len(s) + i`` or ``len(s) + j`` is substituted. But note that ``-0`` is still + ``0``. + +(4) + The slice of *s* from *i* to *j* is defined as the sequence of items with index + *k* such that ``i <= k < j``. If *i* or *j* is greater than ``len(s)``, use + ``len(s)``. If *i* is omitted or ``None``, use ``0``. If *j* is omitted or + ``None``, use ``len(s)``. If *i* is greater than or equal to *j*, the slice is + empty. + +(5) + The slice of *s* from *i* to *j* with step *k* is defined as the sequence of + items with index ``x = i + n*k`` such that 0 ≤n < (j-i)/(k). In other words, + the indices are ``i``, ``i+k``, ``i+2*k``, ``i+3*k`` and so on, stopping when + *j* is reached (but never including *j*). If *i* or *j* is greater than + ``len(s)``, use ``len(s)``. If *i* or *j* are omitted or ``None``, they become + "end" values (which end depends on the sign of *k*). Note, *k* cannot be zero. + If *k* is ``None``, it is treated like ``1``. + +(6) + If *s* and *t* are both strings, some Python implementations such as CPython can + usually perform an in-place optimization for assignments of the form ``s=s+t`` + or ``s+=t``. When applicable, this optimization makes quadratic run-time much + less likely. This optimization is both version and implementation dependent. + For performance sensitive code, it is preferable to use the :meth:`str.join` + method which assures consistent linear concatenation performance across versions + and implementations. + + .. versionchanged:: 2.4 + Formerly, string concatenation never occurred in-place. + + +.. _string-methods: + +String Methods +-------------- + +.. index:: pair: string; methods + +Below are listed the string methods which both 8-bit strings and Unicode objects +support. In addition, Python's strings support the sequence type methods +described in the :ref:`typesseq` section. To output formatted strings +use template strings or the ``%`` operator described in the +:ref:`string-formatting` section. Also, see the :mod:`re` module for +string functions based on regular expressions. + +.. method:: str.capitalize() + + Return a copy of the string with only its first character capitalized. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.center(width[, fillchar]) + + Return centered in a string of length *width*. Padding is done using the + specified *fillchar* (default is a space). + + .. versionchanged:: 2.4 + Support for the *fillchar* argument. + + +.. method:: str.count(sub[, start[, end]]) + + Return the number of occurrences of substring *sub* in string S\ + ``[start:end]``. Optional arguments *start* and *end* are interpreted as in + slice notation. + + +.. method:: str.decode([encoding[, errors]]) + + Decodes the string using the codec registered for *encoding*. *encoding* + defaults to the default string encoding. *errors* may be given to set a + different error handling scheme. The default is ``'strict'``, meaning that + encoding errors raise :exc:`UnicodeError`. Other possible values are + ``'ignore'``, ``'replace'`` and any other name registered via + :func:`codecs.register_error`, see section :ref:`codec-base-classes`. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.3 + Support for other error handling schemes added. + + +.. method:: str.encode([encoding[,errors]]) + + Return an encoded version of the string. Default encoding is the current + default string encoding. *errors* may be given to set a different error + handling scheme. The default for *errors* is ``'strict'``, meaning that + encoding errors raise a :exc:`UnicodeError`. Other possible values are + ``'ignore'``, ``'replace'``, ``'xmlcharrefreplace'``, ``'backslashreplace'`` and + any other name registered via :func:`codecs.register_error`, see section + :ref:`codec-base-classes`. For a list of possible encodings, see section + :ref:`standard-encodings`. + + .. versionadded:: 2.0 + + .. versionchanged:: 2.3 + Support for ``'xmlcharrefreplace'`` and ``'backslashreplace'`` and other error + handling schemes added. + + +.. method:: str.endswith(suffix[, start[, end]]) + + Return ``True`` if the string ends with the specified *suffix*, otherwise return + ``False``. *suffix* can also be a tuple of suffixes to look for. With optional + *start*, test beginning at that position. With optional *end*, stop comparing + at that position. + + .. versionchanged:: 2.5 + Accept tuples as *suffix*. + + +.. method:: str.expandtabs([tabsize]) + + Return a copy of the string where all tab characters are expanded using spaces. + If *tabsize* is not given, a tab size of ``8`` characters is assumed. + + +.. method:: str.find(sub[, start[, end]]) + + Return the lowest index in the string where substring *sub* is found, such that + *sub* is contained in the range [*start*, *end*]. Optional arguments *start* + and *end* are interpreted as in slice notation. Return ``-1`` if *sub* is not + found. + + +.. method:: str.index(sub[, start[, end]]) + + Like :meth:`find`, but raise :exc:`ValueError` when the substring is not found. + + +.. method:: str.isalnum() + + Return true if all characters in the string are alphanumeric and there is at + least one character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.isalpha() + + Return true if all characters in the string are alphabetic and there is at least + one character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.isdigit() + + Return true if all characters in the string are digits and there is at least one + character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.isidentifier() + + Return true if the string is a valid identifier according to the language + definition. + + .. XXX link to the definition? + + +.. method:: str.islower() + + Return true if all cased characters in the string are lowercase and there is at + least one cased character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.isspace() + + Return true if there are only whitespace characters in the string and there is + at least one character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.istitle() + + Return true if the string is a titlecased string and there is at least one + character, for example uppercase characters may only follow uncased characters + and lowercase characters only cased ones. Return false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.isupper() + + Return true if all cased characters in the string are uppercase and there is at + least one cased character, false otherwise. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.join(seq) + + Return a string which is the concatenation of the strings in the sequence *seq*. + The separator between elements is the string providing this method. + + +.. method:: str.ljust(width[, fillchar]) + + Return the string left justified in a string of length *width*. Padding is done + using the specified *fillchar* (default is a space). The original string is + returned if *width* is less than ``len(s)``. + + .. versionchanged:: 2.4 + Support for the *fillchar* argument. + + +.. method:: str.lower() + + Return a copy of the string converted to lowercase. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.lstrip([chars]) + + Return a copy of the string with leading characters removed. The *chars* + argument is a string specifying the set of characters to be removed. If omitted + or ``None``, the *chars* argument defaults to removing whitespace. The *chars* + argument is not a prefix; rather, all combinations of its values are stripped:: + + >>> ' spacious '.lstrip() + 'spacious ' + >>> 'www.example.com'.lstrip('cmowz.') + 'example.com' + + .. versionchanged:: 2.2.2 + Support for the *chars* argument. + + +.. method:: str.partition(sep) + + Split the string at the first occurrence of *sep*, and return a 3-tuple + containing the part before the separator, the separator itself, and the part + after the separator. If the separator is not found, return a 3-tuple containing + the string itself, followed by two empty strings. + + .. versionadded:: 2.5 + + +.. method:: str.replace(old, new[, count]) + + Return a copy of the string with all occurrences of substring *old* replaced by + *new*. If the optional argument *count* is given, only the first *count* + occurrences are replaced. + + +.. method:: str.rfind(sub [,start [,end]]) + + Return the highest index in the string where substring *sub* is found, such that + *sub* is contained within s[start,end]. Optional arguments *start* and *end* + are interpreted as in slice notation. Return ``-1`` on failure. + + +.. method:: str.rindex(sub[, start[, end]]) + + Like :meth:`rfind` but raises :exc:`ValueError` when the substring *sub* is not + found. + + +.. method:: str.rjust(width[, fillchar]) + + Return the string right justified in a string of length *width*. Padding is done + using the specified *fillchar* (default is a space). The original string is + returned if *width* is less than ``len(s)``. + + .. versionchanged:: 2.4 + Support for the *fillchar* argument. + + +.. method:: str.rpartition(sep) + + Split the string at the last occurrence of *sep*, and return a 3-tuple + containing the part before the separator, the separator itself, and the part + after the separator. If the separator is not found, return a 3-tuple containing + two empty strings, followed by the string itself. + + .. versionadded:: 2.5 + + +.. method:: str.rsplit([sep [,maxsplit]]) + + Return a list of the words in the string, using *sep* as the delimiter string. + If *maxsplit* is given, at most *maxsplit* splits are done, the *rightmost* + ones. If *sep* is not specified or ``None``, any whitespace string is a + separator. Except for splitting from the right, :meth:`rsplit` behaves like + :meth:`split` which is described in detail below. + + .. versionadded:: 2.4 + + +.. method:: str.rstrip([chars]) + + Return a copy of the string with trailing characters removed. The *chars* + argument is a string specifying the set of characters to be removed. If omitted + or ``None``, the *chars* argument defaults to removing whitespace. The *chars* + argument is not a suffix; rather, all combinations of its values are stripped:: + + >>> ' spacious '.rstrip() + ' spacious' + >>> 'mississippi'.rstrip('ipz') + 'mississ' + + .. versionchanged:: 2.2.2 + Support for the *chars* argument. + + +.. method:: str.split([sep [,maxsplit]]) + + Return a list of the words in the string, using *sep* as the delimiter string. + If *maxsplit* is given, at most *maxsplit* splits are done. (thus, the list will + have at most ``maxsplit+1`` elements). If *maxsplit* is not specified, then + there is no limit on the number of splits (all possible splits are made). + Consecutive delimiters are not grouped together and are deemed to delimit empty + strings (for example, ``'1,,2'.split(',')`` returns ``['1', '', '2']``). The + *sep* argument may consist of multiple characters (for example, ``'1, 2, + 3'.split(', ')`` returns ``['1', '2', '3']``). Splitting an empty string with a + specified separator returns ``['']``. + + If *sep* is not specified or is ``None``, a different splitting algorithm is + applied. First, whitespace characters (spaces, tabs, newlines, returns, and + formfeeds) are stripped from both ends. Then, words are separated by arbitrary + length strings of whitespace characters. Consecutive whitespace delimiters are + treated as a single delimiter (``'1 2 3'.split()`` returns ``['1', '2', + '3']``). Splitting an empty string or a string consisting of just whitespace + returns an empty list. + + +.. method:: str.splitlines([keepends]) + + Return a list of the lines in the string, breaking at line boundaries. Line + breaks are not included in the resulting list unless *keepends* is given and + true. + + +.. method:: str.startswith(prefix[, start[, end]]) + + Return ``True`` if string starts with the *prefix*, otherwise return ``False``. + *prefix* can also be a tuple of prefixes to look for. With optional *start*, + test string beginning at that position. With optional *end*, stop comparing + string at that position. + + .. versionchanged:: 2.5 + Accept tuples as *prefix*. + + +.. method:: str.strip([chars]) + + Return a copy of the string with the leading and trailing characters removed. + The *chars* argument is a string specifying the set of characters to be removed. + If omitted or ``None``, the *chars* argument defaults to removing whitespace. + The *chars* argument is not a prefix or suffix; rather, all combinations of its + values are stripped:: + + >>> ' spacious '.strip() + 'spacious' + >>> 'www.example.com'.strip('cmowz.') + 'example' + + .. versionchanged:: 2.2.2 + Support for the *chars* argument. + + +.. method:: str.swapcase() + + Return a copy of the string with uppercase characters converted to lowercase and + vice versa. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.title() + + Return a titlecased version of the string: words start with uppercase + characters, all remaining cased characters are lowercase. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.translate(table[, deletechars]) + + Return a copy of the string where all characters occurring in the optional + argument *deletechars* are removed, and the remaining characters have been + mapped through the given translation table, which must be a string of length + 256. + + You can use the :func:`maketrans` helper function in the :mod:`string` module to + create a translation table. For string objects, set the *table* argument to + ``None`` for translations that only delete characters:: + + >>> 'read this short text'.translate(None, 'aeiou') + 'rd ths shrt txt' + + .. versionadded:: 2.6 + Support for a ``None`` *table* argument. + + For Unicode objects, the :meth:`translate` method does not accept the optional + *deletechars* argument. Instead, it returns a copy of the *s* where all + characters have been mapped through the given translation table which must be a + mapping of Unicode ordinals to Unicode ordinals, Unicode strings or ``None``. + Unmapped characters are left untouched. Characters mapped to ``None`` are + deleted. Note, a more flexible approach is to create a custom character mapping + codec using the :mod:`codecs` module (see :mod:`encodings.cp1251` for an + example). + + +.. method:: str.upper() + + Return a copy of the string converted to uppercase. + + For 8-bit strings, this method is locale-dependent. + + +.. method:: str.zfill(width) + + Return the numeric string left filled with zeros in a string of length *width*. + The original string is returned if *width* is less than ``len(s)``. + + .. versionadded:: 2.2.2 + + +.. _string-formatting: + +String Formatting Operations +---------------------------- + +.. index:: + single: formatting, string (%) + single: interpolation, string (%) + single: string; formatting + single: string; interpolation + single: printf-style formatting + single: sprintf-style formatting + single: % formatting + single: % interpolation + +String and Unicode objects have one unique built-in operation: the ``%`` +operator (modulo). This is also known as the string *formatting* or +*interpolation* operator. Given ``format % values`` (where *format* is a string +or Unicode object), ``%`` conversion specifications in *format* are replaced +with zero or more elements of *values*. The effect is similar to the using +:cfunc:`sprintf` in the C language. If *format* is a Unicode object, or if any +of the objects being converted using the ``%s`` conversion are Unicode objects, +the result will also be a Unicode object. + +If *format* requires a single argument, *values* may be a single non-tuple +object. [#]_ Otherwise, *values* must be a tuple with exactly the number of +items specified by the format string, or a single mapping object (for example, a +dictionary). + +A conversion specifier contains two or more characters and has the following +components, which must occur in this order: + +#. The ``'%'`` character, which marks the start of the specifier. + +#. Mapping key (optional), consisting of a parenthesised sequence of characters + (for example, ``(somename)``). + +#. Conversion flags (optional), which affect the result of some conversion + types. + +#. Minimum field width (optional). If specified as an ``'*'`` (asterisk), the + actual width is read from the next element of the tuple in *values*, and the + object to convert comes after the minimum field width and optional precision. + +#. Precision (optional), given as a ``'.'`` (dot) followed by the precision. If + specified as ``'*'`` (an asterisk), the actual width is read from the next + element of the tuple in *values*, and the value to convert comes after the + precision. + +#. Length modifier (optional). + +#. Conversion type. + +When the right argument is a dictionary (or other mapping type), then the +formats in the string *must* include a parenthesised mapping key into that +dictionary inserted immediately after the ``'%'`` character. The mapping key +selects the value to be formatted from the mapping. For example:: + + >>> print '%(language)s has %(#)03d quote types.' % \ + {'language': "Python", "#": 2} + Python has 002 quote types. + +In this case no ``*`` specifiers may occur in a format (since they require a +sequential parameter list). + +The conversion flag characters are: + ++---------+---------------------------------------------------------------------+ +| Flag | Meaning | ++=========+=====================================================================+ +| ``'#'`` | The value conversion will use the "alternate form" (where defined | +| | below). | ++---------+---------------------------------------------------------------------+ +| ``'0'`` | The conversion will be zero padded for numeric values. | ++---------+---------------------------------------------------------------------+ +| ``'-'`` | The converted value is left adjusted (overrides the ``'0'`` | +| | conversion if both are given). | ++---------+---------------------------------------------------------------------+ +| ``' '`` | (a space) A blank should be left before a positive number (or empty | +| | string) produced by a signed conversion. | ++---------+---------------------------------------------------------------------+ +| ``'+'`` | A sign character (``'+'`` or ``'-'``) will precede the conversion | +| | (overrides a "space" flag). | ++---------+---------------------------------------------------------------------+ + +A length modifier (``h``, ``l``, or ``L``) may be present, but is ignored as it +is not necessary for Python. + +The conversion types are: + ++------------+-----------------------------------------------------+-------+ +| Conversion | Meaning | Notes | ++============+=====================================================+=======+ +| ``'d'`` | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| ``'i'`` | Signed integer decimal. | | ++------------+-----------------------------------------------------+-------+ +| ``'o'`` | Unsigned octal. | \(1) | ++------------+-----------------------------------------------------+-------+ +| ``'u'`` | Unsigned decimal. | | ++------------+-----------------------------------------------------+-------+ +| ``'x'`` | Unsigned hexadecimal (lowercase). | \(2) | ++------------+-----------------------------------------------------+-------+ +| ``'X'`` | Unsigned hexadecimal (uppercase). | \(2) | ++------------+-----------------------------------------------------+-------+ +| ``'e'`` | Floating point exponential format (lowercase). | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'E'`` | Floating point exponential format (uppercase). | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'f'`` | Floating point decimal format. | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'F'`` | Floating point decimal format. | \(3) | ++------------+-----------------------------------------------------+-------+ +| ``'g'`` | Floating point format. Uses exponential format if | \(4) | +| | exponent is greater than -4 or less than precision, | | +| | decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| ``'G'`` | Floating point format. Uses exponential format if | \(4) | +| | exponent is greater than -4 or less than precision, | | +| | decimal format otherwise. | | ++------------+-----------------------------------------------------+-------+ +| ``'c'`` | Single character (accepts integer or single | | +| | character string). | | ++------------+-----------------------------------------------------+-------+ +| ``'r'`` | String (converts any python object using | \(5) | +| | :func:`repr`). | | ++------------+-----------------------------------------------------+-------+ +| ``'s'`` | String (converts any python object using | \(6) | +| | :func:`str`). | | ++------------+-----------------------------------------------------+-------+ +| ``'%'`` | No argument is converted, results in a ``'%'`` | | +| | character in the result. | | ++------------+-----------------------------------------------------+-------+ + +Notes: + +(1) + The alternate form causes a leading zero (``'0'``) to be inserted between + left-hand padding and the formatting of the number if the leading character + of the result is not already a zero. + +(2) + The alternate form causes a leading ``'0x'`` or ``'0X'`` (depending on whether + the ``'x'`` or ``'X'`` format was used) to be inserted between left-hand padding + and the formatting of the number if the leading character of the result is not + already a zero. + +(3) + The alternate form causes the result to always contain a decimal point, even if + no digits follow it. + + The precision determines the number of digits after the decimal point and + defaults to 6. + +(4) + The alternate form causes the result to always contain a decimal point, and + trailing zeroes are not removed as they would otherwise be. + + The precision determines the number of significant digits before and after the + decimal point and defaults to 6. + +(5) + The ``%r`` conversion was added in Python 2.0. + + The precision determines the maximal number of characters used. + +(6) + If the object or format provided is a :class:`unicode` string, the resulting + string will also be :class:`unicode`. + + The precision determines the maximal number of characters used. + +Since Python strings have an explicit length, ``%s`` conversions do not assume +that ``'\0'`` is the end of the string. + +.. % XXX Examples? + +For safety reasons, floating point precisions are clipped to 50; ``%f`` +conversions for numbers whose absolute value is over 1e25 are replaced by ``%g`` +conversions. [#]_ All other errors raise exceptions. + +.. index:: + module: string + module: re + +Additional string operations are defined in standard modules :mod:`string` and +:mod:`re`. + + +.. _typesseq-range: + +XRange Type +----------- + +.. index:: object: range + +The :class:`range` type is an immutable sequence which is commonly used for +looping. The advantage of the :class:`range` type is that an :class:`range` +object will always take the same amount of memory, no matter the size of the +range it represents. There are no consistent performance advantages. + +XRange objects have very little behavior: they only support indexing, iteration, +and the :func:`len` function. + + +.. _typesseq-mutable: + +Mutable Sequence Types +---------------------- + +.. index:: + triple: mutable; sequence; types + object: list + +List objects support additional operations that allow in-place modification of +the object. Other mutable sequence types (when added to the language) should +also support these operations. Strings and tuples are immutable sequence types: +such objects cannot be modified once created. The following operations are +defined on mutable sequence types (where *x* is an arbitrary object): + ++------------------------------+--------------------------------+---------------------+ +| Operation | Result | Notes | ++==============================+================================+=====================+ +| ``s[i] = x`` | item *i* of *s* is replaced by | | +| | *x* | | ++------------------------------+--------------------------------+---------------------+ +| ``s[i:j] = t`` | slice of *s* from *i* to *j* | | +| | is replaced by the contents of | | +| | the iterable *t* | | ++------------------------------+--------------------------------+---------------------+ +| ``del s[i:j]`` | same as ``s[i:j] = []`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s[i:j:k] = t`` | the elements of ``s[i:j:k]`` | \(1) | +| | are replaced by those of *t* | | ++------------------------------+--------------------------------+---------------------+ +| ``del s[i:j:k]`` | removes the elements of | | +| | ``s[i:j:k]`` from the list | | ++------------------------------+--------------------------------+---------------------+ +| ``s.append(x)`` | same as ``s[len(s):len(s)] = | \(2) | +| | [x]`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s.extend(x)`` | same as ``s[len(s):len(s)] = | \(3) | +| | x`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s.count(x)`` | return number of *i*'s for | | +| | which ``s[i] == x`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s.index(x[, i[, j]])`` | return smallest *k* such that | \(4) | +| | ``s[k] == x`` and ``i <= k < | | +| | j`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s.insert(i, x)`` | same as ``s[i:i] = [x]`` | \(5) | ++------------------------------+--------------------------------+---------------------+ +| ``s.pop([i])`` | same as ``x = s[i]; del s[i]; | \(6) | +| | return x`` | | ++------------------------------+--------------------------------+---------------------+ +| ``s.remove(x)`` | same as ``del s[s.index(x)]`` | \(4) | ++------------------------------+--------------------------------+---------------------+ +| ``s.reverse()`` | reverses the items of *s* in | \(7) | +| | place | | ++------------------------------+--------------------------------+---------------------+ +| ``s.sort([cmp[, key[, | sort the items of *s* in place | (7), (8), (9), (10) | +| reverse]]])`` | | | ++------------------------------+--------------------------------+---------------------+ + +.. index:: + triple: operations on; sequence; types + triple: operations on; list; type + pair: subscript; assignment + pair: slice; assignment + pair: extended slice; assignment + statement: del + single: append() (list method) + single: extend() (list method) + single: count() (list method) + single: index() (list method) + single: insert() (list method) + single: pop() (list method) + single: remove() (list method) + single: reverse() (list method) + single: sort() (list method) + +Notes: + +(1) + *t* must have the same length as the slice it is replacing. + +(2) + The C implementation of Python has historically accepted multiple parameters and + implicitly joined them into a tuple; this no longer works in Python 2.0. Use of + this misfeature has been deprecated since Python 1.4. + +(3) + *x* can be any iterable object. + +(4) + Raises :exc:`ValueError` when *x* is not found in *s*. When a negative index is + passed as the second or third parameter to the :meth:`index` method, the list + length is added, as for slice indices. If it is still negative, it is truncated + to zero, as for slice indices. + + .. versionchanged:: 2.3 + Previously, :meth:`index` didn't have arguments for specifying start and stop + positions. + +(5) + When a negative index is passed as the first parameter to the :meth:`insert` + method, the list length is added, as for slice indices. If it is still + negative, it is truncated to zero, as for slice indices. + + .. versionchanged:: 2.3 + Previously, all negative indices were truncated to zero. + +(6) + The :meth:`pop` method is only supported by the list and array types. The + optional argument *i* defaults to ``-1``, so that by default the last item is + removed and returned. + +(7) + The :meth:`sort` and :meth:`reverse` methods modify the list in place for + economy of space when sorting or reversing a large list. To remind you that + they operate by side effect, they don't return the sorted or reversed list. + +(8) + The :meth:`sort` method takes optional arguments for controlling the + comparisons. + + *cmp* specifies a custom comparison function of two arguments (list items) which + should return a negative, zero or positive number depending on whether the first + argument is considered smaller than, equal to, or larger than the second + argument: ``cmp=lambda x,y: cmp(x.lower(), y.lower())`` + + *key* specifies a function of one argument that is used to extract a comparison + key from each list element: ``key=str.lower`` + + *reverse* is a boolean value. If set to ``True``, then the list elements are + sorted as if each comparison were reversed. + + In general, the *key* and *reverse* conversion processes are much faster than + specifying an equivalent *cmp* function. This is because *cmp* is called + multiple times for each list element while *key* and *reverse* touch each + element only once. + + .. versionchanged:: 2.3 + Support for ``None`` as an equivalent to omitting *cmp* was added. + + .. versionchanged:: 2.4 + Support for *key* and *reverse* was added. + +(9) + Starting with Python 2.3, the :meth:`sort` method is guaranteed to be stable. A + sort is stable if it guarantees not to change the relative order of elements + that compare equal --- this is helpful for sorting in multiple passes (for + example, sort by department, then by salary grade). + +(10) + While a list is being sorted, the effect of attempting to mutate, or even + inspect, the list is undefined. The C implementation of Python 2.3 and newer + makes the list appear empty for the duration, and raises :exc:`ValueError` if it + can detect that the list has been mutated during a sort. + + +.. _types-set: + +Set Types --- :class:`set`, :class:`frozenset` +============================================== + +.. index:: object: set + +A :dfn:`set` object is an unordered collection of distinct hashable objects. +Common uses include membership testing, removing duplicates from a sequence, and +computing mathematical operations such as intersection, union, difference, and +symmetric difference. +(For other containers see the built in :class:`dict`, :class:`list`, +and :class:`tuple` classes, and the :mod:`collections` module.) + + +.. versionadded:: 2.4 + +Like other collections, sets support ``x in set``, ``len(set)``, and ``for x in +set``. Being an unordered collection, sets do not record element position or +order of insertion. Accordingly, sets do not support indexing, slicing, or +other sequence-like behavior. + +There are currently two builtin set types, :class:`set` and :class:`frozenset`. +The :class:`set` type is mutable --- the contents can be changed using methods +like :meth:`add` and :meth:`remove`. Since it is mutable, it has no hash value +and cannot be used as either a dictionary key or as an element of another set. +The :class:`frozenset` type is immutable and hashable --- its contents cannot be +altered after it is created; it can therefore be used as a dictionary key or as +an element of another set. + +The constructors for both classes work the same: + +.. class:: set([iterable]) + frozenset([iterable]) + + Return a new set or frozenset object whose elements are taken from + *iterable*. The elements of a set must be hashable. To represent sets of + sets, the inner sets must be :class:`frozenset` objects. If *iterable* is + not specified, a new empty set is returned. + +Instances of :class:`set` and :class:`frozenset` provide the following +operations: + +.. describe:: len(s) + + Return the cardinality of set *s*. + +.. describe:: x in s + + Test *x* for membership in *s*. + +.. describe:: x not in s + + Test *x* for non-membership in *s*. + +.. method:: set.issubset(other) + set <= other + + Test whether every element in the set is in *other*. + +.. method:: set.issuperset(other) + set >= other + + Test whether every element in *other* is in the set. + +.. method:: set.union(other) + set | other + + Return a new set with elements from both sets. + +.. method:: set.intersection(other) + set & other + + Return a new set with elements common to both sets. + +.. method:: set.difference(other) + set - other + + Return a new set with elements in the set that are not in *other*. + +.. method:: set.symmetric_difference(other) + set ^ other + + Return a new set with elements in either the set or *other* but not both. + +.. method:: set.copy() + + Return a new set with a shallow copy of *s*. + + +Note, the non-operator versions of :meth:`union`, :meth:`intersection`, +:meth:`difference`, and :meth:`symmetric_difference`, :meth:`issubset`, and +:meth:`issuperset` methods will accept any iterable as an argument. In +contrast, their operator based counterparts require their arguments to be sets. +This precludes error-prone constructions like ``set('abc') & 'cbs'`` in favor of +the more readable ``set('abc').intersection('cbs')``. + +Both :class:`set` and :class:`frozenset` support set to set comparisons. Two +sets are equal if and only if every element of each set is contained in the +other (each is a subset of the other). A set is less than another set if and +only if the first set is a proper subset of the second set (is a subset, but is +not equal). A set is greater than another set if and only if the first set is a +proper superset of the second set (is a superset, but is not equal). + +Instances of :class:`set` are compared to instances of :class:`frozenset` based +on their members. For example, ``set('abc') == frozenset('abc')`` returns +``True``. + +The subset and equality comparisons do not generalize to a complete ordering +function. For example, any two disjoint sets are not equal and are not subsets +of each other, so *all* of the following return ``False``: ``a<b``, ``a==b``, +or ``a>b``. Accordingly, sets do not implement the :meth:`__cmp__` method. + +Since sets only define partial ordering (subset relationships), the output of +the :meth:`list.sort` method is undefined for lists of sets. + +Set elements are like dictionary keys; they need to define both :meth:`__hash__` +and :meth:`__eq__` methods. + +Binary operations that mix :class:`set` instances with :class:`frozenset` return +the type of the first operand. For example: ``frozenset('ab') | set('bc')`` +returns an instance of :class:`frozenset`. + +The following table lists operations available for :class:`set` that do not +apply to immutable instances of :class:`frozenset`: + +.. method:: set.update(other) + set |= other + + Update the set, adding elements from *other*. + +.. method:: set.intersection_update(other) + set &= other + + Update the set, keeping only elements found in it and *other*. + +.. method:: set.difference_update(other) + set -= other + + Update the set, removing elements found in *other*. + +.. method:: set.symmetric_difference_update(other) + set ^= other + + Update the set, keeping only elements found in either set, but not in both. + +.. method:: set.add(el) + + Add element *el* to the set. + +.. method:: set.remove(el) + + Remove element *el* from the set. Raises :exc:`KeyError` if *el* is not + contained in the set. + +.. method:: set.discard(el) + + Remove element *el* from the set if it is present. + +.. method:: set.pop() + + Remove and return an arbitrary element from the set. Raises :exc:`KeyError` + if the set is empty. + +.. method:: set.clear() + + Remove all elements from the set. + + +Note, the non-operator versions of the :meth:`update`, +:meth:`intersection_update`, :meth:`difference_update`, and +:meth:`symmetric_difference_update` methods will accept any iterable as an +argument. + + +.. _typesmapping: + +Mapping Types --- :class:`dict` +=============================== + +.. index:: + object: mapping + object: dictionary + triple: operations on; mapping; types + triple: operations on; dictionary; type + statement: del + builtin: len + +A :dfn:`mapping` object maps immutable values to arbitrary objects. Mappings +are mutable objects. There is currently only one standard mapping type, the +:dfn:`dictionary`. +(For other containers see the built in :class:`list`, +:class:`set`, and :class:`tuple` classes, and the :mod:`collections` +module.) + +A dictionary's keys are *almost* arbitrary values. Only +values containing lists, dictionaries or other mutable types (that are compared +by value rather than by object identity) may not be used as keys. Numeric types +used for keys obey the normal rules for numeric comparison: if two numbers +compare equal (such as ``1`` and ``1.0``) then they can be used interchangeably +to index the same dictionary entry. (Note however, that since computers +store floating-point numbers as approximations it is usually unwise to +use them as dictionary keys.) + +Dictionaries can be created by placing a comma-separated list of ``key: value`` +pairs within braces, for example: ``{'jack': 4098, 'sjoerd': 4127}`` or ``{4098: +'jack', 4127: 'sjoerd'}``, or by the :class:`dict` constructor. + +.. class:: dict([arg]) + + Return a new dictionary initialized from an optional positional argument or from + a set of keyword arguments. If no arguments are given, return a new empty + dictionary. If the positional argument *arg* is a mapping object, return a + dictionary mapping the same keys to the same values as does the mapping object. + Otherwise the positional argument must be a sequence, a container that supports + iteration, or an iterator object. The elements of the argument must each also + be of one of those kinds, and each must in turn contain exactly two objects. + The first is used as a key in the new dictionary, and the second as the key's + value. If a given key is seen more than once, the last value associated with it + is retained in the new dictionary. + + If keyword arguments are given, the keywords themselves with their associated + values are added as items to the dictionary. If a key is specified both in the + positional argument and as a keyword argument, the value associated with the + keyword is retained in the dictionary. For example, these all return a + dictionary equal to ``{"one": 2, "two": 3}``: + + * ``dict(one=2, two=3)`` + + * ``dict({'one': 2, 'two': 3})`` + + * ``dict(zip(('one', 'two'), (2, 3)))`` + + * ``dict([['two', 3], ['one', 2]])`` + + The first example only works for keys that are valid Python + identifiers; the others work with any valid keys. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.3 + Support for building a dictionary from keyword arguments added. + + +These are the operations that dictionaries support (and therefore, custom mapping +types should support too): + +.. describe:: len(d) + + Return the number of items in the dictionary *d*. + +.. describe:: d[key] + + Return the item of *d* with key *key*. Raises a :exc:`KeyError` if *key* is + not in the map. + + .. versionadded:: 2.5 + If a subclass of dict defines a method :meth:`__missing__`, if the key + *key* is not present, the ``d[key]`` operation calls that method with the + key *key* as argument. The ``d[key]`` operation then returns or raises + whatever is returned or raised by the ``__missing__(key)`` call if the key + is not present. No other operations or methods invoke + :meth:`__missing__`. If :meth:`__missing__` is not defined, + :exc:`KeyError` is raised. :meth:`__missing__` must be a method; it + cannot be an instance variable. For an example, see + :class:`collections.defaultdict`. + +.. describe:: d[key] = value + + Set ``d[key]`` to *value*. + +.. describe:: del d[key] + + Remove ``d[key]`` from *d*. Raises a :exc:`KeyError` if *key* is not in the + map. + +.. describe:: key in d + + Return ``True`` if *d* has a key *key*, else ``False``. + + .. versionadded:: 2.2 + +.. describe:: key not in d + + Equivalent to ``not key in d``. + + .. versionadded:: 2.2 + +.. method:: dict.clear() + + Remove all items from the dictionary. + +.. method:: dict.copy() + + Return a shallow copy of the dictionary. + +.. method:: dict.fromkeys(seq[, value]) + + Create a new dictionary with keys from *seq* and values set to *value*. + + :func:`fromkeys` is a class method that returns a new dictionary. *value* + defaults to ``None``. + + .. versionadded:: 2.3 + +.. method:: dict.get(key[, default]) + + Return the value for *key* if *key* is in the dictionary, else *default*. If + *default* is not given, it defaults to ``None``, so that this method never + raises a :exc:`KeyError`. + +.. method:: dict.has_key(key) + + ``d.has_key(key)`` is equivalent to ``key in d``, but deprecated. + +.. method:: dict.items() + + Return a copy of the dictionary's list of ``(key, value)`` pairs. + + .. note:: + + Keys and values are listed in an arbitrary order which is non-random, varies + across Python implementations, and depends on the dictionary's history of + insertions and deletions. If :meth:`items`, :meth:`keys`, :meth:`values`, + :meth:`iteritems`, :meth:`iterkeys`, and :meth:`itervalues` are called with no + intervening modifications to the dictionary, the lists will directly correspond. + This allows the creation of ``(value, key)`` pairs using :func:`zip`: ``pairs = + zip(d.values(), d.keys())``. The same relationship holds for the + :meth:`iterkeys` and :meth:`itervalues` methods: ``pairs = zip(d.itervalues(), + d.iterkeys())`` provides the same value for ``pairs``. Another way to create the + same list is ``pairs = [(v, k) for (k, v) in d.iteritems()]``. + +.. method:: dict.iteritems() + + Return an iterator over the dictionary's ``(key, value)`` pairs. + See the note for :meth:`dict.items`. + + .. versionadded:: 2.2 + +.. method:: dict.iterkeys() + + Return an iterator over the dictionary's keys. See the note for + :meth:`dict.items`. + + .. versionadded:: 2.2 + +.. method:: dict.itervalues() + + Return an iterator over the dictionary's values. See the note for + :meth:`dict.items`. + + .. versionadded:: 2.2 + +.. method:: dict.keys() + + Return a copy of the dictionary's list of keys. See the note for + :meth:`dict.items`. + +.. method:: dict.pop(key[, default]) + + If *key* is in the dictionary, remove it and return its value, else return + *default*. If *default* is not given and *key* is not in the dictionary, a + :exc:`KeyError` is raised. + + .. versionadded:: 2.3 + +.. method:: dict.popitem() + + Remove and return an arbitrary ``(key, value)`` pair from the dictionary. + + :func:`popitem` is useful to destructively iterate over a dictionary, as + often used in set algorithms. If the dictionary is empty, calling + :func:`popitem` raises a :exc:`KeyError`. + +.. method:: dict.setdefault(key[, default]) + + If *key* is in the dictionary, return its value. If not, insert *key* with a + value of *default* and return *default*. *default* defaults to ``None``. + +.. method:: dict.update([other]) + + Update the dictionary with the key/value pairs from *other*, overwriting existing + keys. Return ``None``. + + :func:`update` accepts either another dictionary object or an iterable of + key/value pairs (as a tuple or other iterable of length two). If keyword + arguments are specified, the dictionary is then is updated with those + key/value pairs: ``d.update(red=1, blue=2)``. + + .. versionchanged:: 2.4 + Allowed the argument to be an iterable of key/value pairs and allowed + keyword arguments. + +.. method:: dict.values() + + Return a copy of the dictionary's list of values. See the note for + :meth:`mapping.items`. + + +.. _bltin-file-objects: + +File Objects +============ + +.. index:: + object: file + builtin: file + module: os + module: socket + +File objects are implemented using C's ``stdio`` package and can be +created with the built-in :func:`file` and (more usually) :func:`open` +constructors described in the :ref:`built-in-funcs` section. [#]_ File +objects are also returned by some other built-in functions and methods, +such as :func:`os.popen` and :func:`os.fdopen` and the :meth:`makefile` +method of socket objects. + +When a file operation fails for an I/O-related reason, the exception +:exc:`IOError` is raised. This includes situations where the operation is not +defined for some reason, like :meth:`seek` on a tty device or writing a file +opened for reading. + +Files have the following methods: + + +.. method:: file.close() + + Close the file. A closed file cannot be read or written any more. Any operation + which requires that the file be open will raise a :exc:`ValueError` after the + file has been closed. Calling :meth:`close` more than once is allowed. + + As of Python 2.5, you can avoid having to call this method explicitly if you use + the :keyword:`with` statement. For example, the following code will + automatically close ``f`` when the :keyword:`with` block is exited:: + + from __future__ import with_statement + + with open("hello.txt") as f: + for line in f: + print line + + In older versions of Python, you would have needed to do this to get the same + effect:: + + f = open("hello.txt") + try: + for line in f: + print line + finally: + f.close() + + .. note:: + + Not all "file-like" types in Python support use as a context manager for the + :keyword:`with` statement. If your code is intended to work with any file-like + object, you can use the function :func:`contextlib.closing` instead of using + the object directly. + + +.. method:: file.flush() + + Flush the internal buffer, like ``stdio``'s :cfunc:`fflush`. This may be a + no-op on some file-like objects. + + +.. method:: file.fileno() + + .. index:: + single: file descriptor + single: descriptor, file + module: fcntl + + Return the integer "file descriptor" that is used by the underlying + implementation to request I/O operations from the operating system. This can be + useful for other, lower level interfaces that use file descriptors, such as the + :mod:`fcntl` module or :func:`os.read` and friends. + + .. note:: + + File-like objects which do not have a real file descriptor should *not* provide + this method! + + +.. method:: file.isatty() + + Return ``True`` if the file is connected to a tty(-like) device, else ``False``. + + .. note:: + + If a file-like object is not associated with a real file, this method should + *not* be implemented. + + +.. method:: file.__next__() + + A file object is its own iterator, for example ``iter(f)`` returns *f* (unless + *f* is closed). When a file is used as an iterator, typically in a + :keyword:`for` loop (for example, ``for line in f: print line``), the + :meth:`__next__` method is called repeatedly. This method returns the next + input line, or raises :exc:`StopIteration` when EOF is hit when the file is open + for reading (behavior is undefined when the file is open for writing). In order + to make a :keyword:`for` loop the most efficient way of looping over the lines + of a file (a very common operation), the :meth:`__next__` method uses a hidden + read-ahead buffer. As a consequence of using a read-ahead buffer, combining + :meth:`__next__` with other file methods (like :meth:`readline`) does not work + right. However, using :meth:`seek` to reposition the file to an absolute + position will flush the read-ahead buffer. + + .. versionadded:: 2.3 + + +.. method:: file.read([size]) + + Read at most *size* bytes from the file (less if the read hits EOF before + obtaining *size* bytes). If the *size* argument is negative or omitted, read + all data until EOF is reached. The bytes are returned as a string object. An + empty string is returned when EOF is encountered immediately. (For certain + files, like ttys, it makes sense to continue reading after an EOF is hit.) Note + that this method may call the underlying C function :cfunc:`fread` more than + once in an effort to acquire as close to *size* bytes as possible. Also note + that when in non-blocking mode, less data than what was requested may be + returned, even if no *size* parameter was given. + + +.. method:: file.readline([size]) + + Read one entire line from the file. A trailing newline character is kept in the + string (but may be absent when a file ends with an incomplete line). [#]_ If + the *size* argument is present and non-negative, it is a maximum byte count + (including the trailing newline) and an incomplete line may be returned. An + empty string is returned *only* when EOF is encountered immediately. + + .. note:: + + Unlike ``stdio``'s :cfunc:`fgets`, the returned string contains null characters + (``'\0'``) if they occurred in the input. + + +.. method:: file.readlines([sizehint]) + + Read until EOF using :meth:`readline` and return a list containing the lines + thus read. If the optional *sizehint* argument is present, instead of + reading up to EOF, whole lines totalling approximately *sizehint* bytes + (possibly after rounding up to an internal buffer size) are read. Objects + implementing a file-like interface may choose to ignore *sizehint* if it + cannot be implemented, or cannot be implemented efficiently. + + +.. method:: file.seek(offset[, whence]) + + Set the file's current position, like ``stdio``'s :cfunc:`fseek`. The *whence* + argument is optional and defaults to ``os.SEEK_SET`` or ``0`` (absolute file + positioning); other values are ``os.SEEK_CUR`` or ``1`` (seek relative to the + current position) and ``os.SEEK_END`` or ``2`` (seek relative to the file's + end). There is no return value. Note that if the file is opened for appending + (mode ``'a'`` or ``'a+'``), any :meth:`seek` operations will be undone at the + next write. If the file is only opened for writing in append mode (mode + ``'a'``), this method is essentially a no-op, but it remains useful for files + opened in append mode with reading enabled (mode ``'a+'``). If the file is + opened in text mode (without ``'b'``), only offsets returned by :meth:`tell` are + legal. Use of other offsets causes undefined behavior. + + Note that not all file objects are seekable. + + .. versionchanged:: 2.6 + Passing float values as offset has been deprecated + + +.. method:: file.tell() + + Return the file's current position, like ``stdio``'s :cfunc:`ftell`. + + .. note:: + + On Windows, :meth:`tell` can return illegal values (after an :cfunc:`fgets`) + when reading files with Unix-style line-endings. Use binary mode (``'rb'``) to + circumvent this problem. + + +.. method:: file.truncate([size]) + + Truncate the file's size. If the optional *size* argument is present, the file + is truncated to (at most) that size. The size defaults to the current position. + The current file position is not changed. Note that if a specified size exceeds + the file's current size, the result is platform-dependent: possibilities + include that the file may remain unchanged, increase to the specified size as if + zero-filled, or increase to the specified size with undefined new content. + Availability: Windows, many Unix variants. + + +.. method:: file.write(str) + + Write a string to the file. There is no return value. Due to buffering, the + string may not actually show up in the file until the :meth:`flush` or + :meth:`close` method is called. + + +.. method:: file.writelines(sequence) + + Write a sequence of strings to the file. The sequence can be any iterable + object producing strings, typically a list of strings. There is no return value. + (The name is intended to match :meth:`readlines`; :meth:`writelines` does not + add line separators.) + +Files support the iterator protocol. Each iteration returns the same result as +``file.readline()``, and iteration ends when the :meth:`readline` method returns +an empty string. + +File objects also offer a number of other interesting attributes. These are not +required for file-like objects, but should be implemented if they make sense for +the particular object. + + +.. attribute:: file.closed + + bool indicating the current state of the file object. This is a read-only + attribute; the :meth:`close` method changes the value. It may not be available + on all file-like objects. + + +.. attribute:: file.encoding + + The encoding that this file uses. When Unicode strings are written to a file, + they will be converted to byte strings using this encoding. In addition, when + the file is connected to a terminal, the attribute gives the encoding that the + terminal is likely to use (that information might be incorrect if the user has + misconfigured the terminal). The attribute is read-only and may not be present + on all file-like objects. It may also be ``None``, in which case the file uses + the system default encoding for converting Unicode strings. + + .. versionadded:: 2.3 + + +.. attribute:: file.mode + + The I/O mode for the file. If the file was created using the :func:`open` + built-in function, this will be the value of the *mode* parameter. This is a + read-only attribute and may not be present on all file-like objects. + + +.. attribute:: file.name + + If the file object was created using :func:`open`, the name of the file. + Otherwise, some string that indicates the source of the file object, of the + form ``<...>``. This is a read-only attribute and may not be present on all + file-like objects. + + +.. attribute:: file.newlines + + If Python was built with the :option:`--with-universal-newlines` option to + :program:`configure` (the default) this read-only attribute exists, and for + files opened in universal newline read mode it keeps track of the types of + newlines encountered while reading the file. The values it can take are + ``'\r'``, ``'\n'``, ``'\r\n'``, ``None`` (unknown, no newlines read yet) or a + tuple containing all the newline types seen, to indicate that multiple newline + conventions were encountered. For files not opened in universal newline read + mode the value of this attribute will be ``None``. + + +.. attribute:: file.softspace + + Boolean that indicates whether a space character needs to be printed before + another value when using the :keyword:`print` statement. Classes that are trying + to simulate a file object should also have a writable :attr:`softspace` + attribute, which should be initialized to zero. This will be automatic for most + classes implemented in Python (care may be needed for objects that override + attribute access); types implemented in C will have to provide a writable + :attr:`softspace` attribute. + + .. note:: + + This attribute is not used to control the :keyword:`print` statement, but to + allow the implementation of :keyword:`print` to keep track of its internal + state. + + +.. _typecontextmanager: + +Context Manager Types +===================== + +.. versionadded:: 2.5 + +.. index:: + single: context manager + single: context management protocol + single: protocol; context management + +Python's :keyword:`with` statement supports the concept of a runtime context +defined by a context manager. This is implemented using two separate methods +that allow user-defined classes to define a runtime context that is entered +before the statement body is executed and exited when the statement ends. + +The :dfn:`context management protocol` consists of a pair of methods that need +to be provided for a context manager object to define a runtime context: + + +.. method:: contextmanager.__enter__() + + Enter the runtime context and return either this object or another object + related to the runtime context. The value returned by this method is bound to + the identifier in the :keyword:`as` clause of :keyword:`with` statements using + this context manager. + + An example of a context manager that returns itself is a file object. File + objects return themselves from __enter__() to allow :func:`open` to be used as + the context expression in a :keyword:`with` statement. + + An example of a context manager that returns a related object is the one + returned by ``decimal.Context.get_manager()``. These managers set the active + decimal context to a copy of the original decimal context and then return the + copy. This allows changes to be made to the current decimal context in the body + of the :keyword:`with` statement without affecting code outside the + :keyword:`with` statement. + + +.. method:: contextmanager.__exit__(exc_type, exc_val, exc_tb) + + Exit the runtime context and return a Boolean flag indicating if any expection + that occurred should be suppressed. If an exception occurred while executing the + body of the :keyword:`with` statement, the arguments contain the exception type, + value and traceback information. Otherwise, all three arguments are ``None``. + + Returning a true value from this method will cause the :keyword:`with` statement + to suppress the exception and continue execution with the statement immediately + following the :keyword:`with` statement. Otherwise the exception continues + propagating after this method has finished executing. Exceptions that occur + during execution of this method will replace any exception that occurred in the + body of the :keyword:`with` statement. + + The exception passed in should never be reraised explicitly - instead, this + method should return a false value to indicate that the method completed + successfully and does not want to suppress the raised exception. This allows + context management code (such as ``contextlib.nested``) to easily detect whether + or not an :meth:`__exit__` method has actually failed. + +Python defines several context managers to support easy thread synchronisation, +prompt closure of files or other objects, and simpler manipulation of the active +decimal arithmetic context. The specific types are not treated specially beyond +their implementation of the context management protocol. See the +:mod:`contextlib` module for some examples. + +Python's generators and the ``contextlib.contextfactory`` decorator provide a +convenient way to implement these protocols. If a generator function is +decorated with the ``contextlib.contextfactory`` decorator, it will return a +context manager implementing the necessary :meth:`__enter__` and +:meth:`__exit__` methods, rather than the iterator produced by an undecorated +generator function. + +Note that there is no specific slot for any of these methods in the type +structure for Python objects in the Python/C API. Extension types wanting to +define these methods must provide them as a normal Python accessible method. +Compared to the overhead of setting up the runtime context, the overhead of a +single class dictionary lookup is negligible. + + +.. _typesother: + +Other Built-in Types +==================== + +The interpreter supports several other kinds of objects. Most of these support +only one or two operations. + + +.. _typesmodules: + +Modules +------- + +The only special operation on a module is attribute access: ``m.name``, where +*m* is a module and *name* accesses a name defined in *m*'s symbol table. +Module attributes can be assigned to. (Note that the :keyword:`import` +statement is not, strictly speaking, an operation on a module object; ``import +foo`` does not require a module object named *foo* to exist, rather it requires +an (external) *definition* for a module named *foo* somewhere.) + +A special member of every module is :attr:`__dict__`. This is the dictionary +containing the module's symbol table. Modifying this dictionary will actually +change the module's symbol table, but direct assignment to the :attr:`__dict__` +attribute is not possible (you can write ``m.__dict__['a'] = 1``, which defines +``m.a`` to be ``1``, but you can't write ``m.__dict__ = {}``). Modifying +:attr:`__dict__` directly is not recommended. + +Modules built into the interpreter are written like this: ``<module 'sys' +(built-in)>``. If loaded from a file, they are written as ``<module 'os' from +'/usr/local/lib/pythonX.Y/os.pyc'>``. + + +.. _typesobjects: + +Classes and Class Instances +--------------------------- + +See :ref:`objects` and :ref:`class` for these. + + +.. _typesfunctions: + +Functions +--------- + +Function objects are created by function definitions. The only operation on a +function object is to call it: ``func(argument-list)``. + +There are really two flavors of function objects: built-in functions and +user-defined functions. Both support the same operation (to call the function), +but the implementation is different, hence the different object types. + +See :ref:`function` for more information. + + +.. _typesmethods: + +Methods +------- + +.. index:: object: method + +Methods are functions that are called using the attribute notation. There are +two flavors: built-in methods (such as :meth:`append` on lists) and class +instance methods. Built-in methods are described with the types that support +them. + +The implementation adds two special read-only attributes to class instance +methods: ``m.im_self`` is the object on which the method operates, and +``m.im_func`` is the function implementing the method. Calling ``m(arg-1, +arg-2, ..., arg-n)`` is completely equivalent to calling ``m.im_func(m.im_self, +arg-1, arg-2, ..., arg-n)``. + +Class instance methods are either *bound* or *unbound*, referring to whether the +method was accessed through an instance or a class, respectively. When a method +is unbound, its ``im_self`` attribute will be ``None`` and if called, an +explicit ``self`` object must be passed as the first argument. In this case, +``self`` must be an instance of the unbound method's class (or a subclass of +that class), otherwise a :exc:`TypeError` is raised. + +Like function objects, methods objects support getting arbitrary attributes. +However, since method attributes are actually stored on the underlying function +object (``meth.im_func``), setting method attributes on either bound or unbound +methods is disallowed. Attempting to set a method attribute results in a +:exc:`TypeError` being raised. In order to set a method attribute, you need to +explicitly set it on the underlying function object:: + + class C: + def method(self): + pass + + c = C() + c.method.im_func.whoami = 'my name is c' + +See :ref:`types` for more information. + + +.. _bltin-code-objects: + +Code Objects +------------ + +.. index:: object: code + +.. index:: + builtin: compile + single: __code__ (function object attribute) + +Code objects are used by the implementation to represent "pseudo-compiled" +executable Python code such as a function body. They differ from function +objects because they don't contain a reference to their global execution +environment. Code objects are returned by the built-in :func:`compile` function +and can be extracted from function objects through their :attr:`__code__` +attribute. See also the :mod:`code` module. + +.. index:: + builtin: exec + builtin: eval + +A code object can be executed or evaluated by passing it (instead of a source +string) to the :func:`exec` or :func:`eval` built-in functions. + +See :ref:`types` for more information. + + +.. _bltin-type-objects: + +Type Objects +------------ + +.. index:: + builtin: type + module: types + +Type objects represent the various object types. An object's type is accessed +by the built-in function :func:`type`. There are no special operations on +types. The standard module :mod:`types` defines names for all standard built-in +types. + +Types are written like this: ``<type 'int'>``. + + +.. _bltin-null-object: + +The Null Object +--------------- + +This object is returned by functions that don't explicitly return a value. It +supports no special operations. There is exactly one null object, named +``None`` (a built-in name). + +It is written as ``None``. + + +.. _bltin-ellipsis-object: + +The Ellipsis Object +------------------- + +This object is mostly used by extended slice notation (see :ref:`slicings`). It +supports no special operations. There is exactly one ellipsis object, named +:const:`Ellipsis` (a built-in name). + +It is written as ``Ellipsis`` or ``...``. + + +Boolean Values +-------------- + +Boolean values are the two constant objects ``False`` and ``True``. They are +used to represent truth values (although other values can also be considered +false or true). In numeric contexts (for example when used as the argument to +an arithmetic operator), they behave like the integers 0 and 1, respectively. +The built-in function :func:`bool` can be used to cast any value to a Boolean, +if the value can be interpreted as a truth value (see section Truth Value +Testing above). + +.. index:: + single: False + single: True + pair: Boolean; values + +They are written as ``False`` and ``True``, respectively. + + +.. _typesinternal: + +Internal Objects +---------------- + +See :ref:`types` for this information. It describes stack frame objects, +traceback objects, and slice objects. + + +.. _specialattrs: + +Special Attributes +================== + +The implementation adds a few special read-only attributes to several object +types, where they are relevant. Some of these are not reported by the +:func:`dir` built-in function. + + +.. attribute:: object.__dict__ + + A dictionary or other mapping object used to store an object's (writable) + attributes. + + +.. attribute:: instance.__class__ + + The class to which a class instance belongs. + + +.. attribute:: class.__bases__ + + The tuple of base classes of a class object. If there are no base classes, this + will be an empty tuple. + + +.. attribute:: class.__name__ + + The name of the class or type. + +.. rubric:: Footnotes + +.. [#] Additional information on these special methods may be found in the Python + Reference Manual (:ref:`customization`). + +.. [#] As a consequence, the list ``[1, 2]`` is considered equal to ``[1.0, 2.0]``, and + similarly for tuples. + +.. [#] They must have since the parser can't tell the type of the operands. + +.. [#] To format only a tuple you should therefore provide a singleton tuple whose only + element is the tuple to be formatted. + +.. [#] These numbers are fairly arbitrary. They are intended to avoid printing endless + strings of meaningless digits without hampering correct use and without having + to know the exact precision of floating point values on a particular machine. + +.. [#] :func:`file` is new in Python 2.2. The older built-in :func:`open` is an alias + for :func:`file`. + +.. [#] The advantage of leaving the newline on is that returning an empty string is + then an unambiguous EOF indication. It is also possible (in cases where it + might matter, for example, if you want to make an exact copy of a file while + scanning its lines) to tell whether the last line of a file ended in a newline + or not (yes this happens!). diff --git a/Doc/library/string.rst b/Doc/library/string.rst new file mode 100644 index 0000000..aa2494b --- /dev/null +++ b/Doc/library/string.rst @@ -0,0 +1,468 @@ + +:mod:`string` --- Common string operations +========================================== + +.. module:: string + :synopsis: Common string operations. + + +.. index:: module: re + +The :mod:`string` module contains a number of useful constants and +classes, as well as some deprecated legacy functions that are also +available as methods on strings. In addition, Python's built-in string +classes support the sequence type methods described in the +:ref:`typesseq` section, and also the string-specific methods described +in the :ref:`string-methods` section. To output formatted strings use +template strings or the ``%`` operator described in the +:ref:`string-formatting` section. Also, see the :mod:`re` module for +string functions based on regular expressions. + + +String constants +---------------- + +The constants defined in this module are: + + +.. data:: ascii_letters + + The concatenation of the :const:`ascii_lowercase` and :const:`ascii_uppercase` + constants described below. This value is not locale-dependent. + + +.. data:: ascii_lowercase + + The lowercase letters ``'abcdefghijklmnopqrstuvwxyz'``. This value is not + locale-dependent and will not change. + + +.. data:: ascii_uppercase + + The uppercase letters ``'ABCDEFGHIJKLMNOPQRSTUVWXYZ'``. This value is not + locale-dependent and will not change. + + +.. data:: digits + + The string ``'0123456789'``. + + +.. data:: hexdigits + + The string ``'0123456789abcdefABCDEF'``. + + +.. data:: octdigits + + The string ``'01234567'``. + + +.. data:: punctuation + + String of ASCII characters which are considered punctuation characters + in the ``C`` locale. + + +.. data:: printable + + String of ASCII characters which are considered printable. This is a + combination of :const:`digits`, :const:`ascii_letters`, :const:`punctuation`, + and :const:`whitespace`. + + +.. data:: whitespace + + A string containing all characters that are considered whitespace. + This includes the characters space, tab, linefeed, return, formfeed, and + vertical tab. + + +Template strings +---------------- + +Templates provide simpler string substitutions as described in :pep:`292`. +Instead of the normal ``%``\ -based substitutions, Templates support ``$``\ +-based substitutions, using the following rules: + +* ``$$`` is an escape; it is replaced with a single ``$``. + +* ``$identifier`` names a substitution placeholder matching a mapping key of + ``"identifier"``. By default, ``"identifier"`` must spell a Python + identifier. The first non-identifier character after the ``$`` character + terminates this placeholder specification. + +* ``${identifier}`` is equivalent to ``$identifier``. It is required when valid + identifier characters follow the placeholder but are not part of the + placeholder, such as ``"${noun}ification"``. + +Any other appearance of ``$`` in the string will result in a :exc:`ValueError` +being raised. + +.. versionadded:: 2.4 + +The :mod:`string` module provides a :class:`Template` class that implements +these rules. The methods of :class:`Template` are: + + +.. class:: Template(template) + + The constructor takes a single argument which is the template string. + + +.. method:: Template.substitute(mapping[, **kws]) + + Performs the template substitution, returning a new string. *mapping* is any + dictionary-like object with keys that match the placeholders in the template. + Alternatively, you can provide keyword arguments, where the keywords are the + placeholders. When both *mapping* and *kws* are given and there are duplicates, + the placeholders from *kws* take precedence. + + +.. method:: Template.safe_substitute(mapping[, **kws]) + + Like :meth:`substitute`, except that if placeholders are missing from *mapping* + and *kws*, instead of raising a :exc:`KeyError` exception, the original + placeholder will appear in the resulting string intact. Also, unlike with + :meth:`substitute`, any other appearances of the ``$`` will simply return ``$`` + instead of raising :exc:`ValueError`. + + While other exceptions may still occur, this method is called "safe" because + substitutions always tries to return a usable string instead of raising an + exception. In another sense, :meth:`safe_substitute` may be anything other than + safe, since it will silently ignore malformed templates containing dangling + delimiters, unmatched braces, or placeholders that are not valid Python + identifiers. + +:class:`Template` instances also provide one public data attribute: + + +.. attribute:: string.template + + This is the object passed to the constructor's *template* argument. In general, + you shouldn't change it, but read-only access is not enforced. + +Here is an example of how to use a Template:: + + >>> from string import Template + >>> s = Template('$who likes $what') + >>> s.substitute(who='tim', what='kung pao') + 'tim likes kung pao' + >>> d = dict(who='tim') + >>> Template('Give $who $100').substitute(d) + Traceback (most recent call last): + [...] + ValueError: Invalid placeholder in string: line 1, col 10 + >>> Template('$who likes $what').substitute(d) + Traceback (most recent call last): + [...] + KeyError: 'what' + >>> Template('$who likes $what').safe_substitute(d) + 'tim likes $what' + +Advanced usage: you can derive subclasses of :class:`Template` to customize the +placeholder syntax, delimiter character, or the entire regular expression used +to parse template strings. To do this, you can override these class attributes: + +* *delimiter* -- This is the literal string describing a placeholder introducing + delimiter. The default value ``$``. Note that this should *not* be a regular + expression, as the implementation will call :meth:`re.escape` on this string as + needed. + +* *idpattern* -- This is the regular expression describing the pattern for + non-braced placeholders (the braces will be added automatically as + appropriate). The default value is the regular expression + ``[_a-z][_a-z0-9]*``. + +Alternatively, you can provide the entire regular expression pattern by +overriding the class attribute *pattern*. If you do this, the value must be a +regular expression object with four named capturing groups. The capturing +groups correspond to the rules given above, along with the invalid placeholder +rule: + +* *escaped* -- This group matches the escape sequence, e.g. ``$$``, in the + default pattern. + +* *named* -- This group matches the unbraced placeholder name; it should not + include the delimiter in capturing group. + +* *braced* -- This group matches the brace enclosed placeholder name; it should + not include either the delimiter or braces in the capturing group. + +* *invalid* -- This group matches any other delimiter pattern (usually a single + delimiter), and it should appear last in the regular expression. + + +String functions +---------------- + +The following functions are available to operate on string and Unicode objects. +They are not available as string methods. + + +.. function:: capwords(s) + + Split the argument into words using :func:`split`, capitalize each word using + :func:`capitalize`, and join the capitalized words using :func:`join`. Note + that this replaces runs of whitespace characters by a single space, and removes + leading and trailing whitespace. + + +.. function:: maketrans(from, to) + + Return a translation table suitable for passing to :func:`translate`, that will + map each character in *from* into the character at the same position in *to*; + *from* and *to* must have the same length. + + .. warning:: + + Don't use strings derived from :const:`lowercase` and :const:`uppercase` as + arguments; in some locales, these don't have the same length. For case + conversions, always use :func:`lower` and :func:`upper`. + + +Deprecated string functions +--------------------------- + +The following list of functions are also defined as methods of string and +Unicode objects; see section :ref:`string-methods` for more information on +those. You should consider these functions as deprecated, although they will +not be removed until Python 3.0. The functions defined in this module are: + + +.. function:: atof(s) + + .. deprecated:: 2.0 + Use the :func:`float` built-in function. + + .. index:: builtin: float + + Convert a string to a floating point number. The string must have the standard + syntax for a floating point literal in Python, optionally preceded by a sign + (``+`` or ``-``). Note that this behaves identical to the built-in function + :func:`float` when passed a string. + + .. note:: + + .. index:: + single: NaN + single: Infinity + + When passing in a string, values for NaN and Infinity may be returned, depending + on the underlying C library. The specific set of strings accepted which cause + these values to be returned depends entirely on the C library and is known to + vary. + + +.. function:: atoi(s[, base]) + + .. deprecated:: 2.0 + Use the :func:`int` built-in function. + + .. index:: builtin: eval + + Convert string *s* to an integer in the given *base*. The string must consist + of one or more digits, optionally preceded by a sign (``+`` or ``-``). The + *base* defaults to 10. If it is 0, a default base is chosen depending on the + leading characters of the string (after stripping the sign): ``0x`` or ``0X`` + means 16, ``0`` means 8, anything else means 10. If *base* is 16, a leading + ``0x`` or ``0X`` is always accepted, though not required. This behaves + identically to the built-in function :func:`int` when passed a string. (Also + note: for a more flexible interpretation of numeric literals, use the built-in + function :func:`eval`.) + + +.. function:: atol(s[, base]) + + .. deprecated:: 2.0 + Use the :func:`long` built-in function. + + .. index:: builtin: long + + Convert string *s* to a long integer in the given *base*. The string must + consist of one or more digits, optionally preceded by a sign (``+`` or ``-``). + The *base* argument has the same meaning as for :func:`atoi`. A trailing ``l`` + or ``L`` is not allowed, except if the base is 0. Note that when invoked + without *base* or with *base* set to 10, this behaves identical to the built-in + function :func:`long` when passed a string. + + +.. function:: capitalize(word) + + Return a copy of *word* with only its first character capitalized. + + +.. function:: expandtabs(s[, tabsize]) + + Expand tabs in a string replacing them by one or more spaces, depending on the + current column and the given tab size. The column number is reset to zero after + each newline occurring in the string. This doesn't understand other non-printing + characters or escape sequences. The tab size defaults to 8. + + +.. function:: find(s, sub[, start[,end]]) + + Return the lowest index in *s* where the substring *sub* is found such that + *sub* is wholly contained in ``s[start:end]``. Return ``-1`` on failure. + Defaults for *start* and *end* and interpretation of negative values is the same + as for slices. + + +.. function:: rfind(s, sub[, start[, end]]) + + Like :func:`find` but find the highest index. + + +.. function:: index(s, sub[, start[, end]]) + + Like :func:`find` but raise :exc:`ValueError` when the substring is not found. + + +.. function:: rindex(s, sub[, start[, end]]) + + Like :func:`rfind` but raise :exc:`ValueError` when the substring is not found. + + +.. function:: count(s, sub[, start[, end]]) + + Return the number of (non-overlapping) occurrences of substring *sub* in string + ``s[start:end]``. Defaults for *start* and *end* and interpretation of negative + values are the same as for slices. + + +.. function:: lower(s) + + Return a copy of *s*, but with upper case letters converted to lower case. + + +.. function:: split(s[, sep[, maxsplit]]) + + Return a list of the words of the string *s*. If the optional second argument + *sep* is absent or ``None``, the words are separated by arbitrary strings of + whitespace characters (space, tab, newline, return, formfeed). If the second + argument *sep* is present and not ``None``, it specifies a string to be used as + the word separator. The returned list will then have one more item than the + number of non-overlapping occurrences of the separator in the string. The + optional third argument *maxsplit* defaults to 0. If it is nonzero, at most + *maxsplit* number of splits occur, and the remainder of the string is returned + as the final element of the list (thus, the list will have at most + ``maxsplit+1`` elements). + + The behavior of split on an empty string depends on the value of *sep*. If *sep* + is not specified, or specified as ``None``, the result will be an empty list. + If *sep* is specified as any string, the result will be a list containing one + element which is an empty string. + + +.. function:: rsplit(s[, sep[, maxsplit]]) + + Return a list of the words of the string *s*, scanning *s* from the end. To all + intents and purposes, the resulting list of words is the same as returned by + :func:`split`, except when the optional third argument *maxsplit* is explicitly + specified and nonzero. When *maxsplit* is nonzero, at most *maxsplit* number of + splits -- the *rightmost* ones -- occur, and the remainder of the string is + returned as the first element of the list (thus, the list will have at most + ``maxsplit+1`` elements). + + .. versionadded:: 2.4 + + +.. function:: splitfields(s[, sep[, maxsplit]]) + + This function behaves identically to :func:`split`. (In the past, :func:`split` + was only used with one argument, while :func:`splitfields` was only used with + two arguments.) + + +.. function:: join(words[, sep]) + + Concatenate a list or tuple of words with intervening occurrences of *sep*. + The default value for *sep* is a single space character. It is always true that + ``string.join(string.split(s, sep), sep)`` equals *s*. + + +.. function:: joinfields(words[, sep]) + + This function behaves identically to :func:`join`. (In the past, :func:`join` + was only used with one argument, while :func:`joinfields` was only used with two + arguments.) Note that there is no :meth:`joinfields` method on string objects; + use the :meth:`join` method instead. + + +.. function:: lstrip(s[, chars]) + + Return a copy of the string with leading characters removed. If *chars* is + omitted or ``None``, whitespace characters are removed. If given and not + ``None``, *chars* must be a string; the characters in the string will be + stripped from the beginning of the string this method is called on. + + .. versionchanged:: 2.2.3 + The *chars* parameter was added. The *chars* parameter cannot be passed in + earlier 2.2 versions. + + +.. function:: rstrip(s[, chars]) + + Return a copy of the string with trailing characters removed. If *chars* is + omitted or ``None``, whitespace characters are removed. If given and not + ``None``, *chars* must be a string; the characters in the string will be + stripped from the end of the string this method is called on. + + .. versionchanged:: 2.2.3 + The *chars* parameter was added. The *chars* parameter cannot be passed in + earlier 2.2 versions. + + +.. function:: strip(s[, chars]) + + Return a copy of the string with leading and trailing characters removed. If + *chars* is omitted or ``None``, whitespace characters are removed. If given and + not ``None``, *chars* must be a string; the characters in the string will be + stripped from the both ends of the string this method is called on. + + .. versionchanged:: 2.2.3 + The *chars* parameter was added. The *chars* parameter cannot be passed in + earlier 2.2 versions. + + +.. function:: swapcase(s) + + Return a copy of *s*, but with lower case letters converted to upper case and + vice versa. + + +.. function:: translate(s, table[, deletechars]) + + Delete all characters from *s* that are in *deletechars* (if present), and then + translate the characters using *table*, which must be a 256-character string + giving the translation for each character value, indexed by its ordinal. If + *table* is ``None``, then only the character deletion step is performed. + + +.. function:: upper(s) + + Return a copy of *s*, but with lower case letters converted to upper case. + + +.. function:: ljust(s, width) + rjust(s, width) + center(s, width) + + These functions respectively left-justify, right-justify and center a string in + a field of given width. They return a string that is at least *width* + characters wide, created by padding the string *s* with spaces until the given + width on the right, left or both sides. The string is never truncated. + + +.. function:: zfill(s, width) + + Pad a numeric string on the left with zero digits until the given width is + reached. Strings starting with a sign are handled correctly. + + +.. function:: replace(str, old, new[, maxreplace]) + + Return a copy of string *str* with all occurrences of substring *old* replaced + by *new*. If the optional argument *maxreplace* is given, the first + *maxreplace* occurrences are replaced. + diff --git a/Doc/library/stringio.rst b/Doc/library/stringio.rst new file mode 100644 index 0000000..9e2f0da --- /dev/null +++ b/Doc/library/stringio.rst @@ -0,0 +1,122 @@ + +:mod:`StringIO` --- Read and write strings as files +=================================================== + +.. module:: StringIO + :synopsis: Read and write strings as if they were files. + + +This module implements a file-like class, :class:`StringIO`, that reads and +writes a string buffer (also known as *memory files*). See the description of +file objects for operations (section :ref:`bltin-file-objects`). + + +.. class:: StringIO([buffer]) + + When a :class:`StringIO` object is created, it can be initialized to an existing + string by passing the string to the constructor. If no string is given, the + :class:`StringIO` will start empty. In both cases, the initial file position + starts at zero. + + The :class:`StringIO` object can accept either Unicode or 8-bit strings, but + mixing the two may take some care. If both are used, 8-bit strings that cannot + be interpreted as 7-bit ASCII (that use the 8th bit) will cause a + :exc:`UnicodeError` to be raised when :meth:`getvalue` is called. + +The following methods of :class:`StringIO` objects require special mention: + + +.. method:: StringIO.getvalue() + + Retrieve the entire contents of the "file" at any time before the + :class:`StringIO` object's :meth:`close` method is called. See the note above + for information about mixing Unicode and 8-bit strings; such mixing can cause + this method to raise :exc:`UnicodeError`. + + +.. method:: StringIO.close() + + Free the memory buffer. + +Example usage:: + + import StringIO + + output = StringIO.StringIO() + output.write('First line.\n') + print >>output, 'Second line.' + + # Retrieve file contents -- this will be + # 'First line.\nSecond line.\n' + contents = output.getvalue() + + # Close object and discard memory buffer -- + # .getvalue() will now raise an exception. + output.close() + + +:mod:`cStringIO` --- Faster version of :mod:`StringIO` +====================================================== + +.. module:: cStringIO + :synopsis: Faster version of StringIO, but not subclassable. +.. moduleauthor:: Jim Fulton <jim@zope.com> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +The module :mod:`cStringIO` provides an interface similar to that of the +:mod:`StringIO` module. Heavy use of :class:`StringIO.StringIO` objects can be +made more efficient by using the function :func:`StringIO` from this module +instead. + +Since this module provides a factory function which returns objects of built-in +types, there's no way to build your own version using subclassing. Use the +original :mod:`StringIO` module in that case. + +Unlike the memory files implemented by the :mod:`StringIO` module, those +provided by this module are not able to accept Unicode strings that cannot be +encoded as plain ASCII strings. + +Calling :func:`StringIO` with a Unicode string parameter populates +the object with the buffer representation of the Unicode string, instead of +encoding the string. + +Another difference from the :mod:`StringIO` module is that calling +:func:`StringIO` with a string parameter creates a read-only object. Unlike an +object created without a string parameter, it does not have write methods. +These objects are not generally visible. They turn up in tracebacks as +:class:`StringI` and :class:`StringO`. + +The following data objects are provided as well: + + +.. data:: InputType + + The type object of the objects created by calling :func:`StringIO` with a string + parameter. + + +.. data:: OutputType + + The type object of the objects returned by calling :func:`StringIO` with no + parameters. + +There is a C API to the module as well; refer to the module source for more +information. + +Example usage:: + + import cStringIO + + output = cStringIO.StringIO() + output.write('First line.\n') + print >>output, 'Second line.' + + # Retrieve file contents -- this will be + # 'First line.\nSecond line.\n' + contents = output.getvalue() + + # Close object and discard memory buffer -- + # .getvalue() will now raise an exception. + output.close() + diff --git a/Doc/library/stringprep.rst b/Doc/library/stringprep.rst new file mode 100644 index 0000000..b0944e4 --- /dev/null +++ b/Doc/library/stringprep.rst @@ -0,0 +1,142 @@ + +:mod:`stringprep` --- Internet String Preparation +================================================= + +.. module:: stringprep + :synopsis: String preparation, as per RFC 3453 +.. moduleauthor:: Martin v. Löwis <martin@v.loewis.de> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.3 + +When identifying things (such as host names) in the internet, it is often +necessary to compare such identifications for "equality". Exactly how this +comparison is executed may depend on the application domain, e.g. whether it +should be case-insensitive or not. It may be also necessary to restrict the +possible identifications, to allow only identifications consisting of +"printable" characters. + +:rfc:`3454` defines a procedure for "preparing" Unicode strings in internet +protocols. Before passing strings onto the wire, they are processed with the +preparation procedure, after which they have a certain normalized form. The RFC +defines a set of tables, which can be combined into profiles. Each profile must +define which tables it uses, and what other optional parts of the ``stringprep`` +procedure are part of the profile. One example of a ``stringprep`` profile is +``nameprep``, which is used for internationalized domain names. + +The module :mod:`stringprep` only exposes the tables from RFC 3454. As these +tables would be very large to represent them as dictionaries or lists, the +module uses the Unicode character database internally. The module source code +itself was generated using the ``mkstringprep.py`` utility. + +As a result, these tables are exposed as functions, not as data structures. +There are two kinds of tables in the RFC: sets and mappings. For a set, +:mod:`stringprep` provides the "characteristic function", i.e. a function that +returns true if the parameter is part of the set. For mappings, it provides the +mapping function: given the key, it returns the associated value. Below is a +list of all functions available in the module. + + +.. function:: in_table_a1(code) + + Determine whether *code* is in tableA.1 (Unassigned code points in Unicode 3.2). + + +.. function:: in_table_b1(code) + + Determine whether *code* is in tableB.1 (Commonly mapped to nothing). + + +.. function:: map_table_b2(code) + + Return the mapped value for *code* according to tableB.2 (Mapping for + case-folding used with NFKC). + + +.. function:: map_table_b3(code) + + Return the mapped value for *code* according to tableB.3 (Mapping for + case-folding used with no normalization). + + +.. function:: in_table_c11(code) + + Determine whether *code* is in tableC.1.1 (ASCII space characters). + + +.. function:: in_table_c12(code) + + Determine whether *code* is in tableC.1.2 (Non-ASCII space characters). + + +.. function:: in_table_c11_c12(code) + + Determine whether *code* is in tableC.1 (Space characters, union of C.1.1 and + C.1.2). + + +.. function:: in_table_c21(code) + + Determine whether *code* is in tableC.2.1 (ASCII control characters). + + +.. function:: in_table_c22(code) + + Determine whether *code* is in tableC.2.2 (Non-ASCII control characters). + + +.. function:: in_table_c21_c22(code) + + Determine whether *code* is in tableC.2 (Control characters, union of C.2.1 and + C.2.2). + + +.. function:: in_table_c3(code) + + Determine whether *code* is in tableC.3 (Private use). + + +.. function:: in_table_c4(code) + + Determine whether *code* is in tableC.4 (Non-character code points). + + +.. function:: in_table_c5(code) + + Determine whether *code* is in tableC.5 (Surrogate codes). + + +.. function:: in_table_c6(code) + + Determine whether *code* is in tableC.6 (Inappropriate for plain text). + + +.. function:: in_table_c7(code) + + Determine whether *code* is in tableC.7 (Inappropriate for canonical + representation). + + +.. function:: in_table_c8(code) + + Determine whether *code* is in tableC.8 (Change display properties or are + deprecated). + + +.. function:: in_table_c9(code) + + Determine whether *code* is in tableC.9 (Tagging characters). + + +.. function:: in_table_d1(code) + + Determine whether *code* is in tableD.1 (Characters with bidirectional property + "R" or "AL"). + + +.. function:: in_table_d2(code) + + Determine whether *code* is in tableD.2 (Characters with bidirectional property + "L"). + diff --git a/Doc/library/strings.rst b/Doc/library/strings.rst new file mode 100644 index 0000000..5c8ec4b --- /dev/null +++ b/Doc/library/strings.rst @@ -0,0 +1,31 @@ + +.. _stringservices: + +*************** +String Services +*************** + +The modules described in this chapter provide a wide range of string +manipulation operations. + +In addition, Python's built-in string classes support the sequence type +methods described in the :ref:`typesseq` section, and also the +string-specific methods described in the :ref:`string-methods` section. +To output formatted strings use template strings or the ``%`` operator +described in the :ref:`string-formatting` section. Also, see the +:mod:`re` module for string functions based on regular expressions. + + +.. toctree:: + + string.rst + re.rst + struct.rst + difflib.rst + stringio.rst + textwrap.rst + codecs.rst + unicodedata.rst + stringprep.rst + fpformat.rst + diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst new file mode 100644 index 0000000..2f27d13 --- /dev/null +++ b/Doc/library/struct.rst @@ -0,0 +1,292 @@ + +:mod:`struct` --- Interpret strings as packed binary data +========================================================= + +.. module:: struct + :synopsis: Interpret strings as packed binary data. + +.. index:: + pair: C; structures + triple: packing; binary; data + +This module performs conversions between Python values and C structs represented +as Python strings. It uses :dfn:`format strings` (explained below) as compact +descriptions of the lay-out of the C structs and the intended conversion to/from +Python values. This can be used in handling binary data stored in files or from +network connections, among other sources. + +The module defines the following exception and functions: + + +.. exception:: error + + Exception raised on various occasions; argument is a string describing what is + wrong. + + +.. function:: pack(fmt, v1, v2, ...) + + Return a string containing the values ``v1, v2, ...`` packed according to the + given format. The arguments must match the values required by the format + exactly. + + +.. function:: pack_into(fmt, buffer, offset, v1, v2, ...) + + Pack the values ``v1, v2, ...`` according to the given format, write the packed + bytes into the writable *buffer* starting at *offset*. Note that the offset is + a required argument. + + .. versionadded:: 2.5 + + +.. function:: unpack(fmt, string) + + Unpack the string (presumably packed by ``pack(fmt, ...)``) according to the + given format. The result is a tuple even if it contains exactly one item. The + string must contain exactly the amount of data required by the format + (``len(string)`` must equal ``calcsize(fmt)``). + + +.. function:: unpack_from(fmt, buffer[,offset=0]) + + Unpack the *buffer* according to tthe given format. The result is a tuple even + if it contains exactly one item. The *buffer* must contain at least the amount + of data required by the format (``len(buffer[offset:])`` must be at least + ``calcsize(fmt)``). + + .. versionadded:: 2.5 + + +.. function:: calcsize(fmt) + + Return the size of the struct (and hence of the string) corresponding to the + given format. + +Format characters have the following meaning; the conversion between C and +Python values should be obvious given their types: + ++--------+-------------------------+--------------------+-------+ +| Format | C Type | Python | Notes | ++========+=========================+====================+=======+ +| ``x`` | pad byte | no value | | ++--------+-------------------------+--------------------+-------+ +| ``c`` | :ctype:`char` | string of length 1 | | ++--------+-------------------------+--------------------+-------+ +| ``b`` | :ctype:`signed char` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``B`` | :ctype:`unsigned char` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``t`` | :ctype:`_Bool` | bool | \(1) | ++--------+-------------------------+--------------------+-------+ +| ``h`` | :ctype:`short` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``H`` | :ctype:`unsigned short` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``i`` | :ctype:`int` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``I`` | :ctype:`unsigned int` | long | | ++--------+-------------------------+--------------------+-------+ +| ``l`` | :ctype:`long` | integer | | ++--------+-------------------------+--------------------+-------+ +| ``L`` | :ctype:`unsigned long` | long | | ++--------+-------------------------+--------------------+-------+ +| ``q`` | :ctype:`long long` | long | \(2) | ++--------+-------------------------+--------------------+-------+ +| ``Q`` | :ctype:`unsigned long | long | \(2) | +| | long` | | | ++--------+-------------------------+--------------------+-------+ +| ``f`` | :ctype:`float` | float | | ++--------+-------------------------+--------------------+-------+ +| ``d`` | :ctype:`double` | float | | ++--------+-------------------------+--------------------+-------+ +| ``s`` | :ctype:`char[]` | string | | ++--------+-------------------------+--------------------+-------+ +| ``p`` | :ctype:`char[]` | string | | ++--------+-------------------------+--------------------+-------+ +| ``P`` | :ctype:`void \*` | integer | | ++--------+-------------------------+--------------------+-------+ + +Notes: + +(1) + The ``'t'`` conversion code corresponds to the :ctype:`_Bool` type defined by + C99. If this type is not available, it is simulated using a :ctype:`char`. In + standard mode, it is always represented by one byte. + + .. versionadded:: 2.6 + +(2) + The ``'q'`` and ``'Q'`` conversion codes are available in native mode only if + the platform C compiler supports C :ctype:`long long`, or, on Windows, + :ctype:`__int64`. They are always available in standard modes. + + .. versionadded:: 2.2 + +A format character may be preceded by an integral repeat count. For example, +the format string ``'4h'`` means exactly the same as ``'hhhh'``. + +Whitespace characters between formats are ignored; a count and its format must +not contain whitespace though. + +For the ``'s'`` format character, the count is interpreted as the size of the +string, not a repeat count like for the other format characters; for example, +``'10s'`` means a single 10-byte string, while ``'10c'`` means 10 characters. +For packing, the string is truncated or padded with null bytes as appropriate to +make it fit. For unpacking, the resulting string always has exactly the +specified number of bytes. As a special case, ``'0s'`` means a single, empty +string (while ``'0c'`` means 0 characters). + +The ``'p'`` format character encodes a "Pascal string", meaning a short +variable-length string stored in a fixed number of bytes. The count is the total +number of bytes stored. The first byte stored is the length of the string, or +255, whichever is smaller. The bytes of the string follow. If the string +passed in to :func:`pack` is too long (longer than the count minus 1), only the +leading count-1 bytes of the string are stored. If the string is shorter than +count-1, it is padded with null bytes so that exactly count bytes in all are +used. Note that for :func:`unpack`, the ``'p'`` format character consumes count +bytes, but that the string returned can never contain more than 255 characters. + +For the ``'I'``, ``'L'``, ``'q'`` and ``'Q'`` format characters, the return +value is a Python long integer. + +For the ``'P'`` format character, the return value is a Python integer or long +integer, depending on the size needed to hold a pointer when it has been cast to +an integer type. A *NULL* pointer will always be returned as the Python integer +``0``. When packing pointer-sized values, Python integer or long integer objects +may be used. For example, the Alpha and Merced processors use 64-bit pointer +values, meaning a Python long integer will be used to hold the pointer; other +platforms use 32-bit pointers and will use a Python integer. + +For the ``'t'`` format character, the return value is either :const:`True` or +:const:`False`. When packing, the truth value of the argument object is used. +Either 0 or 1 in the native or standard bool representation will be packed, and +any non-zero value will be True when unpacking. + +By default, C numbers are represented in the machine's native format and byte +order, and properly aligned by skipping pad bytes if necessary (according to the +rules used by the C compiler). + +Alternatively, the first character of the format string can be used to indicate +the byte order, size and alignment of the packed data, according to the +following table: + ++-----------+------------------------+--------------------+ +| Character | Byte order | Size and alignment | ++===========+========================+====================+ +| ``@`` | native | native | ++-----------+------------------------+--------------------+ +| ``=`` | native | standard | ++-----------+------------------------+--------------------+ +| ``<`` | little-endian | standard | ++-----------+------------------------+--------------------+ +| ``>`` | big-endian | standard | ++-----------+------------------------+--------------------+ +| ``!`` | network (= big-endian) | standard | ++-----------+------------------------+--------------------+ + +If the first character is not one of these, ``'@'`` is assumed. + +Native byte order is big-endian or little-endian, depending on the host system. +For example, Motorola and Sun processors are big-endian; Intel and DEC +processors are little-endian. + +Native size and alignment are determined using the C compiler's +:keyword:`sizeof` expression. This is always combined with native byte order. + +Standard size and alignment are as follows: no alignment is required for any +type (so you have to use pad bytes); :ctype:`short` is 2 bytes; :ctype:`int` and +:ctype:`long` are 4 bytes; :ctype:`long long` (:ctype:`__int64` on Windows) is 8 +bytes; :ctype:`float` and :ctype:`double` are 32-bit and 64-bit IEEE floating +point numbers, respectively. :ctype:`_Bool` is 1 byte. + +Note the difference between ``'@'`` and ``'='``: both use native byte order, but +the size and alignment of the latter is standardized. + +The form ``'!'`` is available for those poor souls who claim they can't remember +whether network byte order is big-endian or little-endian. + +There is no way to indicate non-native byte order (force byte-swapping); use the +appropriate choice of ``'<'`` or ``'>'``. + +The ``'P'`` format character is only available for the native byte ordering +(selected as the default or with the ``'@'`` byte order character). The byte +order character ``'='`` chooses to use little- or big-endian ordering based on +the host system. The struct module does not interpret this as native ordering, +so the ``'P'`` format is not available. + +Examples (all using native byte order, size and alignment, on a big-endian +machine):: + + >>> from struct import * + >>> pack('hhl', 1, 2, 3) + '\x00\x01\x00\x02\x00\x00\x00\x03' + >>> unpack('hhl', '\x00\x01\x00\x02\x00\x00\x00\x03') + (1, 2, 3) + >>> calcsize('hhl') + 8 + +Hint: to align the end of a structure to the alignment requirement of a +particular type, end the format with the code for that type with a repeat count +of zero. For example, the format ``'llh0l'`` specifies two pad bytes at the +end, assuming longs are aligned on 4-byte boundaries. This only works when +native size and alignment are in effect; standard size and alignment does not +enforce any alignment. + + +.. seealso:: + + Module :mod:`array` + Packed binary storage of homogeneous data. + + Module :mod:`xdrlib` + Packing and unpacking of XDR data. + + +.. _struct-objects: + +Struct Objects +-------------- + +The :mod:`struct` module also defines the following type: + + +.. class:: Struct(format) + + Return a new Struct object which writes and reads binary data according to the + format string *format*. Creating a Struct object once and calling its methods + is more efficient than calling the :mod:`struct` functions with the same format + since the format string only needs to be compiled once. + + .. versionadded:: 2.5 + +Compiled Struct objects support the following methods and attributes: + + +.. method:: Struct.pack(v1, v2, ...) + + Identical to the :func:`pack` function, using the compiled format. + (``len(result)`` will equal :attr:`self.size`.) + + +.. method:: Struct.pack_into(buffer, offset, v1, v2, ...) + + Identical to the :func:`pack_into` function, using the compiled format. + + +.. method:: Struct.unpack(string) + + Identical to the :func:`unpack` function, using the compiled format. + (``len(string)`` must equal :attr:`self.size`). + + +.. method:: Struct.unpack_from(buffer[, offset=0]) + + Identical to the :func:`unpack_from` function, using the compiled format. + (``len(buffer[offset:])`` must be at least :attr:`self.size`). + + +.. attribute:: Struct.format + + The format string used to construct this Struct object. + diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst new file mode 100644 index 0000000..a3bc2cb --- /dev/null +++ b/Doc/library/subprocess.rst @@ -0,0 +1,340 @@ + +:mod:`subprocess` --- Subprocess management +=========================================== + +.. module:: subprocess + :synopsis: Subprocess management. +.. moduleauthor:: Peter Åstrand <astrand@lysator.liu.se> +.. sectionauthor:: Peter Åstrand <astrand@lysator.liu.se> + + +.. versionadded:: 2.4 + +The :mod:`subprocess` module allows you to spawn new processes, connect to their +input/output/error pipes, and obtain their return codes. This module intends to +replace several other, older modules and functions, such as:: + + os.system + os.spawn* + commands.* + +Information about how the :mod:`subprocess` module can be used to replace these +modules and functions can be found in the following sections. + + +Using the subprocess Module +--------------------------- + +This module defines one class called :class:`Popen`: + + +.. class:: Popen(args, bufsize=0, executable=None, stdin=None, stdout=None, stderr=None, preexec_fn=None, close_fds=False, shell=False, cwd=None, env=None, universal_newlines=False, startupinfo=None, creationflags=0) + + Arguments are: + + *args* should be a string, or a sequence of program arguments. The program to + execute is normally the first item in the args sequence or string, but can be + explicitly set by using the executable argument. + + On Unix, with *shell=False* (default): In this case, the Popen class uses + :meth:`os.execvp` to execute the child program. *args* should normally be a + sequence. A string will be treated as a sequence with the string as the only + item (the program to execute). + + On Unix, with *shell=True*: If args is a string, it specifies the command string + to execute through the shell. If *args* is a sequence, the first item specifies + the command string, and any additional items will be treated as additional shell + arguments. + + On Windows: the :class:`Popen` class uses CreateProcess() to execute the child + program, which operates on strings. If *args* is a sequence, it will be + converted to a string using the :meth:`list2cmdline` method. Please note that + not all MS Windows applications interpret the command line the same way: + :meth:`list2cmdline` is designed for applications using the same rules as the MS + C runtime. + + *bufsize*, if given, has the same meaning as the corresponding argument to the + built-in open() function: :const:`0` means unbuffered, :const:`1` means line + buffered, any other positive value means use a buffer of (approximately) that + size. A negative *bufsize* means to use the system default, which usually means + fully buffered. The default value for *bufsize* is :const:`0` (unbuffered). + + The *executable* argument specifies the program to execute. It is very seldom + needed: Usually, the program to execute is defined by the *args* argument. If + ``shell=True``, the *executable* argument specifies which shell to use. On Unix, + the default shell is :file:`/bin/sh`. On Windows, the default shell is + specified by the :envvar:`COMSPEC` environment variable. + + *stdin*, *stdout* and *stderr* specify the executed programs' standard input, + standard output and standard error file handles, respectively. Valid values are + ``PIPE``, an existing file descriptor (a positive integer), an existing file + object, and ``None``. ``PIPE`` indicates that a new pipe to the child should be + created. With ``None``, no redirection will occur; the child's file handles + will be inherited from the parent. Additionally, *stderr* can be ``STDOUT``, + which indicates that the stderr data from the applications should be captured + into the same file handle as for stdout. + + If *preexec_fn* is set to a callable object, this object will be called in the + child process just before the child is executed. (Unix only) + + If *close_fds* is true, all file descriptors except :const:`0`, :const:`1` and + :const:`2` will be closed before the child process is executed. (Unix only). + Or, on Windows, if *close_fds* is true then no handles will be inherited by the + child process. Note that on Windows, you cannot set *close_fds* to true and + also redirect the standard handles by setting *stdin*, *stdout* or *stderr*. + + If *shell* is :const:`True`, the specified command will be executed through the + shell. + + If *cwd* is not ``None``, the child's current directory will be changed to *cwd* + before it is executed. Note that this directory is not considered when + searching the executable, so you can't specify the program's path relative to + *cwd*. + + If *env* is not ``None``, it defines the environment variables for the new + process. + + If *universal_newlines* is :const:`True`, the file objects stdout and stderr are + opened as text files, but lines may be terminated by any of ``'\n'``, the Unix + end-of-line convention, ``'\r'``, the Macintosh convention or ``'\r\n'``, the + Windows convention. All of these external representations are seen as ``'\n'`` + by the Python program. + + .. note:: + + This feature is only available if Python is built with universal newline support + (the default). Also, the newlines attribute of the file objects :attr:`stdout`, + :attr:`stdin` and :attr:`stderr` are not updated by the communicate() method. + + The *startupinfo* and *creationflags*, if given, will be passed to the + underlying CreateProcess() function. They can specify things such as appearance + of the main window and priority for the new process. (Windows only) + + +Convenience Functions +^^^^^^^^^^^^^^^^^^^^^ + +This module also defines two shortcut functions: + + +.. function:: call(*popenargs, **kwargs) + + Run command with arguments. Wait for command to complete, then return the + :attr:`returncode` attribute. + + The arguments are the same as for the Popen constructor. Example:: + + retcode = call(["ls", "-l"]) + + +.. function:: check_call(*popenargs, **kwargs) + + Run command with arguments. Wait for command to complete. If the exit code was + zero then return, otherwise raise :exc:`CalledProcessError.` The + :exc:`CalledProcessError` object will have the return code in the + :attr:`returncode` attribute. + + The arguments are the same as for the Popen constructor. Example:: + + check_call(["ls", "-l"]) + + .. versionadded:: 2.5 + + +Exceptions +^^^^^^^^^^ + +Exceptions raised in the child process, before the new program has started to +execute, will be re-raised in the parent. Additionally, the exception object +will have one extra attribute called :attr:`child_traceback`, which is a string +containing traceback information from the childs point of view. + +The most common exception raised is :exc:`OSError`. This occurs, for example, +when trying to execute a non-existent file. Applications should prepare for +:exc:`OSError` exceptions. + +A :exc:`ValueError` will be raised if :class:`Popen` is called with invalid +arguments. + +check_call() will raise :exc:`CalledProcessError`, if the called process returns +a non-zero return code. + + +Security +^^^^^^^^ + +Unlike some other popen functions, this implementation will never call /bin/sh +implicitly. This means that all characters, including shell metacharacters, can +safely be passed to child processes. + + +Popen Objects +------------- + +Instances of the :class:`Popen` class have the following methods: + + +.. method:: Popen.poll() + + Check if child process has terminated. Returns returncode attribute. + + +.. method:: Popen.wait() + + Wait for child process to terminate. Returns returncode attribute. + + +.. method:: Popen.communicate(input=None) + + Interact with process: Send data to stdin. Read data from stdout and stderr, + until end-of-file is reached. Wait for process to terminate. The optional + *input* argument should be a string to be sent to the child process, or + ``None``, if no data should be sent to the child. + + communicate() returns a tuple (stdout, stderr). + + .. note:: + + The data read is buffered in memory, so do not use this method if the data size + is large or unlimited. + +The following attributes are also available: + + +.. attribute:: Popen.stdin + + If the *stdin* argument is ``PIPE``, this attribute is a file object that + provides input to the child process. Otherwise, it is ``None``. + + +.. attribute:: Popen.stdout + + If the *stdout* argument is ``PIPE``, this attribute is a file object that + provides output from the child process. Otherwise, it is ``None``. + + +.. attribute:: Popen.stderr + + If the *stderr* argument is ``PIPE``, this attribute is file object that + provides error output from the child process. Otherwise, it is ``None``. + + +.. attribute:: Popen.pid + + The process ID of the child process. + + +.. attribute:: Popen.returncode + + The child return code. A ``None`` value indicates that the process hasn't + terminated yet. A negative value -N indicates that the child was terminated by + signal N (Unix only). + + +Replacing Older Functions with the subprocess Module +---------------------------------------------------- + +In this section, "a ==> b" means that b can be used as a replacement for a. + +.. note:: + + All functions in this section fail (more or less) silently if the executed + program cannot be found; this module raises an :exc:`OSError` exception. + +In the following examples, we assume that the subprocess module is imported with +"from subprocess import \*". + + +Replacing /bin/sh shell backquote +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + output=`mycmd myarg` + ==> + output = Popen(["mycmd", "myarg"], stdout=PIPE).communicate()[0] + + +Replacing shell pipe line +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + output=`dmesg | grep hda` + ==> + p1 = Popen(["dmesg"], stdout=PIPE) + p2 = Popen(["grep", "hda"], stdin=p1.stdout, stdout=PIPE) + output = p2.communicate()[0] + + +Replacing os.system() +^^^^^^^^^^^^^^^^^^^^^ + +:: + + sts = os.system("mycmd" + " myarg") + ==> + p = Popen("mycmd" + " myarg", shell=True) + sts = os.waitpid(p.pid, 0) + +Notes: + +* Calling the program through the shell is usually not required. + +* It's easier to look at the :attr:`returncode` attribute than the exit status. + +A more realistic example would look like this:: + + try: + retcode = call("mycmd" + " myarg", shell=True) + if retcode < 0: + print >>sys.stderr, "Child was terminated by signal", -retcode + else: + print >>sys.stderr, "Child returned", retcode + except OSError as e: + print >>sys.stderr, "Execution failed:", e + + +Replacing os.spawn\* +^^^^^^^^^^^^^^^^^^^^ + +P_NOWAIT example:: + + pid = os.spawnlp(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg") + ==> + pid = Popen(["/bin/mycmd", "myarg"]).pid + +P_WAIT example:: + + retcode = os.spawnlp(os.P_WAIT, "/bin/mycmd", "mycmd", "myarg") + ==> + retcode = call(["/bin/mycmd", "myarg"]) + +Vector example:: + + os.spawnvp(os.P_NOWAIT, path, args) + ==> + Popen([path] + args[1:]) + +Environment example:: + + os.spawnlpe(os.P_NOWAIT, "/bin/mycmd", "mycmd", "myarg", env) + ==> + Popen(["/bin/mycmd", "myarg"], env={"PATH": "/usr/bin"}) + + +Replacing os.popen\* +^^^^^^^^^^^^^^^^^^^^ + +:: + + pipe = os.popen(cmd, mode='r', bufsize) + ==> + pipe = Popen(cmd, shell=True, bufsize=bufsize, stdout=PIPE).stdout + +:: + + pipe = os.popen(cmd, mode='w', bufsize) + ==> + pipe = Popen(cmd, shell=True, bufsize=bufsize, stdin=PIPE).stdin + diff --git a/Doc/library/sunau.rst b/Doc/library/sunau.rst new file mode 100644 index 0000000..9930133 --- /dev/null +++ b/Doc/library/sunau.rst @@ -0,0 +1,261 @@ + +:mod:`sunau` --- Read and write Sun AU files +============================================ + +.. module:: sunau + :synopsis: Provide an interface to the Sun AU sound format. +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +The :mod:`sunau` module provides a convenient interface to the Sun AU sound +format. Note that this module is interface-compatible with the modules +:mod:`aifc` and :mod:`wave`. + +An audio file consists of a header followed by the data. The fields of the +header are: + ++---------------+-----------------------------------------------+ +| Field | Contents | ++===============+===============================================+ +| magic word | The four bytes ``.snd``. | ++---------------+-----------------------------------------------+ +| header size | Size of the header, including info, in bytes. | ++---------------+-----------------------------------------------+ +| data size | Physical size of the data, in bytes. | ++---------------+-----------------------------------------------+ +| encoding | Indicates how the audio samples are encoded. | ++---------------+-----------------------------------------------+ +| sample rate | The sampling rate. | ++---------------+-----------------------------------------------+ +| # of channels | The number of channels in the samples. | ++---------------+-----------------------------------------------+ +| info | ASCII string giving a description of the | +| | audio file (padded with null bytes). | ++---------------+-----------------------------------------------+ + +Apart from the info field, all header fields are 4 bytes in size. They are all +32-bit unsigned integers encoded in big-endian byte order. + +The :mod:`sunau` module defines the following functions: + + +.. function:: open(file, mode) + + If *file* is a string, open the file by that name, otherwise treat it as a + seekable file-like object. *mode* can be any of + + ``'r'`` + Read only mode. + + ``'w'`` + Write only mode. + + Note that it does not allow read/write files. + + A *mode* of ``'r'`` returns a :class:`AU_read` object, while a *mode* of ``'w'`` + or ``'wb'`` returns a :class:`AU_write` object. + + +.. function:: openfp(file, mode) + + A synonym for :func:`open`, maintained for backwards compatibility. + +The :mod:`sunau` module defines the following exception: + + +.. exception:: Error + + An error raised when something is impossible because of Sun AU specs or + implementation deficiency. + +The :mod:`sunau` module defines the following data items: + + +.. data:: AUDIO_FILE_MAGIC + + An integer every valid Sun AU file begins with, stored in big-endian form. This + is the string ``.snd`` interpreted as an integer. + + +.. data:: AUDIO_FILE_ENCODING_MULAW_8 + AUDIO_FILE_ENCODING_LINEAR_8 + AUDIO_FILE_ENCODING_LINEAR_16 + AUDIO_FILE_ENCODING_LINEAR_24 + AUDIO_FILE_ENCODING_LINEAR_32 + AUDIO_FILE_ENCODING_ALAW_8 + + Values of the encoding field from the AU header which are supported by this + module. + + +.. data:: AUDIO_FILE_ENCODING_FLOAT + AUDIO_FILE_ENCODING_DOUBLE + AUDIO_FILE_ENCODING_ADPCM_G721 + AUDIO_FILE_ENCODING_ADPCM_G722 + AUDIO_FILE_ENCODING_ADPCM_G723_3 + AUDIO_FILE_ENCODING_ADPCM_G723_5 + + Additional known values of the encoding field from the AU header, but which are + not supported by this module. + + +.. _au-read-objects: + +AU_read Objects +--------------- + +AU_read objects, as returned by :func:`open` above, have the following methods: + + +.. method:: AU_read.close() + + Close the stream, and make the instance unusable. (This is called automatically + on deletion.) + + +.. method:: AU_read.getnchannels() + + Returns number of audio channels (1 for mone, 2 for stereo). + + +.. method:: AU_read.getsampwidth() + + Returns sample width in bytes. + + +.. method:: AU_read.getframerate() + + Returns sampling frequency. + + +.. method:: AU_read.getnframes() + + Returns number of audio frames. + + +.. method:: AU_read.getcomptype() + + Returns compression type. Supported compression types are ``'ULAW'``, ``'ALAW'`` + and ``'NONE'``. + + +.. method:: AU_read.getcompname() + + Human-readable version of :meth:`getcomptype`. The supported types have the + respective names ``'CCITT G.711 u-law'``, ``'CCITT G.711 A-law'`` and ``'not + compressed'``. + + +.. method:: AU_read.getparams() + + Returns a tuple ``(nchannels, sampwidth, framerate, nframes, comptype, + compname)``, equivalent to output of the :meth:`get\*` methods. + + +.. method:: AU_read.readframes(n) + + Reads and returns at most *n* frames of audio, as a string of bytes. The data + will be returned in linear format. If the original data is in u-LAW format, it + will be converted. + + +.. method:: AU_read.rewind() + + Rewind the file pointer to the beginning of the audio stream. + +The following two methods define a term "position" which is compatible between +them, and is otherwise implementation dependent. + + +.. method:: AU_read.setpos(pos) + + Set the file pointer to the specified position. Only values returned from + :meth:`tell` should be used for *pos*. + + +.. method:: AU_read.tell() + + Return current file pointer position. Note that the returned value has nothing + to do with the actual position in the file. + +The following two functions are defined for compatibility with the :mod:`aifc`, +and don't do anything interesting. + + +.. method:: AU_read.getmarkers() + + Returns ``None``. + + +.. method:: AU_read.getmark(id) + + Raise an error. + + +.. _au-write-objects: + +AU_write Objects +---------------- + +AU_write objects, as returned by :func:`open` above, have the following methods: + + +.. method:: AU_write.setnchannels(n) + + Set the number of channels. + + +.. method:: AU_write.setsampwidth(n) + + Set the sample width (in bytes.) + + +.. method:: AU_write.setframerate(n) + + Set the frame rate. + + +.. method:: AU_write.setnframes(n) + + Set the number of frames. This can be later changed, when and if more frames + are written. + + +.. method:: AU_write.setcomptype(type, name) + + Set the compression type and description. Only ``'NONE'`` and ``'ULAW'`` are + supported on output. + + +.. method:: AU_write.setparams(tuple) + + The *tuple* should be ``(nchannels, sampwidth, framerate, nframes, comptype, + compname)``, with values valid for the :meth:`set\*` methods. Set all + parameters. + + +.. method:: AU_write.tell() + + Return current position in the file, with the same disclaimer for the + :meth:`AU_read.tell` and :meth:`AU_read.setpos` methods. + + +.. method:: AU_write.writeframesraw(data) + + Write audio frames, without correcting *nframes*. + + +.. method:: AU_write.writeframes(data) + + Write audio frames and make sure *nframes* is correct. + + +.. method:: AU_write.close() + + Make sure *nframes* is correct, and close the file. + + This method is called upon deletion. + +Note that it is invalid to set any parameters after calling :meth:`writeframes` +or :meth:`writeframesraw`. + diff --git a/Doc/library/symbol.rst b/Doc/library/symbol.rst new file mode 100644 index 0000000..1735276 --- /dev/null +++ b/Doc/library/symbol.rst @@ -0,0 +1,32 @@ + +:mod:`symbol` --- Constants used with Python parse trees +======================================================== + +.. module:: symbol + :synopsis: Constants representing internal nodes of the parse tree. +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +This module provides constants which represent the numeric values of internal +nodes of the parse tree. Unlike most Python constants, these use lower-case +names. Refer to the file :file:`Grammar/Grammar` in the Python distribution for +the definitions of the names in the context of the language grammar. The +specific numeric values which the names map to may change between Python +versions. + +This module also provides one additional data object: + + +.. data:: sym_name + + Dictionary mapping the numeric values of the constants defined in this module + back to name strings, allowing more human-readable representation of parse trees + to be generated. + + +.. seealso:: + + Module :mod:`parser` + The second example for the :mod:`parser` module shows how to use the + :mod:`symbol` module. + diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst new file mode 100644 index 0000000..5184c25 --- /dev/null +++ b/Doc/library/sys.rst @@ -0,0 +1,606 @@ + +:mod:`sys` --- System-specific parameters and functions +======================================================= + +.. module:: sys + :synopsis: Access system-specific parameters and functions. + + +This module provides access to some variables used or maintained by the +interpreter and to functions that interact strongly with the interpreter. It is +always available. + + +.. data:: argv + + The list of command line arguments passed to a Python script. ``argv[0]`` is the + script name (it is operating system dependent whether this is a full pathname or + not). If the command was executed using the :option:`-c` command line option to + the interpreter, ``argv[0]`` is set to the string ``'-c'``. If no script name + was passed to the Python interpreter, ``argv[0]`` is the empty string. + + To loop over the standard input, or the list of files given on the + command line, see the :mod:`fileinput` module. + + +.. data:: byteorder + + An indicator of the native byte order. This will have the value ``'big'`` on + big-endian (most-significant byte first) platforms, and ``'little'`` on + little-endian (least-significant byte first) platforms. + + .. versionadded:: 2.0 + + +.. data:: subversion + + A triple (repo, branch, version) representing the Subversion information of the + Python interpreter. *repo* is the name of the repository, ``'CPython'``. + *branch* is a string of one of the forms ``'trunk'``, ``'branches/name'`` or + ``'tags/name'``. *version* is the output of ``svnversion``, if the interpreter + was built from a Subversion checkout; it contains the revision number (range) + and possibly a trailing 'M' if there were local modifications. If the tree was + exported (or svnversion was not available), it is the revision of + ``Include/patchlevel.h`` if the branch is a tag. Otherwise, it is ``None``. + + .. versionadded:: 2.5 + + +.. data:: builtin_module_names + + A tuple of strings giving the names of all modules that are compiled into this + Python interpreter. (This information is not available in any other way --- + ``modules.keys()`` only lists the imported modules.) + + +.. data:: copyright + + A string containing the copyright pertaining to the Python interpreter. + + +.. function:: _current_frames() + + Return a dictionary mapping each thread's identifier to the topmost stack frame + currently active in that thread at the time the function is called. Note that + functions in the :mod:`traceback` module can build the call stack given such a + frame. + + This is most useful for debugging deadlock: this function does not require the + deadlocked threads' cooperation, and such threads' call stacks are frozen for as + long as they remain deadlocked. The frame returned for a non-deadlocked thread + may bear no relationship to that thread's current activity by the time calling + code examines the frame. + + This function should be used for internal and specialized purposes only. + + .. versionadded:: 2.5 + + +.. data:: dllhandle + + Integer specifying the handle of the Python DLL. Availability: Windows. + + +.. function:: displayhook(value) + + If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves + it in ``__builtin__._``. + + ``sys.displayhook`` is called on the result of evaluating an expression entered + in an interactive Python session. The display of these values can be customized + by assigning another one-argument function to ``sys.displayhook``. + + +.. function:: excepthook(type, value, traceback) + + This function prints out a given traceback and exception to ``sys.stderr``. + + When an exception is raised and uncaught, the interpreter calls + ``sys.excepthook`` with three arguments, the exception class, exception + instance, and a traceback object. In an interactive session this happens just + before control is returned to the prompt; in a Python program this happens just + before the program exits. The handling of such top-level exceptions can be + customized by assigning another three-argument function to ``sys.excepthook``. + + +.. data:: __displayhook__ + __excepthook__ + + These objects contain the original values of ``displayhook`` and ``excepthook`` + at the start of the program. They are saved so that ``displayhook`` and + ``excepthook`` can be restored in case they happen to get replaced with broken + objects. + + +.. function:: exc_info() + + This function returns a tuple of three values that give information about the + exception that is currently being handled. The information returned is specific + both to the current thread and to the current stack frame. If the current stack + frame is not handling an exception, the information is taken from the calling + stack frame, or its caller, and so on until a stack frame is found that is + handling an exception. Here, "handling an exception" is defined as "executing + or having executed an except clause." For any stack frame, only information + about the most recently handled exception is accessible. + + .. index:: object: traceback + + If no exception is being handled anywhere on the stack, a tuple containing three + ``None`` values is returned. Otherwise, the values returned are ``(type, value, + traceback)``. Their meaning is: *type* gets the exception type of the exception + being handled (a class object); *value* gets the exception parameter (its + :dfn:`associated value` or the second argument to :keyword:`raise`, which is + always a class instance if the exception type is a class object); *traceback* + gets a traceback object (see the Reference Manual) which encapsulates the call + stack at the point where the exception originally occurred. + + .. warning:: + + Assigning the *traceback* return value to a local variable in a function that is + handling an exception will cause a circular reference. This will prevent + anything referenced by a local variable in the same function or by the traceback + from being garbage collected. Since most functions don't need access to the + traceback, the best solution is to use something like ``exctype, value = + sys.exc_info()[:2]`` to extract only the exception type and value. If you do + need the traceback, make sure to delete it after use (best done with a + :keyword:`try` ... :keyword:`finally` statement) or to call :func:`exc_info` in + a function that does not itself handle an exception. + + .. note:: + + Beginning with Python 2.2, such cycles are automatically reclaimed when garbage + collection is enabled and they become unreachable, but it remains more efficient + to avoid creating cycles. + + +.. data:: exec_prefix + + A string giving the site-specific directory prefix where the platform-dependent + Python files are installed; by default, this is also ``'/usr/local'``. This can + be set at build time with the :option:`--exec-prefix` argument to the + :program:`configure` script. Specifically, all configuration files (e.g. the + :file:`pyconfig.h` header file) are installed in the directory ``exec_prefix + + '/lib/pythonversion/config'``, and shared library modules are installed in + ``exec_prefix + '/lib/pythonversion/lib-dynload'``, where *version* is equal to + ``version[:3]``. + + +.. data:: executable + + A string giving the name of the executable binary for the Python interpreter, on + systems where this makes sense. + + +.. function:: exit([arg]) + + Exit from Python. This is implemented by raising the :exc:`SystemExit` + exception, so cleanup actions specified by finally clauses of :keyword:`try` + statements are honored, and it is possible to intercept the exit attempt at an + outer level. The optional argument *arg* can be an integer giving the exit + status (defaulting to zero), or another type of object. If it is an integer, + zero is considered "successful termination" and any nonzero value is considered + "abnormal termination" by shells and the like. Most systems require it to be in + the range 0-127, and produce undefined results otherwise. Some systems have a + convention for assigning specific meanings to specific exit codes, but these are + generally underdeveloped; Unix programs generally use 2 for command line syntax + errors and 1 for all other kind of errors. If another type of object is passed, + ``None`` is equivalent to passing zero, and any other object is printed to + ``sys.stderr`` and results in an exit code of 1. In particular, + ``sys.exit("some error message")`` is a quick way to exit a program when an + error occurs. + + +.. function:: getcheckinterval() + + Return the interpreter's "check interval"; see :func:`setcheckinterval`. + + .. versionadded:: 2.3 + + +.. function:: getdefaultencoding() + + Return the name of the current default string encoding used by the Unicode + implementation. + + .. versionadded:: 2.0 + + +.. function:: getdlopenflags() + + Return the current value of the flags that are used for :cfunc:`dlopen` calls. + The flag constants are defined in the :mod:`dl` and :mod:`DLFCN` modules. + Availability: Unix. + + .. versionadded:: 2.2 + + +.. function:: getfilesystemencoding() + + Return the name of the encoding used to convert Unicode filenames into system + file names, or ``None`` if the system default encoding is used. The result value + depends on the operating system: + + * On Windows 9x, the encoding is "mbcs". + + * On Mac OS X, the encoding is "utf-8". + + * On Unix, the encoding is the user's preference according to the result of + nl_langinfo(CODESET), or :const:`None` if the ``nl_langinfo(CODESET)`` failed. + + * On Windows NT+, file names are Unicode natively, so no conversion is + performed. :func:`getfilesystemencoding` still returns ``'mbcs'``, as this is + the encoding that applications should use when they explicitly want to convert + Unicode strings to byte strings that are equivalent when used as file names. + + .. versionadded:: 2.3 + + +.. function:: getrefcount(object) + + Return the reference count of the *object*. The count returned is generally one + higher than you might expect, because it includes the (temporary) reference as + an argument to :func:`getrefcount`. + + +.. function:: getrecursionlimit() + + Return the current value of the recursion limit, the maximum depth of the Python + interpreter stack. This limit prevents infinite recursion from causing an + overflow of the C stack and crashing Python. It can be set by + :func:`setrecursionlimit`. + + +.. function:: _getframe([depth]) + + Return a frame object from the call stack. If optional integer *depth* is + given, return the frame object that many calls below the top of the stack. If + that is deeper than the call stack, :exc:`ValueError` is raised. The default + for *depth* is zero, returning the frame at the top of the call stack. + + This function should be used for internal and specialized purposes only. + + +.. function:: getwindowsversion() + + Return a tuple containing five components, describing the Windows version + currently running. The elements are *major*, *minor*, *build*, *platform*, and + *text*. *text* contains a string while all other values are integers. + + *platform* may be one of the following values: + + +-----------------------------------------+-----------------------+ + | Constant | Platform | + +=========================================+=======================+ + | :const:`0 (VER_PLATFORM_WIN32s)` | Win32s on Windows 3.1 | + +-----------------------------------------+-----------------------+ + | :const:`1 (VER_PLATFORM_WIN32_WINDOWS)` | Windows 95/98/ME | + +-----------------------------------------+-----------------------+ + | :const:`2 (VER_PLATFORM_WIN32_NT)` | Windows NT/2000/XP | + +-----------------------------------------+-----------------------+ + | :const:`3 (VER_PLATFORM_WIN32_CE)` | Windows CE | + +-----------------------------------------+-----------------------+ + + This function wraps the Win32 :cfunc:`GetVersionEx` function; see the Microsoft + documentation for more information about these fields. + + Availability: Windows. + + .. versionadded:: 2.3 + + +.. data:: hexversion + + The version number encoded as a single integer. This is guaranteed to increase + with each version, including proper support for non-production releases. For + example, to test that the Python interpreter is at least version 1.5.2, use:: + + if sys.hexversion >= 0x010502F0: + # use some advanced feature + ... + else: + # use an alternative implementation or warn the user + ... + + This is called ``hexversion`` since it only really looks meaningful when viewed + as the result of passing it to the built-in :func:`hex` function. The + ``version_info`` value may be used for a more human-friendly encoding of the + same information. + + .. versionadded:: 1.5.2 + + +.. function:: intern(string) + + Enter *string* in the table of "interned" strings and return the interned string + -- which is *string* itself or a copy. Interning strings is useful to gain a + little performance on dictionary lookup -- if the keys in a dictionary are + interned, and the lookup key is interned, the key comparisons (after hashing) + can be done by a pointer compare instead of a string compare. Normally, the + names used in Python programs are automatically interned, and the dictionaries + used to hold module, class or instance attributes have interned keys. + + .. versionchanged:: 2.3 + Interned strings are not immortal (like they used to be in Python 2.2 and + before); you must keep a reference to the return value of :func:`intern` around + to benefit from it. + + +.. data:: last_type + last_value + last_traceback + + These three variables are not always defined; they are set when an exception is + not handled and the interpreter prints an error message and a stack traceback. + Their intended use is to allow an interactive user to import a debugger module + and engage in post-mortem debugging without having to re-execute the command + that caused the error. (Typical use is ``import pdb; pdb.pm()`` to enter the + post-mortem debugger; see chapter :ref:`debugger` for + more information.) + + The meaning of the variables is the same as that of the return values from + :func:`exc_info` above. (Since there is only one interactive thread, + thread-safety is not a concern for these variables, unlike for ``exc_type`` + etc.) + + +.. data:: maxint + + The largest positive integer supported by Python's regular integer type. This + is at least 2\*\*31-1. The largest negative integer is ``-maxint-1`` --- the + asymmetry results from the use of 2's complement binary arithmetic. + + +.. data:: maxunicode + + An integer giving the largest supported code point for a Unicode character. The + value of this depends on the configuration option that specifies whether Unicode + characters are stored as UCS-2 or UCS-4. + + +.. data:: modules + + This is a dictionary that maps module names to modules which have already been + loaded. This can be manipulated to force reloading of modules and other tricks. + + +.. data:: path + + .. index:: triple: module; search; path + + A list of strings that specifies the search path for modules. Initialized from + the environment variable :envvar:`PYTHONPATH`, plus an installation-dependent + default. + + As initialized upon program startup, the first item of this list, ``path[0]``, + is the directory containing the script that was used to invoke the Python + interpreter. If the script directory is not available (e.g. if the interpreter + is invoked interactively or if the script is read from standard input), + ``path[0]`` is the empty string, which directs Python to search modules in the + current directory first. Notice that the script directory is inserted *before* + the entries inserted as a result of :envvar:`PYTHONPATH`. + + A program is free to modify this list for its own purposes. + + .. versionchanged:: 2.3 + Unicode strings are no longer ignored. + + +.. data:: platform + + This string contains a platform identifier, e.g. ``'sunos5'`` or ``'linux1'``. + This can be used to append platform-specific components to ``path``, for + instance. + + +.. data:: prefix + + A string giving the site-specific directory prefix where the platform + independent Python files are installed; by default, this is the string + ``'/usr/local'``. This can be set at build time with the :option:`--prefix` + argument to the :program:`configure` script. The main collection of Python + library modules is installed in the directory ``prefix + '/lib/pythonversion'`` + while the platform independent header files (all except :file:`pyconfig.h`) are + stored in ``prefix + '/include/pythonversion'``, where *version* is equal to + ``version[:3]``. + + +.. data:: ps1 + ps2 + + .. index:: + single: interpreter prompts + single: prompts, interpreter + + Strings specifying the primary and secondary prompt of the interpreter. These + are only defined if the interpreter is in interactive mode. Their initial + values in this case are ``'>>> '`` and ``'... '``. If a non-string object is + assigned to either variable, its :func:`str` is re-evaluated each time the + interpreter prepares to read a new interactive command; this can be used to + implement a dynamic prompt. + + +.. function:: setcheckinterval(interval) + + Set the interpreter's "check interval". This integer value determines how often + the interpreter checks for periodic things such as thread switches and signal + handlers. The default is ``100``, meaning the check is performed every 100 + Python virtual instructions. Setting it to a larger value may increase + performance for programs using threads. Setting it to a value ``<=`` 0 checks + every virtual instruction, maximizing responsiveness as well as overhead. + + +.. function:: setdefaultencoding(name) + + Set the current default string encoding used by the Unicode implementation. If + *name* does not match any available encoding, :exc:`LookupError` is raised. + This function is only intended to be used by the :mod:`site` module + implementation and, where needed, by :mod:`sitecustomize`. Once used by the + :mod:`site` module, it is removed from the :mod:`sys` module's namespace. + + .. % Note that \refmodule{site} is not imported if + .. % the \programopt{-S} option is passed to the interpreter, in which + .. % case this function will remain available. + + .. versionadded:: 2.0 + + +.. function:: setdlopenflags(n) + + Set the flags used by the interpreter for :cfunc:`dlopen` calls, such as when + the interpreter loads extension modules. Among other things, this will enable a + lazy resolving of symbols when importing a module, if called as + ``sys.setdlopenflags(0)``. To share symbols across extension modules, call as + ``sys.setdlopenflags(dl.RTLD_NOW | dl.RTLD_GLOBAL)``. Symbolic names for the + flag modules can be either found in the :mod:`dl` module, or in the :mod:`DLFCN` + module. If :mod:`DLFCN` is not available, it can be generated from + :file:`/usr/include/dlfcn.h` using the :program:`h2py` script. Availability: + Unix. + + .. versionadded:: 2.2 + + +.. function:: setprofile(profilefunc) + + .. index:: + single: profile function + single: profiler + + Set the system's profile function, which allows you to implement a Python source + code profiler in Python. See chapter :ref:`profile` for more information on the + Python profiler. The system's profile function is called similarly to the + system's trace function (see :func:`settrace`), but it isn't called for each + executed line of code (only on call and return, but the return event is reported + even when an exception has been set). The function is thread-specific, but + there is no way for the profiler to know about context switches between threads, + so it does not make sense to use this in the presence of multiple threads. Also, + its return value is not used, so it can simply return ``None``. + + +.. function:: setrecursionlimit(limit) + + Set the maximum depth of the Python interpreter stack to *limit*. This limit + prevents infinite recursion from causing an overflow of the C stack and crashing + Python. + + The highest possible limit is platform-dependent. A user may need to set the + limit higher when she has a program that requires deep recursion and a platform + that supports a higher limit. This should be done with care, because a too-high + limit can lead to a crash. + + +.. function:: settrace(tracefunc) + + .. index:: + single: trace function + single: debugger + + Set the system's trace function, which allows you to implement a Python + source code debugger in Python. See section :ref:`debugger-hooks` in the + chapter on the Python debugger. The function is thread-specific; for a + debugger to support multiple threads, it must be registered using + :func:`settrace` for each thread being debugged. + + .. note:: + + The :func:`settrace` function is intended only for implementing debuggers, + profilers, coverage tools and the like. Its behavior is part of the + implementation platform, rather than part of the language definition, and thus + may not be available in all Python implementations. + + +.. function:: settscdump(on_flag) + + Activate dumping of VM measurements using the Pentium timestamp counter, if + *on_flag* is true. Deactivate these dumps if *on_flag* is off. The function is + available only if Python was compiled with :option:`--with-tsc`. To understand + the output of this dump, read :file:`Python/ceval.c` in the Python sources. + + .. versionadded:: 2.4 + + +.. data:: stdin + stdout + stderr + + File objects corresponding to the interpreter's standard input, output and error + streams. ``stdin`` is used for all interpreter input except for scripts. + ``stdout`` is used for the output of :keyword:`print` and expression statements. + The interpreter's own prompts and (almost all of) its error messages go to + ``stderr``. ``stdout`` and ``stderr`` needn't be built-in file objects: any + object is acceptable as long as it has a :meth:`write` method that takes a + string argument. (Changing these objects doesn't affect the standard I/O + streams of processes executed by :func:`os.popen`, :func:`os.system` or the + :func:`exec\*` family of functions in the :mod:`os` module.) + + +.. data:: __stdin__ + __stdout__ + __stderr__ + + These objects contain the original values of ``stdin``, ``stderr`` and + ``stdout`` at the start of the program. They are used during finalization, and + could be useful to restore the actual files to known working file objects in + case they have been overwritten with a broken object. + + +.. data:: tracebacklimit + + When this variable is set to an integer value, it determines the maximum number + of levels of traceback information printed when an unhandled exception occurs. + The default is ``1000``. When set to ``0`` or less, all traceback information + is suppressed and only the exception type and value are printed. + + +.. data:: version + + A string containing the version number of the Python interpreter plus additional + information on the build number and compiler used. It has a value of the form + ``'version (#build_number, build_date, build_time) [compiler]'``. The first + three characters are used to identify the version in the installation + directories (where appropriate on each platform). An example:: + + >>> import sys + >>> sys.version + '1.5.2 (#0 Apr 13 1999, 10:51:12) [MSC 32 bit (Intel)]' + + +.. data:: api_version + + The C API version for this interpreter. Programmers may find this useful when + debugging version conflicts between Python and extension modules. + + .. versionadded:: 2.3 + + +.. data:: version_info + + A tuple containing the five components of the version number: *major*, *minor*, + *micro*, *releaselevel*, and *serial*. All values except *releaselevel* are + integers; the release level is ``'alpha'``, ``'beta'``, ``'candidate'``, or + ``'final'``. The ``version_info`` value corresponding to the Python version 2.0 + is ``(2, 0, 0, 'final', 0)``. + + .. versionadded:: 2.0 + + +.. data:: warnoptions + + This is an implementation detail of the warnings framework; do not modify this + value. Refer to the :mod:`warnings` module for more information on the warnings + framework. + + +.. data:: winver + + The version number used to form registry keys on Windows platforms. This is + stored as string resource 1000 in the Python DLL. The value is normally the + first three characters of :const:`version`. It is provided in the :mod:`sys` + module for informational purposes; modifying this value has no effect on the + registry keys used by Python. Availability: Windows. + + +.. seealso:: + + Module :mod:`site` + This describes how to use .pth files to extend ``sys.path``. + diff --git a/Doc/library/syslog.rst b/Doc/library/syslog.rst new file mode 100644 index 0000000..549f26b --- /dev/null +++ b/Doc/library/syslog.rst @@ -0,0 +1,66 @@ + +:mod:`syslog` --- Unix syslog library routines +============================================== + +.. module:: syslog + :platform: Unix + :synopsis: An interface to the Unix syslog library routines. + + +This module provides an interface to the Unix ``syslog`` library routines. +Refer to the Unix manual pages for a detailed description of the ``syslog`` +facility. + +The module defines the following functions: + + +.. function:: syslog([priority,] message) + + Send the string *message* to the system logger. A trailing newline is added if + necessary. Each message is tagged with a priority composed of a *facility* and + a *level*. The optional *priority* argument, which defaults to + :const:`LOG_INFO`, determines the message priority. If the facility is not + encoded in *priority* using logical-or (``LOG_INFO | LOG_USER``), the value + given in the :func:`openlog` call is used. + + +.. function:: openlog(ident[, logopt[, facility]]) + + Logging options other than the defaults can be set by explicitly opening the log + file with :func:`openlog` prior to calling :func:`syslog`. The defaults are + (usually) *ident* = ``'syslog'``, *logopt* = ``0``, *facility* = + :const:`LOG_USER`. The *ident* argument is a string which is prepended to every + message. The optional *logopt* argument is a bit field - see below for possible + values to combine. The optional *facility* argument sets the default facility + for messages which do not have a facility explicitly encoded. + + +.. function:: closelog() + + Close the log file. + + +.. function:: setlogmask(maskpri) + + Set the priority mask to *maskpri* and return the previous mask value. Calls to + :func:`syslog` with a priority level not set in *maskpri* are ignored. The + default is to log all priorities. The function ``LOG_MASK(pri)`` calculates the + mask for the individual priority *pri*. The function ``LOG_UPTO(pri)`` + calculates the mask for all priorities up to and including *pri*. + +The module defines the following constants: + +Priority levels (high to low): + :const:`LOG_EMERG`, :const:`LOG_ALERT`, :const:`LOG_CRIT`, :const:`LOG_ERR`, + :const:`LOG_WARNING`, :const:`LOG_NOTICE`, :const:`LOG_INFO`, + :const:`LOG_DEBUG`. + +Facilities: + :const:`LOG_KERN`, :const:`LOG_USER`, :const:`LOG_MAIL`, :const:`LOG_DAEMON`, + :const:`LOG_AUTH`, :const:`LOG_LPR`, :const:`LOG_NEWS`, :const:`LOG_UUCP`, + :const:`LOG_CRON` and :const:`LOG_LOCAL0` to :const:`LOG_LOCAL7`. + +Log options: + :const:`LOG_PID`, :const:`LOG_CONS`, :const:`LOG_NDELAY`, :const:`LOG_NOWAIT` + and :const:`LOG_PERROR` if defined in ``<syslog.h>``. + diff --git a/Doc/library/tabnanny.rst b/Doc/library/tabnanny.rst new file mode 100644 index 0000000..8032655 --- /dev/null +++ b/Doc/library/tabnanny.rst @@ -0,0 +1,70 @@ + +:mod:`tabnanny` --- Detection of ambiguous indentation +====================================================== + +.. module:: tabnanny + :synopsis: Tool for detecting white space related problems in Python source files in a + directory tree. +.. moduleauthor:: Tim Peters <tim_one@users.sourceforge.net> +.. sectionauthor:: Peter Funk <pf@artcom-gmbh.de> + + +.. % rudimentary documentation based on module comments, by Peter Funk +.. % <pf@artcom-gmbh.de> + +For the time being this module is intended to be called as a script. However it +is possible to import it into an IDE and use the function :func:`check` +described below. + +.. warning:: + + The API provided by this module is likely to change in future releases; such + changes may not be backward compatible. + + +.. function:: check(file_or_dir) + + If *file_or_dir* is a directory and not a symbolic link, then recursively + descend the directory tree named by *file_or_dir*, checking all :file:`.py` + files along the way. If *file_or_dir* is an ordinary Python source file, it is + checked for whitespace related problems. The diagnostic messages are written to + standard output using the print statement. + + +.. data:: verbose + + Flag indicating whether to print verbose messages. This is incremented by the + ``-v`` option if called as a script. + + +.. data:: filename_only + + Flag indicating whether to print only the filenames of files containing + whitespace related problems. This is set to true by the ``-q`` option if called + as a script. + + +.. exception:: NannyNag + + Raised by :func:`tokeneater` if detecting an ambiguous indent. Captured and + handled in :func:`check`. + + +.. function:: tokeneater(type, token, start, end, line) + + This function is used by :func:`check` as a callback parameter to the function + :func:`tokenize.tokenize`. + +.. % XXX FIXME: Document \function{errprint}, +.. % \function{format_witnesses} \class{Whitespace} +.. % check_equal, indents +.. % \function{reset_globals} + + +.. seealso:: + + Module :mod:`tokenize` + Lexical scanner for Python source code. + + .. % XXX may be add a reference to IDLE? + diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst new file mode 100644 index 0000000..a0cd673 --- /dev/null +++ b/Doc/library/tarfile.rst @@ -0,0 +1,738 @@ +.. _tarfile-mod: + +:mod:`tarfile` --- Read and write tar archive files +=================================================== + +.. module:: tarfile + :synopsis: Read and write tar-format archive files. + + +.. versionadded:: 2.3 + +.. moduleauthor:: Lars Gustäbel <lars@gustaebel.de> +.. sectionauthor:: Lars Gustäbel <lars@gustaebel.de> + + +The :mod:`tarfile` module makes it possible to read and create tar archives. +Some facts and figures: + +* reads and writes :mod:`gzip` and :mod:`bzip2` compressed archives. + +* read/write support for the POSIX.1-1988 (ustar) format. + +* read/write support for the GNU tar format including *longname* and *longlink* + extensions, read-only support for the *sparse* extension. + +* read/write support for the POSIX.1-2001 (pax) format. + + .. versionadded:: 2.6 + +* handles directories, regular files, hardlinks, symbolic links, fifos, + character devices and block devices and is able to acquire and restore file + information like timestamp, access permissions and owner. + +* can handle tape devices. + + +.. function:: open(name[, mode[, fileobj[, bufsize]]], **kwargs) + + Return a :class:`TarFile` object for the pathname *name*. For detailed + information on :class:`TarFile` objects and the keyword arguments that are + allowed, see :ref:`tarfile-objects`. + + *mode* has to be a string of the form ``'filemode[:compression]'``, it defaults + to ``'r'``. Here is a full list of mode combinations: + + +------------------+---------------------------------------------+ + | mode | action | + +==================+=============================================+ + | ``'r' or 'r:*'`` | Open for reading with transparent | + | | compression (recommended). | + +------------------+---------------------------------------------+ + | ``'r:'`` | Open for reading exclusively without | + | | compression. | + +------------------+---------------------------------------------+ + | ``'r:gz'`` | Open for reading with gzip compression. | + +------------------+---------------------------------------------+ + | ``'r:bz2'`` | Open for reading with bzip2 compression. | + +------------------+---------------------------------------------+ + | ``'a' or 'a:'`` | Open for appending with no compression. The | + | | file is created if it does not exist. | + +------------------+---------------------------------------------+ + | ``'w' or 'w:'`` | Open for uncompressed writing. | + +------------------+---------------------------------------------+ + | ``'w:gz'`` | Open for gzip compressed writing. | + +------------------+---------------------------------------------+ + | ``'w:bz2'`` | Open for bzip2 compressed writing. | + +------------------+---------------------------------------------+ + + Note that ``'a:gz'`` or ``'a:bz2'`` is not possible. If *mode* is not suitable + to open a certain (compressed) file for reading, :exc:`ReadError` is raised. Use + *mode* ``'r'`` to avoid this. If a compression method is not supported, + :exc:`CompressionError` is raised. + + If *fileobj* is specified, it is used as an alternative to a file object opened + for *name*. It is supposed to be at position 0. + + For special purposes, there is a second format for *mode*: + ``'filemode|[compression]'``. :func:`open` will return a :class:`TarFile` + object that processes its data as a stream of blocks. No random seeking will + be done on the file. If given, *fileobj* may be any object that has a + :meth:`read` or :meth:`write` method (depending on the *mode*). *bufsize* + specifies the blocksize and defaults to ``20 * 512`` bytes. Use this variant + in combination with e.g. ``sys.stdin``, a socket file object or a tape + device. However, such a :class:`TarFile` object is limited in that it does + not allow to be accessed randomly, see :ref:`tar-examples`. The currently + possible modes: + + +-------------+--------------------------------------------+ + | Mode | Action | + +=============+============================================+ + | ``'r|*'`` | Open a *stream* of tar blocks for reading | + | | with transparent compression. | + +-------------+--------------------------------------------+ + | ``'r|'`` | Open a *stream* of uncompressed tar blocks | + | | for reading. | + +-------------+--------------------------------------------+ + | ``'r|gz'`` | Open a gzip compressed *stream* for | + | | reading. | + +-------------+--------------------------------------------+ + | ``'r|bz2'`` | Open a bzip2 compressed *stream* for | + | | reading. | + +-------------+--------------------------------------------+ + | ``'w|'`` | Open an uncompressed *stream* for writing. | + +-------------+--------------------------------------------+ + | ``'w|gz'`` | Open an gzip compressed *stream* for | + | | writing. | + +-------------+--------------------------------------------+ + | ``'w|bz2'`` | Open an bzip2 compressed *stream* for | + | | writing. | + +-------------+--------------------------------------------+ + + +.. class:: TarFile + + Class for reading and writing tar archives. Do not use this class directly, + better use :func:`open` instead. See :ref:`tarfile-objects`. + + +.. function:: is_tarfile(name) + + Return :const:`True` if *name* is a tar archive file, that the :mod:`tarfile` + module can read. + + +.. class:: TarFileCompat(filename[, mode[, compression]]) + + Class for limited access to tar archives with a :mod:`zipfile`\ -like interface. + Please consult the documentation of the :mod:`zipfile` module for more details. + *compression* must be one of the following constants: + + + .. data:: TAR_PLAIN + + Constant for an uncompressed tar archive. + + + .. data:: TAR_GZIPPED + + Constant for a :mod:`gzip` compressed tar archive. + + +.. exception:: TarError + + Base class for all :mod:`tarfile` exceptions. + + +.. exception:: ReadError + + Is raised when a tar archive is opened, that either cannot be handled by the + :mod:`tarfile` module or is somehow invalid. + + +.. exception:: CompressionError + + Is raised when a compression method is not supported or when the data cannot be + decoded properly. + + +.. exception:: StreamError + + Is raised for the limitations that are typical for stream-like :class:`TarFile` + objects. + + +.. exception:: ExtractError + + Is raised for *non-fatal* errors when using :meth:`extract`, but only if + :attr:`TarFile.errorlevel`\ ``== 2``. + + +.. exception:: HeaderError + + Is raised by :meth:`frombuf` if the buffer it gets is invalid. + + .. versionadded:: 2.6 + +Each of the following constants defines a tar archive format that the +:mod:`tarfile` module is able to create. See section :ref:`tar-formats` for +details. + + +.. data:: USTAR_FORMAT + + POSIX.1-1988 (ustar) format. + + +.. data:: GNU_FORMAT + + GNU tar format. + + +.. data:: PAX_FORMAT + + POSIX.1-2001 (pax) format. + + +.. data:: DEFAULT_FORMAT + + The default format for creating archives. This is currently :const:`GNU_FORMAT`. + + +.. seealso:: + + Module :mod:`zipfile` + Documentation of the :mod:`zipfile` standard module. + + `GNU tar manual, Basic Tar Format <http://www.gnu.org/software/tar/manual/html_node/tar_134.html#SEC134>`_ + Documentation for tar archive files, including GNU tar extensions. + +.. % ----------------- +.. % TarFile Objects +.. % ----------------- + + +.. _tarfile-objects: + +TarFile Objects +--------------- + +The :class:`TarFile` object provides an interface to a tar archive. A tar +archive is a sequence of blocks. An archive member (a stored file) is made up of +a header block followed by data blocks. It is possible to store a file in a tar +archive several times. Each archive member is represented by a :class:`TarInfo` +object, see :ref:`tarinfo-objects` for details. + + +.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=None, errors=None, pax_headers=None, debug=0, errorlevel=0) + + All following arguments are optional and can be accessed as instance attributes + as well. + + *name* is the pathname of the archive. It can be omitted if *fileobj* is given. + In this case, the file object's :attr:`name` attribute is used if it exists. + + *mode* is either ``'r'`` to read from an existing archive, ``'a'`` to append + data to an existing file or ``'w'`` to create a new file overwriting an existing + one. + + If *fileobj* is given, it is used for reading or writing data. If it can be + determined, *mode* is overridden by *fileobj*'s mode. *fileobj* will be used + from position 0. + + .. note:: + + *fileobj* is not closed, when :class:`TarFile` is closed. + + *format* controls the archive format. It must be one of the constants + :const:`USTAR_FORMAT`, :const:`GNU_FORMAT` or :const:`PAX_FORMAT` that are + defined at module level. + + .. versionadded:: 2.6 + + The *tarinfo* argument can be used to replace the default :class:`TarInfo` class + with a different one. + + .. versionadded:: 2.6 + + If *dereference* is ``False``, add symbolic and hard links to the archive. If it + is ``True``, add the content of the target files to the archive. This has no + effect on systems that do not support symbolic links. + + If *ignore_zeros* is ``False``, treat an empty block as the end of the archive. + If it is *True*, skip empty (and invalid) blocks and try to get as many members + as possible. This is only useful for reading concatenated or damaged archives. + + *debug* can be set from ``0`` (no debug messages) up to ``3`` (all debug + messages). The messages are written to ``sys.stderr``. + + If *errorlevel* is ``0``, all errors are ignored when using :meth:`extract`. + Nevertheless, they appear as error messages in the debug output, when debugging + is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError` or + :exc:`IOError` exceptions. If ``2``, all *non-fatal* errors are raised as + :exc:`TarError` exceptions as well. + + The *encoding* and *errors* arguments control the way strings are converted to + unicode objects and vice versa. The default settings will work for most users. + See section :ref:`tar-unicode` for in-depth information. + + .. versionadded:: 2.6 + + The *pax_headers* argument is an optional dictionary of unicode strings which + will be added as a pax global header if *format* is :const:`PAX_FORMAT`. + + .. versionadded:: 2.6 + + +.. method:: TarFile.open(...) + + Alternative constructor. The :func:`open` function on module level is actually a + shortcut to this classmethod. See section :ref:`tarfile-mod` for details. + + +.. method:: TarFile.getmember(name) + + Return a :class:`TarInfo` object for member *name*. If *name* can not be found + in the archive, :exc:`KeyError` is raised. + + .. note:: + + If a member occurs more than once in the archive, its last occurrence is assumed + to be the most up-to-date version. + + +.. method:: TarFile.getmembers() + + Return the members of the archive as a list of :class:`TarInfo` objects. The + list has the same order as the members in the archive. + + +.. method:: TarFile.getnames() + + Return the members as a list of their names. It has the same order as the list + returned by :meth:`getmembers`. + + +.. method:: TarFile.list(verbose=True) + + Print a table of contents to ``sys.stdout``. If *verbose* is :const:`False`, + only the names of the members are printed. If it is :const:`True`, output + similar to that of :program:`ls -l` is produced. + + +.. method:: TarFile.next() + + Return the next member of the archive as a :class:`TarInfo` object, when + :class:`TarFile` is opened for reading. Return ``None`` if there is no more + available. + + +.. method:: TarFile.extractall([path[, members]]) + + Extract all members from the archive to the current working directory or + directory *path*. If optional *members* is given, it must be a subset of the + list returned by :meth:`getmembers`. Directory information like owner, + modification time and permissions are set after all members have been extracted. + This is done to work around two problems: A directory's modification time is + reset each time a file is created in it. And, if a directory's permissions do + not allow writing, extracting files to it will fail. + + .. versionadded:: 2.5 + + +.. method:: TarFile.extract(member[, path]) + + Extract a member from the archive to the current working directory, using its + full name. Its file information is extracted as accurately as possible. *member* + may be a filename or a :class:`TarInfo` object. You can specify a different + directory using *path*. + + .. note:: + + Because the :meth:`extract` method allows random access to a tar archive there + are some issues you must take care of yourself. See the description for + :meth:`extractall` above. + + +.. method:: TarFile.extractfile(member) + + Extract a member from the archive as a file object. *member* may be a filename + or a :class:`TarInfo` object. If *member* is a regular file, a file-like object + is returned. If *member* is a link, a file-like object is constructed from the + link's target. If *member* is none of the above, ``None`` is returned. + + .. note:: + + The file-like object is read-only and provides the following methods: + :meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`seek`, :meth:`tell`. + + +.. method:: TarFile.add(name[, arcname[, recursive[, exclude]]]) + + Add the file *name* to the archive. *name* may be any type of file (directory, + fifo, symbolic link, etc.). If given, *arcname* specifies an alternative name + for the file in the archive. Directories are added recursively by default. This + can be avoided by setting *recursive* to :const:`False`. If *exclude* is given + it must be a function that takes one filename argument and returns a boolean + value. Depending on this value the respective file is either excluded + (:const:`True`) or added (:const:`False`). + + .. versionchanged:: 2.6 + Added the *exclude* parameter. + + +.. method:: TarFile.addfile(tarinfo[, fileobj]) + + Add the :class:`TarInfo` object *tarinfo* to the archive. If *fileobj* is given, + ``tarinfo.size`` bytes are read from it and added to the archive. You can + create :class:`TarInfo` objects using :meth:`gettarinfo`. + + .. note:: + + On Windows platforms, *fileobj* should always be opened with mode ``'rb'`` to + avoid irritation about the file size. + + +.. method:: TarFile.gettarinfo([name[, arcname[, fileobj]]]) + + Create a :class:`TarInfo` object for either the file *name* or the file object + *fileobj* (using :func:`os.fstat` on its file descriptor). You can modify some + of the :class:`TarInfo`'s attributes before you add it using :meth:`addfile`. + If given, *arcname* specifies an alternative name for the file in the archive. + + +.. method:: TarFile.close() + + Close the :class:`TarFile`. In write mode, two finishing zero blocks are + appended to the archive. + + +.. attribute:: TarFile.posix + + Setting this to :const:`True` is equivalent to setting the :attr:`format` + attribute to :const:`USTAR_FORMAT`, :const:`False` is equivalent to + :const:`GNU_FORMAT`. + + .. versionchanged:: 2.4 + *posix* defaults to :const:`False`. + + .. deprecated:: 2.6 + Use the :attr:`format` attribute instead. + + +.. attribute:: TarFile.pax_headers + + A dictionary containing key-value pairs of pax global headers. + + .. versionadded:: 2.6 + +.. % ----------------- +.. % TarInfo Objects +.. % ----------------- + + +.. _tarinfo-objects: + +TarInfo Objects +--------------- + +A :class:`TarInfo` object represents one member in a :class:`TarFile`. Aside +from storing all required attributes of a file (like file type, size, time, +permissions, owner etc.), it provides some useful methods to determine its type. +It does *not* contain the file's data itself. + +:class:`TarInfo` objects are returned by :class:`TarFile`'s methods +:meth:`getmember`, :meth:`getmembers` and :meth:`gettarinfo`. + + +.. class:: TarInfo([name]) + + Create a :class:`TarInfo` object. + + +.. method:: TarInfo.frombuf(buf) + + Create and return a :class:`TarInfo` object from string buffer *buf*. + + .. versionadded:: 2.6 + Raises :exc:`HeaderError` if the buffer is invalid.. + + +.. method:: TarInfo.fromtarfile(tarfile) + + Read the next member from the :class:`TarFile` object *tarfile* and return it as + a :class:`TarInfo` object. + + .. versionadded:: 2.6 + + +.. method:: TarInfo.tobuf([format[, encoding [, errors]]]) + + Create a string buffer from a :class:`TarInfo` object. For information on the + arguments see the constructor of the :class:`TarFile` class. + + .. versionchanged:: 2.6 + The arguments were added. + +A ``TarInfo`` object has the following public data attributes: + + +.. attribute:: TarInfo.name + + Name of the archive member. + + +.. attribute:: TarInfo.size + + Size in bytes. + + +.. attribute:: TarInfo.mtime + + Time of last modification. + + +.. attribute:: TarInfo.mode + + Permission bits. + + +.. attribute:: TarInfo.type + + File type. *type* is usually one of these constants: :const:`REGTYPE`, + :const:`AREGTYPE`, :const:`LNKTYPE`, :const:`SYMTYPE`, :const:`DIRTYPE`, + :const:`FIFOTYPE`, :const:`CONTTYPE`, :const:`CHRTYPE`, :const:`BLKTYPE`, + :const:`GNUTYPE_SPARSE`. To determine the type of a :class:`TarInfo` object + more conveniently, use the ``is_*()`` methods below. + + +.. attribute:: TarInfo.linkname + + Name of the target file name, which is only present in :class:`TarInfo` objects + of type :const:`LNKTYPE` and :const:`SYMTYPE`. + + +.. attribute:: TarInfo.uid + + User ID of the user who originally stored this member. + + +.. attribute:: TarInfo.gid + + Group ID of the user who originally stored this member. + + +.. attribute:: TarInfo.uname + + User name. + + +.. attribute:: TarInfo.gname + + Group name. + + +.. attribute:: TarInfo.pax_headers + + A dictionary containing key-value pairs of an associated pax extended header. + + .. versionadded:: 2.6 + +A :class:`TarInfo` object also provides some convenient query methods: + + +.. method:: TarInfo.isfile() + + Return :const:`True` if the :class:`Tarinfo` object is a regular file. + + +.. method:: TarInfo.isreg() + + Same as :meth:`isfile`. + + +.. method:: TarInfo.isdir() + + Return :const:`True` if it is a directory. + + +.. method:: TarInfo.issym() + + Return :const:`True` if it is a symbolic link. + + +.. method:: TarInfo.islnk() + + Return :const:`True` if it is a hard link. + + +.. method:: TarInfo.ischr() + + Return :const:`True` if it is a character device. + + +.. method:: TarInfo.isblk() + + Return :const:`True` if it is a block device. + + +.. method:: TarInfo.isfifo() + + Return :const:`True` if it is a FIFO. + + +.. method:: TarInfo.isdev() + + Return :const:`True` if it is one of character device, block device or FIFO. + +.. % ------------------------ +.. % Examples +.. % ------------------------ + + +.. _tar-examples: + +Examples +-------- + +How to extract an entire tar archive to the current working directory:: + + import tarfile + tar = tarfile.open("sample.tar.gz") + tar.extractall() + tar.close() + +How to create an uncompressed tar archive from a list of filenames:: + + import tarfile + tar = tarfile.open("sample.tar", "w") + for name in ["foo", "bar", "quux"]: + tar.add(name) + tar.close() + +How to read a gzip compressed tar archive and display some member information:: + + import tarfile + tar = tarfile.open("sample.tar.gz", "r:gz") + for tarinfo in tar: + print tarinfo.name, "is", tarinfo.size, "bytes in size and is", + if tarinfo.isreg(): + print "a regular file." + elif tarinfo.isdir(): + print "a directory." + else: + print "something else." + tar.close() + +How to create a tar archive with faked information:: + + import tarfile + tar = tarfile.open("sample.tar.gz", "w:gz") + for name in namelist: + tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name) + tarinfo.uid = 123 + tarinfo.gid = 456 + tarinfo.uname = "johndoe" + tarinfo.gname = "fake" + tar.addfile(tarinfo, file(name)) + tar.close() + +The *only* way to extract an uncompressed tar stream from ``sys.stdin``:: + + import sys + import tarfile + tar = tarfile.open(mode="r|", fileobj=sys.stdin) + for tarinfo in tar: + tar.extract(tarinfo) + tar.close() + +.. % ------------ +.. % Tar format +.. % ------------ + + +.. _tar-formats: + +Supported tar formats +--------------------- + +There are three tar formats that can be created with the :mod:`tarfile` module: + +* The POSIX.1-1988 ustar format (:const:`USTAR_FORMAT`). It supports filenames + up to a length of at best 256 characters and linknames up to 100 characters. The + maximum file size is 8 gigabytes. This is an old and limited but widely + supported format. + +* The GNU tar format (:const:`GNU_FORMAT`). It supports long filenames and + linknames, files bigger than 8 gigabytes and sparse files. It is the de facto + standard on GNU/Linux systems. :mod:`tarfile` fully supports the GNU tar + extensions for long names, sparse file support is read-only. + +* The POSIX.1-2001 pax format (:const:`PAX_FORMAT`). It is the most flexible + format with virtually no limits. It supports long filenames and linknames, large + files and stores pathnames in a portable way. However, not all tar + implementations today are able to handle pax archives properly. + + The *pax* format is an extension to the existing *ustar* format. It uses extra + headers for information that cannot be stored otherwise. There are two flavours + of pax headers: Extended headers only affect the subsequent file header, global + headers are valid for the complete archive and affect all following files. All + the data in a pax header is encoded in *UTF-8* for portability reasons. + +There are some more variants of the tar format which can be read, but not +created: + +* The ancient V7 format. This is the first tar format from Unix Seventh Edition, + storing only regular files and directories. Names must not be longer than 100 + characters, there is no user/group name information. Some archives have + miscalculated header checksums in case of fields with non-ASCII characters. + +* The SunOS tar extended format. This format is a variant of the POSIX.1-2001 + pax format, but is not compatible. + +.. % ---------------- +.. % Unicode issues +.. % ---------------- + + +.. _tar-unicode: + +Unicode issues +-------------- + +The tar format was originally conceived to make backups on tape drives with the +main focus on preserving file system information. Nowadays tar archives are +commonly used for file distribution and exchanging archives over networks. One +problem of the original format (that all other formats are merely variants of) +is that there is no concept of supporting different character encodings. For +example, an ordinary tar archive created on a *UTF-8* system cannot be read +correctly on a *Latin-1* system if it contains non-ASCII characters. Names (i.e. +filenames, linknames, user/group names) containing these characters will appear +damaged. Unfortunately, there is no way to autodetect the encoding of an +archive. + +The pax format was designed to solve this problem. It stores non-ASCII names +using the universal character encoding *UTF-8*. When a pax archive is read, +these *UTF-8* names are converted to the encoding of the local file system. + +The details of unicode conversion are controlled by the *encoding* and *errors* +keyword arguments of the :class:`TarFile` class. + +The default value for *encoding* is the local character encoding. It is deduced +from :func:`sys.getfilesystemencoding` and :func:`sys.getdefaultencoding`. In +read mode, *encoding* is used exclusively to convert unicode names from a pax +archive to strings in the local character encoding. In write mode, the use of +*encoding* depends on the chosen archive format. In case of :const:`PAX_FORMAT`, +input names that contain non-ASCII characters need to be decoded before being +stored as *UTF-8* strings. The other formats do not make use of *encoding* +unless unicode objects are used as input names. These are converted to 8-bit +character strings before they are added to the archive. + +The *errors* argument defines how characters are treated that cannot be +converted to or from *encoding*. Possible values are listed in section +:ref:`codec-base-classes`. In read mode, there is an additional scheme +``'utf-8'`` which means that bad characters are replaced by their *UTF-8* +representation. This is the default scheme. In write mode the default value for +*errors* is ``'strict'`` to ensure that name information is not altered +unnoticed. + diff --git a/Doc/library/telnetlib.rst b/Doc/library/telnetlib.rst new file mode 100644 index 0000000..f6ab852 --- /dev/null +++ b/Doc/library/telnetlib.rst @@ -0,0 +1,246 @@ + +:mod:`telnetlib` --- Telnet client +================================== + +.. module:: telnetlib + :synopsis: Telnet client class. +.. sectionauthor:: Skip Montanaro <skip@mojam.com> + + +.. index:: single: protocol; Telnet + +The :mod:`telnetlib` module provides a :class:`Telnet` class that implements the +Telnet protocol. See :rfc:`854` for details about the protocol. In addition, it +provides symbolic constants for the protocol characters (see below), and for the +telnet options. The symbolic names of the telnet options follow the definitions +in ``arpa/telnet.h``, with the leading ``TELOPT_`` removed. For symbolic names +of options which are traditionally not included in ``arpa/telnet.h``, see the +module source itself. + +The symbolic constants for the telnet commands are: IAC, DONT, DO, WONT, WILL, +SE (Subnegotiation End), NOP (No Operation), DM (Data Mark), BRK (Break), IP +(Interrupt process), AO (Abort output), AYT (Are You There), EC (Erase +Character), EL (Erase Line), GA (Go Ahead), SB (Subnegotiation Begin). + + +.. class:: Telnet([host[, port[, timeout]]]) + + :class:`Telnet` represents a connection to a Telnet server. The instance is + initially not connected by default; the :meth:`open` method must be used to + establish a connection. Alternatively, the host name and optional port number + can be passed to the constructor, to, in which case the connection to the server + will be established before the constructor returns. The optional *timeout* + parameter specifies a timeout in seconds for the connection attempt (if not + specified, or passed as None, the global default timeout setting will be used). + + Do not reopen an already connected instance. + + This class has many :meth:`read_\*` methods. Note that some of them raise + :exc:`EOFError` when the end of the connection is read, because they can return + an empty string for other reasons. See the individual descriptions below. + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. seealso:: + + :rfc:`854` - Telnet Protocol Specification + Definition of the Telnet protocol. + + +.. _telnet-objects: + +Telnet Objects +-------------- + +:class:`Telnet` instances have the following methods: + + +.. method:: Telnet.read_until(expected[, timeout]) + + Read until a given string, *expected*, is encountered or until *timeout* seconds + have passed. + + When no match is found, return whatever is available instead, possibly the empty + string. Raise :exc:`EOFError` if the connection is closed and no cooked data is + available. + + +.. method:: Telnet.read_all() + + Read all data until EOF; block until connection closed. + + +.. method:: Telnet.read_some() + + Read at least one byte of cooked data unless EOF is hit. Return ``''`` if EOF is + hit. Block if no data is immediately available. + + +.. method:: Telnet.read_very_eager() + + Read everything that can be without blocking in I/O (eager). + + Raise :exc:`EOFError` if connection closed and no cooked data available. Return + ``''`` if no cooked data available otherwise. Do not block unless in the midst + of an IAC sequence. + + +.. method:: Telnet.read_eager() + + Read readily available data. + + Raise :exc:`EOFError` if connection closed and no cooked data available. Return + ``''`` if no cooked data available otherwise. Do not block unless in the midst + of an IAC sequence. + + +.. method:: Telnet.read_lazy() + + Process and return data already in the queues (lazy). + + Raise :exc:`EOFError` if connection closed and no data available. Return ``''`` + if no cooked data available otherwise. Do not block unless in the midst of an + IAC sequence. + + +.. method:: Telnet.read_very_lazy() + + Return any data available in the cooked queue (very lazy). + + Raise :exc:`EOFError` if connection closed and no data available. Return ``''`` + if no cooked data available otherwise. This method never blocks. + + +.. method:: Telnet.read_sb_data() + + Return the data collected between a SB/SE pair (suboption begin/end). The + callback should access these data when it was invoked with a ``SE`` command. + This method never blocks. + + .. versionadded:: 2.3 + + +.. method:: Telnet.open(host[, port[, timeout]]) + + Connect to a host. The optional second argument is the port number, which + defaults to the standard Telnet port (23). The optional *timeout* parameter + specifies a timeout in seconds for the connection attempt (if not specified, or + passed as None, the global default timeout setting will be used). + + Do not try to reopen an already connected instance. + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. method:: Telnet.msg(msg[, *args]) + + Print a debug message when the debug level is ``>`` 0. If extra arguments are + present, they are substituted in the message using the standard string + formatting operator. + + +.. method:: Telnet.set_debuglevel(debuglevel) + + Set the debug level. The higher the value of *debuglevel*, the more debug + output you get (on ``sys.stdout``). + + +.. method:: Telnet.close() + + Close the connection. + + +.. method:: Telnet.get_socket() + + Return the socket object used internally. + + +.. method:: Telnet.fileno() + + Return the file descriptor of the socket object used internally. + + +.. method:: Telnet.write(buffer) + + Write a string to the socket, doubling any IAC characters. This can block if the + connection is blocked. May raise :exc:`socket.error` if the connection is + closed. + + +.. method:: Telnet.interact() + + Interaction function, emulates a very dumb Telnet client. + + +.. method:: Telnet.mt_interact() + + Multithreaded version of :meth:`interact`. + + +.. method:: Telnet.expect(list[, timeout]) + + Read until one from a list of a regular expressions matches. + + The first argument is a list of regular expressions, either compiled + (:class:`re.RegexObject` instances) or uncompiled (strings). The optional second + argument is a timeout, in seconds; the default is to block indefinitely. + + Return a tuple of three items: the index in the list of the first regular + expression that matches; the match object returned; and the text read up till + and including the match. + + If end of file is found and no text was read, raise :exc:`EOFError`. Otherwise, + when nothing matches, return ``(-1, None, text)`` where *text* is the text + received so far (may be the empty string if a timeout happened). + + If a regular expression ends with a greedy match (such as ``.*``) or if more + than one expression can match the same input, the results are indeterministic, + and may depend on the I/O timing. + + +.. method:: Telnet.set_option_negotiation_callback(callback) + + Each time a telnet option is read on the input flow, this *callback* (if set) is + called with the following parameters : callback(telnet socket, command + (DO/DONT/WILL/WONT), option). No other action is done afterwards by telnetlib. + + +.. _telnet-example: + +Telnet Example +-------------- + +.. sectionauthor:: Peter Funk <pf@artcom-gmbh.de> + + +A simple example illustrating typical use:: + + import getpass + import sys + import telnetlib + + def raw_input(prompt): + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + HOST = "localhost" + user = raw_input("Enter your remote account: ") + password = getpass.getpass() + + tn = telnetlib.Telnet(HOST) + + tn.read_until("login: ") + tn.write(user + "\n") + if password: + tn.read_until("Password: ") + tn.write(password + "\n") + + tn.write("ls\n") + tn.write("exit\n") + + print tn.read_all() + diff --git a/Doc/library/tempfile.rst b/Doc/library/tempfile.rst new file mode 100644 index 0000000..cafdd05 --- /dev/null +++ b/Doc/library/tempfile.rst @@ -0,0 +1,216 @@ + +:mod:`tempfile` --- Generate temporary files and directories +============================================================ + +.. sectionauthor:: Zack Weinberg <zack@codesourcery.com> + + +.. module:: tempfile + :synopsis: Generate temporary files and directories. + + +.. index:: + pair: temporary; file name + pair: temporary; file + +This module generates temporary files and directories. It works on all +supported platforms. + +In version 2.3 of Python, this module was overhauled for enhanced security. It +now provides three new functions, :func:`NamedTemporaryFile`, :func:`mkstemp`, +and :func:`mkdtemp`, which should eliminate all remaining need to use the +insecure :func:`mktemp` function. Temporary file names created by this module +no longer contain the process ID; instead a string of six random characters is +used. + +Also, all the user-callable functions now take additional arguments which allow +direct control over the location and name of temporary files. It is no longer +necessary to use the global *tempdir* and *template* variables. To maintain +backward compatibility, the argument order is somewhat odd; it is recommended to +use keyword arguments for clarity. + +The module defines the following user-callable functions: + + +.. function:: TemporaryFile([mode='w+b'[, bufsize=-1[, suffix[, prefix[, dir]]]]]) + + Return a file (or file-like) object that can be used as a temporary storage + area. The file is created using :func:`mkstemp`. It will be destroyed as soon + as it is closed (including an implicit close when the object is garbage + collected). Under Unix, the directory entry for the file is removed immediately + after the file is created. Other platforms do not support this; your code + should not rely on a temporary file created using this function having or not + having a visible name in the file system. + + The *mode* parameter defaults to ``'w+b'`` so that the file created can be read + and written without being closed. Binary mode is used so that it behaves + consistently on all platforms without regard for the data that is stored. + *bufsize* defaults to ``-1``, meaning that the operating system default is used. + + The *dir*, *prefix* and *suffix* parameters are passed to :func:`mkstemp`. + + +.. function:: NamedTemporaryFile([mode='w+b'[, bufsize=-1[, suffix[, prefix[, dir[, delete]]]]]]) + + This function operates exactly as :func:`TemporaryFile` does, except that the + file is guaranteed to have a visible name in the file system (on Unix, the + directory entry is not unlinked). That name can be retrieved from the + :attr:`name` member of the file object. Whether the name can be used to open + the file a second time, while the named temporary file is still open, varies + across platforms (it can be so used on Unix; it cannot on Windows NT or later). + If *delete* is true (the default), the file is deleted as soon as it is closed. + + .. versionadded:: 2.3 + + .. versionadded:: 2.6 + The *delete* parameter. + + +.. function:: SpooledTemporaryFile([max_size=0, [mode='w+b'[, bufsize=-1[, suffix[, prefix[, dir]]]]]]) + + This function operates exactly as :func:`TemporaryFile` does, except that data + is spooled in memory until the file size exceeds *max_size*, or until the file's + :func:`fileno` method is called, at which point the contents are written to disk + and operation proceeds as with :func:`TemporaryFile`. + + The resulting file has one additional method, :func:`rollover`, which causes the + file to roll over to an on-disk file regardless of its size. + + .. versionadded:: 2.6 + + +.. function:: mkstemp([suffix[, prefix[, dir[, text]]]]) + + Creates a temporary file in the most secure manner possible. There are no + race conditions in the file's creation, assuming that the platform properly + implements the :const:`os.O_EXCL` flag for :func:`os.open`. The file is + readable and writable only by the creating user ID. If the platform uses + permission bits to indicate whether a file is executable, the file is + executable by no one. The file descriptor is not inherited by child + processes. + + Unlike :func:`TemporaryFile`, the user of :func:`mkstemp` is responsible for + deleting the temporary file when done with it. + + If *suffix* is specified, the file name will end with that suffix, otherwise + there will be no suffix. :func:`mkstemp` does not put a dot between the file + name and the suffix; if you need one, put it at the beginning of *suffix*. + + If *prefix* is specified, the file name will begin with that prefix; otherwise, + a default prefix is used. + + If *dir* is specified, the file will be created in that directory; otherwise, + a default directory is used. The default directory is chosen from a + platform-dependent list, but the user of the application can control the + directory location by setting the *TMPDIR*, *TEMP* or *TMP* environment + variables. There is thus no guarantee that the generated filename will have + any nice properties, such as not requiring quoting when passed to external + commands via ``os.popen()``. + + If *text* is specified, it indicates whether to open the file in binary mode + (the default) or text mode. On some platforms, this makes no difference. + + :func:`mkstemp` returns a tuple containing an OS-level handle to an open file + (as would be returned by :func:`os.open`) and the absolute pathname of that + file, in that order. + + .. versionadded:: 2.3 + + +.. function:: mkdtemp([suffix[, prefix[, dir]]]) + + Creates a temporary directory in the most secure manner possible. There are no + race conditions in the directory's creation. The directory is readable, + writable, and searchable only by the creating user ID. + + The user of :func:`mkdtemp` is responsible for deleting the temporary directory + and its contents when done with it. + + The *prefix*, *suffix*, and *dir* arguments are the same as for :func:`mkstemp`. + + :func:`mkdtemp` returns the absolute pathname of the new directory. + + .. versionadded:: 2.3 + + +.. function:: mktemp([suffix[, prefix[, dir]]]) + + .. deprecated:: 2.3 + Use :func:`mkstemp` instead. + + Return an absolute pathname of a file that did not exist at the time the call is + made. The *prefix*, *suffix*, and *dir* arguments are the same as for + :func:`mkstemp`. + + .. warning:: + + Use of this function may introduce a security hole in your program. By the time + you get around to doing anything with the file name it returns, someone else may + have beaten you to the punch. + +The module uses two global variables that tell it how to construct a temporary +name. They are initialized at the first call to any of the functions above. +The caller may change them, but this is discouraged; use the appropriate +function arguments, instead. + + +.. data:: tempdir + + When set to a value other than ``None``, this variable defines the default value + for the *dir* argument to all the functions defined in this module. + + If ``tempdir`` is unset or ``None`` at any call to any of the above functions, + Python searches a standard list of directories and sets *tempdir* to the first + one which the calling user can create files in. The list is: + + #. The directory named by the :envvar:`TMPDIR` environment variable. + + #. The directory named by the :envvar:`TEMP` environment variable. + + #. The directory named by the :envvar:`TMP` environment variable. + + #. A platform-specific location: + + * On RiscOS, the directory named by the :envvar:`Wimp$ScrapDir` environment + variable. + + * On Windows, the directories :file:`C:\\TEMP`, :file:`C:\\TMP`, + :file:`\\TEMP`, and :file:`\\TMP`, in that order. + + * On all other platforms, the directories :file:`/tmp`, :file:`/var/tmp`, and + :file:`/usr/tmp`, in that order. + + #. As a last resort, the current working directory. + + +.. function:: gettempdir() + + Return the directory currently selected to create temporary files in. If + :data:`tempdir` is not ``None``, this simply returns its contents; otherwise, + the search described above is performed, and the result returned. + + +.. data:: template + + .. deprecated:: 2.0 + Use :func:`gettempprefix` instead. + + When set to a value other than ``None``, this variable defines the prefix of the + final component of the filenames returned by :func:`mktemp`. A string of six + random letters and digits is appended to the prefix to make the filename unique. + On Windows, the default prefix is :file:`~T`; on all other systems it is + :file:`tmp`. + + Older versions of this module used to require that ``template`` be set to + ``None`` after a call to :func:`os.fork`; this has not been necessary since + version 1.5.2. + + +.. function:: gettempprefix() + + Return the filename prefix used to create temporary files. This does not + contain the directory component. Using this function is preferred over reading + the *template* variable directly. + + .. versionadded:: 1.5.2 + diff --git a/Doc/library/termios.rst b/Doc/library/termios.rst new file mode 100644 index 0000000..695faad --- /dev/null +++ b/Doc/library/termios.rst @@ -0,0 +1,111 @@ + +:mod:`termios` --- POSIX style tty control +========================================== + +.. module:: termios + :platform: Unix + :synopsis: POSIX style tty control. + + +.. index:: + pair: POSIX; I/O control + pair: tty; I/O control + +This module provides an interface to the POSIX calls for tty I/O control. For a +complete description of these calls, see the POSIX or Unix manual pages. It is +only available for those Unix versions that support POSIX *termios* style tty +I/O control (and then only if configured at installation time). + +All functions in this module take a file descriptor *fd* as their first +argument. This can be an integer file descriptor, such as returned by +``sys.stdin.fileno()``, or a file object, such as ``sys.stdin`` itself. + +This module also defines all the constants needed to work with the functions +provided here; these have the same name as their counterparts in C. Please +refer to your system documentation for more information on using these terminal +control interfaces. + +The module defines the following functions: + + +.. function:: tcgetattr(fd) + + Return a list containing the tty attributes for file descriptor *fd*, as + follows: ``[iflag, oflag, cflag, lflag, ispeed, ospeed, cc]`` where *cc* is a + list of the tty special characters (each a string of length 1, except the + items with indices :const:`VMIN` and :const:`VTIME`, which are integers when + these fields are defined). The interpretation of the flags and the speeds as + well as the indexing in the *cc* array must be done using the symbolic + constants defined in the :mod:`termios` module. + + +.. function:: tcsetattr(fd, when, attributes) + + Set the tty attributes for file descriptor *fd* from the *attributes*, which is + a list like the one returned by :func:`tcgetattr`. The *when* argument + determines when the attributes are changed: :const:`TCSANOW` to change + immediately, :const:`TCSADRAIN` to change after transmitting all queued output, + or :const:`TCSAFLUSH` to change after transmitting all queued output and + discarding all queued input. + + +.. function:: tcsendbreak(fd, duration) + + Send a break on file descriptor *fd*. A zero *duration* sends a break for 0.25 + --0.5 seconds; a nonzero *duration* has a system dependent meaning. + + +.. function:: tcdrain(fd) + + Wait until all output written to file descriptor *fd* has been transmitted. + + +.. function:: tcflush(fd, queue) + + Discard queued data on file descriptor *fd*. The *queue* selector specifies + which queue: :const:`TCIFLUSH` for the input queue, :const:`TCOFLUSH` for the + output queue, or :const:`TCIOFLUSH` for both queues. + + +.. function:: tcflow(fd, action) + + Suspend or resume input or output on file descriptor *fd*. The *action* + argument can be :const:`TCOOFF` to suspend output, :const:`TCOON` to restart + output, :const:`TCIOFF` to suspend input, or :const:`TCION` to restart input. + + +.. seealso:: + + Module :mod:`tty` + Convenience functions for common terminal control operations. + + +Example +------- + +.. _termios-example: + +Here's a function that prompts for a password with echoing turned off. Note the +technique using a separate :func:`tcgetattr` call and a :keyword:`try` ... +:keyword:`finally` statement to ensure that the old tty attributes are restored +exactly no matter what happens:: + + def raw_input(prompt): + import sys + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + def getpass(prompt = "Password: "): + import termios, sys + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + new = termios.tcgetattr(fd) + new[3] = new[3] & ~termios.ECHO # lflags + try: + termios.tcsetattr(fd, termios.TCSADRAIN, new) + passwd = raw_input(prompt) + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old) + return passwd + diff --git a/Doc/library/test.rst b/Doc/library/test.rst new file mode 100644 index 0000000..8972091 --- /dev/null +++ b/Doc/library/test.rst @@ -0,0 +1,317 @@ + +:mod:`test` --- Regression tests package for Python +=================================================== + +.. module:: test + :synopsis: Regression tests package containing the testing suite for Python. +.. sectionauthor:: Brett Cannon <brett@python.org> + + +The :mod:`test` package contains all regression tests for Python as well as the +modules :mod:`test.test_support` and :mod:`test.regrtest`. +:mod:`test.test_support` is used to enhance your tests while +:mod:`test.regrtest` drives the testing suite. + +Each module in the :mod:`test` package whose name starts with ``test_`` is a +testing suite for a specific module or feature. All new tests should be written +using the :mod:`unittest` or :mod:`doctest` module. Some older tests are +written using a "traditional" testing style that compares output printed to +``sys.stdout``; this style of test is considered deprecated. + + +.. seealso:: + + Module :mod:`unittest` + Writing PyUnit regression tests. + + Module :mod:`doctest` + Tests embedded in documentation strings. + + +.. _writing-tests: + +Writing Unit Tests for the :mod:`test` package +---------------------------------------------- + +.. % + +It is preferred that tests that use the :mod:`unittest` module follow a few +guidelines. One is to name the test module by starting it with ``test_`` and end +it with the name of the module being tested. The test methods in the test module +should start with ``test_`` and end with a description of what the method is +testing. This is needed so that the methods are recognized by the test driver as +test methods. Also, no documentation string for the method should be included. A +comment (such as ``# Tests function returns only True or False``) should be used +to provide documentation for test methods. This is done because documentation +strings get printed out if they exist and thus what test is being run is not +stated. + +A basic boilerplate is often used:: + + import unittest + from test import test_support + + class MyTestCase1(unittest.TestCase): + + # Only use setUp() and tearDown() if necessary + + def setUp(self): + ... code to execute in preparation for tests ... + + def tearDown(self): + ... code to execute to clean up after tests ... + + def test_feature_one(self): + # Test feature one. + ... testing code ... + + def test_feature_two(self): + # Test feature two. + ... testing code ... + + ... more test methods ... + + class MyTestCase2(unittest.TestCase): + ... same structure as MyTestCase1 ... + + ... more test classes ... + + def test_main(): + test_support.run_unittest(MyTestCase1, + MyTestCase2, + ... list other tests ... + ) + + if __name__ == '__main__': + test_main() + +This boilerplate code allows the testing suite to be run by :mod:`test.regrtest` +as well as on its own as a script. + +The goal for regression testing is to try to break code. This leads to a few +guidelines to be followed: + +* The testing suite should exercise all classes, functions, and constants. This + includes not just the external API that is to be presented to the outside world + but also "private" code. + +* Whitebox testing (examining the code being tested when the tests are being + written) is preferred. Blackbox testing (testing only the published user + interface) is not complete enough to make sure all boundary and edge cases are + tested. + +* Make sure all possible values are tested including invalid ones. This makes + sure that not only all valid values are acceptable but also that improper values + are handled correctly. + +* Exhaust as many code paths as possible. Test where branching occurs and thus + tailor input to make sure as many different paths through the code are taken. + +* Add an explicit test for any bugs discovered for the tested code. This will + make sure that the error does not crop up again if the code is changed in the + future. + +* Make sure to clean up after your tests (such as close and remove all temporary + files). + +* If a test is dependent on a specific condition of the operating system then + verify the condition already exists before attempting the test. + +* Import as few modules as possible and do it as soon as possible. This + minimizes external dependencies of tests and also minimizes possible anomalous + behavior from side-effects of importing a module. + +* Try to maximize code reuse. On occasion, tests will vary by something as small + as what type of input is used. Minimize code duplication by subclassing a basic + test class with a class that specifies the input:: + + class TestFuncAcceptsSequences(unittest.TestCase): + + func = mySuperWhammyFunction + + def test_func(self): + self.func(self.arg) + + class AcceptLists(TestFuncAcceptsSequences): + arg = [1,2,3] + + class AcceptStrings(TestFuncAcceptsSequences): + arg = 'abc' + + class AcceptTuples(TestFuncAcceptsSequences): + arg = (1,2,3) + + +.. seealso:: + + Test Driven Development + A book by Kent Beck on writing tests before code. + + +.. _regrtest: + +Running tests using :mod:`test.regrtest` +---------------------------------------- + +:mod:`test.regrtest` can be used as a script to drive Python's regression test +suite. Running the script by itself automatically starts running all regression +tests in the :mod:`test` package. It does this by finding all modules in the +package whose name starts with ``test_``, importing them, and executing the +function :func:`test_main` if present. The names of tests to execute may also be +passed to the script. Specifying a single regression test (:program:`python +regrtest.py` :option:`test_spam.py`) will minimize output and only print whether +the test passed or failed and thus minimize output. + +Running :mod:`test.regrtest` directly allows what resources are available for +tests to use to be set. You do this by using the :option:`-u` command-line +option. Run :program:`python regrtest.py` :option:`-uall` to turn on all +resources; specifying :option:`all` as an option for :option:`-u` enables all +possible resources. If all but one resource is desired (a more common case), a +comma-separated list of resources that are not desired may be listed after +:option:`all`. The command :program:`python regrtest.py` +:option:`-uall,-audio,-largefile` will run :mod:`test.regrtest` with all +resources except the :option:`audio` and :option:`largefile` resources. For a +list of all resources and more command-line options, run :program:`python +regrtest.py` :option:`-h`. + +Some other ways to execute the regression tests depend on what platform the +tests are being executed on. On Unix, you can run :program:`make` :option:`test` +at the top-level directory where Python was built. On Windows, executing +:program:`rt.bat` from your :file:`PCBuild` directory will run all regression +tests. + + +:mod:`test.test_support` --- Utility functions for tests +======================================================== + +.. module:: test.test_support + :synopsis: Support for Python regression tests. + + +The :mod:`test.test_support` module provides support for Python's regression +tests. + +This module defines the following exceptions: + + +.. exception:: TestFailed + + Exception to be raised when a test fails. This is deprecated in favor of + :mod:`unittest`\ -based tests and :class:`unittest.TestCase`'s assertion + methods. + + +.. exception:: TestSkipped + + Subclass of :exc:`TestFailed`. Raised when a test is skipped. This occurs when a + needed resource (such as a network connection) is not available at the time of + testing. + + +.. exception:: ResourceDenied + + Subclass of :exc:`TestSkipped`. Raised when a resource (such as a network + connection) is not available. Raised by the :func:`requires` function. + +The :mod:`test.test_support` module defines the following constants: + + +.. data:: verbose + + :const:`True` when verbose output is enabled. Should be checked when more + detailed information is desired about a running test. *verbose* is set by + :mod:`test.regrtest`. + + +.. data:: have_unicode + + :const:`True` when Unicode support is available. + + +.. data:: is_jython + + :const:`True` if the running interpreter is Jython. + + +.. data:: TESTFN + + Set to the path that a temporary file may be created at. Any temporary that is + created should be closed and unlinked (removed). + +The :mod:`test.test_support` module defines the following functions: + + +.. function:: forget(module_name) + + Removes the module named *module_name* from ``sys.modules`` and deletes any + byte-compiled files of the module. + + +.. function:: is_resource_enabled(resource) + + Returns :const:`True` if *resource* is enabled and available. The list of + available resources is only set when :mod:`test.regrtest` is executing the + tests. + + +.. function:: requires(resource[, msg]) + + Raises :exc:`ResourceDenied` if *resource* is not available. *msg* is the + argument to :exc:`ResourceDenied` if it is raised. Always returns true if called + by a function whose ``__name__`` is ``'__main__'``. Used when tests are executed + by :mod:`test.regrtest`. + + +.. function:: findfile(filename) + + Return the path to the file named *filename*. If no match is found *filename* is + returned. This does not equal a failure since it could be the path to the file. + + +.. function:: run_unittest(*classes) + + Execute :class:`unittest.TestCase` subclasses passed to the function. The + function scans the classes for methods starting with the prefix ``test_`` and + executes the tests individually. + + It is also legal to pass strings as parameters; these should be keys in + ``sys.modules``. Each associated module will be scanned by + ``unittest.TestLoader.loadTestsFromModule()``. This is usually seen in the + following :func:`test_main` function:: + + def test_main(): + test_support.run_unittest(__name__) + + This will run all tests defined in the named module. + +The :mod:`test.test_support` module defines the following classes: + + +.. class:: TransientResource(exc[, **kwargs]) + + Instances are a context manager that raises :exc:`ResourceDenied` if the + specified exception type is raised. Any keyword arguments are treated as + attribute/value pairs to be compared against any exception raised within the + :keyword:`with` statement. Only if all pairs match properly against + attributes on the exception is :exc:`ResourceDenied` raised. + + .. versionadded:: 2.6 + + +.. class:: EnvironmentVarGuard() + + Class used to temporarily set or unset environment variables. Instances can be + used as a context manager. + + .. versionadded:: 2.6 + + +.. method:: EnvironmentVarGuard.set(envvar, value) + + Temporarily set the environment variable ``envvar`` to the value of ``value``. + + +.. method:: EnvironmentVarGuard.unset(envvar) + + Temporarily unset the environment variable ``envvar``. + diff --git a/Doc/library/textwrap.rst b/Doc/library/textwrap.rst new file mode 100644 index 0000000..f729a64 --- /dev/null +++ b/Doc/library/textwrap.rst @@ -0,0 +1,192 @@ + +:mod:`textwrap` --- Text wrapping and filling +============================================= + +.. module:: textwrap + :synopsis: Text wrapping and filling +.. moduleauthor:: Greg Ward <gward@python.net> +.. sectionauthor:: Greg Ward <gward@python.net> + + +.. versionadded:: 2.3 + +The :mod:`textwrap` module provides two convenience functions, :func:`wrap` and +:func:`fill`, as well as :class:`TextWrapper`, the class that does all the work, +and a utility function :func:`dedent`. If you're just wrapping or filling one +or two text strings, the convenience functions should be good enough; +otherwise, you should use an instance of :class:`TextWrapper` for efficiency. + + +.. function:: wrap(text[, width[, ...]]) + + Wraps the single paragraph in *text* (a string) so every line is at most *width* + characters long. Returns a list of output lines, without final newlines. + + Optional keyword arguments correspond to the instance attributes of + :class:`TextWrapper`, documented below. *width* defaults to ``70``. + + +.. function:: fill(text[, width[, ...]]) + + Wraps the single paragraph in *text*, and returns a single string containing the + wrapped paragraph. :func:`fill` is shorthand for :: + + "\n".join(wrap(text, ...)) + + In particular, :func:`fill` accepts exactly the same keyword arguments as + :func:`wrap`. + +Both :func:`wrap` and :func:`fill` work by creating a :class:`TextWrapper` +instance and calling a single method on it. That instance is not reused, so for +applications that wrap/fill many text strings, it will be more efficient for you +to create your own :class:`TextWrapper` object. + +An additional utility function, :func:`dedent`, is provided to remove +indentation from strings that have unwanted whitespace to the left of the text. + + +.. function:: dedent(text) + + Remove any common leading whitespace from every line in *text*. + + This can be used to make triple-quoted strings line up with the left edge of the + display, while still presenting them in the source code in indented form. + + Note that tabs and spaces are both treated as whitespace, but they are not + equal: the lines ``" hello"`` and ``"\thello"`` are considered to have no + common leading whitespace. (This behaviour is new in Python 2.5; older versions + of this module incorrectly expanded tabs before searching for common leading + whitespace.) + + For example:: + + def test(): + # end first line with \ to avoid the empty line! + s = '''\ + hello + world + ''' + print repr(s) # prints ' hello\n world\n ' + print repr(dedent(s)) # prints 'hello\n world\n' + + +.. class:: TextWrapper(...) + + The :class:`TextWrapper` constructor accepts a number of optional keyword + arguments. Each argument corresponds to one instance attribute, so for example + :: + + wrapper = TextWrapper(initial_indent="* ") + + is the same as :: + + wrapper = TextWrapper() + wrapper.initial_indent = "* " + + You can re-use the same :class:`TextWrapper` object many times, and you can + change any of its options through direct assignment to instance attributes + between uses. + +The :class:`TextWrapper` instance attributes (and keyword arguments to the +constructor) are as follows: + + +.. attribute:: TextWrapper.width + + (default: ``70``) The maximum length of wrapped lines. As long as there are no + individual words in the input text longer than :attr:`width`, + :class:`TextWrapper` guarantees that no output line will be longer than + :attr:`width` characters. + + +.. attribute:: TextWrapper.expand_tabs + + (default: ``True``) If true, then all tab characters in *text* will be expanded + to spaces using the :meth:`expandtabs` method of *text*. + + +.. attribute:: TextWrapper.replace_whitespace + + (default: ``True``) If true, each whitespace character (as defined by + ``string.whitespace``) remaining after tab expansion will be replaced by a + single space. + + .. note:: + + If :attr:`expand_tabs` is false and :attr:`replace_whitespace` is true, each tab + character will be replaced by a single space, which is *not* the same as tab + expansion. + + +.. attribute:: TextWrapper.drop_whitespace + + (default: ``True``) If true, whitespace that, after wrapping, happens to end up + at the beginning or end of a line is dropped (leading whitespace in the first + line is always preserved, though). + + .. versionadded:: 2.6 + Whitespace was always dropped in earlier versions. + + +.. attribute:: TextWrapper.initial_indent + + (default: ``''``) String that will be prepended to the first line of wrapped + output. Counts towards the length of the first line. + + +.. attribute:: TextWrapper.subsequent_indent + + (default: ``''``) String that will be prepended to all lines of wrapped output + except the first. Counts towards the length of each line except the first. + + +.. attribute:: TextWrapper.fix_sentence_endings + + (default: ``False``) If true, :class:`TextWrapper` attempts to detect sentence + endings and ensure that sentences are always separated by exactly two spaces. + This is generally desired for text in a monospaced font. However, the sentence + detection algorithm is imperfect: it assumes that a sentence ending consists of + a lowercase letter followed by one of ``'.'``, ``'!'``, or ``'?'``, possibly + followed by one of ``'"'`` or ``"'"``, followed by a space. One problem with + this is algorithm is that it is unable to detect the difference between "Dr." in + :: + + [...] Dr. Frankenstein's monster [...] + + and "Spot." in :: + + [...] See Spot. See Spot run [...] + + :attr:`fix_sentence_endings` is false by default. + + Since the sentence detection algorithm relies on ``string.lowercase`` for the + definition of "lowercase letter," and a convention of using two spaces after + a period to separate sentences on the same line, it is specific to + English-language texts. + + +.. attribute:: TextWrapper.break_long_words + + (default: ``True``) If true, then words longer than :attr:`width` will be broken + in order to ensure that no lines are longer than :attr:`width`. If it is false, + long words will not be broken, and some lines may be longer than :attr:`width`. + (Long words will be put on a line by themselves, in order to minimize the amount + by which :attr:`width` is exceeded.) + +:class:`TextWrapper` also provides two public methods, analogous to the +module-level convenience functions: + + +.. method:: TextWrapper.wrap(text) + + Wraps the single paragraph in *text* (a string) so every line is at most + :attr:`width` characters long. All wrapping options are taken from instance + attributes of the :class:`TextWrapper` instance. Returns a list of output lines, + without final newlines. + + +.. method:: TextWrapper.fill(text) + + Wraps the single paragraph in *text*, and returns a single string containing the + wrapped paragraph. + diff --git a/Doc/library/thread.rst b/Doc/library/thread.rst new file mode 100644 index 0000000..c9be598 --- /dev/null +++ b/Doc/library/thread.rst @@ -0,0 +1,171 @@ + +:mod:`thread` --- Multiple threads of control +============================================= + +.. module:: thread + :synopsis: Create multiple threads of control within one interpreter. + + +.. index:: + single: light-weight processes + single: processes, light-weight + single: binary semaphores + single: semaphores, binary + +This module provides low-level primitives for working with multiple threads +(a.k.a. :dfn:`light-weight processes` or :dfn:`tasks`) --- multiple threads of +control sharing their global data space. For synchronization, simple locks +(a.k.a. :dfn:`mutexes` or :dfn:`binary semaphores`) are provided. + +.. index:: + single: pthreads + pair: threads; POSIX + +The module is optional. It is supported on Windows, Linux, SGI IRIX, Solaris +2.x, as well as on systems that have a POSIX thread (a.k.a. "pthread") +implementation. For systems lacking the :mod:`thread` module, the +:mod:`dummy_thread` module is available. It duplicates this module's interface +and can be used as a drop-in replacement. + +It defines the following constant and functions: + + +.. exception:: error + + Raised on thread-specific errors. + + +.. data:: LockType + + This is the type of lock objects. + + +.. function:: start_new_thread(function, args[, kwargs]) + + Start a new thread and return its identifier. The thread executes the function + *function* with the argument list *args* (which must be a tuple). The optional + *kwargs* argument specifies a dictionary of keyword arguments. When the function + returns, the thread silently exits. When the function terminates with an + unhandled exception, a stack trace is printed and then the thread exits (but + other threads continue to run). + + +.. function:: interrupt_main() + + Raise a :exc:`KeyboardInterrupt` exception in the main thread. A subthread can + use this function to interrupt the main thread. + + .. versionadded:: 2.3 + + +.. function:: exit() + + Raise the :exc:`SystemExit` exception. When not caught, this will cause the + thread to exit silently. + +.. % \begin{funcdesc}{exit_prog}{status} +.. % Exit all threads and report the value of the integer argument +.. % \var{status} as the exit status of the entire program. +.. % \strong{Caveat:} code in pending \keyword{finally} clauses, in this thread +.. % or in other threads, is not executed. +.. % \end{funcdesc} + + +.. function:: allocate_lock() + + Return a new lock object. Methods of locks are described below. The lock is + initially unlocked. + + +.. function:: get_ident() + + Return the 'thread identifier' of the current thread. This is a nonzero + integer. Its value has no direct meaning; it is intended as a magic cookie to + be used e.g. to index a dictionary of thread-specific data. Thread identifiers + may be recycled when a thread exits and another thread is created. + + +.. function:: stack_size([size]) + + Return the thread stack size used when creating new threads. The optional + *size* argument specifies the stack size to be used for subsequently created + threads, and must be 0 (use platform or configured default) or a positive + integer value of at least 32,768 (32kB). If changing the thread stack size is + unsupported, a :exc:`ThreadError` is raised. If the specified stack size is + invalid, a :exc:`ValueError` is raised and the stack size is unmodified. 32kB + is currently the minimum supported stack size value to guarantee sufficient + stack space for the interpreter itself. Note that some platforms may have + particular restrictions on values for the stack size, such as requiring a + minimum stack size > 32kB or requiring allocation in multiples of the system + memory page size - platform documentation should be referred to for more + information (4kB pages are common; using multiples of 4096 for the stack size is + the suggested approach in the absence of more specific information). + Availability: Windows, systems with POSIX threads. + + .. versionadded:: 2.5 + +Lock objects have the following methods: + + +.. method:: lock.acquire([waitflag]) + + Without the optional argument, this method acquires the lock unconditionally, if + necessary waiting until it is released by another thread (only one thread at a + time can acquire a lock --- that's their reason for existence). If the integer + *waitflag* argument is present, the action depends on its value: if it is zero, + the lock is only acquired if it can be acquired immediately without waiting, + while if it is nonzero, the lock is acquired unconditionally as before. The + return value is ``True`` if the lock is acquired successfully, ``False`` if not. + + +.. method:: lock.release() + + Releases the lock. The lock must have been acquired earlier, but not + necessarily by the same thread. + + +.. method:: lock.locked() + + Return the status of the lock: ``True`` if it has been acquired by some thread, + ``False`` if not. + +In addition to these methods, lock objects can also be used via the +:keyword:`with` statement, e.g.:: + + from __future__ import with_statement + import thread + + a_lock = thread.allocate_lock() + + with a_lock: + print "a_lock is locked while this executes" + +**Caveats:** + + .. index:: module: signal + +* Threads interact strangely with interrupts: the :exc:`KeyboardInterrupt` + exception will be received by an arbitrary thread. (When the :mod:`signal` + module is available, interrupts always go to the main thread.) + +* Calling :func:`sys.exit` or raising the :exc:`SystemExit` exception is + equivalent to calling :func:`exit`. + +* Not all built-in functions that may block waiting for I/O allow other threads + to run. (The most popular ones (:func:`time.sleep`, :meth:`file.read`, + :func:`select.select`) work as expected.) + +* It is not possible to interrupt the :meth:`acquire` method on a lock --- the + :exc:`KeyboardInterrupt` exception will happen after the lock has been acquired. + + .. index:: pair: threads; IRIX + +* When the main thread exits, it is system defined whether the other threads + survive. On SGI IRIX using the native thread implementation, they survive. On + most other systems, they are killed without executing :keyword:`try` ... + :keyword:`finally` clauses or executing object destructors. + +* When the main thread exits, it does not do any of its usual cleanup (except + that :keyword:`try` ... :keyword:`finally` clauses are honored), and the + standard I/O files are not flushed. + diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst new file mode 100644 index 0000000..92ce02a --- /dev/null +++ b/Doc/library/threading.rst @@ -0,0 +1,732 @@ + +:mod:`threading` --- Higher-level threading interface +===================================================== + +.. module:: threading + :synopsis: Higher-level threading interface. + + +This module constructs higher-level threading interfaces on top of the lower +level :mod:`thread` module. + +The :mod:`dummy_threading` module is provided for situations where +:mod:`threading` cannot be used because :mod:`thread` is missing. + +This module defines the following functions and objects: + + +.. function:: activeCount() + + Return the number of :class:`Thread` objects currently alive. The returned + count is equal to the length of the list returned by :func:`enumerate`. + + +.. function:: Condition() + :noindex: + + A factory function that returns a new condition variable object. A condition + variable allows one or more threads to wait until they are notified by another + thread. + + +.. function:: currentThread() + + Return the current :class:`Thread` object, corresponding to the caller's thread + of control. If the caller's thread of control was not created through the + :mod:`threading` module, a dummy thread object with limited functionality is + returned. + + +.. function:: enumerate() + + Return a list of all :class:`Thread` objects currently alive. The list includes + daemonic threads, dummy thread objects created by :func:`currentThread`, and the + main thread. It excludes terminated threads and threads that have not yet been + started. + + +.. function:: Event() + :noindex: + + A factory function that returns a new event object. An event manages a flag + that can be set to true with the :meth:`set` method and reset to false with the + :meth:`clear` method. The :meth:`wait` method blocks until the flag is true. + + +.. class:: local + + A class that represents thread-local data. Thread-local data are data whose + values are thread specific. To manage thread-local data, just create an + instance of :class:`local` (or a subclass) and store attributes on it:: + + mydata = threading.local() + mydata.x = 1 + + The instance's values will be different for separate threads. + + For more details and extensive examples, see the documentation string of the + :mod:`_threading_local` module. + + .. versionadded:: 2.4 + + +.. function:: Lock() + + A factory function that returns a new primitive lock object. Once a thread has + acquired it, subsequent attempts to acquire it block, until it is released; any + thread may release it. + + +.. function:: RLock() + + A factory function that returns a new reentrant lock object. A reentrant lock + must be released by the thread that acquired it. Once a thread has acquired a + reentrant lock, the same thread may acquire it again without blocking; the + thread must release it once for each time it has acquired it. + + +.. function:: Semaphore([value]) + :noindex: + + A factory function that returns a new semaphore object. A semaphore manages a + counter representing the number of :meth:`release` calls minus the number of + :meth:`acquire` calls, plus an initial value. The :meth:`acquire` method blocks + if necessary until it can return without making the counter negative. If not + given, *value* defaults to 1. + + +.. function:: BoundedSemaphore([value]) + + A factory function that returns a new bounded semaphore object. A bounded + semaphore checks to make sure its current value doesn't exceed its initial + value. If it does, :exc:`ValueError` is raised. In most situations semaphores + are used to guard resources with limited capacity. If the semaphore is released + too many times it's a sign of a bug. If not given, *value* defaults to 1. + + +.. class:: Thread + + A class that represents a thread of control. This class can be safely + subclassed in a limited fashion. + + +.. class:: Timer + + A thread that executes a function after a specified interval has passed. + + +.. function:: settrace(func) + + .. index:: single: trace function + + Set a trace function for all threads started from the :mod:`threading` module. + The *func* will be passed to :func:`sys.settrace` for each thread, before its + :meth:`run` method is called. + + .. versionadded:: 2.3 + + +.. function:: setprofile(func) + + .. index:: single: profile function + + Set a profile function for all threads started from the :mod:`threading` module. + The *func* will be passed to :func:`sys.setprofile` for each thread, before its + :meth:`run` method is called. + + .. versionadded:: 2.3 + + +.. function:: stack_size([size]) + + Return the thread stack size used when creating new threads. The optional + *size* argument specifies the stack size to be used for subsequently created + threads, and must be 0 (use platform or configured default) or a positive + integer value of at least 32,768 (32kB). If changing the thread stack size is + unsupported, a :exc:`ThreadError` is raised. If the specified stack size is + invalid, a :exc:`ValueError` is raised and the stack size is unmodified. 32kB + is currently the minimum supported stack size value to guarantee sufficient + stack space for the interpreter itself. Note that some platforms may have + particular restrictions on values for the stack size, such as requiring a + minimum stack size > 32kB or requiring allocation in multiples of the system + memory page size - platform documentation should be referred to for more + information (4kB pages are common; using multiples of 4096 for the stack size is + the suggested approach in the absence of more specific information). + Availability: Windows, systems with POSIX threads. + + .. versionadded:: 2.5 + +Detailed interfaces for the objects are documented below. + +The design of this module is loosely based on Java's threading model. However, +where Java makes locks and condition variables basic behavior of every object, +they are separate objects in Python. Python's :class:`Thread` class supports a +subset of the behavior of Java's Thread class; currently, there are no +priorities, no thread groups, and threads cannot be destroyed, stopped, +suspended, resumed, or interrupted. The static methods of Java's Thread class, +when implemented, are mapped to module-level functions. + +All of the methods described below are executed atomically. + + +.. _lock-objects: + +Lock Objects +------------ + +A primitive lock is a synchronization primitive that is not owned by a +particular thread when locked. In Python, it is currently the lowest level +synchronization primitive available, implemented directly by the :mod:`thread` +extension module. + +A primitive lock is in one of two states, "locked" or "unlocked". It is created +in the unlocked state. It has two basic methods, :meth:`acquire` and +:meth:`release`. When the state is unlocked, :meth:`acquire` changes the state +to locked and returns immediately. When the state is locked, :meth:`acquire` +blocks until a call to :meth:`release` in another thread changes it to unlocked, +then the :meth:`acquire` call resets it to locked and returns. The +:meth:`release` method should only be called in the locked state; it changes the +state to unlocked and returns immediately. If an attempt is made to release an +unlocked lock, a :exc:`RuntimeError` will be raised. + +When more than one thread is blocked in :meth:`acquire` waiting for the state to +turn to unlocked, only one thread proceeds when a :meth:`release` call resets +the state to unlocked; which one of the waiting threads proceeds is not defined, +and may vary across implementations. + +All methods are executed atomically. + + +.. method:: Lock.acquire([blocking=1]) + + Acquire a lock, blocking or non-blocking. + + When invoked without arguments, block until the lock is unlocked, then set it to + locked, and return true. + + When invoked with the *blocking* argument set to true, do the same thing as when + called without arguments, and return true. + + When invoked with the *blocking* argument set to false, do not block. If a call + without an argument would block, return false immediately; otherwise, do the + same thing as when called without arguments, and return true. + + +.. method:: Lock.release() + + Release a lock. + + When the lock is locked, reset it to unlocked, and return. If any other threads + are blocked waiting for the lock to become unlocked, allow exactly one of them + to proceed. + + Do not call this method when the lock is unlocked. + + There is no return value. + + +.. _rlock-objects: + +RLock Objects +------------- + +A reentrant lock is a synchronization primitive that may be acquired multiple +times by the same thread. Internally, it uses the concepts of "owning thread" +and "recursion level" in addition to the locked/unlocked state used by primitive +locks. In the locked state, some thread owns the lock; in the unlocked state, +no thread owns it. + +To lock the lock, a thread calls its :meth:`acquire` method; this returns once +the thread owns the lock. To unlock the lock, a thread calls its +:meth:`release` method. :meth:`acquire`/:meth:`release` call pairs may be +nested; only the final :meth:`release` (the :meth:`release` of the outermost +pair) resets the lock to unlocked and allows another thread blocked in +:meth:`acquire` to proceed. + + +.. method:: RLock.acquire([blocking=1]) + + Acquire a lock, blocking or non-blocking. + + When invoked without arguments: if this thread already owns the lock, increment + the recursion level by one, and return immediately. Otherwise, if another + thread owns the lock, block until the lock is unlocked. Once the lock is + unlocked (not owned by any thread), then grab ownership, set the recursion level + to one, and return. If more than one thread is blocked waiting until the lock + is unlocked, only one at a time will be able to grab ownership of the lock. + There is no return value in this case. + + When invoked with the *blocking* argument set to true, do the same thing as when + called without arguments, and return true. + + When invoked with the *blocking* argument set to false, do not block. If a call + without an argument would block, return false immediately; otherwise, do the + same thing as when called without arguments, and return true. + + +.. method:: RLock.release() + + Release a lock, decrementing the recursion level. If after the decrement it is + zero, reset the lock to unlocked (not owned by any thread), and if any other + threads are blocked waiting for the lock to become unlocked, allow exactly one + of them to proceed. If after the decrement the recursion level is still + nonzero, the lock remains locked and owned by the calling thread. + + Only call this method when the calling thread owns the lock. A + :exc:`RuntimeError` is raised if this method is called when the lock is + unlocked. + + There is no return value. + + +.. _condition-objects: + +Condition Objects +----------------- + +A condition variable is always associated with some kind of lock; this can be +passed in or one will be created by default. (Passing one in is useful when +several condition variables must share the same lock.) + +A condition variable has :meth:`acquire` and :meth:`release` methods that call +the corresponding methods of the associated lock. It also has a :meth:`wait` +method, and :meth:`notify` and :meth:`notifyAll` methods. These three must only +be called when the calling thread has acquired the lock, otherwise a +:exc:`RuntimeError` is raised. + +The :meth:`wait` method releases the lock, and then blocks until it is awakened +by a :meth:`notify` or :meth:`notifyAll` call for the same condition variable in +another thread. Once awakened, it re-acquires the lock and returns. It is also +possible to specify a timeout. + +The :meth:`notify` method wakes up one of the threads waiting for the condition +variable, if any are waiting. The :meth:`notifyAll` method wakes up all threads +waiting for the condition variable. + +Note: the :meth:`notify` and :meth:`notifyAll` methods don't release the lock; +this means that the thread or threads awakened will not return from their +:meth:`wait` call immediately, but only when the thread that called +:meth:`notify` or :meth:`notifyAll` finally relinquishes ownership of the lock. + +Tip: the typical programming style using condition variables uses the lock to +synchronize access to some shared state; threads that are interested in a +particular change of state call :meth:`wait` repeatedly until they see the +desired state, while threads that modify the state call :meth:`notify` or +:meth:`notifyAll` when they change the state in such a way that it could +possibly be a desired state for one of the waiters. For example, the following +code is a generic producer-consumer situation with unlimited buffer capacity:: + + # Consume one item + cv.acquire() + while not an_item_is_available(): + cv.wait() + get_an_available_item() + cv.release() + + # Produce one item + cv.acquire() + make_an_item_available() + cv.notify() + cv.release() + +To choose between :meth:`notify` and :meth:`notifyAll`, consider whether one +state change can be interesting for only one or several waiting threads. E.g. +in a typical producer-consumer situation, adding one item to the buffer only +needs to wake up one consumer thread. + + +.. class:: Condition([lock]) + + If the *lock* argument is given and not ``None``, it must be a :class:`Lock` or + :class:`RLock` object, and it is used as the underlying lock. Otherwise, a new + :class:`RLock` object is created and used as the underlying lock. + + +.. method:: Condition.acquire(*args) + + Acquire the underlying lock. This method calls the corresponding method on the + underlying lock; the return value is whatever that method returns. + + +.. method:: Condition.release() + + Release the underlying lock. This method calls the corresponding method on the + underlying lock; there is no return value. + + +.. method:: Condition.wait([timeout]) + + Wait until notified or until a timeout occurs. If the calling thread has not + acquired the lock when this method is called, a :exc:`RuntimeError` is raised. + + This method releases the underlying lock, and then blocks until it is awakened + by a :meth:`notify` or :meth:`notifyAll` call for the same condition variable in + another thread, or until the optional timeout occurs. Once awakened or timed + out, it re-acquires the lock and returns. + + When the *timeout* argument is present and not ``None``, it should be a floating + point number specifying a timeout for the operation in seconds (or fractions + thereof). + + When the underlying lock is an :class:`RLock`, it is not released using its + :meth:`release` method, since this may not actually unlock the lock when it was + acquired multiple times recursively. Instead, an internal interface of the + :class:`RLock` class is used, which really unlocks it even when it has been + recursively acquired several times. Another internal interface is then used to + restore the recursion level when the lock is reacquired. + + +.. method:: Condition.notify() + + Wake up a thread waiting on this condition, if any. Wait until notified or until + a timeout occurs. If the calling thread has not acquired the lock when this + method is called, a :exc:`RuntimeError` is raised. + + This method wakes up one of the threads waiting for the condition variable, if + any are waiting; it is a no-op if no threads are waiting. + + The current implementation wakes up exactly one thread, if any are waiting. + However, it's not safe to rely on this behavior. A future, optimized + implementation may occasionally wake up more than one thread. + + Note: the awakened thread does not actually return from its :meth:`wait` call + until it can reacquire the lock. Since :meth:`notify` does not release the + lock, its caller should. + + +.. method:: Condition.notifyAll() + + Wake up all threads waiting on this condition. This method acts like + :meth:`notify`, but wakes up all waiting threads instead of one. If the calling + thread has not acquired the lock when this method is called, a + :exc:`RuntimeError` is raised. + + +.. _semaphore-objects: + +Semaphore Objects +----------------- + +This is one of the oldest synchronization primitives in the history of computer +science, invented by the early Dutch computer scientist Edsger W. Dijkstra (he +used :meth:`P` and :meth:`V` instead of :meth:`acquire` and :meth:`release`). + +A semaphore manages an internal counter which is decremented by each +:meth:`acquire` call and incremented by each :meth:`release` call. The counter +can never go below zero; when :meth:`acquire` finds that it is zero, it blocks, +waiting until some other thread calls :meth:`release`. + + +.. class:: Semaphore([value]) + + The optional argument gives the initial *value* for the internal counter; it + defaults to ``1``. If the *value* given is less than 0, :exc:`ValueError` is + raised. + + +.. method:: Semaphore.acquire([blocking]) + + Acquire a semaphore. + + When invoked without arguments: if the internal counter is larger than zero on + entry, decrement it by one and return immediately. If it is zero on entry, + block, waiting until some other thread has called :meth:`release` to make it + larger than zero. This is done with proper interlocking so that if multiple + :meth:`acquire` calls are blocked, :meth:`release` will wake exactly one of them + up. The implementation may pick one at random, so the order in which blocked + threads are awakened should not be relied on. There is no return value in this + case. + + When invoked with *blocking* set to true, do the same thing as when called + without arguments, and return true. + + When invoked with *blocking* set to false, do not block. If a call without an + argument would block, return false immediately; otherwise, do the same thing as + when called without arguments, and return true. + + +.. method:: Semaphore.release() + + Release a semaphore, incrementing the internal counter by one. When it was zero + on entry and another thread is waiting for it to become larger than zero again, + wake up that thread. + + +.. _semaphore-examples: + +:class:`Semaphore` Example +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Semaphores are often used to guard resources with limited capacity, for example, +a database server. In any situation where the size of the resource size is +fixed, you should use a bounded semaphore. Before spawning any worker threads, +your main thread would initialize the semaphore:: + + maxconnections = 5 + ... + pool_sema = BoundedSemaphore(value=maxconnections) + +Once spawned, worker threads call the semaphore's acquire and release methods +when they need to connect to the server:: + + pool_sema.acquire() + conn = connectdb() + ... use connection ... + conn.close() + pool_sema.release() + +The use of a bounded semaphore reduces the chance that a programming error which +causes the semaphore to be released more than it's acquired will go undetected. + + +.. _event-objects: + +Event Objects +------------- + +This is one of the simplest mechanisms for communication between threads: one +thread signals an event and other threads wait for it. + +An event object manages an internal flag that can be set to true with the +:meth:`set` method and reset to false with the :meth:`clear` method. The +:meth:`wait` method blocks until the flag is true. + + +.. class:: Event() + + The internal flag is initially false. + + +.. method:: Event.isSet() + + Return true if and only if the internal flag is true. + + +.. method:: Event.set() + + Set the internal flag to true. All threads waiting for it to become true are + awakened. Threads that call :meth:`wait` once the flag is true will not block at + all. + + +.. method:: Event.clear() + + Reset the internal flag to false. Subsequently, threads calling :meth:`wait` + will block until :meth:`set` is called to set the internal flag to true again. + + +.. method:: Event.wait([timeout]) + + Block until the internal flag is true. If the internal flag is true on entry, + return immediately. Otherwise, block until another thread calls :meth:`set` to + set the flag to true, or until the optional timeout occurs. + + When the timeout argument is present and not ``None``, it should be a floating + point number specifying a timeout for the operation in seconds (or fractions + thereof). + + +.. _thread-objects: + +Thread Objects +-------------- + +This class represents an activity that is run in a separate thread of control. +There are two ways to specify the activity: by passing a callable object to the +constructor, or by overriding the :meth:`run` method in a subclass. No other +methods (except for the constructor) should be overridden in a subclass. In +other words, *only* override the :meth:`__init__` and :meth:`run` methods of +this class. + +Once a thread object is created, its activity must be started by calling the +thread's :meth:`start` method. This invokes the :meth:`run` method in a +separate thread of control. + +Once the thread's activity is started, the thread is considered 'alive'. It +stops being alive when its :meth:`run` method terminates -- either normally, or +by raising an unhandled exception. The :meth:`isAlive` method tests whether the +thread is alive. + +Other threads can call a thread's :meth:`join` method. This blocks the calling +thread until the thread whose :meth:`join` method is called is terminated. + +A thread has a name. The name can be passed to the constructor, set with the +:meth:`setName` method, and retrieved with the :meth:`getName` method. + +A thread can be flagged as a "daemon thread". The significance of this flag is +that the entire Python program exits when only daemon threads are left. The +initial value is inherited from the creating thread. The flag can be set with +the :meth:`setDaemon` method and retrieved with the :meth:`isDaemon` method. + +There is a "main thread" object; this corresponds to the initial thread of +control in the Python program. It is not a daemon thread. + +There is the possibility that "dummy thread objects" are created. These are +thread objects corresponding to "alien threads", which are threads of control +started outside the threading module, such as directly from C code. Dummy +thread objects have limited functionality; they are always considered alive and +daemonic, and cannot be :meth:`join`\ ed. They are never deleted, since it is +impossible to detect the termination of alien threads. + + +.. class:: Thread(group=None, target=None, name=None, args=(), kwargs={}) + + This constructor should always be called with keyword arguments. Arguments are: + + *group* should be ``None``; reserved for future extension when a + :class:`ThreadGroup` class is implemented. + + *target* is the callable object to be invoked by the :meth:`run` method. + Defaults to ``None``, meaning nothing is called. + + *name* is the thread name. By default, a unique name is constructed of the form + "Thread-*N*" where *N* is a small decimal number. + + *args* is the argument tuple for the target invocation. Defaults to ``()``. + + *kwargs* is a dictionary of keyword arguments for the target invocation. + Defaults to ``{}``. + + If the subclass overrides the constructor, it must make sure to invoke the base + class constructor (``Thread.__init__()``) before doing anything else to the + thread. + + +.. method:: Thread.start() + + Start the thread's activity. + + It must be called at most once per thread object. It arranges for the object's + :meth:`run` method to be invoked in a separate thread of control. + + This method will raise a :exc:`RuntimeException` if called more than once on the + same thread object. + + +.. method:: Thread.run() + + Method representing the thread's activity. + + You may override this method in a subclass. The standard :meth:`run` method + invokes the callable object passed to the object's constructor as the *target* + argument, if any, with sequential and keyword arguments taken from the *args* + and *kwargs* arguments, respectively. + + +.. method:: Thread.join([timeout]) + + Wait until the thread terminates. This blocks the calling thread until the + thread whose :meth:`join` method is called terminates -- either normally or + through an unhandled exception -- or until the optional timeout occurs. + + When the *timeout* argument is present and not ``None``, it should be a floating + point number specifying a timeout for the operation in seconds (or fractions + thereof). As :meth:`join` always returns ``None``, you must call + :meth:`isAlive` to decide whether a timeout happened. + + When the *timeout* argument is not present or ``None``, the operation will block + until the thread terminates. + + A thread can be :meth:`join`\ ed many times. + + :meth:`join` may throw a :exc:`RuntimeError`, if an attempt is made to join the + current thread as that would cause a deadlock. It is also an error to + :meth:`join` a thread before it has been started and attempts to do so raises + same exception. + + +.. method:: Thread.getName() + + Return the thread's name. + + +.. method:: Thread.setName(name) + + Set the thread's name. + + The name is a string used for identification purposes only. It has no semantics. + Multiple threads may be given the same name. The initial name is set by the + constructor. + + +.. method:: Thread.isAlive() + + Return whether the thread is alive. + + Roughly, a thread is alive from the moment the :meth:`start` method returns + until its :meth:`run` method terminates. The module function :func:`enumerate` + returns a list of all alive threads. + + +.. method:: Thread.isDaemon() + + Return the thread's daemon flag. + + +.. method:: Thread.setDaemon(daemonic) + + Set the thread's daemon flag to the Boolean value *daemonic*. This must be + called before :meth:`start` is called, otherwise :exc:`RuntimeError` is raised. + + The initial value is inherited from the creating thread. + + The entire Python program exits when no alive non-daemon threads are left. + + +.. _timer-objects: + +Timer Objects +------------- + +This class represents an action that should be run only after a certain amount +of time has passed --- a timer. :class:`Timer` is a subclass of :class:`Thread` +and as such also functions as an example of creating custom threads. + +Timers are started, as with threads, by calling their :meth:`start` method. The +timer can be stopped (before its action has begun) by calling the :meth:`cancel` +method. The interval the timer will wait before executing its action may not be +exactly the same as the interval specified by the user. + +For example:: + + def hello(): + print "hello, world" + + t = Timer(30.0, hello) + t.start() # after 30 seconds, "hello, world" will be printed + + +.. class:: Timer(interval, function, args=[], kwargs={}) + + Create a timer that will run *function* with arguments *args* and keyword + arguments *kwargs*, after *interval* seconds have passed. + + +.. method:: Timer.cancel() + + Stop the timer, and cancel the execution of the timer's action. This will only + work if the timer is still in its waiting stage. + + +.. _with-locks: + +Using locks, conditions, and semaphores in the :keyword:`with` statement +------------------------------------------------------------------------ + +All of the objects provided by this module that have :meth:`acquire` and +:meth:`release` methods can be used as context managers for a :keyword:`with` +statement. The :meth:`acquire` method will be called when the block is entered, +and :meth:`release` will be called when the block is exited. + +Currently, :class:`Lock`, :class:`RLock`, :class:`Condition`, +:class:`Semaphore`, and :class:`BoundedSemaphore` objects may be used as +:keyword:`with` statement context managers. For example:: + + from __future__ import with_statement + import threading + + some_rlock = threading.RLock() + + with some_rlock: + print "some_rlock is locked while this executes" + diff --git a/Doc/library/time.rst b/Doc/library/time.rst new file mode 100644 index 0000000..04c8f66 --- /dev/null +++ b/Doc/library/time.rst @@ -0,0 +1,540 @@ + +:mod:`time` --- Time access and conversions +=========================================== + +.. module:: time + :synopsis: Time access and conversions. + + +This module provides various time-related functions. For related +functionality, see also the :mod:`datetime` and :mod:`calendar` modules. + +Although this module is always available, +not all functions are available on all platforms. Most of the functions +defined in this module call platform C library functions with the same name. It +may sometimes be helpful to consult the platform documentation, because the +semantics of these functions varies among platforms. + +An explanation of some terminology and conventions is in order. + + .. index:: single: epoch + +* The :dfn:`epoch` is the point where the time starts. On January 1st of that + year, at 0 hours, the "time since the epoch" is zero. For Unix, the epoch is + 1970. To find out what the epoch is, look at ``gmtime(0)``. + + .. index:: single: Year 2038 + +* The functions in this module do not handle dates and times before the epoch or + far in the future. The cut-off point in the future is determined by the C + library; for Unix, it is typically in 2038. + + .. index:: + single: Year 2000 + single: Y2K + +* **Year 2000 (Y2K) issues**: Python depends on the platform's C library, which + generally doesn't have year 2000 issues, since all dates and times are + represented internally as seconds since the epoch. Functions accepting a + :class:`struct_time` (see below) generally require a 4-digit year. For backward + compatibility, 2-digit years are supported if the module variable + ``accept2dyear`` is a non-zero integer; this variable is initialized to ``1`` + unless the environment variable :envvar:`PYTHONY2K` is set to a non-empty + string, in which case it is initialized to ``0``. Thus, you can set + :envvar:`PYTHONY2K` to a non-empty string in the environment to require 4-digit + years for all year input. When 2-digit years are accepted, they are converted + according to the POSIX or X/Open standard: values 69-99 are mapped to 1969-1999, + and values 0--68 are mapped to 2000--2068. Values 100--1899 are always illegal. + Note that this is new as of Python 1.5.2(a2); earlier versions, up to Python + 1.5.1 and 1.5.2a1, would add 1900 to year values below 1900. + + .. index:: + single: UTC + single: Coordinated Universal Time + single: Greenwich Mean Time + +* UTC is Coordinated Universal Time (formerly known as Greenwich Mean Time, or + GMT). The acronym UTC is not a mistake but a compromise between English and + French. + + .. index:: single: Daylight Saving Time + +* DST is Daylight Saving Time, an adjustment of the timezone by (usually) one + hour during part of the year. DST rules are magic (determined by local law) and + can change from year to year. The C library has a table containing the local + rules (often it is read from a system file for flexibility) and is the only + source of True Wisdom in this respect. + +* The precision of the various real-time functions may be less than suggested by + the units in which their value or argument is expressed. E.g. on most Unix + systems, the clock "ticks" only 50 or 100 times a second, and on the Mac, times + are only accurate to whole seconds. + +* On the other hand, the precision of :func:`time` and :func:`sleep` is better + than their Unix equivalents: times are expressed as floating point numbers, + :func:`time` returns the most accurate time available (using Unix + :cfunc:`gettimeofday` where available), and :func:`sleep` will accept a time + with a nonzero fraction (Unix :cfunc:`select` is used to implement this, where + available). + +* The time value as returned by :func:`gmtime`, :func:`localtime`, and + :func:`strptime`, and accepted by :func:`asctime`, :func:`mktime` and + :func:`strftime`, is a sequence of 9 integers. The return values of + :func:`gmtime`, :func:`localtime`, and :func:`strptime` also offer attribute + names for individual fields. + + +-------+------------------+------------------------------+ + | Index | Attribute | Values | + +=======+==================+==============================+ + | 0 | :attr:`tm_year` | (for example, 1993) | + +-------+------------------+------------------------------+ + | 1 | :attr:`tm_mon` | range [1,12] | + +-------+------------------+------------------------------+ + | 2 | :attr:`tm_mday` | range [1,31] | + +-------+------------------+------------------------------+ + | 3 | :attr:`tm_hour` | range [0,23] | + +-------+------------------+------------------------------+ + | 4 | :attr:`tm_min` | range [0,59] | + +-------+------------------+------------------------------+ + | 5 | :attr:`tm_sec` | range [0,61]; see **(1)** in | + | | | :func:`strftime` description | + +-------+------------------+------------------------------+ + | 6 | :attr:`tm_wday` | range [0,6], Monday is 0 | + +-------+------------------+------------------------------+ + | 7 | :attr:`tm_yday` | range [1,366] | + +-------+------------------+------------------------------+ + | 8 | :attr:`tm_isdst` | 0, 1 or -1; see below | + +-------+------------------+------------------------------+ + + Note that unlike the C structure, the month value is a range of 1-12, not 0-11. + A year value will be handled as described under "Year 2000 (Y2K) issues" above. + A ``-1`` argument as the daylight savings flag, passed to :func:`mktime` will + usually result in the correct daylight savings state to be filled in. + + When a tuple with an incorrect length is passed to a function expecting a + :class:`struct_time`, or having elements of the wrong type, a :exc:`TypeError` + is raised. + + .. versionchanged:: 2.2 + The time value sequence was changed from a tuple to a :class:`struct_time`, with + the addition of attribute names for the fields. + +The module defines the following functions and data items: + + +.. data:: accept2dyear + + Boolean value indicating whether two-digit year values will be accepted. This + is true by default, but will be set to false if the environment variable + :envvar:`PYTHONY2K` has been set to a non-empty string. It may also be modified + at run time. + + +.. data:: altzone + + The offset of the local DST timezone, in seconds west of UTC, if one is defined. + This is negative if the local DST timezone is east of UTC (as in Western Europe, + including the UK). Only use this if ``daylight`` is nonzero. + + +.. function:: asctime([t]) + + Convert a tuple or :class:`struct_time` representing a time as returned by + :func:`gmtime` or :func:`localtime` to a 24-character string of the following + form: ``'Sun Jun 20 23:21:05 1993'``. If *t* is not provided, the current time + as returned by :func:`localtime` is used. Locale information is not used by + :func:`asctime`. + + .. note:: + + Unlike the C function of the same name, there is no trailing newline. + + .. versionchanged:: 2.1 + Allowed *t* to be omitted. + + +.. function:: clock() + + .. index:: + single: CPU time + single: processor time + single: benchmarking + + On Unix, return the current processor time as a floating point number expressed + in seconds. The precision, and in fact the very definition of the meaning of + "processor time", depends on that of the C function of the same name, but in any + case, this is the function to use for benchmarking Python or timing algorithms. + + On Windows, this function returns wall-clock seconds elapsed since the first + call to this function, as a floating point number, based on the Win32 function + :cfunc:`QueryPerformanceCounter`. The resolution is typically better than one + microsecond. + + +.. function:: ctime([secs]) + + Convert a time expressed in seconds since the epoch to a string representing + local time. If *secs* is not provided or :const:`None`, the current time as + returned by :func:`time` is used. ``ctime(secs)`` is equivalent to + ``asctime(localtime(secs))``. Locale information is not used by :func:`ctime`. + + .. versionchanged:: 2.1 + Allowed *secs* to be omitted. + + .. versionchanged:: 2.4 + If *secs* is :const:`None`, the current time is used. + + +.. data:: daylight + + Nonzero if a DST timezone is defined. + + +.. function:: gmtime([secs]) + + Convert a time expressed in seconds since the epoch to a :class:`struct_time` in + UTC in which the dst flag is always zero. If *secs* is not provided or + :const:`None`, the current time as returned by :func:`time` is used. Fractions + of a second are ignored. See above for a description of the + :class:`struct_time` object. See :func:`calendar.timegm` for the inverse of this + function. + + .. versionchanged:: 2.1 + Allowed *secs* to be omitted. + + .. versionchanged:: 2.4 + If *secs* is :const:`None`, the current time is used. + + +.. function:: localtime([secs]) + + Like :func:`gmtime` but converts to local time. If *secs* is not provided or + :const:`None`, the current time as returned by :func:`time` is used. The dst + flag is set to ``1`` when DST applies to the given time. + + .. versionchanged:: 2.1 + Allowed *secs* to be omitted. + + .. versionchanged:: 2.4 + If *secs* is :const:`None`, the current time is used. + + +.. function:: mktime(t) + + This is the inverse function of :func:`localtime`. Its argument is the + :class:`struct_time` or full 9-tuple (since the dst flag is needed; use ``-1`` + as the dst flag if it is unknown) which expresses the time in *local* time, not + UTC. It returns a floating point number, for compatibility with :func:`time`. + If the input value cannot be represented as a valid time, either + :exc:`OverflowError` or :exc:`ValueError` will be raised (which depends on + whether the invalid value is caught by Python or the underlying C libraries). + The earliest date for which it can generate a time is platform-dependent. + + +.. function:: sleep(secs) + + Suspend execution for the given number of seconds. The argument may be a + floating point number to indicate a more precise sleep time. The actual + suspension time may be less than that requested because any caught signal will + terminate the :func:`sleep` following execution of that signal's catching + routine. Also, the suspension time may be longer than requested by an arbitrary + amount because of the scheduling of other activity in the system. + + +.. function:: strftime(format[, t]) + + Convert a tuple or :class:`struct_time` representing a time as returned by + :func:`gmtime` or :func:`localtime` to a string as specified by the *format* + argument. If *t* is not provided, the current time as returned by + :func:`localtime` is used. *format* must be a string. :exc:`ValueError` is + raised if any field in *t* is outside of the allowed range. + + .. versionchanged:: 2.1 + Allowed *t* to be omitted. + + .. versionchanged:: 2.4 + :exc:`ValueError` raised if a field in *t* is out of range. + + .. versionchanged:: 2.5 + 0 is now a legal argument for any position in the time tuple; if it is normally + illegal the value is forced to a correct one.. + + The following directives can be embedded in the *format* string. They are shown + without the optional field width and precision specification, and are replaced + by the indicated characters in the :func:`strftime` result: + + +-----------+--------------------------------+-------+ + | Directive | Meaning | Notes | + +===========+================================+=======+ + | ``%a`` | Locale's abbreviated weekday | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%A`` | Locale's full weekday name. | | + +-----------+--------------------------------+-------+ + | ``%b`` | Locale's abbreviated month | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%B`` | Locale's full month name. | | + +-----------+--------------------------------+-------+ + | ``%c`` | Locale's appropriate date and | | + | | time representation. | | + +-----------+--------------------------------+-------+ + | ``%d`` | Day of the month as a decimal | | + | | number [01,31]. | | + +-----------+--------------------------------+-------+ + | ``%H`` | Hour (24-hour clock) as a | | + | | decimal number [00,23]. | | + +-----------+--------------------------------+-------+ + | ``%I`` | Hour (12-hour clock) as a | | + | | decimal number [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%j`` | Day of the year as a decimal | | + | | number [001,366]. | | + +-----------+--------------------------------+-------+ + | ``%m`` | Month as a decimal number | | + | | [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%M`` | Minute as a decimal number | | + | | [00,59]. | | + +-----------+--------------------------------+-------+ + | ``%p`` | Locale's equivalent of either | \(1) | + | | AM or PM. | | + +-----------+--------------------------------+-------+ + | ``%S`` | Second as a decimal number | \(2) | + | | [00,61]. | | + +-----------+--------------------------------+-------+ + | ``%U`` | Week number of the year | \(3) | + | | (Sunday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Sunday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%w`` | Weekday as a decimal number | | + | | [0(Sunday),6]. | | + +-----------+--------------------------------+-------+ + | ``%W`` | Week number of the year | \(3) | + | | (Monday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Monday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%x`` | Locale's appropriate date | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%X`` | Locale's appropriate time | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%y`` | Year without century as a | | + | | decimal number [00,99]. | | + +-----------+--------------------------------+-------+ + | ``%Y`` | Year with century as a decimal | | + | | number. | | + +-----------+--------------------------------+-------+ + | ``%Z`` | Time zone name (no characters | | + | | if no time zone exists). | | + +-----------+--------------------------------+-------+ + | ``%%`` | A literal ``'%'`` character. | | + +-----------+--------------------------------+-------+ + + Notes: + + (1) + When used with the :func:`strptime` function, the ``%p`` directive only affects + the output hour field if the ``%I`` directive is used to parse the hour. + + (2) + The range really is ``0`` to ``61``; this accounts for leap seconds and the + (very rare) double leap seconds. + + (3) + When used with the :func:`strptime` function, ``%U`` and ``%W`` are only used in + calculations when the day of the week and the year are specified. + + Here is an example, a format for dates compatible with that specified in the + :rfc:`2822` Internet email standard. [#]_ :: + + >>> from time import gmtime, strftime + >>> strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) + 'Thu, 28 Jun 2001 14:17:15 +0000' + + Additional directives may be supported on certain platforms, but only the ones + listed here have a meaning standardized by ANSI C. + + On some platforms, an optional field width and precision specification can + immediately follow the initial ``'%'`` of a directive in the following order; + this is also not portable. The field width is normally 2 except for ``%j`` where + it is 3. + + +.. function:: strptime(string[, format]) + + Parse a string representing a time according to a format. The return value is + a :class:`struct_time` as returned by :func:`gmtime` or :func:`localtime`. + + The *format* parameter uses the same directives as those used by + :func:`strftime`; it defaults to ``"%a %b %d %H:%M:%S %Y"`` which matches the + formatting returned by :func:`ctime`. If *string* cannot be parsed according to + *format*, or if it has excess data after parsing, :exc:`ValueError` is raised. + The default values used to fill in any missing data when more accurate values + cannot be inferred are ``(1900, 1, 1, 0, 0, 0, 0, 1, -1)``. + + For example:: + + >>> import time + >>> time.strptime("30 Nov 00", "%d %b %y") + (2000, 11, 30, 0, 0, 0, 3, 335, -1) + + Support for the ``%Z`` directive is based on the values contained in ``tzname`` + and whether ``daylight`` is true. Because of this, it is platform-specific + except for recognizing UTC and GMT which are always known (and are considered to + be non-daylight savings timezones). + + Only the directives specified in the documentation are supported. Because + ``strftime()`` is implemented per platform it can sometimes offer more + directives than those listed. But ``strptime()`` is independent of any platform + and thus does not necessarily support all directives available that are not + documented as supported. + + +.. data:: struct_time + + The type of the time value sequence returned by :func:`gmtime`, + :func:`localtime`, and :func:`strptime`. + + .. versionadded:: 2.2 + + +.. function:: time() + + Return the time as a floating point number expressed in seconds since the epoch, + in UTC. Note that even though the time is always returned as a floating point + number, not all systems provide time with a better precision than 1 second. + While this function normally returns non-decreasing values, it can return a + lower value than a previous call if the system clock has been set back between + the two calls. + + +.. data:: timezone + + The offset of the local (non-DST) timezone, in seconds west of UTC (negative in + most of Western Europe, positive in the US, zero in the UK). + + +.. data:: tzname + + A tuple of two strings: the first is the name of the local non-DST timezone, the + second is the name of the local DST timezone. If no DST timezone is defined, + the second string should not be used. + + +.. function:: tzset() + + Resets the time conversion rules used by the library routines. The environment + variable :envvar:`TZ` specifies how this is done. + + .. versionadded:: 2.3 + + Availability: Unix. + + .. note:: + + Although in many cases, changing the :envvar:`TZ` environment variable may + affect the output of functions like :func:`localtime` without calling + :func:`tzset`, this behavior should not be relied on. + + The :envvar:`TZ` environment variable should contain no whitespace. + + The standard format of the :envvar:`TZ` environment variable is (whitespace + added for clarity):: + + std offset [dst [offset [,start[/time], end[/time]]]] + + Where the components are: + + ``std`` and ``dst`` + Three or more alphanumerics giving the timezone abbreviations. These will be + propagated into time.tzname + + ``offset`` + The offset has the form: ``± hh[:mm[:ss]]``. This indicates the value + added the local time to arrive at UTC. If preceded by a '-', the timezone + is east of the Prime Meridian; otherwise, it is west. If no offset follows + dst, summer time is assumed to be one hour ahead of standard time. + + ``start[/time], end[/time]`` + Indicates when to change to and back from DST. The format of the + start and end dates are one of the following: + + :samp:`J{n}` + The Julian day *n* (1 <= *n* <= 365). Leap days are not counted, so in + all years February 28 is day 59 and March 1 is day 60. + + :samp:`{n}` + The zero-based Julian day (0 <= *n* <= 365). Leap days are counted, and + it is possible to refer to February 29. + + :samp:`M{m}.{n}.{d}` + The *d*'th day (0 <= *d* <= 6) or week *n* of month *m* of the year (1 + <= *n* <= 5, 1 <= *m* <= 12, where week 5 means "the last *d* day in + month *m*" which may occur in either the fourth or the fifth + week). Week 1 is the first week in which the *d*'th day occurs. Day + zero is Sunday. + + ``time`` has the same format as ``offset`` except that no leading sign + ('-' or '+') is allowed. The default, if time is not given, is 02:00:00. + + :: + + >>> os.environ['TZ'] = 'EST+05EDT,M4.1.0,M10.5.0' + >>> time.tzset() + >>> time.strftime('%X %x %Z') + '02:07:36 05/08/03 EDT' + >>> os.environ['TZ'] = 'AEST-10AEDT-11,M10.5.0,M3.5.0' + >>> time.tzset() + >>> time.strftime('%X %x %Z') + '16:08:12 05/08/03 AEST' + + On many Unix systems (including \*BSD, Linux, Solaris, and Darwin), it is more + convenient to use the system's zoneinfo (:manpage:`tzfile(5)`) database to + specify the timezone rules. To do this, set the :envvar:`TZ` environment + variable to the path of the required timezone datafile, relative to the root of + the systems 'zoneinfo' timezone database, usually located at + :file:`/usr/share/zoneinfo`. For example, ``'US/Eastern'``, + ``'Australia/Melbourne'``, ``'Egypt'`` or ``'Europe/Amsterdam'``. :: + + >>> os.environ['TZ'] = 'US/Eastern' + >>> time.tzset() + >>> time.tzname + ('EST', 'EDT') + >>> os.environ['TZ'] = 'Egypt' + >>> time.tzset() + >>> time.tzname + ('EET', 'EEST') + + +.. seealso:: + + Module :mod:`datetime` + More object-oriented interface to dates and times. + + Module :mod:`locale` + Internationalization services. The locale settings can affect the return values + for some of the functions in the :mod:`time` module. + + Module :mod:`calendar` + General calendar-related functions. :func:`timegm` is the inverse of + :func:`gmtime` from this module. + +.. rubric:: Footnotes + +.. [#] The use of ``%Z`` is now deprecated, but the ``%z`` escape that expands to the + preferred hour/minute offset is not supported by all ANSI C libraries. Also, a + strict reading of the original 1982 :rfc:`822` standard calls for a two-digit + year (%y rather than %Y), but practice moved to 4-digit years long before the + year 2000. The 4-digit year has been mandated by :rfc:`2822`, which obsoletes + :rfc:`822`. + diff --git a/Doc/library/timeit.rst b/Doc/library/timeit.rst new file mode 100644 index 0000000..8c0cda3 --- /dev/null +++ b/Doc/library/timeit.rst @@ -0,0 +1,243 @@ + +:mod:`timeit` --- Measure execution time of small code snippets +=============================================================== + +.. module:: timeit + :synopsis: Measure the execution time of small code snippets. + + +.. versionadded:: 2.3 + +.. index:: + single: Benchmarking + single: Performance + +This module provides a simple way to time small bits of Python code. It has both +command line as well as callable interfaces. It avoids a number of common traps +for measuring execution times. See also Tim Peters' introduction to the +"Algorithms" chapter in the Python Cookbook, published by O'Reilly. + +The module defines the following public class: + + +.. class:: Timer([stmt='pass' [, setup='pass' [, timer=<timer function>]]]) + + Class for timing execution speed of small code snippets. + + The constructor takes a statement to be timed, an additional statement used for + setup, and a timer function. Both statements default to ``'pass'``; the timer + function is platform-dependent (see the module doc string). The statements may + contain newlines, as long as they don't contain multi-line string literals. + + To measure the execution time of the first statement, use the :meth:`timeit` + method. The :meth:`repeat` method is a convenience to call :meth:`timeit` + multiple times and return a list of results. + + .. versionchanged:: 2.6 + The *stmt* and *setup* parameters can now also take objects that are callable + without arguments. This will embed calls to them in a timer function that will + then be executed by :meth:`timeit`. Note that the timing overhead is a little + larger in this case because of the extra function calls. + + +.. method:: Timer.print_exc([file=None]) + + Helper to print a traceback from the timed code. + + Typical use:: + + t = Timer(...) # outside the try/except + try: + t.timeit(...) # or t.repeat(...) + except: + t.print_exc() + + The advantage over the standard traceback is that source lines in the compiled + template will be displayed. The optional *file* argument directs where the + traceback is sent; it defaults to ``sys.stderr``. + + +.. method:: Timer.repeat([repeat=3 [, number=1000000]]) + + Call :meth:`timeit` a few times. + + This is a convenience function that calls the :meth:`timeit` repeatedly, + returning a list of results. The first argument specifies how many times to + call :meth:`timeit`. The second argument specifies the *number* argument for + :func:`timeit`. + + .. note:: + + It's tempting to calculate mean and standard deviation from the result vector + and report these. However, this is not very useful. In a typical case, the + lowest value gives a lower bound for how fast your machine can run the given + code snippet; higher values in the result vector are typically not caused by + variability in Python's speed, but by other processes interfering with your + timing accuracy. So the :func:`min` of the result is probably the only number + you should be interested in. After that, you should look at the entire vector + and apply common sense rather than statistics. + + +.. method:: Timer.timeit([number=1000000]) + + Time *number* executions of the main statement. This executes the setup + statement once, and then returns the time it takes to execute the main statement + a number of times, measured in seconds as a float. The argument is the number + of times through the loop, defaulting to one million. The main statement, the + setup statement and the timer function to be used are passed to the constructor. + + .. note:: + + By default, :meth:`timeit` temporarily turns off garbage collection during the + timing. The advantage of this approach is that it makes independent timings + more comparable. This disadvantage is that GC may be an important component of + the performance of the function being measured. If so, GC can be re-enabled as + the first statement in the *setup* string. For example:: + + timeit.Timer('for i in range(10): oct(i)', 'gc.enable()').timeit() + +Starting with version 2.6, the module also defines two convenience functions: + + +.. function:: repeat(stmt[, setup[, timer[, repeat=3 [, number=1000000]]]]) + + Create a :class:`Timer` instance with the given statement, setup code and timer + function and run its :meth:`repeat` method with the given repeat count and + *number* executions. + + .. versionadded:: 2.6 + + +.. function:: timeit(stmt[, setup[, timer[, number=1000000]]]) + + Create a :class:`Timer` instance with the given statement, setup code and timer + function and run its :meth:`timeit` method with *number* executions. + + .. versionadded:: 2.6 + + +Command Line Interface +---------------------- + +When called as a program from the command line, the following form is used:: + + python -m timeit [-n N] [-r N] [-s S] [-t] [-c] [-h] [statement ...] + +where the following options are understood: + +-n N/:option:`--number=N` + how many times to execute 'statement' + +-r N/:option:`--repeat=N` + how many times to repeat the timer (default 3) + +-s S/:option:`--setup=S` + statement to be executed once initially (default ``'pass'``) + +-t/:option:`--time` + use :func:`time.time` (default on all platforms but Windows) + +-c/:option:`--clock` + use :func:`time.clock` (default on Windows) + +-v/:option:`--verbose` + print raw timing results; repeat for more digits precision + +-h/:option:`--help` + print a short usage message and exit + +A multi-line statement may be given by specifying each line as a separate +statement argument; indented lines are possible by enclosing an argument in +quotes and using leading spaces. Multiple :option:`-s` options are treated +similarly. + +If :option:`-n` is not given, a suitable number of loops is calculated by trying +successive powers of 10 until the total time is at least 0.2 seconds. + +The default timer function is platform dependent. On Windows, +:func:`time.clock` has microsecond granularity but :func:`time.time`'s +granularity is 1/60th of a second; on Unix, :func:`time.clock` has 1/100th of a +second granularity and :func:`time.time` is much more precise. On either +platform, the default timer functions measure wall clock time, not the CPU time. +This means that other processes running on the same computer may interfere with +the timing. The best thing to do when accurate timing is necessary is to repeat +the timing a few times and use the best time. The :option:`-r` option is good +for this; the default of 3 repetitions is probably enough in most cases. On +Unix, you can use :func:`time.clock` to measure CPU time. + +.. note:: + + There is a certain baseline overhead associated with executing a pass statement. + The code here doesn't try to hide it, but you should be aware of it. The + baseline overhead can be measured by invoking the program without arguments. + +The baseline overhead differs between Python versions! Also, to fairly compare +older Python versions to Python 2.3, you may want to use Python's :option:`-O` +option for the older versions to avoid timing ``SET_LINENO`` instructions. + + +Examples +-------- + +Here are two example sessions (one using the command line, one using the module +interface) that compare the cost of using :func:`hasattr` vs. +:keyword:`try`/:keyword:`except` to test for missing and present object +attributes. :: + + % timeit.py 'try:' ' str.__bool__' 'except AttributeError:' ' pass' + 100000 loops, best of 3: 15.7 usec per loop + % timeit.py 'if hasattr(str, "__bool__"): pass' + 100000 loops, best of 3: 4.26 usec per loop + % timeit.py 'try:' ' int.__bool__' 'except AttributeError:' ' pass' + 1000000 loops, best of 3: 1.43 usec per loop + % timeit.py 'if hasattr(int, "__bool__"): pass' + 100000 loops, best of 3: 2.23 usec per loop + +:: + + >>> import timeit + >>> s = """\ + ... try: + ... str.__bool__ + ... except AttributeError: + ... pass + ... """ + >>> t = timeit.Timer(stmt=s) + >>> print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000) + 17.09 usec/pass + >>> s = """\ + ... if hasattr(str, '__bool__'): pass + ... """ + >>> t = timeit.Timer(stmt=s) + >>> print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000) + 4.85 usec/pass + >>> s = """\ + ... try: + ... int.__bool__ + ... except AttributeError: + ... pass + ... """ + >>> t = timeit.Timer(stmt=s) + >>> print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000) + 1.97 usec/pass + >>> s = """\ + ... if hasattr(int, '__bool__'): pass + ... """ + >>> t = timeit.Timer(stmt=s) + >>> print "%.2f usec/pass" % (1000000 * t.timeit(number=100000)/100000) + 3.15 usec/pass + +To give the :mod:`timeit` module access to functions you define, you can pass a +``setup`` parameter which contains an import statement:: + + def test(): + "Stupid test function" + L = [] + for i in range(100): + L.append(i) + + if __name__=='__main__': + from timeit import Timer + t = Timer("test()", "from __main__ import test") + print t.timeit() + diff --git a/Doc/library/tix.rst b/Doc/library/tix.rst new file mode 100644 index 0000000..4701c15 --- /dev/null +++ b/Doc/library/tix.rst @@ -0,0 +1,602 @@ +:mod:`Tix` --- Extension widgets for Tk +======================================= + +.. module:: Tix + :synopsis: Tk Extension Widgets for Tkinter +.. sectionauthor:: Mike Clarkson <mikeclarkson@users.sourceforge.net> + + +.. index:: single: Tix + +The :mod:`Tix` (Tk Interface Extension) module provides an additional rich set +of widgets. Although the standard Tk library has many useful widgets, they are +far from complete. The :mod:`Tix` library provides most of the commonly needed +widgets that are missing from standard Tk: :class:`HList`, :class:`ComboBox`, +:class:`Control` (a.k.a. SpinBox) and an assortment of scrollable widgets. +:mod:`Tix` also includes many more widgets that are generally useful in a wide +range of applications: :class:`NoteBook`, :class:`FileEntry`, +:class:`PanedWindow`, etc; there are more than 40 of them. + +With all these new widgets, you can introduce new interaction techniques into +applications, creating more useful and more intuitive user interfaces. You can +design your application by choosing the most appropriate widgets to match the +special needs of your application and users. + + +.. seealso:: + + `Tix Homepage <http://tix.sourceforge.net/>`_ + The home page for :mod:`Tix`. This includes links to additional documentation + and downloads. + + `Tix Man Pages <http://tix.sourceforge.net/dist/current/man/>`_ + On-line version of the man pages and reference material. + + `Tix Programming Guide <http://tix.sourceforge.net/dist/current/docs/tix-book/tix.book.html>`_ + On-line version of the programmer's reference material. + + `Tix Development Applications <http://tix.sourceforge.net/Tide/>`_ + Tix applications for development of Tix and Tkinter programs. Tide applications + work under Tk or Tkinter, and include :program:`TixInspect`, an inspector to + remotely modify and debug Tix/Tk/Tkinter applications. + + +Using Tix +--------- + + +.. class:: Tix(screenName[, baseName[, className]]) + + Toplevel widget of Tix which represents mostly the main window of an + application. It has an associated Tcl interpreter. + + Classes in the :mod:`Tix` module subclasses the classes in the :mod:`Tkinter` + module. The former imports the latter, so to use :mod:`Tix` with Tkinter, all + you need to do is to import one module. In general, you can just import + :mod:`Tix`, and replace the toplevel call to :class:`Tkinter.Tk` with + :class:`Tix.Tk`:: + + import Tix + from Tkconstants import * + root = Tix.Tk() + +To use :mod:`Tix`, you must have the :mod:`Tix` widgets installed, usually +alongside your installation of the Tk widgets. To test your installation, try +the following:: + + import Tix + root = Tix.Tk() + root.tk.eval('package require Tix') + +If this fails, you have a Tk installation problem which must be resolved before +proceeding. Use the environment variable :envvar:`TIX_LIBRARY` to point to the +installed :mod:`Tix` library directory, and make sure you have the dynamic +object library (:file:`tix8183.dll` or :file:`libtix8183.so`) in the same +directory that contains your Tk dynamic object library (:file:`tk8183.dll` or +:file:`libtk8183.so`). The directory with the dynamic object library should also +have a file called :file:`pkgIndex.tcl` (case sensitive), which contains the +line:: + + package ifneeded Tix 8.1 [list load "[file join $dir tix8183.dll]" Tix] + +.. % $ <-- bow to font-lock + + +Tix Widgets +----------- + +`Tix <http://tix.sourceforge.net/dist/current/man/html/TixCmd/TixIntro.htm>`_ +introduces over 40 widget classes to the :mod:`Tkinter` repertoire. There is a +demo of all the :mod:`Tix` widgets in the :file:`Demo/tix` directory of the +standard distribution. + +.. % The Python sample code is still being added to Python, hence commented out + + +Basic Widgets +^^^^^^^^^^^^^ + + +.. class:: Balloon() + + A `Balloon + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixBalloon.htm>`_ that + pops up over a widget to provide help. When the user moves the cursor inside a + widget to which a Balloon widget has been bound, a small pop-up window with a + descriptive message will be shown on the screen. + +.. % Python Demo of: +.. % \ulink{Balloon}{http://tix.sourceforge.net/dist/current/demos/samples/Balloon.tcl} + + +.. class:: ButtonBox() + + The `ButtonBox + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixButtonBox.htm>`_ + widget creates a box of buttons, such as is commonly used for ``Ok Cancel``. + +.. % Python Demo of: +.. % \ulink{ButtonBox}{http://tix.sourceforge.net/dist/current/demos/samples/BtnBox.tcl} + + +.. class:: ComboBox() + + The `ComboBox + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixComboBox.htm>`_ + widget is similar to the combo box control in MS Windows. The user can select a + choice by either typing in the entry subwdget or selecting from the listbox + subwidget. + +.. % Python Demo of: +.. % \ulink{ComboBox}{http://tix.sourceforge.net/dist/current/demos/samples/ComboBox.tcl} + + +.. class:: Control() + + The `Control + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixControl.htm>`_ + widget is also known as the :class:`SpinBox` widget. The user can adjust the + value by pressing the two arrow buttons or by entering the value directly into + the entry. The new value will be checked against the user-defined upper and + lower limits. + +.. % Python Demo of: +.. % \ulink{Control}{http://tix.sourceforge.net/dist/current/demos/samples/Control.tcl} + + +.. class:: LabelEntry() + + The `LabelEntry + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixLabelEntry.htm>`_ + widget packages an entry widget and a label into one mega widget. It can be used + be used to simplify the creation of "entry-form" type of interface. + +.. % Python Demo of: +.. % \ulink{LabelEntry}{http://tix.sourceforge.net/dist/current/demos/samples/LabEntry.tcl} + + +.. class:: LabelFrame() + + The `LabelFrame + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixLabelFrame.htm>`_ + widget packages a frame widget and a label into one mega widget. To create + widgets inside a LabelFrame widget, one creates the new widgets relative to the + :attr:`frame` subwidget and manage them inside the :attr:`frame` subwidget. + +.. % Python Demo of: +.. % \ulink{LabelFrame}{http://tix.sourceforge.net/dist/current/demos/samples/LabFrame.tcl} + + +.. class:: Meter() + + The `Meter + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixMeter.htm>`_ widget + can be used to show the progress of a background job which may take a long time + to execute. + +.. % Python Demo of: +.. % \ulink{Meter}{http://tix.sourceforge.net/dist/current/demos/samples/Meter.tcl} + + +.. class:: OptionMenu() + + The `OptionMenu + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixOptionMenu.htm>`_ + creates a menu button of options. + +.. % Python Demo of: +.. % \ulink{OptionMenu}{http://tix.sourceforge.net/dist/current/demos/samples/OptMenu.tcl} + + +.. class:: PopupMenu() + + The `PopupMenu + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixPopupMenu.htm>`_ + widget can be used as a replacement of the ``tk_popup`` command. The advantage + of the :mod:`Tix` :class:`PopupMenu` widget is it requires less application code + to manipulate. + +.. % Python Demo of: +.. % \ulink{PopupMenu}{http://tix.sourceforge.net/dist/current/demos/samples/PopMenu.tcl} + + +.. class:: Select() + + The `Select + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixSelect.htm>`_ widget + is a container of button subwidgets. It can be used to provide radio-box or + check-box style of selection options for the user. + +.. % Python Demo of: +.. % \ulink{Select}{http://tix.sourceforge.net/dist/current/demos/samples/Select.tcl} + + +.. class:: StdButtonBox() + + The `StdButtonBox + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixStdButtonBox.htm>`_ + widget is a group of standard buttons for Motif-like dialog boxes. + +.. % Python Demo of: +.. % \ulink{StdButtonBox}{http://tix.sourceforge.net/dist/current/demos/samples/StdBBox.tcl} + + +File Selectors +^^^^^^^^^^^^^^ + + +.. class:: DirList() + + The `DirList + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixDirList.htm>`_ + widget displays a list view of a directory, its previous directories and its + sub-directories. The user can choose one of the directories displayed in the + list or change to another directory. + +.. % Python Demo of: +.. % \ulink{DirList}{http://tix.sourceforge.net/dist/current/demos/samples/DirList.tcl} + + +.. class:: DirTree() + + The `DirTree + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixDirTree.htm>`_ + widget displays a tree view of a directory, its previous directories and its + sub-directories. The user can choose one of the directories displayed in the + list or change to another directory. + +.. % Python Demo of: +.. % \ulink{DirTree}{http://tix.sourceforge.net/dist/current/demos/samples/DirTree.tcl} + + +.. class:: DirSelectDialog() + + The `DirSelectDialog + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixDirSelectDialog.htm>`_ + widget presents the directories in the file system in a dialog window. The user + can use this dialog window to navigate through the file system to select the + desired directory. + +.. % Python Demo of: +.. % \ulink{DirSelectDialog}{http://tix.sourceforge.net/dist/current/demos/samples/DirDlg.tcl} + + +.. class:: DirSelectBox() + + The :class:`DirSelectBox` is similar to the standard Motif(TM) + directory-selection box. It is generally used for the user to choose a + directory. DirSelectBox stores the directories mostly recently selected into + a ComboBox widget so that they can be quickly selected again. + + +.. class:: ExFileSelectBox() + + The `ExFileSelectBox + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixExFileSelectBox.htm>`_ + widget is usually embedded in a tixExFileSelectDialog widget. It provides an + convenient method for the user to select files. The style of the + :class:`ExFileSelectBox` widget is very similar to the standard file dialog on + MS Windows 3.1. + +.. % Python Demo of: +.. % \ulink{ExFileSelectDialog}{http://tix.sourceforge.net/dist/current/demos/samples/EFileDlg.tcl} + + +.. class:: FileSelectBox() + + The `FileSelectBox + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixFileSelectBox.htm>`_ + is similar to the standard Motif(TM) file-selection box. It is generally used + for the user to choose a file. FileSelectBox stores the files mostly recently + selected into a :class:`ComboBox` widget so that they can be quickly selected + again. + +.. % Python Demo of: +.. % \ulink{FileSelectDialog}{http://tix.sourceforge.net/dist/current/demos/samples/FileDlg.tcl} + + +.. class:: FileEntry() + + The `FileEntry + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixFileEntry.htm>`_ + widget can be used to input a filename. The user can type in the filename + manually. Alternatively, the user can press the button widget that sits next to + the entry, which will bring up a file selection dialog. + +.. % Python Demo of: +.. % \ulink{FileEntry}{http://tix.sourceforge.net/dist/current/demos/samples/FileEnt.tcl} + + +Hierachical ListBox +^^^^^^^^^^^^^^^^^^^ + + +.. class:: HList() + + The `HList + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixHList.htm>`_ widget + can be used to display any data that have a hierarchical structure, for example, + file system directory trees. The list entries are indented and connected by + branch lines according to their places in the hierarchy. + +.. % Python Demo of: +.. % \ulink{HList}{http://tix.sourceforge.net/dist/current/demos/samples/HList1.tcl} + + +.. class:: CheckList() + + The `CheckList + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixCheckList.htm>`_ + widget displays a list of items to be selected by the user. CheckList acts + similarly to the Tk checkbutton or radiobutton widgets, except it is capable of + handling many more items than checkbuttons or radiobuttons. + +.. % Python Demo of: +.. % \ulink{ CheckList}{http://tix.sourceforge.net/dist/current/demos/samples/ChkList.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledHList (1)}{http://tix.sourceforge.net/dist/current/demos/samples/SHList.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledHList (2)}{http://tix.sourceforge.net/dist/current/demos/samples/SHList2.tcl} + + +.. class:: Tree() + + The `Tree + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixTree.htm>`_ widget + can be used to display hierarchical data in a tree form. The user can adjust the + view of the tree by opening or closing parts of the tree. + +.. % Python Demo of: +.. % \ulink{Tree}{http://tix.sourceforge.net/dist/current/demos/samples/Tree.tcl} +.. % Python Demo of: +.. % \ulink{Tree (Dynamic)}{http://tix.sourceforge.net/dist/current/demos/samples/DynTree.tcl} + + +Tabular ListBox +^^^^^^^^^^^^^^^ + + +.. class:: TList() + + The `TList + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixTList.htm>`_ widget + can be used to display data in a tabular format. The list entries of a + :class:`TList` widget are similar to the entries in the Tk listbox widget. The + main differences are (1) the :class:`TList` widget can display the list entries + in a two dimensional format and (2) you can use graphical images as well as + multiple colors and fonts for the list entries. + +.. % Python Demo of: +.. % \ulink{ScrolledTList (1)}{http://tix.sourceforge.net/dist/current/demos/samples/STList1.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledTList (2)}{http://tix.sourceforge.net/dist/current/demos/samples/STList2.tcl} +.. % Grid has yet to be added to Python +.. % \subsubsection{Grid Widget} +.. % Python Demo of: +.. % \ulink{Simple Grid}{http://tix.sourceforge.net/dist/current/demos/samples/SGrid0.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledGrid}{http://tix.sourceforge.net/dist/current/demos/samples/SGrid1.tcl} +.. % Python Demo of: +.. % \ulink{Editable Grid}{http://tix.sourceforge.net/dist/current/demos/samples/EditGrid.tcl} + + +Manager Widgets +^^^^^^^^^^^^^^^ + + +.. class:: PanedWindow() + + The `PanedWindow + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixPanedWindow.htm>`_ + widget allows the user to interactively manipulate the sizes of several panes. + The panes can be arranged either vertically or horizontally. The user changes + the sizes of the panes by dragging the resize handle between two panes. + +.. % Python Demo of: +.. % \ulink{PanedWindow}{http://tix.sourceforge.net/dist/current/demos/samples/PanedWin.tcl} + + +.. class:: ListNoteBook() + + The `ListNoteBook + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixListNoteBook.htm>`_ + widget is very similar to the :class:`TixNoteBook` widget: it can be used to + display many windows in a limited space using a notebook metaphor. The notebook + is divided into a stack of pages (windows). At one time only one of these pages + can be shown. The user can navigate through these pages by choosing the name of + the desired page in the :attr:`hlist` subwidget. + +.. % Python Demo of: +.. % \ulink{ListNoteBook}{http://tix.sourceforge.net/dist/current/demos/samples/ListNBK.tcl} + + +.. class:: NoteBook() + + The `NoteBook + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixNoteBook.htm>`_ + widget can be used to display many windows in a limited space using a notebook + metaphor. The notebook is divided into a stack of pages. At one time only one of + these pages can be shown. The user can navigate through these pages by choosing + the visual "tabs" at the top of the NoteBook widget. + +.. % Python Demo of: +.. % \ulink{NoteBook}{http://tix.sourceforge.net/dist/current/demos/samples/NoteBook.tcl} + +.. % \subsubsection{Scrolled Widgets} +.. % Python Demo of: +.. % \ulink{ScrolledListBox}{http://tix.sourceforge.net/dist/current/demos/samples/SListBox.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledText}{http://tix.sourceforge.net/dist/current/demos/samples/SText.tcl} +.. % Python Demo of: +.. % \ulink{ScrolledWindow}{http://tix.sourceforge.net/dist/current/demos/samples/SWindow.tcl} +.. % Python Demo of: +.. % \ulink{Canvas Object View}{http://tix.sourceforge.net/dist/current/demos/samples/CObjView.tcl} + + +Image Types +^^^^^^^^^^^ + +The :mod:`Tix` module adds: + +* `pixmap <http://tix.sourceforge.net/dist/current/man/html/TixCmd/pixmap.htm>`_ + capabilities to all :mod:`Tix` and :mod:`Tkinter` widgets to create color images + from XPM files. + + .. % Python Demo of: + .. % \ulink{XPM Image In Button}{http://tix.sourceforge.net/dist/current/demos/samples/Xpm.tcl} + .. % Python Demo of: + .. % \ulink{XPM Image In Menu}{http://tix.sourceforge.net/dist/current/demos/samples/Xpm1.tcl} + +* `Compound + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/compound.htm>`_ image + types can be used to create images that consists of multiple horizontal lines; + each line is composed of a series of items (texts, bitmaps, images or spaces) + arranged from left to right. For example, a compound image can be used to + display a bitmap and a text string simultaneously in a Tk :class:`Button` + widget. + + .. % Python Demo of: + .. % \ulink{Compound Image In Buttons}{http://tix.sourceforge.net/dist/current/demos/samples/CmpImg.tcl} + .. % Python Demo of: + .. % \ulink{Compound Image In NoteBook}{http://tix.sourceforge.net/dist/current/demos/samples/CmpImg2.tcl} + .. % Python Demo of: + .. % \ulink{Compound Image Notebook Color Tabs}{http://tix.sourceforge.net/dist/current/demos/samples/CmpImg4.tcl} + .. % Python Demo of: + .. % \ulink{Compound Image Icons}{http://tix.sourceforge.net/dist/current/demos/samples/CmpImg3.tcl} + + +Miscellaneous Widgets +^^^^^^^^^^^^^^^^^^^^^ + + +.. class:: InputOnly() + + The `InputOnly + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixInputOnly.htm>`_ + widgets are to accept inputs from the user, which can be done with the ``bind`` + command (Unix only). + + +Form Geometry Manager +^^^^^^^^^^^^^^^^^^^^^ + +In addition, :mod:`Tix` augments :mod:`Tkinter` by providing: + + +.. class:: Form() + + The `Form + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tixForm.htm>`_ geometry + manager based on attachment rules for all Tk widgets. + +.. % begin{latexonly} +.. % \subsection{Tix Class Structure} +.. % +.. % \begin{figure}[hbtp] +.. % \centerline{\epsfig{file=hierarchy.png,width=.9\textwidth}} +.. % \vspace{.5cm} +.. % \caption{The Class Hierarchy of Tix Widgets} +.. % \end{figure} +.. % end{latexonly} + + +Tix Commands +------------ + + +.. class:: tixCommand() + + The `tix commands + <http://tix.sourceforge.net/dist/current/man/html/TixCmd/tix.htm>`_ provide + access to miscellaneous elements of :mod:`Tix`'s internal state and the + :mod:`Tix` application context. Most of the information manipulated by these + methods pertains to the application as a whole, or to a screen or display, + rather than to a particular window. + + To view the current settings, the common usage is:: + + import Tix + root = Tix.Tk() + print root.tix_configure() + + +.. method:: tixCommand.tix_configure([cnf,] **kw) + + Query or modify the configuration options of the Tix application context. If no + option is specified, returns a dictionary all of the available options. If + option is specified with no value, then the method returns a list describing the + one named option (this list will be identical to the corresponding sublist of + the value returned if no option is specified). If one or more option-value + pairs are specified, then the method modifies the given option(s) to have the + given value(s); in this case the method returns an empty string. Option may be + any of the configuration options. + + +.. method:: tixCommand.tix_cget(option) + + Returns the current value of the configuration option given by *option*. Option + may be any of the configuration options. + + +.. method:: tixCommand.tix_getbitmap(name) + + Locates a bitmap file of the name ``name.xpm`` or ``name`` in one of the bitmap + directories (see the :meth:`tix_addbitmapdir` method). By using + :meth:`tix_getbitmap`, you can avoid hard coding the pathnames of the bitmap + files in your application. When successful, it returns the complete pathname of + the bitmap file, prefixed with the character ``@``. The returned value can be + used to configure the ``bitmap`` option of the Tk and Tix widgets. + + +.. method:: tixCommand.tix_addbitmapdir(directory) + + Tix maintains a list of directories under which the :meth:`tix_getimage` and + :meth:`tix_getbitmap` methods will search for image files. The standard bitmap + directory is :file:`$TIX_LIBRARY/bitmaps`. The :meth:`tix_addbitmapdir` method + adds *directory* into this list. By using this method, the image files of an + applications can also be located using the :meth:`tix_getimage` or + :meth:`tix_getbitmap` method. + + +.. method:: tixCommand.tix_filedialog([dlgclass]) + + Returns the file selection dialog that may be shared among different calls from + this application. This method will create a file selection dialog widget when + it is called the first time. This dialog will be returned by all subsequent + calls to :meth:`tix_filedialog`. An optional dlgclass parameter can be passed + as a string to specified what type of file selection dialog widget is desired. + Possible options are ``tix``, ``FileSelectDialog`` or ``tixExFileSelectDialog``. + + +.. method:: tixCommand.tix_getimage(self, name) + + Locates an image file of the name :file:`name.xpm`, :file:`name.xbm` or + :file:`name.ppm` in one of the bitmap directories (see the + :meth:`tix_addbitmapdir` method above). If more than one file with the same name + (but different extensions) exist, then the image type is chosen according to the + depth of the X display: xbm images are chosen on monochrome displays and color + images are chosen on color displays. By using :meth:`tix_getimage`, you can + avoid hard coding the pathnames of the image files in your application. When + successful, this method returns the name of the newly created image, which can + be used to configure the ``image`` option of the Tk and Tix widgets. + + +.. method:: tixCommand.tix_option_get(name) + + Gets the options maintained by the Tix scheme mechanism. + + +.. method:: tixCommand.tix_resetoptions(newScheme, newFontSet[, newScmPrio]) + + Resets the scheme and fontset of the Tix application to *newScheme* and + *newFontSet*, respectively. This affects only those widgets created after this + call. Therefore, it is best to call the resetoptions method before the creation + of any widgets in a Tix application. + + The optional parameter *newScmPrio* can be given to reset the priority level of + the Tk options set by the Tix schemes. + + Because of the way Tk handles the X option database, after Tix has been has + imported and inited, it is not possible to reset the color schemes and font sets + using the :meth:`tix_config` method. Instead, the :meth:`tix_resetoptions` + method must be used. diff --git a/Doc/library/tk.rst b/Doc/library/tk.rst new file mode 100644 index 0000000..bb852d2 --- /dev/null +++ b/Doc/library/tk.rst @@ -0,0 +1,43 @@ +.. _tkinter: + +********************************* +Graphical User Interfaces with Tk +********************************* + +.. index:: + single: GUI + single: Graphical User Interface + single: Tkinter + single: Tk + +Tk/Tcl has long been an integral part of Python. It provides a robust and +platform independent windowing toolkit, that is available to Python programmers +using the :mod:`Tkinter` module, and its extension, the :mod:`Tix` module. + +The :mod:`Tkinter` module is a thin object-oriented layer on top of Tcl/Tk. To +use :mod:`Tkinter`, you don't need to write Tcl code, but you will need to +consult the Tk documentation, and occasionally the Tcl documentation. +:mod:`Tkinter` is a set of wrappers that implement the Tk widgets as Python +classes. In addition, the internal module :mod:`_tkinter` provides a threadsafe +mechanism which allows Python and Tcl to interact. + +:mod:`Tkinter`'s chief virtues are that it is fast, and that it usually comes +bundled with Python. Although it has been used to create some very good +applications, including IDLE, it has weak documentation and an outdated look and +feel. For more modern, better documented, and much more extensive GUI +libraries, see the :ref:`other-gui-packages` section. + +.. toctree:: + + tkinter.rst + tix.rst + scrolledtext.rst + turtle.rst + idle.rst + othergui.rst + +.. % Other sections I have in mind are +.. % Tkinter internals +.. % Freezing Tkinter applications + + diff --git a/Doc/library/tkinter.rst b/Doc/library/tkinter.rst new file mode 100644 index 0000000..d52c1e0 --- /dev/null +++ b/Doc/library/tkinter.rst @@ -0,0 +1,840 @@ +:mod:`Tkinter` --- Python interface to Tcl/Tk +============================================= + +.. module:: Tkinter + :synopsis: Interface to Tcl/Tk for graphical user interfaces +.. moduleauthor:: Guido van Rossum <guido@Python.org> + + +The :mod:`Tkinter` module ("Tk interface") is the standard Python interface to +the Tk GUI toolkit. Both Tk and :mod:`Tkinter` are available on most Unix +platforms, as well as on Windows and Macintosh systems. (Tk itself is not part +of Python; it is maintained at ActiveState.) + + +.. seealso:: + + `Python Tkinter Resources <http://www.python.org/topics/tkinter/>`_ + The Python Tkinter Topic Guide provides a great deal of information on using Tk + from Python and links to other sources of information on Tk. + + `An Introduction to Tkinter <http://www.pythonware.com/library/an-introduction-to-tkinter.htm>`_ + Fredrik Lundh's on-line reference material. + + `Tkinter reference: a GUI for Python <http://www.nmt.edu/tcc/help/pubs/lang.html>`_ + On-line reference material. + + `Tkinter for JPython <http://jtkinter.sourceforge.net>`_ + The Jython interface to Tkinter. + + `Python and Tkinter Programming <http://www.amazon.com/exec/obidos/ASIN/1884777813>`_ + The book by John Grayson (ISBN 1-884777-81-3). + + +Tkinter Modules +--------------- + +Most of the time, the :mod:`Tkinter` module is all you really need, but a number +of additional modules are available as well. The Tk interface is located in a +binary module named :mod:`_tkinter`. This module contains the low-level +interface to Tk, and should never be used directly by application programmers. +It is usually a shared library (or DLL), but might in some cases be statically +linked with the Python interpreter. + +In addition to the Tk interface module, :mod:`Tkinter` includes a number of +Python modules. The two most important modules are the :mod:`Tkinter` module +itself, and a module called :mod:`Tkconstants`. The former automatically imports +the latter, so to use Tkinter, all you need to do is to import one module:: + + import Tkinter + +Or, more often:: + + from Tkinter import * + + +.. class:: Tk(screenName=None, baseName=None, className='Tk', useTk=1) + + The :class:`Tk` class is instantiated without arguments. This creates a toplevel + widget of Tk which usually is the main window of an application. Each instance + has its own associated Tcl interpreter. + + .. % FIXME: The following keyword arguments are currently recognized: + + .. versionchanged:: 2.4 + The *useTk* parameter was added. + + +.. function:: Tcl(screenName=None, baseName=None, className='Tk', useTk=0) + + The :func:`Tcl` function is a factory function which creates an object much like + that created by the :class:`Tk` class, except that it does not initialize the Tk + subsystem. This is most often useful when driving the Tcl interpreter in an + environment where one doesn't want to create extraneous toplevel windows, or + where one cannot (such as Unix/Linux systems without an X server). An object + created by the :func:`Tcl` object can have a Toplevel window created (and the Tk + subsystem initialized) by calling its :meth:`loadtk` method. + + .. versionadded:: 2.4 + +Other modules that provide Tk support include: + +:mod:`ScrolledText` + Text widget with a vertical scroll bar built in. + +:mod:`tkColorChooser` + Dialog to let the user choose a color. + +:mod:`tkCommonDialog` + Base class for the dialogs defined in the other modules listed here. + +:mod:`tkFileDialog` + Common dialogs to allow the user to specify a file to open or save. + +:mod:`tkFont` + Utilities to help work with fonts. + +:mod:`tkMessageBox` + Access to standard Tk dialog boxes. + +:mod:`tkSimpleDialog` + Basic dialogs and convenience functions. + +:mod:`Tkdnd` + Drag-and-drop support for :mod:`Tkinter`. This is experimental and should become + deprecated when it is replaced with the Tk DND. + +:mod:`turtle` + Turtle graphics in a Tk window. + + +Tkinter Life Preserver +---------------------- + +.. sectionauthor:: Matt Conway + + +This section is not designed to be an exhaustive tutorial on either Tk or +Tkinter. Rather, it is intended as a stop gap, providing some introductory +orientation on the system. + +.. % Converted to LaTeX by Mike Clarkson. + +Credits: + +* Tkinter was written by Steen Lumholt and Guido van Rossum. + +* Tk was written by John Ousterhout while at Berkeley. + +* This Life Preserver was written by Matt Conway at the University of Virginia. + +* The html rendering, and some liberal editing, was produced from a FrameMaker + version by Ken Manheimer. + +* Fredrik Lundh elaborated and revised the class interface descriptions, to get + them current with Tk 4.2. + +* Mike Clarkson converted the documentation to LaTeX, and compiled the User + Interface chapter of the reference manual. + + +How To Use This Section +^^^^^^^^^^^^^^^^^^^^^^^ + +This section is designed in two parts: the first half (roughly) covers +background material, while the second half can be taken to the keyboard as a +handy reference. + +When trying to answer questions of the form "how do I do blah", it is often best +to find out how to do"blah" in straight Tk, and then convert this back into the +corresponding :mod:`Tkinter` call. Python programmers can often guess at the +correct Python command by looking at the Tk documentation. This means that in +order to use Tkinter, you will have to know a little bit about Tk. This document +can't fulfill that role, so the best we can do is point you to the best +documentation that exists. Here are some hints: + +* The authors strongly suggest getting a copy of the Tk man pages. Specifically, + the man pages in the ``mann`` directory are most useful. The ``man3`` man pages + describe the C interface to the Tk library and thus are not especially helpful + for script writers. + +* Addison-Wesley publishes a book called Tcl and the Tk Toolkit by John + Ousterhout (ISBN 0-201-63337-X) which is a good introduction to Tcl and Tk for + the novice. The book is not exhaustive, and for many details it defers to the + man pages. + +* :file:`Tkinter.py` is a last resort for most, but can be a good place to go + when nothing else makes sense. + + +.. seealso:: + + `ActiveState Tcl Home Page <http://tcl.activestate.com/>`_ + The Tk/Tcl development is largely taking place at ActiveState. + + `Tcl and the Tk Toolkit <http://www.amazon.com/exec/obidos/ASIN/020163337X>`_ + The book by John Ousterhout, the inventor of Tcl . + + `Practical Programming in Tcl and Tk <http://www.amazon.com/exec/obidos/ASIN/0130220280>`_ + Brent Welch's encyclopedic book. + + +A Simple Hello World Program +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. % HelloWorld.html +.. % begin{latexonly} +.. % \begin{figure}[hbtp] +.. % \centerline{\epsfig{file=HelloWorld.gif,width=.9\textwidth}} +.. % \vspace{.5cm} +.. % \caption{HelloWorld gadget image} +.. % \end{figure} +.. % See also the hello-world \ulink{notes}{classes/HelloWorld-notes.html} and +.. % \ulink{summary}{classes/HelloWorld-summary.html}. +.. % end{latexonly} + +:: + + from Tkinter import * + + class Application(Frame): + def say_hi(self): + print "hi there, everyone!" + + def createWidgets(self): + self.QUIT = Button(self) + self.QUIT["text"] = "QUIT" + self.QUIT["fg"] = "red" + self.QUIT["command"] = self.quit + + self.QUIT.pack({"side": "left"}) + + self.hi_there = Button(self) + self.hi_there["text"] = "Hello", + self.hi_there["command"] = self.say_hi + + self.hi_there.pack({"side": "left"}) + + def __init__(self, master=None): + Frame.__init__(self, master) + self.pack() + self.createWidgets() + + root = Tk() + app = Application(master=root) + app.mainloop() + root.destroy() + + +A (Very) Quick Look at Tcl/Tk +----------------------------- + +The class hierarchy looks complicated, but in actual practice, application +programmers almost always refer to the classes at the very bottom of the +hierarchy. + +.. % BriefTclTk.html + +Notes: + +* These classes are provided for the purposes of organizing certain functions + under one namespace. They aren't meant to be instantiated independently. + +* The :class:`Tk` class is meant to be instantiated only once in an application. + Application programmers need not instantiate one explicitly, the system creates + one whenever any of the other classes are instantiated. + +* The :class:`Widget` class is not meant to be instantiated, it is meant only + for subclassing to make "real" widgets (in C++, this is called an 'abstract + class'). + +To make use of this reference material, there will be times when you will need +to know how to read short passages of Tk and how to identify the various parts +of a Tk command. (See section :ref:`tkinter-basic-mapping` for the +:mod:`Tkinter` equivalents of what's below.) + +Tk scripts are Tcl programs. Like all Tcl programs, Tk scripts are just lists +of tokens separated by spaces. A Tk widget is just its *class*, the *options* +that help configure it, and the *actions* that make it do useful things. + +To make a widget in Tk, the command is always of the form:: + + classCommand newPathname options + +*classCommand* + denotes which kind of widget to make (a button, a label, a menu...) + +*newPathname* + is the new name for this widget. All names in Tk must be unique. To help + enforce this, widgets in Tk are named with *pathnames*, just like files in a + file system. The top level widget, the *root*, is called ``.`` (period) and + children are delimited by more periods. For example, + ``.myApp.controlPanel.okButton`` might be the name of a widget. + +*options* + configure the widget's appearance and in some cases, its behavior. The options + come in the form of a list of flags and values. Flags are preceded by a '-', + like Unix shell command flags, and values are put in quotes if they are more + than one word. + +For example:: + + button .fred -fg red -text "hi there" + ^ ^ \_____________________/ + | | | + class new options + command widget (-opt val -opt val ...) + +Once created, the pathname to the widget becomes a new command. This new +*widget command* is the programmer's handle for getting the new widget to +perform some *action*. In C, you'd express this as someAction(fred, +someOptions), in C++, you would express this as fred.someAction(someOptions), +and in Tk, you say:: + + .fred someAction someOptions + +Note that the object name, ``.fred``, starts with a dot. + +As you'd expect, the legal values for *someAction* will depend on the widget's +class: ``.fred disable`` works if fred is a button (fred gets greyed out), but +does not work if fred is a label (disabling of labels is not supported in Tk). + +The legal values of *someOptions* is action dependent. Some actions, like +``disable``, require no arguments, others, like a text-entry box's ``delete`` +command, would need arguments to specify what range of text to delete. + + +.. _tkinter-basic-mapping: + +Mapping Basic Tk into Tkinter +----------------------------- + +Class commands in Tk correspond to class constructors in Tkinter. :: + + button .fred =====> fred = Button() + +The master of an object is implicit in the new name given to it at creation +time. In Tkinter, masters are specified explicitly. :: + + button .panel.fred =====> fred = Button(panel) + +The configuration options in Tk are given in lists of hyphened tags followed by +values. In Tkinter, options are specified as keyword-arguments in the instance +constructor, and keyword-args for configure calls or as instance indices, in +dictionary style, for established instances. See section +:ref:`tkinter-setting-options` on setting options. :: + + button .fred -fg red =====> fred = Button(panel, fg = "red") + .fred configure -fg red =====> fred["fg"] = red + OR ==> fred.config(fg = "red") + +In Tk, to perform an action on a widget, use the widget name as a command, and +follow it with an action name, possibly with arguments (options). In Tkinter, +you call methods on the class instance to invoke actions on the widget. The +actions (methods) that a given widget can perform are listed in the Tkinter.py +module. :: + + .fred invoke =====> fred.invoke() + +To give a widget to the packer (geometry manager), you call pack with optional +arguments. In Tkinter, the Pack class holds all this functionality, and the +various forms of the pack command are implemented as methods. All widgets in +:mod:`Tkinter` are subclassed from the Packer, and so inherit all the packing +methods. See the :mod:`Tix` module documentation for additional information on +the Form geometry manager. :: + + pack .fred -side left =====> fred.pack(side = "left") + + +How Tk and Tkinter are Related +------------------------------ + +.. % Relationship.html + +.. note:: + + This was derived from a graphical image; the image will be used more directly in + a subsequent version of this document. + +From the top down: + +Your App Here (Python) + A Python application makes a :mod:`Tkinter` call. + +Tkinter (Python Module) + This call (say, for example, creating a button widget), is implemented in the + *Tkinter* module, which is written in Python. This Python function will parse + the commands and the arguments and convert them into a form that makes them look + as if they had come from a Tk script instead of a Python script. + +tkinter (C) + These commands and their arguments will be passed to a C function in the + *tkinter* - note the lowercase - extension module. + +Tk Widgets (C and Tcl) + This C function is able to make calls into other C modules, including the C + functions that make up the Tk library. Tk is implemented in C and some Tcl. + The Tcl part of the Tk widgets is used to bind certain default behaviors to + widgets, and is executed once at the point where the Python :mod:`Tkinter` + module is imported. (The user never sees this stage). + +Tk (C) + The Tk part of the Tk Widgets implement the final mapping to ... + +Xlib (C) + the Xlib library to draw graphics on the screen. + + +Handy Reference +--------------- + + +.. _tkinter-setting-options: + +Setting Options +^^^^^^^^^^^^^^^ + +Options control things like the color and border width of a widget. Options can +be set in three ways: + +At object creation time, using keyword arguments + :: + + fred = Button(self, fg = "red", bg = "blue") + +After object creation, treating the option name like a dictionary index + :: + + fred["fg"] = "red" + fred["bg"] = "blue" + +Use the config() method to update multiple attrs subsequent to object creation + :: + + fred.config(fg = "red", bg = "blue") + +For a complete explanation of a given option and its behavior, see the Tk man +pages for the widget in question. + +Note that the man pages list "STANDARD OPTIONS" and "WIDGET SPECIFIC OPTIONS" +for each widget. The former is a list of options that are common to many +widgets, the latter are the options that are idiosyncratic to that particular +widget. The Standard Options are documented on the :manpage:`options(3)` man +page. + +No distinction between standard and widget-specific options is made in this +document. Some options don't apply to some kinds of widgets. Whether a given +widget responds to a particular option depends on the class of the widget; +buttons have a ``command`` option, labels do not. + +The options supported by a given widget are listed in that widget's man page, or +can be queried at runtime by calling the :meth:`config` method without +arguments, or by calling the :meth:`keys` method on that widget. The return +value of these calls is a dictionary whose key is the name of the option as a +string (for example, ``'relief'``) and whose values are 5-tuples. + +Some options, like ``bg`` are synonyms for common options with long names +(``bg`` is shorthand for "background"). Passing the ``config()`` method the name +of a shorthand option will return a 2-tuple, not 5-tuple. The 2-tuple passed +back will contain the name of the synonym and the "real" option (such as +``('bg', 'background')``). + ++-------+---------------------------------+--------------+ +| Index | Meaning | Example | ++=======+=================================+==============+ +| 0 | option name | ``'relief'`` | ++-------+---------------------------------+--------------+ +| 1 | option name for database lookup | ``'relief'`` | ++-------+---------------------------------+--------------+ +| 2 | option class for database | ``'Relief'`` | +| | lookup | | ++-------+---------------------------------+--------------+ +| 3 | default value | ``'raised'`` | ++-------+---------------------------------+--------------+ +| 4 | current value | ``'groove'`` | ++-------+---------------------------------+--------------+ + +Example:: + + >>> print fred.config() + {'relief' : ('relief', 'relief', 'Relief', 'raised', 'groove')} + +Of course, the dictionary printed will include all the options available and +their values. This is meant only as an example. + + +The Packer +^^^^^^^^^^ + +.. index:: single: packing (widgets) + +.. % Packer.html + +The packer is one of Tk's geometry-management mechanisms. Geometry managers +are used to specify the relative positioning of the positioning of widgets +within their container - their mutual *master*. In contrast to the more +cumbersome *placer* (which is used less commonly, and we do not cover here), the +packer takes qualitative relationship specification - *above*, *to the left of*, +*filling*, etc - and works everything out to determine the exact placement +coordinates for you. + +.. % See also \citetitle[classes/ClassPacker.html]{the Packer class interface}. + +The size of any *master* widget is determined by the size of the "slave widgets" +inside. The packer is used to control where slave widgets appear inside the +master into which they are packed. You can pack widgets into frames, and frames +into other frames, in order to achieve the kind of layout you desire. +Additionally, the arrangement is dynamically adjusted to accommodate incremental +changes to the configuration, once it is packed. + +Note that widgets do not appear until they have had their geometry specified +with a geometry manager. It's a common early mistake to leave out the geometry +specification, and then be surprised when the widget is created but nothing +appears. A widget will appear only after it has had, for example, the packer's +:meth:`pack` method applied to it. + +The pack() method can be called with keyword-option/value pairs that control +where the widget is to appear within its container, and how it is to behave when +the main application window is resized. Here are some examples:: + + fred.pack() # defaults to side = "top" + fred.pack(side = "left") + fred.pack(expand = 1) + + +Packer Options +^^^^^^^^^^^^^^ + +For more extensive information on the packer and the options that it can take, +see the man pages and page 183 of John Ousterhout's book. + +anchor + Anchor type. Denotes where the packer is to place each slave in its parcel. + +expand + Boolean, ``0`` or ``1``. + +fill + Legal values: ``'x'``, ``'y'``, ``'both'``, ``'none'``. + +ipadx and ipady + A distance - designating internal padding on each side of the slave widget. + +padx and pady + A distance - designating external padding on each side of the slave widget. + +side + Legal values are: ``'left'``, ``'right'``, ``'top'``, ``'bottom'``. + + +Coupling Widget Variables +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The current-value setting of some widgets (like text entry widgets) can be +connected directly to application variables by using special options. These +options are ``variable``, ``textvariable``, ``onvalue``, ``offvalue``, and +``value``. This connection works both ways: if the variable changes for any +reason, the widget it's connected to will be updated to reflect the new value. + +.. % VarCouplings.html + +Unfortunately, in the current implementation of :mod:`Tkinter` it is not +possible to hand over an arbitrary Python variable to a widget through a +``variable`` or ``textvariable`` option. The only kinds of variables for which +this works are variables that are subclassed from a class called Variable, +defined in the :mod:`Tkinter` module. + +There are many useful subclasses of Variable already defined: +:class:`StringVar`, :class:`IntVar`, :class:`DoubleVar`, and +:class:`BooleanVar`. To read the current value of such a variable, call the +:meth:`get` method on it, and to change its value you call the :meth:`set` +method. If you follow this protocol, the widget will always track the value of +the variable, with no further intervention on your part. + +For example:: + + class App(Frame): + def __init__(self, master=None): + Frame.__init__(self, master) + self.pack() + + self.entrythingy = Entry() + self.entrythingy.pack() + + # here is the application variable + self.contents = StringVar() + # set it to some value + self.contents.set("this is a variable") + # tell the entry widget to watch this variable + self.entrythingy["textvariable"] = self.contents + + # and here we get a callback when the user hits return. + # we will have the program print out the value of the + # application variable when the user hits return + self.entrythingy.bind('<Key-Return>', + self.print_contents) + + def print_contents(self, event): + print "hi. contents of entry is now ---->", \ + self.contents.get() + + +The Window Manager +^^^^^^^^^^^^^^^^^^ + +.. index:: single: window manager (widgets) + +.. % WindowMgr.html + +In Tk, there is a utility command, ``wm``, for interacting with the window +manager. Options to the ``wm`` command allow you to control things like titles, +placement, icon bitmaps, and the like. In :mod:`Tkinter`, these commands have +been implemented as methods on the :class:`Wm` class. Toplevel widgets are +subclassed from the :class:`Wm` class, and so can call the :class:`Wm` methods +directly. + +To get at the toplevel window that contains a given widget, you can often just +refer to the widget's master. Of course if the widget has been packed inside of +a frame, the master won't represent a toplevel window. To get at the toplevel +window that contains an arbitrary widget, you can call the :meth:`_root` method. +This method begins with an underscore to denote the fact that this function is +part of the implementation, and not an interface to Tk functionality. + +.. % See also \citetitle[classes/ClassWm.html]{the Wm class interface}. + +Here are some examples of typical usage:: + + from Tkinter import * + class App(Frame): + def __init__(self, master=None): + Frame.__init__(self, master) + self.pack() + + + # create the application + myapp = App() + + # + # here are method calls to the window manager class + # + myapp.master.title("My Do-Nothing Application") + myapp.master.maxsize(1000, 400) + + # start the program + myapp.mainloop() + + +Tk Option Data Types +^^^^^^^^^^^^^^^^^^^^ + +.. index:: single: Tk Option Data Types + +.. % OptionTypes.html + +anchor + Legal values are points of the compass: ``"n"``, ``"ne"``, ``"e"``, ``"se"``, + ``"s"``, ``"sw"``, ``"w"``, ``"nw"``, and also ``"center"``. + +bitmap + There are eight built-in, named bitmaps: ``'error'``, ``'gray25'``, + ``'gray50'``, ``'hourglass'``, ``'info'``, ``'questhead'``, ``'question'``, + ``'warning'``. To specify an X bitmap filename, give the full path to the file, + preceded with an ``@``, as in ``"@/usr/contrib/bitmap/gumby.bit"``. + +boolean + You can pass integers 0 or 1 or the strings ``"yes"`` or ``"no"`` . + +callback + This is any Python function that takes no arguments. For example:: + + def print_it(): + print "hi there" + fred["command"] = print_it + +color + Colors can be given as the names of X colors in the rgb.txt file, or as strings + representing RGB values in 4 bit: ``"#RGB"``, 8 bit: ``"#RRGGBB"``, 12 bit" + ``"#RRRGGGBBB"``, or 16 bit ``"#RRRRGGGGBBBB"`` ranges, where R,G,B here + represent any legal hex digit. See page 160 of Ousterhout's book for details. + +cursor + The standard X cursor names from :file:`cursorfont.h` can be used, without the + ``XC_`` prefix. For example to get a hand cursor (:const:`XC_hand2`), use the + string ``"hand2"``. You can also specify a bitmap and mask file of your own. + See page 179 of Ousterhout's book. + +distance + Screen distances can be specified in either pixels or absolute distances. + Pixels are given as numbers and absolute distances as strings, with the trailing + character denoting units: ``c`` for centimetres, ``i`` for inches, ``m`` for + millimetres, ``p`` for printer's points. For example, 3.5 inches is expressed + as ``"3.5i"``. + +font + Tk uses a list font name format, such as ``{courier 10 bold}``. Font sizes with + positive numbers are measured in points; sizes with negative numbers are + measured in pixels. + +geometry + This is a string of the form ``widthxheight``, where width and height are + measured in pixels for most widgets (in characters for widgets displaying text). + For example: ``fred["geometry"] = "200x100"``. + +justify + Legal values are the strings: ``"left"``, ``"center"``, ``"right"``, and + ``"fill"``. + +region + This is a string with four space-delimited elements, each of which is a legal + distance (see above). For example: ``"2 3 4 5"`` and ``"3i 2i 4.5i 2i"`` and + ``"3c 2c 4c 10.43c"`` are all legal regions. + +relief + Determines what the border style of a widget will be. Legal values are: + ``"raised"``, ``"sunken"``, ``"flat"``, ``"groove"``, and ``"ridge"``. + +scrollcommand + This is almost always the :meth:`set` method of some scrollbar widget, but can + be any widget method that takes a single argument. Refer to the file + :file:`Demo/tkinter/matt/canvas-with-scrollbars.py` in the Python source + distribution for an example. + +wrap: + Must be one of: ``"none"``, ``"char"``, or ``"word"``. + + +Bindings and Events +^^^^^^^^^^^^^^^^^^^ + +.. index:: + single: bind (widgets) + single: events (widgets) + +.. % Bindings.html + +The bind method from the widget command allows you to watch for certain events +and to have a callback function trigger when that event type occurs. The form +of the bind method is:: + + def bind(self, sequence, func, add=''): + +where: + +sequence + is a string that denotes the target kind of event. (See the bind man page and + page 201 of John Ousterhout's book for details). + +func + is a Python function, taking one argument, to be invoked when the event occurs. + An Event instance will be passed as the argument. (Functions deployed this way + are commonly known as *callbacks*.) + +add + is optional, either ``''`` or ``'+'``. Passing an empty string denotes that + this binding is to replace any other bindings that this event is associated + with. Passing a ``'+'`` means that this function is to be added to the list + of functions bound to this event type. + +For example:: + + def turnRed(self, event): + event.widget["activeforeground"] = "red" + + self.button.bind("<Enter>", self.turnRed) + +Notice how the widget field of the event is being accessed in the +:meth:`turnRed` callback. This field contains the widget that caught the X +event. The following table lists the other event fields you can access, and how +they are denoted in Tk, which can be useful when referring to the Tk man pages. +:: + + Tk Tkinter Event Field Tk Tkinter Event Field + -- ------------------- -- ------------------- + %f focus %A char + %h height %E send_event + %k keycode %K keysym + %s state %N keysym_num + %t time %T type + %w width %W widget + %x x %X x_root + %y y %Y y_root + + +The index Parameter +^^^^^^^^^^^^^^^^^^^ + +A number of widgets require"index" parameters to be passed. These are used to +point at a specific place in a Text widget, or to particular characters in an +Entry widget, or to particular menu items in a Menu widget. + +.. % Index.html + +Entry widget indexes (index, view index, etc.) + Entry widgets have options that refer to character positions in the text being + displayed. You can use these :mod:`Tkinter` functions to access these special + points in text widgets: + + AtEnd() + refers to the last position in the text + + AtInsert() + refers to the point where the text cursor is + + AtSelFirst() + indicates the beginning point of the selected text + + AtSelLast() + denotes the last point of the selected text and finally + + At(x[, y]) + refers to the character at pixel location *x*, *y* (with *y* not used in the + case of a text entry widget, which contains a single line of text). + +Text widget indexes + The index notation for Text widgets is very rich and is best described in the Tk + man pages. + +Menu indexes (menu.invoke(), menu.entryconfig(), etc.) + Some options and methods for menus manipulate specific menu entries. Anytime a + menu index is needed for an option or a parameter, you may pass in: + + * an integer which refers to the numeric position of the entry in the widget, + counted from the top, starting with 0; + + * the string ``'active'``, which refers to the menu position that is currently + under the cursor; + + * the string ``"last"`` which refers to the last menu item; + + * An integer preceded by ``@``, as in ``@6``, where the integer is interpreted + as a y pixel coordinate in the menu's coordinate system; + + * the string ``"none"``, which indicates no menu entry at all, most often used + with menu.activate() to deactivate all entries, and finally, + + * a text string that is pattern matched against the label of the menu entry, as + scanned from the top of the menu to the bottom. Note that this index type is + considered after all the others, which means that matches for menu items + labelled ``last``, ``active``, or ``none`` may be interpreted as the above + literals, instead. + + +Images +^^^^^^ + +Bitmap/Pixelmap images can be created through the subclasses of +:class:`Tkinter.Image`: + +* :class:`BitmapImage` can be used for X11 bitmap data. + +* :class:`PhotoImage` can be used for GIF and PPM/PGM color bitmaps. + +Either type of image is created through either the ``file`` or the ``data`` +option (other options are available as well). + +The image object can then be used wherever an ``image`` option is supported by +some widget (e.g. labels, buttons, menus). In these cases, Tk will not keep a +reference to the image. When the last Python reference to the image object is +deleted, the image data is deleted as well, and Tk will display an empty box +wherever the image was used. + diff --git a/Doc/library/token.rst b/Doc/library/token.rst new file mode 100644 index 0000000..5bf0ea8 --- /dev/null +++ b/Doc/library/token.rst @@ -0,0 +1,47 @@ + +:mod:`token` --- Constants used with Python parse trees +======================================================= + +.. module:: token + :synopsis: Constants representing terminal nodes of the parse tree. +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +This module provides constants which represent the numeric values of leaf nodes +of the parse tree (terminal tokens). Refer to the file :file:`Grammar/Grammar` +in the Python distribution for the definitions of the names in the context of +the language grammar. The specific numeric values which the names map to may +change between Python versions. + +This module also provides one data object and some functions. The functions +mirror definitions in the Python C header files. + + +.. data:: tok_name + + Dictionary mapping the numeric values of the constants defined in this module + back to name strings, allowing more human-readable representation of parse trees + to be generated. + + +.. function:: ISTERMINAL(x) + + Return true for terminal token values. + + +.. function:: ISNONTERMINAL(x) + + Return true for non-terminal token values. + + +.. function:: ISEOF(x) + + Return true if *x* is the marker indicating the end of input. + + +.. seealso:: + + Module :mod:`parser` + The second example for the :mod:`parser` module shows how to use the + :mod:`symbol` module. + diff --git a/Doc/library/tokenize.rst b/Doc/library/tokenize.rst new file mode 100644 index 0000000..61f2c4d --- /dev/null +++ b/Doc/library/tokenize.rst @@ -0,0 +1,122 @@ + +:mod:`tokenize` --- Tokenizer for Python source +=============================================== + +.. module:: tokenize + :synopsis: Lexical scanner for Python source code. +.. moduleauthor:: Ka Ping Yee +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +The :mod:`tokenize` module provides a lexical scanner for Python source code, +implemented in Python. The scanner in this module returns comments as tokens as +well, making it useful for implementing "pretty-printers," including colorizers +for on-screen displays. + +The primary entry point is a generator: + + +.. function:: generate_tokens(readline) + + The :func:`generate_tokens` generator requires one argment, *readline*, which + must be a callable object which provides the same interface as the + :meth:`readline` method of built-in file objects (see section + :ref:`bltin-file-objects`). Each call to the function should return one line of + input as a string. + + The generator produces 5-tuples with these members: the token type; the token + string; a 2-tuple ``(srow, scol)`` of ints specifying the row and column where + the token begins in the source; a 2-tuple ``(erow, ecol)`` of ints specifying + the row and column where the token ends in the source; and the line on which the + token was found. The line passed is the *logical* line; continuation lines are + included. + + .. versionadded:: 2.2 + +An older entry point is retained for backward compatibility: + + +.. function:: tokenize(readline[, tokeneater]) + + The :func:`tokenize` function accepts two parameters: one representing the input + stream, and one providing an output mechanism for :func:`tokenize`. + + The first parameter, *readline*, must be a callable object which provides the + same interface as the :meth:`readline` method of built-in file objects (see + section :ref:`bltin-file-objects`). Each call to the function should return one + line of input as a string. Alternately, *readline* may be a callable object that + signals completion by raising :exc:`StopIteration`. + + .. versionchanged:: 2.5 + Added :exc:`StopIteration` support. + + The second parameter, *tokeneater*, must also be a callable object. It is + called once for each token, with five arguments, corresponding to the tuples + generated by :func:`generate_tokens`. + +All constants from the :mod:`token` module are also exported from +:mod:`tokenize`, as are two additional token type values that might be passed to +the *tokeneater* function by :func:`tokenize`: + + +.. data:: COMMENT + + Token value used to indicate a comment. + + +.. data:: NL + + Token value used to indicate a non-terminating newline. The NEWLINE token + indicates the end of a logical line of Python code; NL tokens are generated when + a logical line of code is continued over multiple physical lines. + +Another function is provided to reverse the tokenization process. This is useful +for creating tools that tokenize a script, modify the token stream, and write +back the modified script. + + +.. function:: untokenize(iterable) + + Converts tokens back into Python source code. The *iterable* must return + sequences with at least two elements, the token type and the token string. Any + additional sequence elements are ignored. + + The reconstructed script is returned as a single string. The result is + guaranteed to tokenize back to match the input so that the conversion is + lossless and round-trips are assured. The guarantee applies only to the token + type and token string as the spacing between tokens (column positions) may + change. + + .. versionadded:: 2.5 + +Example of a script re-writer that transforms float literals into Decimal +objects:: + + def decistmt(s): + """Substitute Decimals for floats in a string of statements. + + >>> from decimal import Decimal + >>> s = 'print +21.3e-5*-.1234/81.7' + >>> decistmt(s) + "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" + + >>> exec(s) + -3.21716034272e-007 + >>> exec(decistmt(s)) + -3.217160342717258261933904529E-7 + + """ + result = [] + g = generate_tokens(StringIO(s).readline) # tokenize the string + for toknum, tokval, _, _, _ in g: + if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens + result.extend([ + (NAME, 'Decimal'), + (OP, '('), + (STRING, repr(tokval)), + (OP, ')') + ]) + else: + result.append((toknum, tokval)) + return untokenize(result) + diff --git a/Doc/library/trace.rst b/Doc/library/trace.rst new file mode 100644 index 0000000..91cf1a4 --- /dev/null +++ b/Doc/library/trace.rst @@ -0,0 +1,128 @@ + +:mod:`trace` --- Trace or track Python statement execution +========================================================== + +.. module:: trace + :synopsis: Trace or track Python statement execution. + + +The :mod:`trace` module allows you to trace program execution, generate +annotated statement coverage listings, print caller/callee relationships and +list functions executed during a program run. It can be used in another program +or from the command line. + + +.. _trace-cli: + +Command Line Usage +------------------ + +The :mod:`trace` module can be invoked from the command line. It can be as +simple as :: + + python -m trace --count somefile.py ... + +The above will generate annotated listings of all Python modules imported during +the execution of :file:`somefile.py`. + +The following command-line arguments are supported: + +:option:`--trace`, :option:`-t` + Display lines as they are executed. + +:option:`--count`, :option:`-c` + Produce a set of annotated listing files upon program completion that shows how + many times each statement was executed. + +:option:`--report`, :option:`-r` + Produce an annotated list from an earlier program run that used the + :option:`--count` and :option:`--file` arguments. + +:option:`--no-report`, :option:`-R` + Do not generate annotated listings. This is useful if you intend to make + several runs with :option:`--count` then produce a single set of annotated + listings at the end. + +:option:`--listfuncs`, :option:`-l` + List the functions executed by running the program. + +:option:`--trackcalls`, :option:`-T` + Generate calling relationships exposed by running the program. + +:option:`--file`, :option:`-f` + Name a file containing (or to contain) counts. + +:option:`--coverdir`, :option:`-C` + Name a directory in which to save annotated listing files. + +:option:`--missing`, :option:`-m` + When generating annotated listings, mark lines which were not executed with + '``>>>>>>``'. + +:option:`--summary`, :option:`-s` + When using :option:`--count` or :option:`--report`, write a brief summary to + stdout for each file processed. + +:option:`--ignore-module` + Ignore the named module and its submodules (if it is a package). May be given + multiple times. + +:option:`--ignore-dir` + Ignore all modules and packages in the named directory and subdirectories. May + be given multiple times. + + +.. _trace-api: + +Programming Interface +--------------------- + + +.. class:: Trace([count=1[, trace=1[, countfuncs=0[, countcallers=0[, ignoremods=()[, ignoredirs=()[, infile=None[, outfile=None]]]]]]]]) + + Create an object to trace execution of a single statement or expression. All + parameters are optional. *count* enables counting of line numbers. *trace* + enables line execution tracing. *countfuncs* enables listing of the functions + called during the run. *countcallers* enables call relationship tracking. + *ignoremods* is a list of modules or packages to ignore. *ignoredirs* is a list + of directories whose modules or packages should be ignored. *infile* is the + file from which to read stored count information. *outfile* is a file in which + to write updated count information. + + +.. method:: Trace.run(cmd) + + Run *cmd* under control of the Trace object with the current tracing parameters. + + +.. method:: Trace.runctx(cmd[, globals=None[, locals=None]]) + + Run *cmd* under control of the Trace object with the current tracing parameters + in the defined global and local environments. If not defined, *globals* and + *locals* default to empty dictionaries. + + +.. method:: Trace.runfunc(func, *args, **kwds) + + Call *func* with the given arguments under control of the :class:`Trace` object + with the current tracing parameters. + +This is a simple example showing the use of this module:: + + import sys + import trace + + # create a Trace object, telling it what to ignore, and whether to + # do tracing or line-counting or both. + tracer = trace.Trace( + ignoredirs=[sys.prefix, sys.exec_prefix], + trace=0, + count=1) + + # run the new command using the given tracer + tracer.run('main()') + + # make a report, placing output in /tmp + r = tracer.results() + r.write_results(show_missing=True, coverdir="/tmp") + diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst new file mode 100644 index 0000000..ec8687f --- /dev/null +++ b/Doc/library/traceback.rst @@ -0,0 +1,160 @@ + +:mod:`traceback` --- Print or retrieve a stack traceback +======================================================== + +.. module:: traceback + :synopsis: Print or retrieve a stack traceback. + + +This module provides a standard interface to extract, format and print stack +traces of Python programs. It exactly mimics the behavior of the Python +interpreter when it prints a stack trace. This is useful when you want to print +stack traces under program control, such as in a "wrapper" around the +interpreter. + +.. index:: object: traceback + +The module uses traceback objects --- this is the object type that is stored in +the ``sys.last_traceback`` variable and returned as the third item from +:func:`sys.exc_info`. + +The module defines the following functions: + + +.. function:: print_tb(traceback[, limit[, file]]) + + Print up to *limit* stack trace entries from *traceback*. If *limit* is omitted + or ``None``, all entries are printed. If *file* is omitted or ``None``, the + output goes to ``sys.stderr``; otherwise it should be an open file or file-like + object to receive the output. + + +.. function:: print_exception(type, value, traceback[, limit[, file]]) + + Print exception information and up to *limit* stack trace entries from + *traceback* to *file*. This differs from :func:`print_tb` in the following ways: + (1) if *traceback* is not ``None``, it prints a header ``Traceback (most recent + call last):``; (2) it prints the exception *type* and *value* after the stack + trace; (3) if *type* is :exc:`SyntaxError` and *value* has the appropriate + format, it prints the line where the syntax error occurred with a caret + indicating the approximate position of the error. + + +.. function:: print_exc([limit[, file]]) + + This is a shorthand for ``print_exception(*sys.exc_info()``. + + +.. function:: format_exc([limit]) + + This is like ``print_exc(limit)`` but returns a string instead of printing to a + file. + + .. versionadded:: 2.4 + + +.. function:: print_last([limit[, file]]) + + This is a shorthand for ``print_exception(sys.last_type, sys.last_value, + sys.last_traceback, limit, file)``. + + +.. function:: print_stack([f[, limit[, file]]]) + + This function prints a stack trace from its invocation point. The optional *f* + argument can be used to specify an alternate stack frame to start. The optional + *limit* and *file* arguments have the same meaning as for + :func:`print_exception`. + + +.. function:: extract_tb(traceback[, limit]) + + Return a list of up to *limit* "pre-processed" stack trace entries extracted + from the traceback object *traceback*. It is useful for alternate formatting of + stack traces. If *limit* is omitted or ``None``, all entries are extracted. A + "pre-processed" stack trace entry is a quadruple (*filename*, *line number*, + *function name*, *text*) representing the information that is usually printed + for a stack trace. The *text* is a string with leading and trailing whitespace + stripped; if the source is not available it is ``None``. + + +.. function:: extract_stack([f[, limit]]) + + Extract the raw traceback from the current stack frame. The return value has + the same format as for :func:`extract_tb`. The optional *f* and *limit* + arguments have the same meaning as for :func:`print_stack`. + + +.. function:: format_list(list) + + Given a list of tuples as returned by :func:`extract_tb` or + :func:`extract_stack`, return a list of strings ready for printing. Each string + in the resulting list corresponds to the item with the same index in the + argument list. Each string ends in a newline; the strings may contain internal + newlines as well, for those items whose source text line is not ``None``. + + +.. function:: format_exception_only(type, value) + + Format the exception part of a traceback. The arguments are the exception type + and value such as given by ``sys.last_type`` and ``sys.last_value``. The return + value is a list of strings, each ending in a newline. Normally, the list + contains a single string; however, for :exc:`SyntaxError` exceptions, it + contains several lines that (when printed) display detailed information about + where the syntax error occurred. The message indicating which exception + occurred is the always last string in the list. + + +.. function:: format_exception(type, value, tb[, limit]) + + Format a stack trace and the exception information. The arguments have the + same meaning as the corresponding arguments to :func:`print_exception`. The + return value is a list of strings, each ending in a newline and some containing + internal newlines. When these lines are concatenated and printed, exactly the + same text is printed as does :func:`print_exception`. + + +.. function:: format_tb(tb[, limit]) + + A shorthand for ``format_list(extract_tb(tb, limit))``. + + +.. function:: format_stack([f[, limit]]) + + A shorthand for ``format_list(extract_stack(f, limit))``. + + +.. function:: tb_lineno(tb) + + This function returns the current line number set in the traceback object. This + function was necessary because in versions of Python prior to 2.3 when the + :option:`-O` flag was passed to Python the ``tb.tb_lineno`` was not updated + correctly. This function has no use in versions past 2.3. + + +.. _traceback-example: + +Traceback Example +----------------- + +This simple example implements a basic read-eval-print loop, similar to (but +less useful than) the standard Python interactive interpreter loop. For a more +complete implementation of the interpreter loop, refer to the :mod:`code` +module. :: + + import sys, traceback + + def run_user_code(envdir): + source = raw_input(">>> ") + try: + exec(source, envdir) + except: + print "Exception in user code:" + print '-'*60 + traceback.print_exc(file=sys.stdout) + print '-'*60 + + envdir = {} + while 1: + run_user_code(envdir) + diff --git a/Doc/library/tty.rst b/Doc/library/tty.rst new file mode 100644 index 0000000..688faee --- /dev/null +++ b/Doc/library/tty.rst @@ -0,0 +1,38 @@ + +:mod:`tty` --- Terminal control functions +========================================= + +.. module:: tty + :platform: Unix + :synopsis: Utility functions that perform common terminal control operations. +.. moduleauthor:: Steen Lumholt +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +The :mod:`tty` module defines functions for putting the tty into cbreak and raw +modes. + +Because it requires the :mod:`termios` module, it will work only on Unix. + +The :mod:`tty` module defines the following functions: + + +.. function:: setraw(fd[, when]) + + Change the mode of the file descriptor *fd* to raw. If *when* is omitted, it + defaults to :const:`termios.TCSAFLUSH`, and is passed to + :func:`termios.tcsetattr`. + + +.. function:: setcbreak(fd[, when]) + + Change the mode of file descriptor *fd* to cbreak. If *when* is omitted, it + defaults to :const:`termios.TCSAFLUSH`, and is passed to + :func:`termios.tcsetattr`. + + +.. seealso:: + + Module :mod:`termios` + Low-level terminal control interface. + diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst new file mode 100644 index 0000000..354bb11 --- /dev/null +++ b/Doc/library/turtle.rst @@ -0,0 +1,312 @@ + +:mod:`turtle` --- Turtle graphics for Tk +======================================== + +.. module:: turtle + :platform: Tk + :synopsis: An environment for turtle graphics. +.. moduleauthor:: Guido van Rossum <guido@python.org> + + +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +The :mod:`turtle` module provides turtle graphics primitives, in both an +object-oriented and procedure-oriented ways. Because it uses :mod:`Tkinter` for +the underlying graphics, it needs a version of python installed with Tk support. + +The procedural interface uses a pen and a canvas which are automagically created +when any of the functions are called. + +The :mod:`turtle` module defines the following functions: + + +.. function:: degrees() + + Set angle measurement units to degrees. + + +.. function:: radians() + + Set angle measurement units to radians. + + +.. function:: setup(**kwargs) + + Sets the size and position of the main window. Keywords are: + + * ``width``: either a size in pixels or a fraction of the screen. The default is + 50% of the screen. + + * ``height``: either a size in pixels or a fraction of the screen. The default + is 50% of the screen. + + * ``startx``: starting position in pixels from the left edge of the screen. + ``None`` is the default value and centers the window horizontally on screen. + + * ``starty``: starting position in pixels from the top edge of the screen. + ``None`` is the default value and centers the window vertically on screen. + + Examples:: + + # Uses default geometry: 50% x 50% of screen, centered. + setup() + + # Sets window to 200x200 pixels, in upper left of screen + setup (width=200, height=200, startx=0, starty=0) + + # Sets window to 75% of screen by 50% of screen, and centers it. + setup(width=.75, height=0.5, startx=None, starty=None) + + +.. function:: title(title_str) + + Set the window's title to *title*. + + +.. function:: done() + + Enters the Tk main loop. The window will continue to be displayed until the + user closes it or the process is killed. + + +.. function:: reset() + + Clear the screen, re-center the pen, and set variables to the default values. + + +.. function:: clear() + + Clear the screen. + + +.. function:: tracer(flag) + + Set tracing on/off (according to whether flag is true or not). Tracing means + line are drawn more slowly, with an animation of an arrow along the line. + + +.. function:: speed(speed) + + Set the speed of the turtle. Valid values for the parameter *speed* are + ``'fastest'`` (no delay), ``'fast'``, (delay 5ms), ``'normal'`` (delay 10ms), + ``'slow'`` (delay 15ms), and ``'slowest'`` (delay 20ms). + + .. versionadded:: 2.5 + + +.. function:: delay(delay) + + Set the speed of the turtle to *delay*, which is given in ms. + + .. versionadded:: 2.5 + + +.. function:: forward(distance) + + Go forward *distance* steps. + + +.. function:: backward(distance) + + Go backward *distance* steps. + + +.. function:: left(angle) + + Turn left *angle* units. Units are by default degrees, but can be set via the + :func:`degrees` and :func:`radians` functions. + + +.. function:: right(angle) + + Turn right *angle* units. Units are by default degrees, but can be set via the + :func:`degrees` and :func:`radians` functions. + + +.. function:: up() + + Move the pen up --- stop drawing. + + +.. function:: down() + + Move the pen down --- draw when moving. + + +.. function:: width(width) + + Set the line width to *width*. + + +.. function:: color(s) + color((r, g, b)) + color(r, g, b) + + Set the pen color. In the first form, the color is specified as a Tk color + specification as a string. The second form specifies the color as a tuple of + the RGB values, each in the range [0..1]. For the third form, the color is + specified giving the RGB values as three separate parameters (each in the range + [0..1]). + + +.. function:: write(text[, move]) + + Write *text* at the current pen position. If *move* is true, the pen is moved to + the bottom-right corner of the text. By default, *move* is false. + + +.. function:: fill(flag) + + The complete specifications are rather complex, but the recommended usage is: + call ``fill(1)`` before drawing a path you want to fill, and call ``fill(0)`` + when you finish to draw the path. + + +.. function:: begin_fill() + + Switch turtle into filling mode; Must eventually be followed by a corresponding + end_fill() call. Otherwise it will be ignored. + + .. versionadded:: 2.5 + + +.. function:: end_fill() + + End filling mode, and fill the shape; equivalent to ``fill(0)``. + + .. versionadded:: 2.5 + + +.. function:: circle(radius[, extent]) + + Draw a circle with radius *radius* whose center-point is *radius* units left of + the turtle. *extent* determines which part of a circle is drawn: if not given it + defaults to a full circle. + + If *extent* is not a full circle, one endpoint of the arc is the current pen + position. The arc is drawn in a counter clockwise direction if *radius* is + positive, otherwise in a clockwise direction. In the process, the direction of + the turtle is changed by the amount of the *extent*. + + +.. function:: goto(x, y) + goto((x, y)) + + Go to co-ordinates *x*, *y*. The co-ordinates may be specified either as two + separate arguments or as a 2-tuple. + + +.. function:: towards(x, y) + + Return the angle of the line from the turtle's position to the point *x*, *y*. + The co-ordinates may be specified either as two separate arguments, as a + 2-tuple, or as another pen object. + + .. versionadded:: 2.5 + + +.. function:: heading() + + Return the current orientation of the turtle. + + .. versionadded:: 2.3 + + +.. function:: setheading(angle) + + Set the orientation of the turtle to *angle*. + + .. versionadded:: 2.3 + + +.. function:: position() + + Return the current location of the turtle as an ``(x,y)`` pair. + + .. versionadded:: 2.3 + + +.. function:: setx(x) + + Set the x coordinate of the turtle to *x*. + + .. versionadded:: 2.3 + + +.. function:: sety(y) + + Set the y coordinate of the turtle to *y*. + + .. versionadded:: 2.3 + + +.. function:: window_width() + + Return the width of the canvas window. + + .. versionadded:: 2.3 + + +.. function:: window_height() + + Return the height of the canvas window. + + .. versionadded:: 2.3 + +This module also does ``from math import *``, so see the documentation for the +:mod:`math` module for additional constants and functions useful for turtle +graphics. + + +.. function:: demo() + + Exercise the module a bit. + + +.. exception:: Error + + Exception raised on any error caught by this module. + +For examples, see the code of the :func:`demo` function. + +This module defines the following classes: + + +.. class:: Pen() + + Define a pen. All above functions can be called as a methods on the given pen. + The constructor automatically creates a canvas do be drawn on. + + +.. class:: Turtle() + + Define a pen. This is essentially a synonym for ``Pen()``; :class:`Turtle` is an + empty subclass of :class:`Pen`. + + +.. class:: RawPen(canvas) + + Define a pen which draws on a canvas *canvas*. This is useful if you want to + use the module to create graphics in a "real" program. + + +.. _pen-rawpen-objects: + +Turtle, Pen and RawPen Objects +------------------------------ + +Most of the global functions available in the module are also available as +methods of the :class:`Turtle`, :class:`Pen` and :class:`RawPen` classes, +affecting only the state of the given pen. + +The only method which is more powerful as a method is :func:`degrees`, which +takes an optional argument letting you specify the number of units +corresponding to a full circle: + + +.. method:: Turtle.degrees([fullcircle]) + + *fullcircle* is by default 360. This can cause the pen to have any angular units + whatever: give *fullcircle* 2\*$π for radians, or 400 for gradians. + diff --git a/Doc/library/types.rst b/Doc/library/types.rst new file mode 100644 index 0000000..c636a73 --- /dev/null +++ b/Doc/library/types.rst @@ -0,0 +1,257 @@ + +:mod:`types` --- Names for built-in types +========================================= + +.. module:: types + :synopsis: Names for built-in types. + + +This module defines names for some object types that are used by the standard +Python interpreter, but not for the types defined by various extension modules. +Also, it does not include some of the types that arise during processing such as +the ``listiterator`` type. It is safe to use ``from types import *`` --- the +module does not export any names besides the ones listed here. New names +exported by future versions of this module will all end in ``Type``. + +Typical use is for functions that do different things depending on their +argument types, like the following:: + + from types import * + def delete(mylist, item): + if type(item) is IntType: + del mylist[item] + else: + mylist.remove(item) + +Starting in Python 2.2, built-in factory functions such as :func:`int` and +:func:`str` are also names for the corresponding types. This is now the +preferred way to access the type instead of using the :mod:`types` module. +Accordingly, the example above should be written as follows:: + + def delete(mylist, item): + if isinstance(item, int): + del mylist[item] + else: + mylist.remove(item) + +The module defines the following names: + + +.. data:: NoneType + + The type of ``None``. + + +.. data:: TypeType + + .. index:: builtin: type + + The type of type objects (such as returned by :func:`type`); alias of the + built-in :class:`type`. + + +.. data:: BooleanType + + The type of the :class:`bool` values ``True`` and ``False``; alias of the + built-in :class:`bool`. + + .. versionadded:: 2.3 + + +.. data:: IntType + + The type of integers (e.g. ``1``); alias of the built-in :class:`int`. + + +.. data:: LongType + + The type of long integers (e.g. ``1L``); alias of the built-in :class:`long`. + + +.. data:: FloatType + + The type of floating point numbers (e.g. ``1.0``); alias of the built-in + :class:`float`. + + +.. data:: ComplexType + + The type of complex numbers (e.g. ``1.0j``). This is not defined if Python was + built without complex number support. + + +.. data:: StringType + + The type of character strings (e.g. ``'Spam'``); alias of the built-in + :class:`str`. + + +.. data:: UnicodeType + + The type of Unicode character strings (e.g. ``u'Spam'``). This is not defined + if Python was built without Unicode support. It's an alias of the built-in + :class:`unicode`. + + +.. data:: TupleType + + The type of tuples (e.g. ``(1, 2, 3, 'Spam')``); alias of the built-in + :class:`tuple`. + + +.. data:: ListType + + The type of lists (e.g. ``[0, 1, 2, 3]``); alias of the built-in + :class:`list`. + + +.. data:: DictType + + The type of dictionaries (e.g. ``{'Bacon': 1, 'Ham': 0}``); alias of the + built-in :class:`dict`. + + +.. data:: DictionaryType + + An alternate name for ``DictType``. + + +.. data:: FunctionType + + The type of user-defined functions and lambdas. + + +.. data:: LambdaType + + An alternate name for ``FunctionType``. + + +.. data:: GeneratorType + + The type of generator-iterator objects, produced by calling a generator + function. + + .. versionadded:: 2.2 + + +.. data:: CodeType + + .. index:: builtin: compile + + The type for code objects such as returned by :func:`compile`. + + +.. data:: ClassType + + The type of user-defined classes. + + +.. data:: MethodType + + The type of methods of user-defined class instances. + + +.. data:: UnboundMethodType + + An alternate name for ``MethodType``. + + +.. data:: BuiltinFunctionType + + The type of built-in functions like :func:`len` or :func:`sys.exit`. + + +.. data:: BuiltinMethodType + + An alternate name for ``BuiltinFunction``. + + +.. data:: ModuleType + + The type of modules. + + +.. data:: FileType + + The type of open file objects such as ``sys.stdout``; alias of the built-in + :class:`file`. + + +.. data:: RangeType + + .. index:: builtin: range + + The type of range objects returned by :func:`range`; alias of the built-in + :class:`range`. + + +.. data:: SliceType + + .. index:: builtin: slice + + The type of objects returned by :func:`slice`; alias of the built-in + :class:`slice`. + + +.. data:: EllipsisType + + The type of ``Ellipsis``. + + +.. data:: TracebackType + + The type of traceback objects such as found in ``sys.exc_info()[2]``. + + +.. data:: FrameType + + The type of frame objects such as found in ``tb.tb_frame`` if ``tb`` is a + traceback object. + + +.. data:: BufferType + + .. index:: builtin: buffer + + The type of buffer objects created by the :func:`buffer` function. + + +.. data:: DictProxyType + + The type of dict proxies, such as ``TypeType.__dict__``. + + +.. data:: NotImplementedType + + The type of ``NotImplemented`` + + +.. data:: GetSetDescriptorType + + The type of objects defined in extension modules with ``PyGetSetDef``, such as + ``FrameType.f_locals`` or ``array.array.typecode``. This constant is not + defined in implementations of Python that do not have such extension types, so + for portable code use ``hasattr(types, 'GetSetDescriptorType')``. + + .. versionadded:: 2.5 + + +.. data:: MemberDescriptorType + + The type of objects defined in extension modules with ``PyMemberDef``, such as + ``datetime.timedelta.days``. This constant is not defined in implementations of + Python that do not have such extension types, so for portable code use + ``hasattr(types, 'MemberDescriptorType')``. + + .. versionadded:: 2.5 + + +.. data:: StringTypes + + A sequence containing ``StringType`` and ``UnicodeType`` used to facilitate + easier checking for any string object. Using this is more portable than using a + sequence of the two string types constructed elsewhere since it only contains + ``UnicodeType`` if it has been built in the running version of Python. For + example: ``isinstance(s, types.StringTypes)``. + + .. versionadded:: 2.2 diff --git a/Doc/library/undoc.rst b/Doc/library/undoc.rst new file mode 100644 index 0000000..ad46fc8 --- /dev/null +++ b/Doc/library/undoc.rst @@ -0,0 +1,186 @@ + +.. _undoc: + +******************** +Undocumented Modules +******************** + +Here's a quick listing of modules that are currently undocumented, but that +should be documented. Feel free to contribute documentation for them! (Send +via email to docs@python.org.) + +The idea and original contents for this chapter were taken from a posting by +Fredrik Lundh; the specific contents of this chapter have been substantially +revised. + + +Miscellaneous useful utilities +============================== + +Some of these are very old and/or not very robust; marked with "hmm." + +:mod:`bdb` + --- A generic Python debugger base class (used by pdb). + +:mod:`ihooks` + --- Import hook support (for :mod:`rexec`; may become obsolete). + + +Platform specific modules +========================= + +These modules are used to implement the :mod:`os.path` module, and are not +documented beyond this mention. There's little need to document these. + +:mod:`ntpath` + --- Implementation of :mod:`os.path` on Win32, Win64, WinCE, and OS/2 platforms. + +:mod:`posixpath` + --- Implementation of :mod:`os.path` on POSIX. + + +Multimedia +========== + +:mod:`linuxaudiodev` + --- Play audio data on the Linux audio device. Replaced in Python 2.3 by the + :mod:`ossaudiodev` module. + +:mod:`sunaudio` + --- Interpret Sun audio headers (may become obsolete or a tool/demo). + + +.. _undoc-mac-modules: + +Undocumented Mac OS modules +=========================== + + +:mod:`applesingle` --- AppleSingle decoder +------------------------------------------ + +.. module:: applesingle + :platform: Mac + :synopsis: Rudimentary decoder for AppleSingle format files. + + + +:mod:`buildtools` --- Helper module for BuildApplet and Friends +--------------------------------------------------------------- + +.. module:: buildtools + :platform: Mac + :synopsis: Helper module for BuildApplet, BuildApplication and macfreeze. + + +.. deprecated:: 2.4 + + +:mod:`icopen` --- Internet Config replacement for :meth:`open` +-------------------------------------------------------------- + +.. module:: icopen + :platform: Mac + :synopsis: Internet Config replacement for open(). + + +Importing :mod:`icopen` will replace the builtin :meth:`open` with a version +that uses Internet Config to set file type and creator for new files. + + +:mod:`macerrors` --- Mac OS Errors +---------------------------------- + +.. module:: macerrors + :platform: Mac + :synopsis: Constant definitions for many Mac OS error codes. + + +:mod:`macerrors` contains constant definitions for many Mac OS error codes. + + +:mod:`macresource` --- Locate script resources +---------------------------------------------- + +.. module:: macresource + :platform: Mac + :synopsis: Locate script resources. + + +:mod:`macresource` helps scripts finding their resources, such as dialogs and +menus, without requiring special case code for when the script is run under +MacPython, as a MacPython applet or under OSX Python. + + +:mod:`Nav` --- NavServices calls +-------------------------------- + +.. module:: Nav + :platform: Mac + :synopsis: Interface to Navigation Services. + + +A low-level interface to Navigation Services. + + +:mod:`PixMapWrapper` --- Wrapper for PixMap objects +--------------------------------------------------- + +.. module:: PixMapWrapper + :platform: Mac + :synopsis: Wrapper for PixMap objects. + + +:mod:`PixMapWrapper` wraps a PixMap object with a Python object that allows +access to the fields by name. It also has methods to convert to and from +:mod:`PIL` images. + + +:mod:`videoreader` --- Read QuickTime movies +-------------------------------------------- + +.. module:: videoreader + :platform: Mac + :synopsis: Read QuickTime movies frame by frame for further processing. + + +:mod:`videoreader` reads and decodes QuickTime movies and passes a stream of +images to your program. It also provides some support for audio tracks. + + +:mod:`W` --- Widgets built on :mod:`FrameWork` +---------------------------------------------- + +.. module:: W + :platform: Mac + :synopsis: Widgets for the Mac, built on top of FrameWork. + + +The :mod:`W` widgets are used extensively in the :program:`IDE`. + + +.. _obsolete-modules: + +Obsolete +======== + +These modules are not normally available for import; additional work must be +done to make them available. + +These extension modules written in C are not built by default. Under Unix, these +must be enabled by uncommenting the appropriate lines in :file:`Modules/Setup` +in the build tree and either rebuilding Python if the modules are statically +linked, or building and installing the shared object if using dynamically-loaded +extensions. + +.. % %% lib-old is empty as of Python 2.5 +.. % Those which are written in Python will be installed into the directory +.. % \file{lib-old/} installed as part of the standard library. To use +.. % these, the directory must be added to \code{sys.path}, possibly using +.. % \envvar{PYTHONPATH}. + +.. % XXX need Windows instructions! + + + --- This section should be empty for Python 3.0. + diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst new file mode 100644 index 0000000..017d4ee --- /dev/null +++ b/Doc/library/unicodedata.rst @@ -0,0 +1,165 @@ + +:mod:`unicodedata` --- Unicode Database +======================================= + +.. module:: unicodedata + :synopsis: Access the Unicode Database. +.. moduleauthor:: Marc-Andre Lemburg <mal@lemburg.com> +.. sectionauthor:: Marc-Andre Lemburg <mal@lemburg.com> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. index:: + single: Unicode + single: character + pair: Unicode; database + +This module provides access to the Unicode Character Database which defines +character properties for all Unicode characters. The data in this database is +based on the :file:`UnicodeData.txt` file version 4.1.0 which is publicly +available from ftp://ftp.unicode.org/. + +The module uses the same names and symbols as defined by the UnicodeData File +Format 4.1.0 (see http://www.unicode.org/Public/4.1.0/ucd/UCD.html). It defines +the following functions: + + +.. function:: lookup(name) + + Look up character by name. If a character with the given name is found, return + the corresponding Unicode character. If not found, :exc:`KeyError` is raised. + + +.. function:: name(unichr[, default]) + + Returns the name assigned to the Unicode character *unichr* as a string. If no + name is defined, *default* is returned, or, if not given, :exc:`ValueError` is + raised. + + +.. function:: decimal(unichr[, default]) + + Returns the decimal value assigned to the Unicode character *unichr* as integer. + If no such value is defined, *default* is returned, or, if not given, + :exc:`ValueError` is raised. + + +.. function:: digit(unichr[, default]) + + Returns the digit value assigned to the Unicode character *unichr* as integer. + If no such value is defined, *default* is returned, or, if not given, + :exc:`ValueError` is raised. + + +.. function:: numeric(unichr[, default]) + + Returns the numeric value assigned to the Unicode character *unichr* as float. + If no such value is defined, *default* is returned, or, if not given, + :exc:`ValueError` is raised. + + +.. function:: category(unichr) + + Returns the general category assigned to the Unicode character *unichr* as + string. + + +.. function:: bidirectional(unichr) + + Returns the bidirectional category assigned to the Unicode character *unichr* as + string. If no such value is defined, an empty string is returned. + + +.. function:: combining(unichr) + + Returns the canonical combining class assigned to the Unicode character *unichr* + as integer. Returns ``0`` if no combining class is defined. + + +.. function:: east_asian_width(unichr) + + Returns the east asian width assigned to the Unicode character *unichr* as + string. + + .. versionadded:: 2.4 + + +.. function:: mirrored(unichr) + + Returns the mirrored property assigned to the Unicode character *unichr* as + integer. Returns ``1`` if the character has been identified as a "mirrored" + character in bidirectional text, ``0`` otherwise. + + +.. function:: decomposition(unichr) + + Returns the character decomposition mapping assigned to the Unicode character + *unichr* as string. An empty string is returned in case no such mapping is + defined. + + +.. function:: normalize(form, unistr) + + Return the normal form *form* for the Unicode string *unistr*. Valid values for + *form* are 'NFC', 'NFKC', 'NFD', and 'NFKD'. + + The Unicode standard defines various normalization forms of a Unicode string, + based on the definition of canonical equivalence and compatibility equivalence. + In Unicode, several characters can be expressed in various way. For example, the + character U+00C7 (LATIN CAPITAL LETTER C WITH CEDILLA) can also be expressed as + the sequence U+0043 (LATIN CAPITAL LETTER C) U+0327 (COMBINING CEDILLA). + + For each character, there are two normal forms: normal form C and normal form D. + Normal form D (NFD) is also known as canonical decomposition, and translates + each character into its decomposed form. Normal form C (NFC) first applies a + canonical decomposition, then composes pre-combined characters again. + + In addition to these two forms, there are two additional normal forms based on + compatibility equivalence. In Unicode, certain characters are supported which + normally would be unified with other characters. For example, U+2160 (ROMAN + NUMERAL ONE) is really the same thing as U+0049 (LATIN CAPITAL LETTER I). + However, it is supported in Unicode for compatibility with existing character + sets (e.g. gb2312). + + The normal form KD (NFKD) will apply the compatibility decomposition, i.e. + replace all compatibility characters with their equivalents. The normal form KC + (NFKC) first applies the compatibility decomposition, followed by the canonical + composition. + + .. versionadded:: 2.3 + +In addition, the module exposes the following constant: + + +.. data:: unidata_version + + The version of the Unicode database used in this module. + + .. versionadded:: 2.3 + + +.. data:: ucd_3_2_0 + + This is an object that has the same methods as the entire module, but uses the + Unicode database version 3.2 instead, for applications that require this + specific version of the Unicode database (such as IDNA). + + .. versionadded:: 2.5 + +Examples:: + + >>> unicodedata.lookup('LEFT CURLY BRACKET') + u'{' + >>> unicodedata.name(u'/') + 'SOLIDUS' + >>> unicodedata.decimal(u'9') + 9 + >>> unicodedata.decimal(u'a') + Traceback (most recent call last): + File "<stdin>", line 1, in ? + ValueError: not a decimal + >>> unicodedata.category(u'A') # 'L'etter, 'u'ppercase + 'Lu' + >>> unicodedata.bidirectional(u'\u0660') # 'A'rabic, 'N'umber + 'AN' + diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst new file mode 100644 index 0000000..3d3727f --- /dev/null +++ b/Doc/library/unittest.rst @@ -0,0 +1,936 @@ + +:mod:`unittest` --- Unit testing framework +========================================== + +.. module:: unittest + :synopsis: Unit testing framework for Python. +.. moduleauthor:: Steve Purcell <stephen_purcell@yahoo.com> +.. sectionauthor:: Steve Purcell <stephen_purcell@yahoo.com> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> +.. sectionauthor:: Raymond Hettinger <python@rcn.com> + + +.. versionadded:: 2.1 + +The Python unit testing framework, sometimes referred to as "PyUnit," is a +Python language version of JUnit, by Kent Beck and Erich Gamma. JUnit is, in +turn, a Java version of Kent's Smalltalk testing framework. Each is the de +facto standard unit testing framework for its respective language. + +:mod:`unittest` supports test automation, sharing of setup and shutdown code for +tests, aggregation of tests into collections, and independence of the tests from +the reporting framework. The :mod:`unittest` module provides classes that make +it easy to support these qualities for a set of tests. + +To achieve this, :mod:`unittest` supports some important concepts: + +test fixture + A :dfn:`test fixture` represents the preparation needed to perform one or more + tests, and any associate cleanup actions. This may involve, for example, + creating temporary or proxy databases, directories, or starting a server + process. + +test case + A :dfn:`test case` is the smallest unit of testing. It checks for a specific + response to a particular set of inputs. :mod:`unittest` provides a base class, + :class:`TestCase`, which may be used to create new test cases. + +test suite + A :dfn:`test suite` is a collection of test cases, test suites, or both. It is + used to aggregate tests that should be executed together. + +test runner + A :dfn:`test runner` is a component which orchestrates the execution of tests + and provides the outcome to the user. The runner may use a graphical interface, + a textual interface, or return a special value to indicate the results of + executing the tests. + +The test case and test fixture concepts are supported through the +:class:`TestCase` and :class:`FunctionTestCase` classes; the former should be +used when creating new tests, and the latter can be used when integrating +existing test code with a :mod:`unittest`\ -driven framework. When building test +fixtures using :class:`TestCase`, the :meth:`setUp` and :meth:`tearDown` methods +can be overridden to provide initialization and cleanup for the fixture. With +:class:`FunctionTestCase`, existing functions can be passed to the constructor +for these purposes. When the test is run, the fixture initialization is run +first; if it succeeds, the cleanup method is run after the test has been +executed, regardless of the outcome of the test. Each instance of the +:class:`TestCase` will only be used to run a single test method, so a new +fixture is created for each test. + +Test suites are implemented by the :class:`TestSuite` class. This class allows +individual tests and test suites to be aggregated; when the suite is executed, +all tests added directly to the suite and in "child" test suites are run. + +A test runner is an object that provides a single method, :meth:`run`, which +accepts a :class:`TestCase` or :class:`TestSuite` object as a parameter, and +returns a result object. The class :class:`TestResult` is provided for use as +the result object. :mod:`unittest` provides the :class:`TextTestRunner` as an +example test runner which reports test results on the standard error stream by +default. Alternate runners can be implemented for other environments (such as +graphical environments) without any need to derive from a specific class. + + +.. seealso:: + + Module :mod:`doctest` + Another test-support module with a very different flavor. + + `Simple Smalltalk Testing: With Patterns <http://www.XProgramming.com/testfram.htm>`_ + Kent Beck's original paper on testing frameworks using the pattern shared by + :mod:`unittest`. + + +.. _unittest-minimal-example: + +Basic example +------------- + +The :mod:`unittest` module provides a rich set of tools for constructing and +running tests. This section demonstrates that a small subset of the tools +suffice to meet the needs of most users. + +Here is a short script to test three functions from the :mod:`random` module:: + + import random + import unittest + + class TestSequenceFunctions(unittest.TestCase): + + def setUp(self): + self.seq = range(10) + + def testshuffle(self): + # make sure the shuffled sequence does not lose any elements + random.shuffle(self.seq) + self.seq.sort() + self.assertEqual(self.seq, range(10)) + + def testchoice(self): + element = random.choice(self.seq) + self.assert_(element in self.seq) + + def testsample(self): + self.assertRaises(ValueError, random.sample, self.seq, 20) + for element in random.sample(self.seq, 5): + self.assert_(element in self.seq) + + if __name__ == '__main__': + unittest.main() + +A testcase is created by subclassing :class:`unittest.TestCase`. The three +individual tests are defined with methods whose names start with the letters +``test``. This naming convention informs the test runner about which methods +represent tests. + +The crux of each test is a call to :meth:`assertEqual` to check for an expected +result; :meth:`assert_` to verify a condition; or :meth:`assertRaises` to verify +that an expected exception gets raised. These methods are used instead of the +:keyword:`assert` statement so the test runner can accumulate all test results +and produce a report. + +When a :meth:`setUp` method is defined, the test runner will run that method +prior to each test. Likewise, if a :meth:`tearDown` method is defined, the test +runner will invoke that method after each test. In the example, :meth:`setUp` +was used to create a fresh sequence for each test. + +The final block shows a simple way to run the tests. :func:`unittest.main` +provides a command line interface to the test script. When run from the command +line, the above script produces an output that looks like this:: + + ... + ---------------------------------------------------------------------- + Ran 3 tests in 0.000s + + OK + +Instead of :func:`unittest.main`, there are other ways to run the tests with a +finer level of control, less terse output, and no requirement to be run from the +command line. For example, the last two lines may be replaced with:: + + suite = unittest.TestLoader().loadTestsFromTestCase(TestSequenceFunctions) + unittest.TextTestRunner(verbosity=2).run(suite) + +Running the revised script from the interpreter or another script produces the +following output:: + + testchoice (__main__.TestSequenceFunctions) ... ok + testsample (__main__.TestSequenceFunctions) ... ok + testshuffle (__main__.TestSequenceFunctions) ... ok + + ---------------------------------------------------------------------- + Ran 3 tests in 0.110s + + OK + +The above examples show the most commonly used :mod:`unittest` features which +are sufficient to meet many everyday testing needs. The remainder of the +documentation explores the full feature set from first principles. + + +.. _organizing-tests: + +Organizing test code +-------------------- + +The basic building blocks of unit testing are :dfn:`test cases` --- single +scenarios that must be set up and checked for correctness. In :mod:`unittest`, +test cases are represented by instances of :mod:`unittest`'s :class:`TestCase` +class. To make your own test cases you must write subclasses of +:class:`TestCase`, or use :class:`FunctionTestCase`. + +An instance of a :class:`TestCase`\ -derived class is an object that can +completely run a single test method, together with optional set-up and tidy-up +code. + +The testing code of a :class:`TestCase` instance should be entirely self +contained, such that it can be run either in isolation or in arbitrary +combination with any number of other test cases. + +The simplest :class:`TestCase` subclass will simply override the :meth:`runTest` +method in order to perform specific testing code:: + + import unittest + + class DefaultWidgetSizeTestCase(unittest.TestCase): + def runTest(self): + widget = Widget('The widget') + self.assertEqual(widget.size(), (50, 50), 'incorrect default size') + +Note that in order to test something, we use the one of the :meth:`assert\*` or +:meth:`fail\*` methods provided by the :class:`TestCase` base class. If the +test fails, an exception will be raised, and :mod:`unittest` will identify the +test case as a :dfn:`failure`. Any other exceptions will be treated as +:dfn:`errors`. This helps you identify where the problem is: :dfn:`failures` are +caused by incorrect results - a 5 where you expected a 6. :dfn:`Errors` are +caused by incorrect code - e.g., a :exc:`TypeError` caused by an incorrect +function call. + +The way to run a test case will be described later. For now, note that to +construct an instance of such a test case, we call its constructor without +arguments:: + + testCase = DefaultWidgetSizeTestCase() + +Now, such test cases can be numerous, and their set-up can be repetitive. In +the above case, constructing a :class:`Widget` in each of 100 Widget test case +subclasses would mean unsightly duplication. + +Luckily, we can factor out such set-up code by implementing a method called +:meth:`setUp`, which the testing framework will automatically call for us when +we run the test:: + + import unittest + + class SimpleWidgetTestCase(unittest.TestCase): + def setUp(self): + self.widget = Widget('The widget') + + class DefaultWidgetSizeTestCase(SimpleWidgetTestCase): + def runTest(self): + self.failUnless(self.widget.size() == (50,50), + 'incorrect default size') + + class WidgetResizeTestCase(SimpleWidgetTestCase): + def runTest(self): + self.widget.resize(100,150) + self.failUnless(self.widget.size() == (100,150), + 'wrong size after resize') + +If the :meth:`setUp` method raises an exception while the test is running, the +framework will consider the test to have suffered an error, and the +:meth:`runTest` method will not be executed. + +Similarly, we can provide a :meth:`tearDown` method that tidies up after the +:meth:`runTest` method has been run:: + + import unittest + + class SimpleWidgetTestCase(unittest.TestCase): + def setUp(self): + self.widget = Widget('The widget') + + def tearDown(self): + self.widget.dispose() + self.widget = None + +If :meth:`setUp` succeeded, the :meth:`tearDown` method will be run whether +:meth:`runTest` succeeded or not. + +Such a working environment for the testing code is called a :dfn:`fixture`. + +Often, many small test cases will use the same fixture. In this case, we would +end up subclassing :class:`SimpleWidgetTestCase` into many small one-method +classes such as :class:`DefaultWidgetSizeTestCase`. This is time-consuming and + +discouraging, so in the same vein as JUnit, :mod:`unittest` provides a simpler +mechanism:: + + import unittest + + class WidgetTestCase(unittest.TestCase): + def setUp(self): + self.widget = Widget('The widget') + + def tearDown(self): + self.widget.dispose() + self.widget = None + + def testDefaultSize(self): + self.failUnless(self.widget.size() == (50,50), + 'incorrect default size') + + def testResize(self): + self.widget.resize(100,150) + self.failUnless(self.widget.size() == (100,150), + 'wrong size after resize') + +Here we have not provided a :meth:`runTest` method, but have instead provided +two different test methods. Class instances will now each run one of the +:meth:`test\*` methods, with ``self.widget`` created and destroyed separately +for each instance. When creating an instance we must specify the test method it +is to run. We do this by passing the method name in the constructor:: + + defaultSizeTestCase = WidgetTestCase('testDefaultSize') + resizeTestCase = WidgetTestCase('testResize') + +Test case instances are grouped together according to the features they test. +:mod:`unittest` provides a mechanism for this: the :dfn:`test suite`, +represented by :mod:`unittest`'s :class:`TestSuite` class:: + + widgetTestSuite = unittest.TestSuite() + widgetTestSuite.addTest(WidgetTestCase('testDefaultSize')) + widgetTestSuite.addTest(WidgetTestCase('testResize')) + +For the ease of running tests, as we will see later, it is a good idea to +provide in each test module a callable object that returns a pre-built test +suite:: + + def suite(): + suite = unittest.TestSuite() + suite.addTest(WidgetTestCase('testDefaultSize')) + suite.addTest(WidgetTestCase('testResize')) + return suite + +or even:: + + def suite(): + tests = ['testDefaultSize', 'testResize'] + + return unittest.TestSuite(map(WidgetTestCase, tests)) + +Since it is a common pattern to create a :class:`TestCase` subclass with many +similarly named test functions, :mod:`unittest` provides a :class:`TestLoader` +class that can be used to automate the process of creating a test suite and +populating it with individual tests. For example, :: + + suite = unittest.TestLoader().loadTestsFromTestCase(WidgetTestCase) + +will create a test suite that will run ``WidgetTestCase.testDefaultSize()`` and +``WidgetTestCase.testResize``. :class:`TestLoader` uses the ``'test'`` method +name prefix to identify test methods automatically. + +Note that the order in which the various test cases will be run is determined by +sorting the test function names with the built-in :func:`cmp` function. + +Often it is desirable to group suites of test cases together, so as to run tests +for the whole system at once. This is easy, since :class:`TestSuite` instances +can be added to a :class:`TestSuite` just as :class:`TestCase` instances can be +added to a :class:`TestSuite`:: + + suite1 = module1.TheTestSuite() + suite2 = module2.TheTestSuite() + alltests = unittest.TestSuite([suite1, suite2]) + +You can place the definitions of test cases and test suites in the same modules +as the code they are to test (such as :file:`widget.py`), but there are several +advantages to placing the test code in a separate module, such as +:file:`test_widget.py`: + +* The test module can be run standalone from the command line. + +* The test code can more easily be separated from shipped code. + +* There is less temptation to change test code to fit the code it tests without + a good reason. + +* Test code should be modified much less frequently than the code it tests. + +* Tested code can be refactored more easily. + +* Tests for modules written in C must be in separate modules anyway, so why not + be consistent? + +* If the testing strategy changes, there is no need to change the source code. + + +.. _legacy-unit-tests: + +Re-using old test code +---------------------- + +Some users will find that they have existing test code that they would like to +run from :mod:`unittest`, without converting every old test function to a +:class:`TestCase` subclass. + +For this reason, :mod:`unittest` provides a :class:`FunctionTestCase` class. +This subclass of :class:`TestCase` can be used to wrap an existing test +function. Set-up and tear-down functions can also be provided. + +Given the following test function:: + + def testSomething(): + something = makeSomething() + assert something.name is not None + # ... + +one can create an equivalent test case instance as follows:: + + testcase = unittest.FunctionTestCase(testSomething) + +If there are additional set-up and tear-down methods that should be called as +part of the test case's operation, they can also be provided like so:: + + testcase = unittest.FunctionTestCase(testSomething, + setUp=makeSomethingDB, + tearDown=deleteSomethingDB) + +To make migrating existing test suites easier, :mod:`unittest` supports tests +raising :exc:`AssertionError` to indicate test failure. However, it is +recommended that you use the explicit :meth:`TestCase.fail\*` and +:meth:`TestCase.assert\*` methods instead, as future versions of :mod:`unittest` +may treat :exc:`AssertionError` differently. + +.. note:: + + Even though :class:`FunctionTestCase` can be used to quickly convert an existing + test base over to a :mod:`unittest`\ -based system, this approach is not + recommended. Taking the time to set up proper :class:`TestCase` subclasses will + make future test refactorings infinitely easier. + + +.. _unittest-contents: + +Classes and functions +--------------------- + + +.. class:: TestCase([methodName]) + + Instances of the :class:`TestCase` class represent the smallest testable units + in the :mod:`unittest` universe. This class is intended to be used as a base + class, with specific tests being implemented by concrete subclasses. This class + implements the interface needed by the test runner to allow it to drive the + test, and methods that the test code can use to check for and report various + kinds of failure. + + Each instance of :class:`TestCase` will run a single test method: the method + named *methodName*. If you remember, we had an earlier example that went + something like this:: + + def suite(): + suite = unittest.TestSuite() + suite.addTest(WidgetTestCase('testDefaultSize')) + suite.addTest(WidgetTestCase('testResize')) + return suite + + Here, we create two instances of :class:`WidgetTestCase`, each of which runs a + single test. + + *methodName* defaults to ``'runTest'``. + + +.. class:: FunctionTestCase(testFunc[, setUp[, tearDown[, description]]]) + + This class implements the portion of the :class:`TestCase` interface which + allows the test runner to drive the test, but does not provide the methods which + test code can use to check and report errors. This is used to create test cases + using legacy test code, allowing it to be integrated into a :mod:`unittest`\ + -based test framework. + + +.. class:: TestSuite([tests]) + + This class represents an aggregation of individual tests cases and test suites. + The class presents the interface needed by the test runner to allow it to be run + as any other test case. Running a :class:`TestSuite` instance is the same as + iterating over the suite, running each test individually. + + If *tests* is given, it must be an iterable of individual test cases or other + test suites that will be used to build the suite initially. Additional methods + are provided to add test cases and suites to the collection later on. + + +.. class:: TestLoader() + + This class is responsible for loading tests according to various criteria and + returning them wrapped in a :class:`TestSuite`. It can load all tests within a + given module or :class:`TestCase` subclass. + + +.. class:: TestResult() + + This class is used to compile information about which tests have succeeded and + which have failed. + + +.. data:: defaultTestLoader + + Instance of the :class:`TestLoader` class intended to be shared. If no + customization of the :class:`TestLoader` is needed, this instance can be used + instead of repeatedly creating new instances. + + +.. class:: TextTestRunner([stream[, descriptions[, verbosity]]]) + + A basic test runner implementation which prints results on standard error. It + has a few configurable parameters, but is essentially very simple. Graphical + applications which run test suites should provide alternate implementations. + + +.. function:: main([module[, defaultTest[, argv[, testRunner[, testLoader]]]]]) + + A command-line program that runs a set of tests; this is primarily for making + test modules conveniently executable. The simplest use for this function is to + include the following line at the end of a test script:: + + if __name__ == '__main__': + unittest.main() + + The *testRunner* argument can either be a test runner class or an already + created instance of it. + +In some cases, the existing tests may have been written using the :mod:`doctest` +module. If so, that module provides a :class:`DocTestSuite` class that can +automatically build :class:`unittest.TestSuite` instances from the existing +:mod:`doctest`\ -based tests. + +.. versionadded:: 2.3 + + +.. _testcase-objects: + +TestCase Objects +---------------- + +Each :class:`TestCase` instance represents a single test, but each concrete +subclass may be used to define multiple tests --- the concrete class represents +a single test fixture. The fixture is created and cleaned up for each test +case. + +:class:`TestCase` instances provide three groups of methods: one group used to +run the test, another used by the test implementation to check conditions and +report failures, and some inquiry methods allowing information about the test +itself to be gathered. + +Methods in the first group (running the test) are: + + +.. method:: TestCase.setUp() + + Method called to prepare the test fixture. This is called immediately before + calling the test method; any exception raised by this method will be considered + an error rather than a test failure. The default implementation does nothing. + + +.. method:: TestCase.tearDown() + + Method called immediately after the test method has been called and the result + recorded. This is called even if the test method raised an exception, so the + implementation in subclasses may need to be particularly careful about checking + internal state. Any exception raised by this method will be considered an error + rather than a test failure. This method will only be called if the + :meth:`setUp` succeeds, regardless of the outcome of the test method. The + default implementation does nothing. + + +.. method:: TestCase.run([result]) + + Run the test, collecting the result into the test result object passed as + *result*. If *result* is omitted or :const:`None`, a temporary result object is + created (by calling the :meth:`defaultTestCase` method) and used; this result + object is not returned to :meth:`run`'s caller. + + The same effect may be had by simply calling the :class:`TestCase` instance. + + +.. method:: TestCase.debug() + + Run the test without collecting the result. This allows exceptions raised by + the test to be propagated to the caller, and can be used to support running + tests under a debugger. + +The test code can use any of the following methods to check for and report +failures. + + +.. method:: TestCase.assert_(expr[, msg]) + TestCase.failUnless(expr[, msg]) + + Signal a test failure if *expr* is false; the explanation for the error will be + *msg* if given, otherwise it will be :const:`None`. + + +.. method:: TestCase.assertEqual(first, second[, msg]) + TestCase.failUnlessEqual(first, second[, msg]) + + Test that *first* and *second* are equal. If the values do not compare equal, + the test will fail with the explanation given by *msg*, or :const:`None`. Note + that using :meth:`failUnlessEqual` improves upon doing the comparison as the + first parameter to :meth:`failUnless`: the default value for *msg* can be + computed to include representations of both *first* and *second*. + + +.. method:: TestCase.assertNotEqual(first, second[, msg]) + TestCase.failIfEqual(first, second[, msg]) + + Test that *first* and *second* are not equal. If the values do compare equal, + the test will fail with the explanation given by *msg*, or :const:`None`. Note + that using :meth:`failIfEqual` improves upon doing the comparison as the first + parameter to :meth:`failUnless` is that the default value for *msg* can be + computed to include representations of both *first* and *second*. + + +.. method:: TestCase.assertAlmostEqual(first, second[, places[, msg]]) + TestCase.failUnlessAlmostEqual(first, second[, places[, msg]]) + + Test that *first* and *second* are approximately equal by computing the + difference, rounding to the given number of *places*, and comparing to zero. + Note that comparing a given number of decimal places is not the same as + comparing a given number of significant digits. If the values do not compare + equal, the test will fail with the explanation given by *msg*, or :const:`None`. + + +.. method:: TestCase.assertNotAlmostEqual(first, second[, places[, msg]]) + TestCase.failIfAlmostEqual(first, second[, places[, msg]]) + + Test that *first* and *second* are not approximately equal by computing the + difference, rounding to the given number of *places*, and comparing to zero. + Note that comparing a given number of decimal places is not the same as + comparing a given number of significant digits. If the values do not compare + equal, the test will fail with the explanation given by *msg*, or :const:`None`. + + +.. method:: TestCase.assertRaises(exception, callable, ...) + TestCase.failUnlessRaises(exception, callable, ...) + + Test that an exception is raised when *callable* is called with any positional + or keyword arguments that are also passed to :meth:`assertRaises`. The test + passes if *exception* is raised, is an error if another exception is raised, or + fails if no exception is raised. To catch any of a group of exceptions, a tuple + containing the exception classes may be passed as *exception*. + + +.. method:: TestCase.failIf(expr[, msg]) + + The inverse of the :meth:`failUnless` method is the :meth:`failIf` method. This + signals a test failure if *expr* is true, with *msg* or :const:`None` for the + error message. + + +.. method:: TestCase.fail([msg]) + + Signals a test failure unconditionally, with *msg* or :const:`None` for the + error message. + + +.. attribute:: TestCase.failureException + + This class attribute gives the exception raised by the :meth:`test` method. If + a test framework needs to use a specialized exception, possibly to carry + additional information, it must subclass this exception in order to "play fair" + with the framework. The initial value of this attribute is + :exc:`AssertionError`. + +Testing frameworks can use the following methods to collect information on the +test: + + +.. method:: TestCase.countTestCases() + + Return the number of tests represented by this test object. For + :class:`TestCase` instances, this will always be ``1``. + + +.. method:: TestCase.defaultTestResult() + + Return an instance of the test result class that should be used for this test + case class (if no other result instance is provided to the :meth:`run` method). + + For :class:`TestCase` instances, this will always be an instance of + :class:`TestResult`; subclasses of :class:`TestCase` should override this as + necessary. + + +.. method:: TestCase.id() + + Return a string identifying the specific test case. This is usually the full + name of the test method, including the module and class name. + + +.. method:: TestCase.shortDescription() + + Returns a one-line description of the test, or :const:`None` if no description + has been provided. The default implementation of this method returns the first + line of the test method's docstring, if available, or :const:`None`. + + +.. _testsuite-objects: + +TestSuite Objects +----------------- + +:class:`TestSuite` objects behave much like :class:`TestCase` objects, except +they do not actually implement a test. Instead, they are used to aggregate +tests into groups of tests that should be run together. Some additional methods +are available to add tests to :class:`TestSuite` instances: + + +.. method:: TestSuite.addTest(test) + + Add a :class:`TestCase` or :class:`TestSuite` to the suite. + + +.. method:: TestSuite.addTests(tests) + + Add all the tests from an iterable of :class:`TestCase` and :class:`TestSuite` + instances to this test suite. + + This is equivalent to iterating over *tests*, calling :meth:`addTest` for each + element. + +:class:`TestSuite` shares the following methods with :class:`TestCase`: + + +.. method:: TestSuite.run(result) + + Run the tests associated with this suite, collecting the result into the test + result object passed as *result*. Note that unlike :meth:`TestCase.run`, + :meth:`TestSuite.run` requires the result object to be passed in. + + +.. method:: TestSuite.debug() + + Run the tests associated with this suite without collecting the result. This + allows exceptions raised by the test to be propagated to the caller and can be + used to support running tests under a debugger. + + +.. method:: TestSuite.countTestCases() + + Return the number of tests represented by this test object, including all + individual tests and sub-suites. + +In the typical usage of a :class:`TestSuite` object, the :meth:`run` method is +invoked by a :class:`TestRunner` rather than by the end-user test harness. + + +.. _testresult-objects: + +TestResult Objects +------------------ + +A :class:`TestResult` object stores the results of a set of tests. The +:class:`TestCase` and :class:`TestSuite` classes ensure that results are +properly recorded; test authors do not need to worry about recording the outcome +of tests. + +Testing frameworks built on top of :mod:`unittest` may want access to the +:class:`TestResult` object generated by running a set of tests for reporting +purposes; a :class:`TestResult` instance is returned by the +:meth:`TestRunner.run` method for this purpose. + +:class:`TestResult` instances have the following attributes that will be of +interest when inspecting the results of running a set of tests: + + +.. attribute:: TestResult.errors + + A list containing 2-tuples of :class:`TestCase` instances and strings holding + formatted tracebacks. Each tuple represents a test which raised an unexpected + exception. + + .. versionchanged:: 2.2 + Contains formatted tracebacks instead of :func:`sys.exc_info` results. + + +.. attribute:: TestResult.failures + + A list containing 2-tuples of :class:`TestCase` instances and strings holding + formatted tracebacks. Each tuple represents a test where a failure was + explicitly signalled using the :meth:`TestCase.fail\*` or + :meth:`TestCase.assert\*` methods. + + .. versionchanged:: 2.2 + Contains formatted tracebacks instead of :func:`sys.exc_info` results. + + +.. attribute:: TestResult.testsRun + + The total number of tests run so far. + + +.. method:: TestResult.wasSuccessful() + + Returns :const:`True` if all tests run so far have passed, otherwise returns + :const:`False`. + + +.. method:: TestResult.stop() + + This method can be called to signal that the set of tests being run should be + aborted by setting the :class:`TestResult`'s ``shouldStop`` attribute to + :const:`True`. :class:`TestRunner` objects should respect this flag and return + without running any additional tests. + + For example, this feature is used by the :class:`TextTestRunner` class to stop + the test framework when the user signals an interrupt from the keyboard. + Interactive tools which provide :class:`TestRunner` implementations can use this + in a similar manner. + +The following methods of the :class:`TestResult` class are used to maintain the +internal data structures, and may be extended in subclasses to support +additional reporting requirements. This is particularly useful in building +tools which support interactive reporting while tests are being run. + + +.. method:: TestResult.startTest(test) + + Called when the test case *test* is about to be run. + + The default implementation simply increments the instance's ``testsRun`` + counter. + + +.. method:: TestResult.stopTest(test) + + Called after the test case *test* has been executed, regardless of the outcome. + + The default implementation does nothing. + + +.. method:: TestResult.addError(test, err) + + Called when the test case *test* raises an unexpected exception *err* is a tuple + of the form returned by :func:`sys.exc_info`: ``(type, value, traceback)``. + + The default implementation appends ``(test, err)`` to the instance's ``errors`` + attribute. + + +.. method:: TestResult.addFailure(test, err) + + Called when the test case *test* signals a failure. *err* is a tuple of the form + returned by :func:`sys.exc_info`: ``(type, value, traceback)``. + + The default implementation appends ``(test, err)`` to the instance's + ``failures`` attribute. + + +.. method:: TestResult.addSuccess(test) + + Called when the test case *test* succeeds. + + The default implementation does nothing. + + +.. _testloader-objects: + +TestLoader Objects +------------------ + +The :class:`TestLoader` class is used to create test suites from classes and +modules. Normally, there is no need to create an instance of this class; the +:mod:`unittest` module provides an instance that can be shared as +``unittest.defaultTestLoader``. Using a subclass or instance, however, allows +customization of some configurable properties. + +:class:`TestLoader` objects have the following methods: + + +.. method:: TestLoader.loadTestsFromTestCase(testCaseClass) + + Return a suite of all tests cases contained in the :class:`TestCase`\ -derived + :class:`testCaseClass`. + + +.. method:: TestLoader.loadTestsFromModule(module) + + Return a suite of all tests cases contained in the given module. This method + searches *module* for classes derived from :class:`TestCase` and creates an + instance of the class for each test method defined for the class. + + .. warning:: + + While using a hierarchy of :class:`TestCase`\ -derived classes can be convenient + in sharing fixtures and helper functions, defining test methods on base classes + that are not intended to be instantiated directly does not play well with this + method. Doing so, however, can be useful when the fixtures are different and + defined in subclasses. + + +.. method:: TestLoader.loadTestsFromName(name[, module]) + + Return a suite of all tests cases given a string specifier. + + The specifier *name* is a "dotted name" that may resolve either to a module, a + test case class, a test method within a test case class, a :class:`TestSuite` + instance, or a callable object which returns a :class:`TestCase` or + :class:`TestSuite` instance. These checks are applied in the order listed here; + that is, a method on a possible test case class will be picked up as "a test + method within a test case class", rather than "a callable object". + + For example, if you have a module :mod:`SampleTests` containing a + :class:`TestCase`\ -derived class :class:`SampleTestCase` with three test + methods (:meth:`test_one`, :meth:`test_two`, and :meth:`test_three`), the + specifier ``'SampleTests.SampleTestCase'`` would cause this method to return a + suite which will run all three test methods. Using the specifier + ``'SampleTests.SampleTestCase.test_two'`` would cause it to return a test suite + which will run only the :meth:`test_two` test method. The specifier can refer + to modules and packages which have not been imported; they will be imported as a + side-effect. + + The method optionally resolves *name* relative to the given *module*. + + +.. method:: TestLoader.loadTestsFromNames(names[, module]) + + Similar to :meth:`loadTestsFromName`, but takes a sequence of names rather than + a single name. The return value is a test suite which supports all the tests + defined for each name. + + +.. method:: TestLoader.getTestCaseNames(testCaseClass) + + Return a sorted sequence of method names found within *testCaseClass*; this + should be a subclass of :class:`TestCase`. + +The following attributes of a :class:`TestLoader` can be configured either by +subclassing or assignment on an instance: + + +.. attribute:: TestLoader.testMethodPrefix + + String giving the prefix of method names which will be interpreted as test + methods. The default value is ``'test'``. + + This affects :meth:`getTestCaseNames` and all the :meth:`loadTestsFrom\*` + methods. + + +.. attribute:: TestLoader.sortTestMethodsUsing + + Function to be used to compare method names when sorting them in + :meth:`getTestCaseNames` and all the :meth:`loadTestsFrom\*` methods. The + default value is the built-in :func:`cmp` function; the attribute can also be + set to :const:`None` to disable the sort. + + +.. attribute:: TestLoader.suiteClass + + Callable object that constructs a test suite from a list of tests. No methods on + the resulting object are needed. The default value is the :class:`TestSuite` + class. + + This affects all the :meth:`loadTestsFrom\*` methods. + diff --git a/Doc/library/unix.rst b/Doc/library/unix.rst new file mode 100644 index 0000000..b60af0f --- /dev/null +++ b/Doc/library/unix.rst @@ -0,0 +1,29 @@ + +.. _unix: + +********************** +Unix Specific Services +********************** + +The modules described in this chapter provide interfaces to features that are +unique to the Unix operating system, or in some cases to some or many variants +of it. Here's an overview: + + +.. toctree:: + + posix.rst + pwd.rst + spwd.rst + grp.rst + crypt.rst + dl.rst + termios.rst + tty.rst + pty.rst + fcntl.rst + pipes.rst + resource.rst + nis.rst + syslog.rst + commands.rst diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst new file mode 100644 index 0000000..ef8264f --- /dev/null +++ b/Doc/library/urllib.rst @@ -0,0 +1,471 @@ + +:mod:`urllib` --- Open arbitrary resources by URL +================================================= + +.. module:: urllib + :synopsis: Open an arbitrary network resource by URL (requires sockets). + + +.. index:: + single: WWW + single: World Wide Web + single: URL + +This module provides a high-level interface for fetching data across the World +Wide Web. In particular, the :func:`urlopen` function is similar to the +built-in function :func:`open`, but accepts Universal Resource Locators (URLs) +instead of filenames. Some restrictions apply --- it can only open URLs for +reading, and no seek operations are available. + +It defines the following public functions: + + +.. function:: urlopen(url[, data[, proxies]]) + + Open a network object denoted by a URL for reading. If the URL does not have a + scheme identifier, or if it has :file:`file:` as its scheme identifier, this + opens a local file (without universal newlines); otherwise it opens a socket to + a server somewhere on the network. If the connection cannot be made the + :exc:`IOError` exception is raised. If all went well, a file-like object is + returned. This supports the following methods: :meth:`read`, :meth:`readline`, + :meth:`readlines`, :meth:`fileno`, :meth:`close`, :meth:`info` and + :meth:`geturl`. It also has proper support for the iterator protocol. One + caveat: the :meth:`read` method, if the size argument is omitted or negative, + may not read until the end of the data stream; there is no good way to determine + that the entire stream from a socket has been read in the general case. + + Except for the :meth:`info` and :meth:`geturl` methods, these methods have the + same interface as for file objects --- see section :ref:`bltin-file-objects` in + this manual. (It is not a built-in file object, however, so it can't be used at + those few places where a true built-in file object is required.) + + .. index:: module: mimetools + + The :meth:`info` method returns an instance of the class + :class:`mimetools.Message` containing meta-information associated with the + URL. When the method is HTTP, these headers are those returned by the server + at the head of the retrieved HTML page (including Content-Length and + Content-Type). When the method is FTP, a Content-Length header will be + present if (as is now usual) the server passed back a file length in response + to the FTP retrieval request. A Content-Type header will be present if the + MIME type can be guessed. When the method is local-file, returned headers + will include a Date representing the file's last-modified time, a + Content-Length giving file size, and a Content-Type containing a guess at the + file's type. See also the description of the :mod:`mimetools` module. + + The :meth:`geturl` method returns the real URL of the page. In some cases, the + HTTP server redirects a client to another URL. The :func:`urlopen` function + handles this transparently, but in some cases the caller needs to know which URL + the client was redirected to. The :meth:`geturl` method can be used to get at + this redirected URL. + + If the *url* uses the :file:`http:` scheme identifier, the optional *data* + argument may be given to specify a ``POST`` request (normally the request type + is ``GET``). The *data* argument must be in standard + :mimetype:`application/x-www-form-urlencoded` format; see the :func:`urlencode` + function below. + + The :func:`urlopen` function works transparently with proxies which do not + require authentication. In a Unix or Windows environment, set the + :envvar:`http_proxy`, or :envvar:`ftp_proxy` environment variables to a URL that + identifies the proxy server before starting the Python interpreter. For example + (the ``'%'`` is the command prompt):: + + % http_proxy="http://www.someproxy.com:3128" + % export http_proxy + % python + ... + + In a Windows environment, if no proxy environment variables are set, proxy + settings are obtained from the registry's Internet Settings section. + + .. index:: single: Internet Config + + In a Macintosh environment, :func:`urlopen` will retrieve proxy information from + Internet Config. + + Alternatively, the optional *proxies* argument may be used to explicitly specify + proxies. It must be a dictionary mapping scheme names to proxy URLs, where an + empty dictionary causes no proxies to be used, and ``None`` (the default value) + causes environmental proxy settings to be used as discussed above. For + example:: + + # Use http://www.someproxy.com:3128 for http proxying + proxies = {'http': 'http://www.someproxy.com:3128'} + filehandle = urllib.urlopen(some_url, proxies=proxies) + # Don't use any proxies + filehandle = urllib.urlopen(some_url, proxies={}) + # Use proxies from environment - both versions are equivalent + filehandle = urllib.urlopen(some_url, proxies=None) + filehandle = urllib.urlopen(some_url) + + The :func:`urlopen` function does not support explicit proxy specification. If + you need to override environmental proxy settings, use :class:`URLopener`, or a + subclass such as :class:`FancyURLopener`. + + Proxies which require authentication for use are not currently supported; this + is considered an implementation limitation. + + .. versionchanged:: 2.3 + Added the *proxies* support. + + +.. function:: urlretrieve(url[, filename[, reporthook[, data]]]) + + Copy a network object denoted by a URL to a local file, if necessary. If the URL + points to a local file, or a valid cached copy of the object exists, the object + is not copied. Return a tuple ``(filename, headers)`` where *filename* is the + local file name under which the object can be found, and *headers* is whatever + the :meth:`info` method of the object returned by :func:`urlopen` returned (for + a remote object, possibly cached). Exceptions are the same as for + :func:`urlopen`. + + The second argument, if present, specifies the file location to copy to (if + absent, the location will be a tempfile with a generated name). The third + argument, if present, is a hook function that will be called once on + establishment of the network connection and once after each block read + thereafter. The hook will be passed three arguments; a count of blocks + transferred so far, a block size in bytes, and the total size of the file. The + third argument may be ``-1`` on older FTP servers which do not return a file + size in response to a retrieval request. + + If the *url* uses the :file:`http:` scheme identifier, the optional *data* + argument may be given to specify a ``POST`` request (normally the request type + is ``GET``). The *data* argument must in standard + :mimetype:`application/x-www-form-urlencoded` format; see the :func:`urlencode` + function below. + + .. versionchanged:: 2.5 + :func:`urlretrieve` will raise :exc:`ContentTooShortError` when it detects that + the amount of data available was less than the expected amount (which is the + size reported by a *Content-Length* header). This can occur, for example, when + the download is interrupted. + + The *Content-Length* is treated as a lower bound: if there's more data to read, + urlretrieve reads more data, but if less data is available, it raises the + exception. + + You can still retrieve the downloaded data in this case, it is stored in the + :attr:`content` attribute of the exception instance. + + If no *Content-Length* header was supplied, urlretrieve can not check the size + of the data it has downloaded, and just returns it. In this case you just have + to assume that the download was successful. + + +.. data:: _urlopener + + The public functions :func:`urlopen` and :func:`urlretrieve` create an instance + of the :class:`FancyURLopener` class and use it to perform their requested + actions. To override this functionality, programmers can create a subclass of + :class:`URLopener` or :class:`FancyURLopener`, then assign an instance of that + class to the ``urllib._urlopener`` variable before calling the desired function. + For example, applications may want to specify a different + :mailheader:`User-Agent` header than :class:`URLopener` defines. This can be + accomplished with the following code:: + + import urllib + + class AppURLopener(urllib.FancyURLopener): + version = "App/1.7" + + urllib._urlopener = AppURLopener() + + +.. function:: urlcleanup() + + Clear the cache that may have been built up by previous calls to + :func:`urlretrieve`. + + +.. function:: quote(string[, safe]) + + Replace special characters in *string* using the ``%xx`` escape. Letters, + digits, and the characters ``'_.-'`` are never quoted. The optional *safe* + parameter specifies additional characters that should not be quoted --- its + default value is ``'/'``. + + Example: ``quote('/~connolly/')`` yields ``'/%7econnolly/'``. + + +.. function:: quote_plus(string[, safe]) + + Like :func:`quote`, but also replaces spaces by plus signs, as required for + quoting HTML form values. Plus signs in the original string are escaped unless + they are included in *safe*. It also does not have *safe* default to ``'/'``. + + +.. function:: unquote(string) + + Replace ``%xx`` escapes by their single-character equivalent. + + Example: ``unquote('/%7Econnolly/')`` yields ``'/~connolly/'``. + + +.. function:: unquote_plus(string) + + Like :func:`unquote`, but also replaces plus signs by spaces, as required for + unquoting HTML form values. + + +.. function:: urlencode(query[, doseq]) + + Convert a mapping object or a sequence of two-element tuples to a "url-encoded" + string, suitable to pass to :func:`urlopen` above as the optional *data* + argument. This is useful to pass a dictionary of form fields to a ``POST`` + request. The resulting string is a series of ``key=value`` pairs separated by + ``'&'`` characters, where both *key* and *value* are quoted using + :func:`quote_plus` above. If the optional parameter *doseq* is present and + evaluates to true, individual ``key=value`` pairs are generated for each element + of the sequence. When a sequence of two-element tuples is used as the *query* + argument, the first element of each tuple is a key and the second is a value. + The order of parameters in the encoded string will match the order of parameter + tuples in the sequence. The :mod:`cgi` module provides the functions + :func:`parse_qs` and :func:`parse_qsl` which are used to parse query strings + into Python data structures. + + +.. function:: pathname2url(path) + + Convert the pathname *path* from the local syntax for a path to the form used in + the path component of a URL. This does not produce a complete URL. The return + value will already be quoted using the :func:`quote` function. + + +.. function:: url2pathname(path) + + Convert the path component *path* from an encoded URL to the local syntax for a + path. This does not accept a complete URL. This function uses :func:`unquote` + to decode *path*. + + +.. class:: URLopener([proxies[, **x509]]) + + Base class for opening and reading URLs. Unless you need to support opening + objects using schemes other than :file:`http:`, :file:`ftp:`, or :file:`file:`, + you probably want to use :class:`FancyURLopener`. + + By default, the :class:`URLopener` class sends a :mailheader:`User-Agent` header + of ``urllib/VVV``, where *VVV* is the :mod:`urllib` version number. + Applications can define their own :mailheader:`User-Agent` header by subclassing + :class:`URLopener` or :class:`FancyURLopener` and setting the class attribute + :attr:`version` to an appropriate string value in the subclass definition. + + The optional *proxies* parameter should be a dictionary mapping scheme names to + proxy URLs, where an empty dictionary turns proxies off completely. Its default + value is ``None``, in which case environmental proxy settings will be used if + present, as discussed in the definition of :func:`urlopen`, above. + + Additional keyword parameters, collected in *x509*, may be used for + authentication of the client when using the :file:`https:` scheme. The keywords + *key_file* and *cert_file* are supported to provide an SSL key and certificate; + both are needed to support client authentication. + + :class:`URLopener` objects will raise an :exc:`IOError` exception if the server + returns an error code. + + +.. class:: FancyURLopener(...) + + :class:`FancyURLopener` subclasses :class:`URLopener` providing default handling + for the following HTTP response codes: 301, 302, 303, 307 and 401. For the 30x + response codes listed above, the :mailheader:`Location` header is used to fetch + the actual URL. For 401 response codes (authentication required), basic HTTP + authentication is performed. For the 30x response codes, recursion is bounded + by the value of the *maxtries* attribute, which defaults to 10. + + For all other response codes, the method :meth:`http_error_default` is called + which you can override in subclasses to handle the error appropriately. + + .. note:: + + According to the letter of :rfc:`2616`, 301 and 302 responses to POST requests + must not be automatically redirected without confirmation by the user. In + reality, browsers do allow automatic redirection of these responses, changing + the POST to a GET, and :mod:`urllib` reproduces this behaviour. + + The parameters to the constructor are the same as those for :class:`URLopener`. + + .. note:: + + When performing basic authentication, a :class:`FancyURLopener` instance calls + its :meth:`prompt_user_passwd` method. The default implementation asks the + users for the required information on the controlling terminal. A subclass may + override this method to support more appropriate behavior if needed. + + +.. exception:: ContentTooShortError(msg[, content]) + + This exception is raised when the :func:`urlretrieve` function detects that the + amount of the downloaded data is less than the expected amount (given by the + *Content-Length* header). The :attr:`content` attribute stores the downloaded + (and supposedly truncated) data. + + .. versionadded:: 2.5 + +Restrictions: + + .. index:: + pair: HTTP; protocol + pair: FTP; protocol + +* Currently, only the following protocols are supported: HTTP, (versions 0.9 and + 1.0), FTP, and local files. + +* The caching feature of :func:`urlretrieve` has been disabled until I find the + time to hack proper processing of Expiration time headers. + +* There should be a function to query whether a particular URL is in the cache. + +* For backward compatibility, if a URL appears to point to a local file but the + file can't be opened, the URL is re-interpreted using the FTP protocol. This + can sometimes cause confusing error messages. + +* The :func:`urlopen` and :func:`urlretrieve` functions can cause arbitrarily + long delays while waiting for a network connection to be set up. This means + that it is difficult to build an interactive Web client using these functions + without using threads. + + .. index:: + single: HTML + pair: HTTP; protocol + module: htmllib + +* The data returned by :func:`urlopen` or :func:`urlretrieve` is the raw data + returned by the server. This may be binary data (such as an image), plain text + or (for example) HTML. The HTTP protocol provides type information in the reply + header, which can be inspected by looking at the :mailheader:`Content-Type` + header. If the returned data is HTML, you can use the module :mod:`htmllib` to + parse it. + + .. index:: single: FTP + +* The code handling the FTP protocol cannot differentiate between a file and a + directory. This can lead to unexpected behavior when attempting to read a URL + that points to a file that is not accessible. If the URL ends in a ``/``, it is + assumed to refer to a directory and will be handled accordingly. But if an + attempt to read a file leads to a 550 error (meaning the URL cannot be found or + is not accessible, often for permission reasons), then the path is treated as a + directory in order to handle the case when a directory is specified by a URL but + the trailing ``/`` has been left off. This can cause misleading results when + you try to fetch a file whose read permissions make it inaccessible; the FTP + code will try to read it, fail with a 550 error, and then perform a directory + listing for the unreadable file. If fine-grained control is needed, consider + using the :mod:`ftplib` module, subclassing :class:`FancyURLOpener`, or changing + *_urlopener* to meet your needs. + +* This module does not support the use of proxies which require authentication. + This may be implemented in the future. + + .. index:: module: urlparse + +* Although the :mod:`urllib` module contains (undocumented) routines to parse + and unparse URL strings, the recommended interface for URL manipulation is in + module :mod:`urlparse`. + + +.. _urlopener-objs: + +URLopener Objects +----------------- + +.. sectionauthor:: Skip Montanaro <skip@mojam.com> + + +:class:`URLopener` and :class:`FancyURLopener` objects have the following +attributes. + + +.. method:: URLopener.open(fullurl[, data]) + + Open *fullurl* using the appropriate protocol. This method sets up cache and + proxy information, then calls the appropriate open method with its input + arguments. If the scheme is not recognized, :meth:`open_unknown` is called. + The *data* argument has the same meaning as the *data* argument of + :func:`urlopen`. + + +.. method:: URLopener.open_unknown(fullurl[, data]) + + Overridable interface to open unknown URL types. + + +.. method:: URLopener.retrieve(url[, filename[, reporthook[, data]]]) + + Retrieves the contents of *url* and places it in *filename*. The return value + is a tuple consisting of a local filename and either a + :class:`mimetools.Message` object containing the response headers (for remote + URLs) or ``None`` (for local URLs). The caller must then open and read the + contents of *filename*. If *filename* is not given and the URL refers to a + local file, the input filename is returned. If the URL is non-local and + *filename* is not given, the filename is the output of :func:`tempfile.mktemp` + with a suffix that matches the suffix of the last path component of the input + URL. If *reporthook* is given, it must be a function accepting three numeric + parameters. It will be called after each chunk of data is read from the + network. *reporthook* is ignored for local URLs. + + If the *url* uses the :file:`http:` scheme identifier, the optional *data* + argument may be given to specify a ``POST`` request (normally the request type + is ``GET``). The *data* argument must in standard + :mimetype:`application/x-www-form-urlencoded` format; see the :func:`urlencode` + function below. + + +.. attribute:: URLopener.version + + Variable that specifies the user agent of the opener object. To get + :mod:`urllib` to tell servers that it is a particular user agent, set this in a + subclass as a class variable or in the constructor before calling the base + constructor. + +The :class:`FancyURLopener` class offers one additional method that should be +overloaded to provide the appropriate behavior: + + +.. method:: FancyURLopener.prompt_user_passwd(host, realm) + + Return information needed to authenticate the user at the given host in the + specified security realm. The return value should be a tuple, ``(user, + password)``, which can be used for basic authentication. + + The implementation prompts for this information on the terminal; an application + should override this method to use an appropriate interaction model in the local + environment. + + +.. _urllib-examples: + +Examples +-------- + +Here is an example session that uses the ``GET`` method to retrieve a URL +containing parameters:: + + >>> import urllib + >>> params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) + >>> f = urllib.urlopen("http://www.musi-cal.com/cgi-bin/query?%s" % params) + >>> print f.read() + +The following example uses the ``POST`` method instead:: + + >>> import urllib + >>> params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) + >>> f = urllib.urlopen("http://www.musi-cal.com/cgi-bin/query", params) + >>> print f.read() + +The following example uses an explicitly specified HTTP proxy, overriding +environment settings:: + + >>> import urllib + >>> proxies = {'http': 'http://proxy.example.com:8080/'} + >>> opener = urllib.FancyURLopener(proxies) + >>> f = opener.open("http://www.python.org") + >>> f.read() + +The following example uses no proxies at all, overriding environment settings:: + + >>> import urllib + >>> opener = urllib.FancyURLopener({}) + >>> f = opener.open("http://www.python.org/") + >>> f.read() + diff --git a/Doc/library/urllib2.rst b/Doc/library/urllib2.rst new file mode 100644 index 0000000..41bb033 --- /dev/null +++ b/Doc/library/urllib2.rst @@ -0,0 +1,927 @@ +:mod:`urllib2` --- extensible library for opening URLs +====================================================== + +.. module:: urllib2 + :synopsis: Next generation URL opening library. +.. moduleauthor:: Jeremy Hylton <jhylton@users.sourceforge.net> +.. sectionauthor:: Moshe Zadka <moshez@users.sourceforge.net> + + +The :mod:`urllib2` module defines functions and classes which help in opening +URLs (mostly HTTP) in a complex world --- basic and digest authentication, +redirections, cookies and more. + +The :mod:`urllib2` module defines the following functions: + + +.. function:: urlopen(url[, data][, timeout]) + + Open the URL *url*, which can be either a string or a :class:`Request` object. + + *data* may be a string specifying additional data to send to the server, or + ``None`` if no such data is needed. Currently HTTP requests are the only ones + that use *data*; the HTTP request will be a POST instead of a GET when the + *data* parameter is provided. *data* should be a buffer in the standard + :mimetype:`application/x-www-form-urlencoded` format. The + :func:`urllib.urlencode` function takes a mapping or sequence of 2-tuples and + returns a string in this format. + + The optional *timeout* parameter specifies a timeout in seconds for the + connection attempt (if not specified, or passed as None, the global default + timeout setting will be used). This actually only work for HTTP, HTTPS, FTP and + FTPS connections. + + This function returns a file-like object with two additional methods: + + * :meth:`geturl` --- return the URL of the resource retrieved + + * :meth:`info` --- return the meta-information of the page, as a dictionary-like + object + + Raises :exc:`URLError` on errors. + + Note that ``None`` may be returned if no handler handles the request (though the + default installed global :class:`OpenerDirector` uses :class:`UnknownHandler` to + ensure this never happens). + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. function:: install_opener(opener) + + Install an :class:`OpenerDirector` instance as the default global opener. + Installing an opener is only necessary if you want urlopen to use that opener; + otherwise, simply call :meth:`OpenerDirector.open` instead of :func:`urlopen`. + The code does not check for a real :class:`OpenerDirector`, and any class with + the appropriate interface will work. + + +.. function:: build_opener([handler, ...]) + + Return an :class:`OpenerDirector` instance, which chains the handlers in the + order given. *handler*\s can be either instances of :class:`BaseHandler`, or + subclasses of :class:`BaseHandler` (in which case it must be possible to call + the constructor without any parameters). Instances of the following classes + will be in front of the *handler*\s, unless the *handler*\s contain them, + instances of them or subclasses of them: :class:`ProxyHandler`, + :class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`, + :class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`, + :class:`HTTPErrorProcessor`. + + If the Python installation has SSL support (:func:`socket.ssl` exists), + :class:`HTTPSHandler` will also be added. + + Beginning in Python 2.3, a :class:`BaseHandler` subclass may also change its + :attr:`handler_order` member variable to modify its position in the handlers + list. + +The following exceptions are raised as appropriate: + + +.. exception:: URLError + + The handlers raise this exception (or derived exceptions) when they run into a + problem. It is a subclass of :exc:`IOError`. + + +.. exception:: HTTPError + + A subclass of :exc:`URLError`, it can also function as a non-exceptional + file-like return value (the same thing that :func:`urlopen` returns). This + is useful when handling exotic HTTP errors, such as requests for + authentication. + +The following classes are provided: + + +.. class:: Request(url[, data][, headers] [, origin_req_host][, unverifiable]) + + This class is an abstraction of a URL request. + + *url* should be a string containing a valid URL. + + *data* may be a string specifying additional data to send to the server, or + ``None`` if no such data is needed. Currently HTTP requests are the only ones + that use *data*; the HTTP request will be a POST instead of a GET when the + *data* parameter is provided. *data* should be a buffer in the standard + :mimetype:`application/x-www-form-urlencoded` format. The + :func:`urllib.urlencode` function takes a mapping or sequence of 2-tuples and + returns a string in this format. + + *headers* should be a dictionary, and will be treated as if :meth:`add_header` + was called with each key and value as arguments. + + The final two arguments are only of interest for correct handling of third-party + HTTP cookies: + + *origin_req_host* should be the request-host of the origin transaction, as + defined by :rfc:`2965`. It defaults to ``cookielib.request_host(self)``. This + is the host name or IP address of the original request that was initiated by the + user. For example, if the request is for an image in an HTML document, this + should be the request-host of the request for the page containing the image. + + *unverifiable* should indicate whether the request is unverifiable, as defined + by RFC 2965. It defaults to False. An unverifiable request is one whose URL + the user did not have the option to approve. For example, if the request is for + an image in an HTML document, and the user had no option to approve the + automatic fetching of the image, this should be true. + + +.. class:: OpenerDirector() + + The :class:`OpenerDirector` class opens URLs via :class:`BaseHandler`\ s chained + together. It manages the chaining of handlers, and recovery from errors. + + +.. class:: BaseHandler() + + This is the base class for all registered handlers --- and handles only the + simple mechanics of registration. + + +.. class:: HTTPDefaultErrorHandler() + + A class which defines a default handler for HTTP error responses; all responses + are turned into :exc:`HTTPError` exceptions. + + +.. class:: HTTPRedirectHandler() + + A class to handle redirections. + + +.. class:: HTTPCookieProcessor([cookiejar]) + + A class to handle HTTP Cookies. + + +.. class:: ProxyHandler([proxies]) + + Cause requests to go through a proxy. If *proxies* is given, it must be a + dictionary mapping protocol names to URLs of proxies. The default is to read the + list of proxies from the environment variables :envvar:`<protocol>_proxy`. + + +.. class:: HTTPPasswordMgr() + + Keep a database of ``(realm, uri) -> (user, password)`` mappings. + + +.. class:: HTTPPasswordMgrWithDefaultRealm() + + Keep a database of ``(realm, uri) -> (user, password)`` mappings. A realm of + ``None`` is considered a catch-all realm, which is searched if no other realm + fits. + + +.. class:: AbstractBasicAuthHandler([password_mgr]) + + This is a mixin class that helps with HTTP authentication, both to the remote + host and to a proxy. *password_mgr*, if given, should be something that is + compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: HTTPBasicAuthHandler([password_mgr]) + + Handle authentication with the remote host. *password_mgr*, if given, should be + something that is compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: ProxyBasicAuthHandler([password_mgr]) + + Handle authentication with the proxy. *password_mgr*, if given, should be + something that is compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: AbstractDigestAuthHandler([password_mgr]) + + This is a mixin class that helps with HTTP authentication, both to the remote + host and to a proxy. *password_mgr*, if given, should be something that is + compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: HTTPDigestAuthHandler([password_mgr]) + + Handle authentication with the remote host. *password_mgr*, if given, should be + something that is compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: ProxyDigestAuthHandler([password_mgr]) + + Handle authentication with the proxy. *password_mgr*, if given, should be + something that is compatible with :class:`HTTPPasswordMgr`; refer to section + :ref:`http-password-mgr` for information on the interface that must be + supported. + + +.. class:: HTTPHandler() + + A class to handle opening of HTTP URLs. + + +.. class:: HTTPSHandler() + + A class to handle opening of HTTPS URLs. + + +.. class:: FileHandler() + + Open local files. + + +.. class:: FTPHandler() + + Open FTP URLs. + + +.. class:: CacheFTPHandler() + + Open FTP URLs, keeping a cache of open FTP connections to minimize delays. + + +.. class:: UnknownHandler() + + A catch-all class to handle unknown URLs. + + +.. _request-objects: + +Request Objects +--------------- + +The following methods describe all of :class:`Request`'s public interface, and +so all must be overridden in subclasses. + + +.. method:: Request.add_data(data) + + Set the :class:`Request` data to *data*. This is ignored by all handlers except + HTTP handlers --- and there it should be a byte string, and will change the + request to be ``POST`` rather than ``GET``. + + +.. method:: Request.get_method() + + Return a string indicating the HTTP request method. This is only meaningful for + HTTP requests, and currently always returns ``'GET'`` or ``'POST'``. + + +.. method:: Request.has_data() + + Return whether the instance has a non-\ ``None`` data. + + +.. method:: Request.get_data() + + Return the instance's data. + + +.. method:: Request.add_header(key, val) + + Add another header to the request. Headers are currently ignored by all + handlers except HTTP handlers, where they are added to the list of headers sent + to the server. Note that there cannot be more than one header with the same + name, and later calls will overwrite previous calls in case the *key* collides. + Currently, this is no loss of HTTP functionality, since all headers which have + meaning when used more than once have a (header-specific) way of gaining the + same functionality using only one header. + + +.. method:: Request.add_unredirected_header(key, header) + + Add a header that will not be added to a redirected request. + + .. versionadded:: 2.4 + + +.. method:: Request.has_header(header) + + Return whether the instance has the named header (checks both regular and + unredirected). + + .. versionadded:: 2.4 + + +.. method:: Request.get_full_url() + + Return the URL given in the constructor. + + +.. method:: Request.get_type() + + Return the type of the URL --- also known as the scheme. + + +.. method:: Request.get_host() + + Return the host to which a connection will be made. + + +.. method:: Request.get_selector() + + Return the selector --- the part of the URL that is sent to the server. + + +.. method:: Request.set_proxy(host, type) + + Prepare the request by connecting to a proxy server. The *host* and *type* will + replace those of the instance, and the instance's selector will be the original + URL given in the constructor. + + +.. method:: Request.get_origin_req_host() + + Return the request-host of the origin transaction, as defined by :rfc:`2965`. + See the documentation for the :class:`Request` constructor. + + +.. method:: Request.is_unverifiable() + + Return whether the request is unverifiable, as defined by RFC 2965. See the + documentation for the :class:`Request` constructor. + + +.. _opener-director-objects: + +OpenerDirector Objects +---------------------- + +:class:`OpenerDirector` instances have the following methods: + + +.. method:: OpenerDirector.add_handler(handler) + + *handler* should be an instance of :class:`BaseHandler`. The following methods + are searched, and added to the possible chains (note that HTTP errors are a + special case). + + * :meth:`protocol_open` --- signal that the handler knows how to open *protocol* + URLs. + + * :meth:`http_error_type` --- signal that the handler knows how to handle HTTP + errors with HTTP error code *type*. + + * :meth:`protocol_error` --- signal that the handler knows how to handle errors + from (non-\ ``http``) *protocol*. + + * :meth:`protocol_request` --- signal that the handler knows how to pre-process + *protocol* requests. + + * :meth:`protocol_response` --- signal that the handler knows how to + post-process *protocol* responses. + + +.. method:: OpenerDirector.open(url[, data][, timeout]) + + Open the given *url* (which can be a request object or a string), optionally + passing the given *data*. Arguments, return values and exceptions raised are the + same as those of :func:`urlopen` (which simply calls the :meth:`open` method on + the currently installed global :class:`OpenerDirector`). The optional *timeout* + parameter specifies a timeout in seconds for the connection attempt (if not + specified, or passed as None, the global default timeout setting will be used; + this actually only work for HTTP, HTTPS, FTP and FTPS connections). + + .. versionchanged:: 2.6 + *timeout* was added. + + +.. method:: OpenerDirector.error(proto[, arg[, ...]]) + + Handle an error of the given protocol. This will call the registered error + handlers for the given protocol with the given arguments (which are protocol + specific). The HTTP protocol is a special case which uses the HTTP response + code to determine the specific error handler; refer to the :meth:`http_error_\*` + methods of the handler classes. + + Return values and exceptions raised are the same as those of :func:`urlopen`. + +OpenerDirector objects open URLs in three stages: + +The order in which these methods are called within each stage is determined by +sorting the handler instances. + +#. Every handler with a method named like :meth:`protocol_request` has that + method called to pre-process the request. + +#. Handlers with a method named like :meth:`protocol_open` are called to handle + the request. This stage ends when a handler either returns a non-\ :const:`None` + value (ie. a response), or raises an exception (usually :exc:`URLError`). + Exceptions are allowed to propagate. + + In fact, the above algorithm is first tried for methods named + :meth:`default_open`. If all such methods return :const:`None`, the algorithm + is repeated for methods named like :meth:`protocol_open`. If all such methods + return :const:`None`, the algorithm is repeated for methods named + :meth:`unknown_open`. + + Note that the implementation of these methods may involve calls of the parent + :class:`OpenerDirector` instance's :meth:`.open` and :meth:`.error` methods. + +#. Every handler with a method named like :meth:`protocol_response` has that + method called to post-process the response. + + +.. _base-handler-objects: + +BaseHandler Objects +------------------- + +:class:`BaseHandler` objects provide a couple of methods that are directly +useful, and others that are meant to be used by derived classes. These are +intended for direct use: + + +.. method:: BaseHandler.add_parent(director) + + Add a director as parent. + + +.. method:: BaseHandler.close() + + Remove any parents. + +The following members and methods should only be used by classes derived from +:class:`BaseHandler`. + +.. note:: + + The convention has been adopted that subclasses defining + :meth:`protocol_request` or :meth:`protocol_response` methods are named + :class:`\*Processor`; all others are named :class:`\*Handler`. + + +.. attribute:: BaseHandler.parent + + A valid :class:`OpenerDirector`, which can be used to open using a different + protocol, or handle errors. + + +.. method:: BaseHandler.default_open(req) + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + define it if they want to catch all URLs. + + This method, if implemented, will be called by the parent + :class:`OpenerDirector`. It should return a file-like object as described in + the return value of the :meth:`open` of :class:`OpenerDirector`, or ``None``. + It should raise :exc:`URLError`, unless a truly exceptional thing happens (for + example, :exc:`MemoryError` should not be mapped to :exc:`URLError`). + + This method will be called before any protocol-specific open method. + + +.. method:: BaseHandler.protocol_open(req) + :noindex: + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + define it if they want to handle URLs with the given protocol. + + This method, if defined, will be called by the parent :class:`OpenerDirector`. + Return values should be the same as for :meth:`default_open`. + + +.. method:: BaseHandler.unknown_open(req) + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + define it if they want to catch all URLs with no specific registered handler to + open it. + + This method, if implemented, will be called by the :attr:`parent` + :class:`OpenerDirector`. Return values should be the same as for + :meth:`default_open`. + + +.. method:: BaseHandler.http_error_default(req, fp, code, msg, hdrs) + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + override it if they intend to provide a catch-all for otherwise unhandled HTTP + errors. It will be called automatically by the :class:`OpenerDirector` getting + the error, and should not normally be called in other circumstances. + + *req* will be a :class:`Request` object, *fp* will be a file-like object with + the HTTP error body, *code* will be the three-digit code of the error, *msg* + will be the user-visible explanation of the code and *hdrs* will be a mapping + object with the headers of the error. + + Return values and exceptions raised should be the same as those of + :func:`urlopen`. + + +.. method:: BaseHandler.http_error_nnn(req, fp, code, msg, hdrs) + + *nnn* should be a three-digit HTTP error code. This method is also not defined + in :class:`BaseHandler`, but will be called, if it exists, on an instance of a + subclass, when an HTTP error with code *nnn* occurs. + + Subclasses should override this method to handle specific HTTP errors. + + Arguments, return values and exceptions raised should be the same as for + :meth:`http_error_default`. + + +.. method:: BaseHandler.protocol_request(req) + :noindex: + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + define it if they want to pre-process requests of the given protocol. + + This method, if defined, will be called by the parent :class:`OpenerDirector`. + *req* will be a :class:`Request` object. The return value should be a + :class:`Request` object. + + +.. method:: BaseHandler.protocol_response(req, response) + :noindex: + + This method is *not* defined in :class:`BaseHandler`, but subclasses should + define it if they want to post-process responses of the given protocol. + + This method, if defined, will be called by the parent :class:`OpenerDirector`. + *req* will be a :class:`Request` object. *response* will be an object + implementing the same interface as the return value of :func:`urlopen`. The + return value should implement the same interface as the return value of + :func:`urlopen`. + + +.. _http-redirect-handler: + +HTTPRedirectHandler Objects +--------------------------- + +.. note:: + + Some HTTP redirections require action from this module's client code. If this + is the case, :exc:`HTTPError` is raised. See :rfc:`2616` for details of the + precise meanings of the various redirection codes. + + +.. method:: HTTPRedirectHandler.redirect_request(req, fp, code, msg, hdrs) + + Return a :class:`Request` or ``None`` in response to a redirect. This is called + by the default implementations of the :meth:`http_error_30\*` methods when a + redirection is received from the server. If a redirection should take place, + return a new :class:`Request` to allow :meth:`http_error_30\*` to perform the + redirect. Otherwise, raise :exc:`HTTPError` if no other handler should try to + handle this URL, or return ``None`` if you can't but another handler might. + + .. note:: + + The default implementation of this method does not strictly follow :rfc:`2616`, + which says that 301 and 302 responses to ``POST`` requests must not be + automatically redirected without confirmation by the user. In reality, browsers + do allow automatic redirection of these responses, changing the POST to a + ``GET``, and the default implementation reproduces this behavior. + + +.. method:: HTTPRedirectHandler.http_error_301(req, fp, code, msg, hdrs) + + Redirect to the ``Location:`` URL. This method is called by the parent + :class:`OpenerDirector` when getting an HTTP 'moved permanently' response. + + +.. method:: HTTPRedirectHandler.http_error_302(req, fp, code, msg, hdrs) + + The same as :meth:`http_error_301`, but called for the 'found' response. + + +.. method:: HTTPRedirectHandler.http_error_303(req, fp, code, msg, hdrs) + + The same as :meth:`http_error_301`, but called for the 'see other' response. + + +.. method:: HTTPRedirectHandler.http_error_307(req, fp, code, msg, hdrs) + + The same as :meth:`http_error_301`, but called for the 'temporary redirect' + response. + + +.. _http-cookie-processor: + +HTTPCookieProcessor Objects +--------------------------- + +.. versionadded:: 2.4 + +:class:`HTTPCookieProcessor` instances have one attribute: + + +.. attribute:: HTTPCookieProcessor.cookiejar + + The :class:`cookielib.CookieJar` in which cookies are stored. + + +.. _proxy-handler: + +ProxyHandler Objects +-------------------- + + +.. method:: ProxyHandler.protocol_open(request) + :noindex: + + The :class:`ProxyHandler` will have a method :meth:`protocol_open` for every + *protocol* which has a proxy in the *proxies* dictionary given in the + constructor. The method will modify requests to go through the proxy, by + calling ``request.set_proxy()``, and call the next handler in the chain to + actually execute the protocol. + + +.. _http-password-mgr: + +HTTPPasswordMgr Objects +----------------------- + +These methods are available on :class:`HTTPPasswordMgr` and +:class:`HTTPPasswordMgrWithDefaultRealm` objects. + + +.. method:: HTTPPasswordMgr.add_password(realm, uri, user, passwd) + + *uri* can be either a single URI, or a sequence of URIs. *realm*, *user* and + *passwd* must be strings. This causes ``(user, passwd)`` to be used as + authentication tokens when authentication for *realm* and a super-URI of any of + the given URIs is given. + + +.. method:: HTTPPasswordMgr.find_user_password(realm, authuri) + + Get user/password for given realm and URI, if any. This method will return + ``(None, None)`` if there is no matching user/password. + + For :class:`HTTPPasswordMgrWithDefaultRealm` objects, the realm ``None`` will be + searched if the given *realm* has no matching user/password. + + +.. _abstract-basic-auth-handler: + +AbstractBasicAuthHandler Objects +-------------------------------- + + +.. method:: AbstractBasicAuthHandler.http_error_auth_reqed(authreq, host, req, headers) + + Handle an authentication request by getting a user/password pair, and re-trying + the request. *authreq* should be the name of the header where the information + about the realm is included in the request, *host* specifies the URL and path to + authenticate for, *req* should be the (failed) :class:`Request` object, and + *headers* should be the error headers. + + *host* is either an authority (e.g. ``"python.org"``) or a URL containing an + authority component (e.g. ``"http://python.org/"``). In either case, the + authority must not contain a userinfo component (so, ``"python.org"`` and + ``"python.org:80"`` are fine, ``"joe:password@python.org"`` is not). + + +.. _http-basic-auth-handler: + +HTTPBasicAuthHandler Objects +---------------------------- + + +.. method:: HTTPBasicAuthHandler.http_error_401(req, fp, code, msg, hdrs) + + Retry the request with authentication information, if available. + + +.. _proxy-basic-auth-handler: + +ProxyBasicAuthHandler Objects +----------------------------- + + +.. method:: ProxyBasicAuthHandler.http_error_407(req, fp, code, msg, hdrs) + + Retry the request with authentication information, if available. + + +.. _abstract-digest-auth-handler: + +AbstractDigestAuthHandler Objects +--------------------------------- + + +.. method:: AbstractDigestAuthHandler.http_error_auth_reqed(authreq, host, req, headers) + + *authreq* should be the name of the header where the information about the realm + is included in the request, *host* should be the host to authenticate to, *req* + should be the (failed) :class:`Request` object, and *headers* should be the + error headers. + + +.. _http-digest-auth-handler: + +HTTPDigestAuthHandler Objects +----------------------------- + + +.. method:: HTTPDigestAuthHandler.http_error_401(req, fp, code, msg, hdrs) + + Retry the request with authentication information, if available. + + +.. _proxy-digest-auth-handler: + +ProxyDigestAuthHandler Objects +------------------------------ + + +.. method:: ProxyDigestAuthHandler.http_error_407(req, fp, code, msg, hdrs) + + Retry the request with authentication information, if available. + + +.. _http-handler-objects: + +HTTPHandler Objects +------------------- + + +.. method:: HTTPHandler.http_open(req) + + Send an HTTP request, which can be either GET or POST, depending on + ``req.has_data()``. + + +.. _https-handler-objects: + +HTTPSHandler Objects +-------------------- + + +.. method:: HTTPSHandler.https_open(req) + + Send an HTTPS request, which can be either GET or POST, depending on + ``req.has_data()``. + + +.. _file-handler-objects: + +FileHandler Objects +------------------- + + +.. method:: FileHandler.file_open(req) + + Open the file locally, if there is no host name, or the host name is + ``'localhost'``. Change the protocol to ``ftp`` otherwise, and retry opening it + using :attr:`parent`. + + +.. _ftp-handler-objects: + +FTPHandler Objects +------------------ + + +.. method:: FTPHandler.ftp_open(req) + + Open the FTP file indicated by *req*. The login is always done with empty + username and password. + + +.. _cacheftp-handler-objects: + +CacheFTPHandler Objects +----------------------- + +:class:`CacheFTPHandler` objects are :class:`FTPHandler` objects with the +following additional methods: + + +.. method:: CacheFTPHandler.setTimeout(t) + + Set timeout of connections to *t* seconds. + + +.. method:: CacheFTPHandler.setMaxConns(m) + + Set maximum number of cached connections to *m*. + + +.. _unknown-handler-objects: + +UnknownHandler Objects +---------------------- + + +.. method:: UnknownHandler.unknown_open() + + Raise a :exc:`URLError` exception. + + +.. _http-error-processor-objects: + +HTTPErrorProcessor Objects +-------------------------- + +.. versionadded:: 2.4 + + +.. method:: HTTPErrorProcessor.unknown_open() + + Process HTTP error responses. + + For 200 error codes, the response object is returned immediately. + + For non-200 error codes, this simply passes the job on to the + :meth:`protocol_error_code` handler methods, via :meth:`OpenerDirector.error`. + Eventually, :class:`urllib2.HTTPDefaultErrorHandler` will raise an + :exc:`HTTPError` if no other handler handles the error. + + +.. _urllib2-examples: + +Examples +-------- + +This example gets the python.org main page and displays the first 100 bytes of +it:: + + >>> import urllib2 + >>> f = urllib2.urlopen('http://www.python.org/') + >>> print f.read(100) + <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> + <?xml-stylesheet href="./css/ht2html + +Here we are sending a data-stream to the stdin of a CGI and reading the data it +returns to us. Note that this example will only work when the Python +installation supports SSL. :: + + >>> import urllib2 + >>> req = urllib2.Request(url='https://localhost/cgi-bin/test.cgi', + ... data='This data is passed to stdin of the CGI') + >>> f = urllib2.urlopen(req) + >>> print f.read() + Got Data: "This data is passed to stdin of the CGI" + +The code for the sample CGI used in the above example is:: + + #!/usr/bin/env python + import sys + data = sys.stdin.read() + print 'Content-type: text-plain\n\nGot Data: "%s"' % data + +Use of Basic HTTP Authentication:: + + import urllib2 + # Create an OpenerDirector with support for Basic HTTP Authentication... + auth_handler = urllib2.HTTPBasicAuthHandler() + auth_handler.add_password(realm='PDQ Application', + uri='https://mahler:8092/site-updates.py', + user='klem', + passwd='kadidd!ehopper') + opener = urllib2.build_opener(auth_handler) + # ...and install it globally so it can be used with urlopen. + urllib2.install_opener(opener) + urllib2.urlopen('http://www.example.com/login.html') + +:func:`build_opener` provides many handlers by default, including a +:class:`ProxyHandler`. By default, :class:`ProxyHandler` uses the environment +variables named ``<scheme>_proxy``, where ``<scheme>`` is the URL scheme +involved. For example, the :envvar:`http_proxy` environment variable is read to +obtain the HTTP proxy's URL. + +This example replaces the default :class:`ProxyHandler` with one that uses +programatically-supplied proxy URLs, and adds proxy authorization support with +:class:`ProxyBasicAuthHandler`. :: + + proxy_handler = urllib2.ProxyHandler({'http': 'http://www.example.com:3128/'}) + proxy_auth_handler = urllib2.HTTPBasicAuthHandler() + proxy_auth_handler.add_password('realm', 'host', 'username', 'password') + + opener = build_opener(proxy_handler, proxy_auth_handler) + # This time, rather than install the OpenerDirector, we use it directly: + opener.open('http://www.example.com/login.html') + +Adding HTTP headers: + +Use the *headers* argument to the :class:`Request` constructor, or:: + + import urllib2 + req = urllib2.Request('http://www.example.com/') + req.add_header('Referer', 'http://www.python.org/') + r = urllib2.urlopen(req) + +:class:`OpenerDirector` automatically adds a :mailheader:`User-Agent` header to +every :class:`Request`. To change this:: + + import urllib2 + opener = urllib2.build_opener() + opener.addheaders = [('User-agent', 'Mozilla/5.0')] + opener.open('http://www.example.com/') + +Also, remember that a few standard headers (:mailheader:`Content-Length`, +:mailheader:`Content-Type` and :mailheader:`Host`) are added when the +:class:`Request` is passed to :func:`urlopen` (or :meth:`OpenerDirector.open`). + diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst new file mode 100644 index 0000000..c6bc82b --- /dev/null +++ b/Doc/library/urlparse.rst @@ -0,0 +1,268 @@ +:mod:`urlparse` --- Parse URLs into components +============================================== + +.. module:: urlparse + :synopsis: Parse URLs into or assemble them from components. + + +.. index:: + single: WWW + single: World Wide Web + single: URL + pair: URL; parsing + pair: relative; URL + +This module defines a standard interface to break Uniform Resource Locator (URL) +strings up in components (addressing scheme, network location, path etc.), to +combine the components back into a URL string, and to convert a "relative URL" +to an absolute URL given a "base URL." + +The module has been designed to match the Internet RFC on Relative Uniform +Resource Locators (and discovered a bug in an earlier draft!). It supports the +following URL schemes: ``file``, ``ftp``, ``gopher``, ``hdl``, ``http``, +``https``, ``imap``, ``mailto``, ``mms``, ``news``, ``nntp``, ``prospero``, +``rsync``, ``rtsp``, ``rtspu``, ``sftp``, ``shttp``, ``sip``, ``sips``, +``snews``, ``svn``, ``svn+ssh``, ``telnet``, ``wais``. + +.. versionadded:: 2.5 + Support for the ``sftp`` and ``sips`` schemes. + +The :mod:`urlparse` module defines the following functions: + + +.. function:: urlparse(urlstring[, default_scheme[, allow_fragments]]) + + Parse a URL into six components, returning a 6-tuple. This corresponds to the + general structure of a URL: ``scheme://netloc/path;parameters?query#fragment``. + Each tuple item is a string, possibly empty. The components are not broken up in + smaller parts (for example, the network location is a single string), and % + escapes are not expanded. The delimiters as shown above are not part of the + result, except for a leading slash in the *path* component, which is retained if + present. For example:: + + >>> from urlparse import urlparse + >>> o = urlparse('http://www.cwi.nl:80/%7Eguido/Python.html') + >>> o + ('http', 'www.cwi.nl:80', '/%7Eguido/Python.html', '', '', '') + >>> o.scheme + 'http' + >>> o.port + 80 + >>> o.geturl() + 'http://www.cwi.nl:80/%7Eguido/Python.html' + + If the *default_scheme* argument is specified, it gives the default addressing + scheme, to be used only if the URL does not specify one. The default value for + this argument is the empty string. + + If the *allow_fragments* argument is false, fragment identifiers are not + allowed, even if the URL's addressing scheme normally does support them. The + default value for this argument is :const:`True`. + + The return value is actually an instance of a subclass of :class:`tuple`. This + class has the following additional read-only convenience attributes: + + +------------------+-------+--------------------------+----------------------+ + | Attribute | Index | Value | Value if not present | + +==================+=======+==========================+======================+ + | :attr:`scheme` | 0 | URL scheme specifier | empty string | + +------------------+-------+--------------------------+----------------------+ + | :attr:`netloc` | 1 | Network location part | empty string | + +------------------+-------+--------------------------+----------------------+ + | :attr:`path` | 2 | Hierarchical path | empty string | + +------------------+-------+--------------------------+----------------------+ + | :attr:`params` | 3 | Parameters for last path | empty string | + | | | element | | + +------------------+-------+--------------------------+----------------------+ + | :attr:`query` | 4 | Query component | empty string | + +------------------+-------+--------------------------+----------------------+ + | :attr:`fragment` | 5 | Fragment identifier | empty string | + +------------------+-------+--------------------------+----------------------+ + | :attr:`username` | | User name | :const:`None` | + +------------------+-------+--------------------------+----------------------+ + | :attr:`password` | | Password | :const:`None` | + +------------------+-------+--------------------------+----------------------+ + | :attr:`hostname` | | Host name (lower case) | :const:`None` | + +------------------+-------+--------------------------+----------------------+ + | :attr:`port` | | Port number as integer, | :const:`None` | + | | | if present | | + +------------------+-------+--------------------------+----------------------+ + + See section :ref:`urlparse-result-object` for more information on the result + object. + + .. versionchanged:: 2.5 + Added attributes to return value. + + +.. function:: urlunparse(parts) + + Construct a URL from a tuple as returned by ``urlparse()``. The *parts* argument + can be any six-item iterable. This may result in a slightly different, but + equivalent URL, if the URL that was parsed originally had unnecessary delimiters + (for example, a ? with an empty query; the RFC states that these are + equivalent). + + +.. function:: urlsplit(urlstring[, default_scheme[, allow_fragments]]) + + This is similar to :func:`urlparse`, but does not split the params from the URL. + This should generally be used instead of :func:`urlparse` if the more recent URL + syntax allowing parameters to be applied to each segment of the *path* portion + of the URL (see :rfc:`2396`) is wanted. A separate function is needed to + separate the path segments and parameters. This function returns a 5-tuple: + (addressing scheme, network location, path, query, fragment identifier). + + The return value is actually an instance of a subclass of :class:`tuple`. This + class has the following additional read-only convenience attributes: + + +------------------+-------+-------------------------+----------------------+ + | Attribute | Index | Value | Value if not present | + +==================+=======+=========================+======================+ + | :attr:`scheme` | 0 | URL scheme specifier | empty string | + +------------------+-------+-------------------------+----------------------+ + | :attr:`netloc` | 1 | Network location part | empty string | + +------------------+-------+-------------------------+----------------------+ + | :attr:`path` | 2 | Hierarchical path | empty string | + +------------------+-------+-------------------------+----------------------+ + | :attr:`query` | 3 | Query component | empty string | + +------------------+-------+-------------------------+----------------------+ + | :attr:`fragment` | 4 | Fragment identifier | empty string | + +------------------+-------+-------------------------+----------------------+ + | :attr:`username` | | User name | :const:`None` | + +------------------+-------+-------------------------+----------------------+ + | :attr:`password` | | Password | :const:`None` | + +------------------+-------+-------------------------+----------------------+ + | :attr:`hostname` | | Host name (lower case) | :const:`None` | + +------------------+-------+-------------------------+----------------------+ + | :attr:`port` | | Port number as integer, | :const:`None` | + | | | if present | | + +------------------+-------+-------------------------+----------------------+ + + See section :ref:`urlparse-result-object` for more information on the result + object. + + .. versionadded:: 2.2 + + .. versionchanged:: 2.5 + Added attributes to return value. + + +.. function:: urlunsplit(parts) + + Combine the elements of a tuple as returned by :func:`urlsplit` into a complete + URL as a string. The *parts* argument can be any five-item iterable. This may + result in a slightly different, but equivalent URL, if the URL that was parsed + originally had unnecessary delimiters (for example, a ? with an empty query; the + RFC states that these are equivalent). + + .. versionadded:: 2.2 + + +.. function:: urljoin(base, url[, allow_fragments]) + + Construct a full ("absolute") URL by combining a "base URL" (*base*) with + another URL (*url*). Informally, this uses components of the base URL, in + particular the addressing scheme, the network location and (part of) the path, + to provide missing components in the relative URL. For example:: + + >>> from urlparse import urljoin + >>> urljoin('http://www.cwi.nl/%7Eguido/Python.html', 'FAQ.html') + 'http://www.cwi.nl/%7Eguido/FAQ.html' + + The *allow_fragments* argument has the same meaning and default as for + :func:`urlparse`. + + .. note:: + + If *url* is an absolute URL (that is, starting with ``//`` or ``scheme://``), + the *url*'s host name and/or scheme will be present in the result. For example: + + :: + + >>> urljoin('http://www.cwi.nl/%7Eguido/Python.html', + ... '//www.python.org/%7Eguido') + 'http://www.python.org/%7Eguido' + + If you do not want that behavior, preprocess the *url* with :func:`urlsplit` and + :func:`urlunsplit`, removing possible *scheme* and *netloc* parts. + + +.. function:: urldefrag(url) + + If *url* contains a fragment identifier, returns a modified version of *url* + with no fragment identifier, and the fragment identifier as a separate string. + If there is no fragment identifier in *url*, returns *url* unmodified and an + empty string. + + +.. seealso:: + + :rfc:`1738` - Uniform Resource Locators (URL) + This specifies the formal syntax and semantics of absolute URLs. + + :rfc:`1808` - Relative Uniform Resource Locators + This Request For Comments includes the rules for joining an absolute and a + relative URL, including a fair number of "Abnormal Examples" which govern the + treatment of border cases. + + :rfc:`2396` - Uniform Resource Identifiers (URI): Generic Syntax + Document describing the generic syntactic requirements for both Uniform Resource + Names (URNs) and Uniform Resource Locators (URLs). + + +.. _urlparse-result-object: + +Results of :func:`urlparse` and :func:`urlsplit` +------------------------------------------------ + +The result objects from the :func:`urlparse` and :func:`urlsplit` functions are +subclasses of the :class:`tuple` type. These subclasses add the attributes +described in those functions, as well as provide an additional method: + + +.. method:: ParseResult.geturl() + + Return the re-combined version of the original URL as a string. This may differ + from the original URL in that the scheme will always be normalized to lower case + and empty components may be dropped. Specifically, empty parameters, queries, + and fragment identifiers will be removed. + + The result of this method is a fixpoint if passed back through the original + parsing function:: + + >>> import urlparse + >>> url = 'HTTP://www.Python.org/doc/#' + + >>> r1 = urlparse.urlsplit(url) + >>> r1.geturl() + 'http://www.Python.org/doc/' + + >>> r2 = urlparse.urlsplit(r1.geturl()) + >>> r2.geturl() + 'http://www.Python.org/doc/' + + .. versionadded:: 2.5 + +The following classes provide the implementations of the parse results:: + + +.. class:: BaseResult + + Base class for the concrete result classes. This provides most of the attribute + definitions. It does not provide a :meth:`geturl` method. It is derived from + :class:`tuple`, but does not override the :meth:`__init__` or :meth:`__new__` + methods. + + +.. class:: ParseResult(scheme, netloc, path, params, query, fragment) + + Concrete class for :func:`urlparse` results. The :meth:`__new__` method is + overridden to support checking that the right number of arguments are passed. + + +.. class:: SplitResult(scheme, netloc, path, query, fragment) + + Concrete class for :func:`urlsplit` results. The :meth:`__new__` method is + overridden to support checking that the right number of arguments are passed. + diff --git a/Doc/library/user.rst b/Doc/library/user.rst new file mode 100644 index 0000000..ba94262 --- /dev/null +++ b/Doc/library/user.rst @@ -0,0 +1,69 @@ + +:mod:`user` --- User-specific configuration hook +================================================ + +.. module:: user + :synopsis: A standard way to reference user-specific modules. + + +.. index:: + pair: .pythonrc.py; file + triple: user; configuration; file + +As a policy, Python doesn't run user-specified code on startup of Python +programs. (Only interactive sessions execute the script specified in the +:envvar:`PYTHONSTARTUP` environment variable if it exists). + +However, some programs or sites may find it convenient to allow users to have a +standard customization file, which gets run when a program requests it. This +module implements such a mechanism. A program that wishes to use the mechanism +must execute the statement :: + + import user + +.. index:: builtin: exec + +The :mod:`user` module looks for a file :file:`.pythonrc.py` in the user's home +directory and if it can be opened, executes it (using :func:`exec`) in its +own (the module :mod:`user`'s) global namespace. Errors during this phase are +not caught; that's up to the program that imports the :mod:`user` module, if it +wishes. The home directory is assumed to be named by the :envvar:`HOME` +environment variable; if this is not set, the current directory is used. + +The user's :file:`.pythonrc.py` could conceivably test for ``sys.version`` if it +wishes to do different things depending on the Python version. + +A warning to users: be very conservative in what you place in your +:file:`.pythonrc.py` file. Since you don't know which programs will use it, +changing the behavior of standard modules or functions is generally not a good +idea. + +A suggestion for programmers who wish to use this mechanism: a simple way to let +users specify options for your package is to have them define variables in their +:file:`.pythonrc.py` file that you test in your module. For example, a module +:mod:`spam` that has a verbosity level can look for a variable +``user.spam_verbose``, as follows:: + + import user + + verbose = bool(getattr(user, "spam_verbose", 0)) + +(The three-argument form of :func:`getattr` is used in case the user has not +defined ``spam_verbose`` in their :file:`.pythonrc.py` file.) + +Programs with extensive customization needs are better off reading a +program-specific customization file. + +Programs with security or privacy concerns should *not* import this module; a +user can easily break into a program by placing arbitrary code in the +:file:`.pythonrc.py` file. + +Modules for general use should *not* import this module; it may interfere with +the operation of the importing program. + + +.. seealso:: + + Module :mod:`site` + Site-wide customization mechanism. + diff --git a/Doc/library/userdict.rst b/Doc/library/userdict.rst new file mode 100644 index 0000000..11d46ed --- /dev/null +++ b/Doc/library/userdict.rst @@ -0,0 +1,188 @@ + +:mod:`UserDict` --- Class wrapper for dictionary objects +======================================================== + +.. module:: UserDict + :synopsis: Class wrapper for dictionary objects. + + +The module defines a mixin, :class:`DictMixin`, defining all dictionary methods +for classes that already have a minimum mapping interface. This greatly +simplifies writing classes that need to be substitutable for dictionaries (such +as the shelve module). + +This also module defines a class, :class:`UserDict`, that acts as a wrapper +around dictionary objects. The need for this class has been largely supplanted +by the ability to subclass directly from :class:`dict` (a feature that became +available starting with Python version 2.2). Prior to the introduction of +:class:`dict`, the :class:`UserDict` class was used to create dictionary-like +sub-classes that obtained new behaviors by overriding existing methods or adding +new ones. + +The :mod:`UserDict` module defines the :class:`UserDict` class and +:class:`DictMixin`: + + +.. class:: UserDict([initialdata]) + + Class that simulates a dictionary. The instance's contents are kept in a + regular dictionary, which is accessible via the :attr:`data` attribute of + :class:`UserDict` instances. If *initialdata* is provided, :attr:`data` is + initialized with its contents; note that a reference to *initialdata* will not + be kept, allowing it be used for other purposes. + + .. note:: + + For backward compatibility, instances of :class:`UserDict` are not iterable. + + +.. class:: IterableUserDict([initialdata]) + + Subclass of :class:`UserDict` that supports direct iteration (e.g. ``for key in + myDict``). + +In addition to supporting the methods and operations of mappings (see section +:ref:`typesmapping`), :class:`UserDict` and :class:`IterableUserDict` instances +provide the following attribute: + + +.. attribute:: IterableUserDict.data + + A real dictionary used to store the contents of the :class:`UserDict` class. + + +.. class:: DictMixin() + + Mixin defining all dictionary methods for classes that already have a minimum + dictionary interface including :meth:`__getitem__`, :meth:`__setitem__`, + :meth:`__delitem__`, and :meth:`keys`. + + This mixin should be used as a superclass. Adding each of the above methods + adds progressively more functionality. For instance, defining all but + :meth:`__delitem__` will preclude only :meth:`pop` and :meth:`popitem` from the + full interface. + + In addition to the four base methods, progressively more efficiency comes with + defining :meth:`__contains__`, :meth:`__iter__`, and :meth:`iteritems`. + + Since the mixin has no knowledge of the subclass constructor, it does not define + :meth:`__init__` or :meth:`copy`. + + +:mod:`UserList` --- Class wrapper for list objects +================================================== + +.. module:: UserList + :synopsis: Class wrapper for list objects. + + +.. note:: + + This module is available for backward compatibility only. If you are writing + code that does not need to work with versions of Python earlier than Python 2.2, + please consider subclassing directly from the built-in :class:`list` type. + +This module defines a class that acts as a wrapper around list objects. It is a +useful base class for your own list-like classes, which can inherit from them +and override existing methods or add new ones. In this way one can add new +behaviors to lists. + +The :mod:`UserList` module defines the :class:`UserList` class: + + +.. class:: UserList([list]) + + Class that simulates a list. The instance's contents are kept in a regular + list, which is accessible via the :attr:`data` attribute of :class:`UserList` + instances. The instance's contents are initially set to a copy of *list*, + defaulting to the empty list ``[]``. *list* can be any iterable, e.g. a + real Python list or a :class:`UserList` object. + +In addition to supporting the methods and operations of mutable sequences (see +section :ref:`typesseq`), :class:`UserList` instances provide the following +attribute: + + +.. attribute:: UserList.data + + A real Python list object used to store the contents of the :class:`UserList` + class. + +**Subclassing requirements:** Subclasses of :class:`UserList` are expect to +offer a constructor which can be called with either no arguments or one +argument. List operations which return a new sequence attempt to create an +instance of the actual implementation class. To do so, it assumes that the +constructor can be called with a single parameter, which is a sequence object +used as a data source. + +If a derived class does not wish to comply with this requirement, all of the +special methods supported by this class will need to be overridden; please +consult the sources for information about the methods which need to be provided +in that case. + +.. versionchanged:: 2.0 + Python versions 1.5.2 and 1.6 also required that the constructor be callable + with no parameters, and offer a mutable :attr:`data` attribute. Earlier + versions of Python did not attempt to create instances of the derived class. + + +:mod:`UserString` --- Class wrapper for string objects +====================================================== + +.. module:: UserString + :synopsis: Class wrapper for string objects. +.. moduleauthor:: Peter Funk <pf@artcom-gmbh.de> +.. sectionauthor:: Peter Funk <pf@artcom-gmbh.de> + + +.. note:: + + This :class:`UserString` class from this module is available for backward + compatibility only. If you are writing code that does not need to work with + versions of Python earlier than Python 2.2, please consider subclassing directly + from the built-in :class:`str` type instead of using :class:`UserString` (there + is no built-in equivalent to :class:`MutableString`). + +This module defines a class that acts as a wrapper around string objects. It is +a useful base class for your own string-like classes, which can inherit from +them and override existing methods or add new ones. In this way one can add new +behaviors to strings. + +It should be noted that these classes are highly inefficient compared to real +string or Unicode objects; this is especially the case for +:class:`MutableString`. + +The :mod:`UserString` module defines the following classes: + + +.. class:: UserString([sequence]) + + Class that simulates a string or a Unicode string object. The instance's + content is kept in a regular string or Unicode string object, which is + accessible via the :attr:`data` attribute of :class:`UserString` instances. The + instance's contents are initially set to a copy of *sequence*. *sequence* can + be either a regular Python string or Unicode string, an instance of + :class:`UserString` (or a subclass) or an arbitrary sequence which can be + converted into a string using the built-in :func:`str` function. + + +.. class:: MutableString([sequence]) + + This class is derived from the :class:`UserString` above and redefines strings + to be *mutable*. Mutable strings can't be used as dictionary keys, because + dictionaries require *immutable* objects as keys. The main intention of this + class is to serve as an educational example for inheritance and necessity to + remove (override) the :meth:`__hash__` method in order to trap attempts to use a + mutable object as dictionary key, which would be otherwise very error prone and + hard to track down. + +In addition to supporting the methods and operations of string and Unicode +objects (see section :ref:`string-methods`), :class:`UserString` instances +provide the following attribute: + + +.. attribute:: MutableString.data + + A real Python string or Unicode object used to store the content of the + :class:`UserString` class. + diff --git a/Doc/library/uu.rst b/Doc/library/uu.rst new file mode 100644 index 0000000..e2303c3 --- /dev/null +++ b/Doc/library/uu.rst @@ -0,0 +1,60 @@ + +:mod:`uu` --- Encode and decode uuencode files +============================================== + +.. module:: uu + :synopsis: Encode and decode files in uuencode format. +.. moduleauthor:: Lance Ellinghouse + + +This module encodes and decodes files in uuencode format, allowing arbitrary +binary data to be transferred over ASCII-only connections. Wherever a file +argument is expected, the methods accept a file-like object. For backwards +compatibility, a string containing a pathname is also accepted, and the +corresponding file will be opened for reading and writing; the pathname ``'-'`` +is understood to mean the standard input or output. However, this interface is +deprecated; it's better for the caller to open the file itself, and be sure +that, when required, the mode is ``'rb'`` or ``'wb'`` on Windows. + +.. index:: + single: Jansen, Jack + single: Ellinghouse, Lance + +This code was contributed by Lance Ellinghouse, and modified by Jack Jansen. + +The :mod:`uu` module defines the following functions: + + +.. function:: encode(in_file, out_file[, name[, mode]]) + + Uuencode file *in_file* into file *out_file*. The uuencoded file will have the + header specifying *name* and *mode* as the defaults for the results of decoding + the file. The default defaults are taken from *in_file*, or ``'-'`` and ``0666`` + respectively. + + +.. function:: decode(in_file[, out_file[, mode[, quiet]]]) + + This call decodes uuencoded file *in_file* placing the result on file + *out_file*. If *out_file* is a pathname, *mode* is used to set the permission + bits if the file must be created. Defaults for *out_file* and *mode* are taken + from the uuencode header. However, if the file specified in the header already + exists, a :exc:`uu.Error` is raised. + + :func:`decode` may print a warning to standard error if the input was produced + by an incorrect uuencoder and Python could recover from that error. Setting + *quiet* to a true value silences this warning. + + +.. exception:: Error() + + Subclass of :exc:`Exception`, this can be raised by :func:`uu.decode` under + various situations, such as described above, but also including a badly + formatted header, or truncated input file. + + +.. seealso:: + + Module :mod:`binascii` + Support module containing ASCII-to-binary and binary-to-ASCII conversions. + diff --git a/Doc/library/uuid.rst b/Doc/library/uuid.rst new file mode 100644 index 0000000..dd52638 --- /dev/null +++ b/Doc/library/uuid.rst @@ -0,0 +1,258 @@ + +:mod:`uuid` --- UUID objects according to RFC 4122 +================================================== + +.. module:: uuid + :synopsis: UUID objects (universally unique identifiers) according to RFC 4122 +.. moduleauthor:: Ka-Ping Yee <ping@zesty.ca> +.. sectionauthor:: George Yoshida <quiver@users.sourceforge.net> + + +.. versionadded:: 2.5 + +This module provides immutable :class:`UUID` objects (the :class:`UUID` class) +and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5` for +generating version 1, 3, 4, and 5 UUIDs as specified in :rfc:`4122`. + +If all you want is a unique ID, you should probably call :func:`uuid1` or +:func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates +a UUID containing the computer's network address. :func:`uuid4` creates a +random UUID. + + +.. class:: UUID([hex[, bytes[, bytes_le[, fields[, int[, version]]]]]]) + + Create a UUID from either a string of 32 hexadecimal digits, a string of 16 + bytes as the *bytes* argument, a string of 16 bytes in little-endian order as + the *bytes_le* argument, a tuple of six integers (32-bit *time_low*, 16-bit + *time_mid*, 16-bit *time_hi_version*, 8-bit *clock_seq_hi_variant*, 8-bit + *clock_seq_low*, 48-bit *node*) as the *fields* argument, or a single 128-bit + integer as the *int* argument. When a string of hex digits is given, curly + braces, hyphens, and a URN prefix are all optional. For example, these + expressions all yield the same UUID:: + + UUID('{12345678-1234-5678-1234-567812345678}') + UUID('12345678123456781234567812345678') + UUID('urn:uuid:12345678-1234-5678-1234-567812345678') + UUID(bytes='\x12\x34\x56\x78'*4) + UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' + + '\x12\x34\x56\x78\x12\x34\x56\x78') + UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678)) + UUID(int=0x12345678123456781234567812345678) + + Exactly one of *hex*, *bytes*, *bytes_le*, *fields*, or *int* must be given. + The *version* argument is optional; if given, the resulting UUID will have its + variant and version number set according to RFC 4122, overriding bits in the + given *hex*, *bytes*, *bytes_le*, *fields*, or *int*. + +:class:`UUID` instances have these read-only attributes: + + +.. attribute:: UUID.bytes + + The UUID as a 16-byte string (containing the six integer fields in big-endian + byte order). + + +.. attribute:: UUID.bytes_le + + The UUID as a 16-byte string (with *time_low*, *time_mid*, and *time_hi_version* + in little-endian byte order). + + +.. attribute:: UUID.fields + + A tuple of the six integer fields of the UUID, which are also available as six + individual attributes and two derived attributes: + + +------------------------------+-------------------------------+ + | Field | Meaning | + +==============================+===============================+ + | :attr:`time_low` | the first 32 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`time_mid` | the next 16 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`time_hi_version` | the next 16 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`clock_seq_hi_variant` | the next 8 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`clock_seq_low` | the next 8 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`node` | the last 48 bits of the UUID | + +------------------------------+-------------------------------+ + | :attr:`time` | the 60-bit timestamp | + +------------------------------+-------------------------------+ + | :attr:`clock_seq` | the 14-bit sequence number | + +------------------------------+-------------------------------+ + + +.. attribute:: UUID.hex + + The UUID as a 32-character hexadecimal string. + + +.. attribute:: UUID.int + + The UUID as a 128-bit integer. + + +.. attribute:: UUID.urn + + The UUID as a URN as specified in RFC 4122. + + +.. attribute:: UUID.variant + + The UUID variant, which determines the internal layout of the UUID. This will be + one of the integer constants :const:`RESERVED_NCS`, :const:`RFC_4122`, + :const:`RESERVED_MICROSOFT`, or :const:`RESERVED_FUTURE`. + + +.. attribute:: UUID.version + + The UUID version number (1 through 5, meaningful only when the variant is + :const:`RFC_4122`). + +The :mod:`uuid` module defines the following functions: + + +.. function:: getnode() + + Get the hardware address as a 48-bit positive integer. The first time this + runs, it may launch a separate program, which could be quite slow. If all + attempts to obtain the hardware address fail, we choose a random 48-bit number + with its eighth bit set to 1 as recommended in RFC 4122. "Hardware address" + means the MAC address of a network interface, and on a machine with multiple + network interfaces the MAC address of any one of them may be returned. + +.. index:: single: getnode + + +.. function:: uuid1([node[, clock_seq]]) + + Generate a UUID from a host ID, sequence number, and the current time. If *node* + is not given, :func:`getnode` is used to obtain the hardware address. If + *clock_seq* is given, it is used as the sequence number; otherwise a random + 14-bit sequence number is chosen. + +.. index:: single: uuid1 + + +.. function:: uuid3(namespace, name) + + Generate a UUID based on the MD5 hash of a namespace identifier (which is a + UUID) and a name (which is a string). + +.. index:: single: uuid3 + + +.. function:: uuid4() + + Generate a random UUID. + +.. index:: single: uuid4 + + +.. function:: uuid5(namespace, name) + + Generate a UUID based on the SHA-1 hash of a namespace identifier (which is a + UUID) and a name (which is a string). + +.. index:: single: uuid5 + +The :mod:`uuid` module defines the following namespace identifiers for use with +:func:`uuid3` or :func:`uuid5`. + + +.. data:: NAMESPACE_DNS + + When this namespace is specified, the *name* string is a fully-qualified domain + name. + + +.. data:: NAMESPACE_URL + + When this namespace is specified, the *name* string is a URL. + + +.. data:: NAMESPACE_OID + + When this namespace is specified, the *name* string is an ISO OID. + + +.. data:: NAMESPACE_X500 + + When this namespace is specified, the *name* string is an X.500 DN in DER or a + text output format. + +The :mod:`uuid` module defines the following constants for the possible values +of the :attr:`variant` attribute: + + +.. data:: RESERVED_NCS + + Reserved for NCS compatibility. + + +.. data:: RFC_4122 + + Specifies the UUID layout given in :rfc:`4122`. + + +.. data:: RESERVED_MICROSOFT + + Reserved for Microsoft compatibility. + + +.. data:: RESERVED_FUTURE + + Reserved for future definition. + + +.. seealso:: + + :rfc:`4122` - A Universally Unique IDentifier (UUID) URN Namespace + This specification defines a Uniform Resource Name namespace for UUIDs, the + internal format of UUIDs, and methods of generating UUIDs. + + +.. _uuid-example: + +Example +------- + +Here are some examples of typical usage of the :mod:`uuid` module:: + + >>> import uuid + + # make a UUID based on the host ID and current time + >>> uuid.uuid1() + UUID('a8098c1a-f86e-11da-bd1a-00112444be1e') + + # make a UUID using an MD5 hash of a namespace UUID and a name + >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org') + UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e') + + # make a random UUID + >>> uuid.uuid4() + UUID('16fd2706-8baf-433b-82eb-8c7fada847da') + + # make a UUID using a SHA-1 hash of a namespace UUID and a name + >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org') + UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d') + + # make a UUID from a string of hex digits (braces and hyphens ignored) + >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}') + + # convert a UUID to a string of hex digits in standard form + >>> str(x) + '00010203-0405-0607-0809-0a0b0c0d0e0f' + + # get the raw 16 bytes of the UUID + >>> x.bytes + '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' + + # make a UUID from a 16-byte string + >>> uuid.UUID(bytes=x.bytes) + UUID('00010203-0405-0607-0809-0a0b0c0d0e0f') + diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst new file mode 100644 index 0000000..35e9888 --- /dev/null +++ b/Doc/library/warnings.rst @@ -0,0 +1,242 @@ + +:mod:`warnings` --- Warning control +=================================== + +.. index:: single: warnings + +.. module:: warnings + :synopsis: Issue warning messages and control their disposition. + + +.. versionadded:: 2.1 + +Warning messages are typically issued in situations where it is useful to alert +the user of some condition in a program, where that condition (normally) doesn't +warrant raising an exception and terminating the program. For example, one +might want to issue a warning when a program uses an obsolete module. + +Python programmers issue warnings by calling the :func:`warn` function defined +in this module. (C programmers use :cfunc:`PyErr_WarnEx`; see +:ref:`exceptionhandling` for details). + +Warning messages are normally written to ``sys.stderr``, but their disposition +can be changed flexibly, from ignoring all warnings to turning them into +exceptions. The disposition of warnings can vary based on the warning category +(see below), the text of the warning message, and the source location where it +is issued. Repetitions of a particular warning for the same source location are +typically suppressed. + +There are two stages in warning control: first, each time a warning is issued, a +determination is made whether a message should be issued or not; next, if a +message is to be issued, it is formatted and printed using a user-settable hook. + +The determination whether to issue a warning message is controlled by the +warning filter, which is a sequence of matching rules and actions. Rules can be +added to the filter by calling :func:`filterwarnings` and reset to its default +state by calling :func:`resetwarnings`. + +The printing of warning messages is done by calling :func:`showwarning`, which +may be overridden; the default implementation of this function formats the +message by calling :func:`formatwarning`, which is also available for use by +custom implementations. + + +.. _warning-categories: + +Warning Categories +------------------ + +There are a number of built-in exceptions that represent warning categories. +This categorization is useful to be able to filter out groups of warnings. The +following warnings category classes are currently defined: + ++----------------------------------+-----------------------------------------------+ +| Class | Description | ++==================================+===============================================+ +| :exc:`Warning` | This is the base class of all warning | +| | category classes. It is a subclass of | +| | :exc:`Exception`. | ++----------------------------------+-----------------------------------------------+ +| :exc:`UserWarning` | The default category for :func:`warn`. | ++----------------------------------+-----------------------------------------------+ +| :exc:`DeprecationWarning` | Base category for warnings about deprecated | +| | features. | ++----------------------------------+-----------------------------------------------+ +| :exc:`SyntaxWarning` | Base category for warnings about dubious | +| | syntactic features. | ++----------------------------------+-----------------------------------------------+ +| :exc:`RuntimeWarning` | Base category for warnings about dubious | +| | runtime features. | ++----------------------------------+-----------------------------------------------+ +| :exc:`FutureWarning` | Base category for warnings about constructs | +| | that will change semantically in the future. | ++----------------------------------+-----------------------------------------------+ +| :exc:`PendingDeprecationWarning` | Base category for warnings about features | +| | that will be deprecated in the future | +| | (ignored by default). | ++----------------------------------+-----------------------------------------------+ +| :exc:`ImportWarning` | Base category for warnings triggered during | +| | the process of importing a module (ignored by | +| | default). | ++----------------------------------+-----------------------------------------------+ +| :exc:`UnicodeWarning` | Base category for warnings related to | +| | Unicode. | ++----------------------------------+-----------------------------------------------+ + +While these are technically built-in exceptions, they are documented here, +because conceptually they belong to the warnings mechanism. + +User code can define additional warning categories by subclassing one of the +standard warning categories. A warning category must always be a subclass of +the :exc:`Warning` class. + + +.. _warning-filter: + +The Warnings Filter +------------------- + +The warnings filter controls whether warnings are ignored, displayed, or turned +into errors (raising an exception). + +Conceptually, the warnings filter maintains an ordered list of filter +specifications; any specific warning is matched against each filter +specification in the list in turn until a match is found; the match determines +the disposition of the match. Each entry is a tuple of the form (*action*, +*message*, *category*, *module*, *lineno*), where: + +* *action* is one of the following strings: + + +---------------+----------------------------------------------+ + | Value | Disposition | + +===============+==============================================+ + | ``"error"`` | turn matching warnings into exceptions | + +---------------+----------------------------------------------+ + | ``"ignore"`` | never print matching warnings | + +---------------+----------------------------------------------+ + | ``"always"`` | always print matching warnings | + +---------------+----------------------------------------------+ + | ``"default"`` | print the first occurrence of matching | + | | warnings for each location where the warning | + | | is issued | + +---------------+----------------------------------------------+ + | ``"module"`` | print the first occurrence of matching | + | | warnings for each module where the warning | + | | is issued | + +---------------+----------------------------------------------+ + | ``"once"`` | print only the first occurrence of matching | + | | warnings, regardless of location | + +---------------+----------------------------------------------+ + +* *message* is a string containing a regular expression that the warning message + must match (the match is compiled to always be case-insensitive) + +* *category* is a class (a subclass of :exc:`Warning`) of which the warning + category must be a subclass in order to match + +* *module* is a string containing a regular expression that the module name must + match (the match is compiled to be case-sensitive) + +* *lineno* is an integer that the line number where the warning occurred must + match, or ``0`` to match all line numbers + +Since the :exc:`Warning` class is derived from the built-in :exc:`Exception` +class, to turn a warning into an error we simply raise ``category(message)``. + +The warnings filter is initialized by :option:`-W` options passed to the Python +interpreter command line. The interpreter saves the arguments for all +:option:`-W` options without interpretation in ``sys.warnoptions``; the +:mod:`warnings` module parses these when it is first imported (invalid options +are ignored, after printing a message to ``sys.stderr``). + +The warnings that are ignored by default may be enabled by passing :option:`-Wd` +to the interpreter. This enables default handling for all warnings, including +those that are normally ignored by default. This is particular useful for +enabling ImportWarning when debugging problems importing a developed package. +ImportWarning can also be enabled explicitly in Python code using:: + + warnings.simplefilter('default', ImportWarning) + + +.. _warning-functions: + +Available Functions +------------------- + + +.. function:: warn(message[, category[, stacklevel]]) + + Issue a warning, or maybe ignore it or raise an exception. The *category* + argument, if given, must be a warning category class (see above); it defaults to + :exc:`UserWarning`. Alternatively *message* can be a :exc:`Warning` instance, + in which case *category* will be ignored and ``message.__class__`` will be used. + In this case the message text will be ``str(message)``. This function raises an + exception if the particular warning issued is changed into an error by the + warnings filter see above. The *stacklevel* argument can be used by wrapper + functions written in Python, like this:: + + def deprecation(message): + warnings.warn(message, DeprecationWarning, stacklevel=2) + + This makes the warning refer to :func:`deprecation`'s caller, rather than to the + source of :func:`deprecation` itself (since the latter would defeat the purpose + of the warning message). + + +.. function:: warn_explicit(message, category, filename, lineno[, module[, registry[, module_globals]]]) + + This is a low-level interface to the functionality of :func:`warn`, passing in + explicitly the message, category, filename and line number, and optionally the + module name and the registry (which should be the ``__warningregistry__`` + dictionary of the module). The module name defaults to the filename with + ``.py`` stripped; if no registry is passed, the warning is never suppressed. + *message* must be a string and *category* a subclass of :exc:`Warning` or + *message* may be a :exc:`Warning` instance, in which case *category* will be + ignored. + + *module_globals*, if supplied, should be the global namespace in use by the code + for which the warning is issued. (This argument is used to support displaying + source for modules found in zipfiles or other non-filesystem import sources, and + was added in Python 2.5.) + + +.. function:: showwarning(message, category, filename, lineno[, file]) + + Write a warning to a file. The default implementation calls + ``formatwarning(message, category, filename, lineno)`` and writes the resulting + string to *file*, which defaults to ``sys.stderr``. You may replace this + function with an alternative implementation by assigning to + ``warnings.showwarning``. + + +.. function:: formatwarning(message, category, filename, lineno) + + Format a warning the standard way. This returns a string which may contain + embedded newlines and ends in a newline. + + +.. function:: filterwarnings(action[, message[, category[, module[, lineno[, append]]]]]) + + Insert an entry into the list of warnings filters. The entry is inserted at the + front by default; if *append* is true, it is inserted at the end. This checks + the types of the arguments, compiles the message and module regular expressions, + and inserts them as a tuple in the list of warnings filters. Entries closer to + the front of the list override entries later in the list, if both match a + particular warning. Omitted arguments default to a value that matches + everything. + + +.. function:: simplefilter(action[, category[, lineno[, append]]]) + + Insert a simple entry into the list of warnings filters. The meaning of the + function parameters is as for :func:`filterwarnings`, but regular expressions + are not needed as the filter inserted always matches any message in any module + as long as the category and line number match. + + +.. function:: resetwarnings() + + Reset the warnings filter. This discards the effect of all previous calls to + :func:`filterwarnings`, including that of the :option:`-W` command line options + and calls to :func:`simplefilter`. + diff --git a/Doc/library/wave.rst b/Doc/library/wave.rst new file mode 100644 index 0000000..d03f091 --- /dev/null +++ b/Doc/library/wave.rst @@ -0,0 +1,201 @@ +.. % Documentations stolen and LaTeX'ed from comments in file. + + +:mod:`wave` --- Read and write WAV files +======================================== + +.. module:: wave + :synopsis: Provide an interface to the WAV sound format. +.. sectionauthor:: Moshe Zadka <moshez@zadka.site.co.il> + + +The :mod:`wave` module provides a convenient interface to the WAV sound format. +It does not support compression/decompression, but it does support mono/stereo. + +The :mod:`wave` module defines the following function and exception: + + +.. function:: open(file[, mode]) + + If *file* is a string, open the file by that name, other treat it as a seekable + file-like object. *mode* can be any of + + ``'r'``, ``'rb'`` + Read only mode. + + ``'w'``, ``'wb'`` + Write only mode. + + Note that it does not allow read/write WAV files. + + A *mode* of ``'r'`` or ``'rb'`` returns a :class:`Wave_read` object, while a + *mode* of ``'w'`` or ``'wb'`` returns a :class:`Wave_write` object. If *mode* + is omitted and a file-like object is passed as *file*, ``file.mode`` is used as + the default value for *mode* (the ``'b'`` flag is still added if necessary). + + +.. function:: openfp(file, mode) + + A synonym for :func:`open`, maintained for backwards compatibility. + + +.. exception:: Error + + An error raised when something is impossible because it violates the WAV + specification or hits an implementation deficiency. + + +.. _wave-read-objects: + +Wave_read Objects +----------------- + +Wave_read objects, as returned by :func:`open`, have the following methods: + + +.. method:: Wave_read.close() + + Close the stream, and make the instance unusable. This is called automatically + on object collection. + + +.. method:: Wave_read.getnchannels() + + Returns number of audio channels (``1`` for mono, ``2`` for stereo). + + +.. method:: Wave_read.getsampwidth() + + Returns sample width in bytes. + + +.. method:: Wave_read.getframerate() + + Returns sampling frequency. + + +.. method:: Wave_read.getnframes() + + Returns number of audio frames. + + +.. method:: Wave_read.getcomptype() + + Returns compression type (``'NONE'`` is the only supported type). + + +.. method:: Wave_read.getcompname() + + Human-readable version of :meth:`getcomptype`. Usually ``'not compressed'`` + parallels ``'NONE'``. + + +.. method:: Wave_read.getparams() + + Returns a tuple ``(nchannels, sampwidth, framerate, nframes, comptype, + compname)``, equivalent to output of the :meth:`get\*` methods. + + +.. method:: Wave_read.readframes(n) + + Reads and returns at most *n* frames of audio, as a string of bytes. + + +.. method:: Wave_read.rewind() + + Rewind the file pointer to the beginning of the audio stream. + +The following two methods are defined for compatibility with the :mod:`aifc` +module, and don't do anything interesting. + + +.. method:: Wave_read.getmarkers() + + Returns ``None``. + + +.. method:: Wave_read.getmark(id) + + Raise an error. + +The following two methods define a term "position" which is compatible between +them, and is otherwise implementation dependent. + + +.. method:: Wave_read.setpos(pos) + + Set the file pointer to the specified position. + + +.. method:: Wave_read.tell() + + Return current file pointer position. + + +.. _wave-write-objects: + +Wave_write Objects +------------------ + +Wave_write objects, as returned by :func:`open`, have the following methods: + + +.. method:: Wave_write.close() + + Make sure *nframes* is correct, and close the file. This method is called upon + deletion. + + +.. method:: Wave_write.setnchannels(n) + + Set the number of channels. + + +.. method:: Wave_write.setsampwidth(n) + + Set the sample width to *n* bytes. + + +.. method:: Wave_write.setframerate(n) + + Set the frame rate to *n*. + + +.. method:: Wave_write.setnframes(n) + + Set the number of frames to *n*. This will be changed later if more frames are + written. + + +.. method:: Wave_write.setcomptype(type, name) + + Set the compression type and description. At the moment, only compression type + ``NONE`` is supported, meaning no compression. + + +.. method:: Wave_write.setparams(tuple) + + The *tuple* should be ``(nchannels, sampwidth, framerate, nframes, comptype, + compname)``, with values valid for the :meth:`set\*` methods. Sets all + parameters. + + +.. method:: Wave_write.tell() + + Return current position in the file, with the same disclaimer for the + :meth:`Wave_read.tell` and :meth:`Wave_read.setpos` methods. + + +.. method:: Wave_write.writeframesraw(data) + + Write audio frames, without correcting *nframes*. + + +.. method:: Wave_write.writeframes(data) + + Write audio frames and make sure *nframes* is correct. + +Note that it is invalid to set any parameters after calling :meth:`writeframes` +or :meth:`writeframesraw`, and any attempt to do so will raise +:exc:`wave.Error`. + diff --git a/Doc/library/weakref.rst b/Doc/library/weakref.rst new file mode 100644 index 0000000..c5857ba --- /dev/null +++ b/Doc/library/weakref.rst @@ -0,0 +1,330 @@ + +:mod:`weakref` --- Weak references +================================== + +.. module:: weakref + :synopsis: Support for weak references and weak dictionaries. +.. moduleauthor:: Fred L. Drake, Jr. <fdrake@acm.org> +.. moduleauthor:: Neil Schemenauer <nas@arctrix.com> +.. moduleauthor:: Martin von Löwis <martin@loewis.home.cs.tu-berlin.de> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +.. versionadded:: 2.1 + +The :mod:`weakref` module allows the Python programmer to create :dfn:`weak +references` to objects. + +.. % When making changes to the examples in this file, be sure to update +.. % Lib/test/test_weakref.py::libreftest too! + +In the following, the term :dfn:`referent` means the object which is referred to +by a weak reference. + +A weak reference to an object is not enough to keep the object alive: when the +only remaining references to a referent are weak references, garbage collection +is free to destroy the referent and reuse its memory for something else. A +primary use for weak references is to implement caches or mappings holding large +objects, where it's desired that a large object not be kept alive solely because +it appears in a cache or mapping. For example, if you have a number of large +binary image objects, you may wish to associate a name with each. If you used a +Python dictionary to map names to images, or images to names, the image objects +would remain alive just because they appeared as values or keys in the +dictionaries. The :class:`WeakKeyDictionary` and :class:`WeakValueDictionary` +classes supplied by the :mod:`weakref` module are an alternative, using weak +references to construct mappings that don't keep objects alive solely because +they appear in the mapping objects. If, for example, an image object is a value +in a :class:`WeakValueDictionary`, then when the last remaining references to +that image object are the weak references held by weak mappings, garbage +collection can reclaim the object, and its corresponding entries in weak +mappings are simply deleted. + +:class:`WeakKeyDictionary` and :class:`WeakValueDictionary` use weak references +in their implementation, setting up callback functions on the weak references +that notify the weak dictionaries when a key or value has been reclaimed by +garbage collection. Most programs should find that using one of these weak +dictionary types is all they need -- it's not usually necessary to create your +own weak references directly. The low-level machinery used by the weak +dictionary implementations is exposed by the :mod:`weakref` module for the +benefit of advanced uses. + +Not all objects can be weakly referenced; those objects which can include class +instances, functions written in Python (but not in C), methods (both bound and +unbound), sets, frozensets, file objects, generators, type objects, DBcursor +objects from the :mod:`bsddb` module, sockets, arrays, deques, and regular +expression pattern objects. + +.. versionchanged:: 2.4 + Added support for files, sockets, arrays, and patterns. + +Several builtin types such as :class:`list` and :class:`dict` do not directly +support weak references but can add support through subclassing:: + + class Dict(dict): + pass + + obj = Dict(red=1, green=2, blue=3) # this object is weak referencable + +Extension types can easily be made to support weak references; see +:ref:`weakref-support`. + + +.. class:: ref(object[, callback]) + + Return a weak reference to *object*. The original object can be retrieved by + calling the reference object if the referent is still alive; if the referent is + no longer alive, calling the reference object will cause :const:`None` to be + returned. If *callback* is provided and not :const:`None`, and the returned + weakref object is still alive, the callback will be called when the object is + about to be finalized; the weak reference object will be passed as the only + parameter to the callback; the referent will no longer be available. + + It is allowable for many weak references to be constructed for the same object. + Callbacks registered for each weak reference will be called from the most + recently registered callback to the oldest registered callback. + + Exceptions raised by the callback will be noted on the standard error output, + but cannot be propagated; they are handled in exactly the same way as exceptions + raised from an object's :meth:`__del__` method. + + Weak references are hashable if the *object* is hashable. They will maintain + their hash value even after the *object* was deleted. If :func:`hash` is called + the first time only after the *object* was deleted, the call will raise + :exc:`TypeError`. + + Weak references support tests for equality, but not ordering. If the referents + are still alive, two references have the same equality relationship as their + referents (regardless of the *callback*). If either referent has been deleted, + the references are equal only if the reference objects are the same object. + + .. versionchanged:: 2.4 + This is now a subclassable type rather than a factory function; it derives from + :class:`object`. + + +.. function:: proxy(object[, callback]) + + Return a proxy to *object* which uses a weak reference. This supports use of + the proxy in most contexts instead of requiring the explicit dereferencing used + with weak reference objects. The returned object will have a type of either + ``ProxyType`` or ``CallableProxyType``, depending on whether *object* is + callable. Proxy objects are not hashable regardless of the referent; this + avoids a number of problems related to their fundamentally mutable nature, and + prevent their use as dictionary keys. *callback* is the same as the parameter + of the same name to the :func:`ref` function. + + +.. function:: getweakrefcount(object) + + Return the number of weak references and proxies which refer to *object*. + + +.. function:: getweakrefs(object) + + Return a list of all weak reference and proxy objects which refer to *object*. + + +.. class:: WeakKeyDictionary([dict]) + + Mapping class that references keys weakly. Entries in the dictionary will be + discarded when there is no longer a strong reference to the key. This can be + used to associate additional data with an object owned by other parts of an + application without adding attributes to those objects. This can be especially + useful with objects that override attribute accesses. + + .. note:: + + Caution: Because a :class:`WeakKeyDictionary` is built on top of a Python + dictionary, it must not change size when iterating over it. This can be + difficult to ensure for a :class:`WeakKeyDictionary` because actions performed + by the program during iteration may cause items in the dictionary to vanish "by + magic" (as a side effect of garbage collection). + +:class:`WeakKeyDictionary` objects have the following additional methods. These +expose the internal references directly. The references are not guaranteed to +be "live" at the time they are used, so the result of calling the references +needs to be checked before being used. This can be used to avoid creating +references that will cause the garbage collector to keep the keys around longer +than needed. + + +.. method:: WeakKeyDictionary.iterkeyrefs() + + Return an iterator that yields the weak references to the keys. + + .. versionadded:: 2.5 + + +.. method:: WeakKeyDictionary.keyrefs() + + Return a list of weak references to the keys. + + .. versionadded:: 2.5 + + +.. class:: WeakValueDictionary([dict]) + + Mapping class that references values weakly. Entries in the dictionary will be + discarded when no strong reference to the value exists any more. + + .. note:: + + Caution: Because a :class:`WeakValueDictionary` is built on top of a Python + dictionary, it must not change size when iterating over it. This can be + difficult to ensure for a :class:`WeakValueDictionary` because actions performed + by the program during iteration may cause items in the dictionary to vanish "by + magic" (as a side effect of garbage collection). + +:class:`WeakValueDictionary` objects have the following additional methods. +These method have the same issues as the :meth:`iterkeyrefs` and :meth:`keyrefs` +methods of :class:`WeakKeyDictionary` objects. + + +.. method:: WeakValueDictionary.itervaluerefs() + + Return an iterator that yields the weak references to the values. + + .. versionadded:: 2.5 + + +.. method:: WeakValueDictionary.valuerefs() + + Return a list of weak references to the values. + + .. versionadded:: 2.5 + + +.. data:: ReferenceType + + The type object for weak references objects. + + +.. data:: ProxyType + + The type object for proxies of objects which are not callable. + + +.. data:: CallableProxyType + + The type object for proxies of callable objects. + + +.. data:: ProxyTypes + + Sequence containing all the type objects for proxies. This can make it simpler + to test if an object is a proxy without being dependent on naming both proxy + types. + + +.. exception:: ReferenceError + + Exception raised when a proxy object is used but the underlying object has been + collected. This is the same as the standard :exc:`ReferenceError` exception. + + +.. seealso:: + + :pep:`0205` - Weak References + The proposal and rationale for this feature, including links to earlier + implementations and information about similar features in other languages. + + +.. _weakref-objects: + +Weak Reference Objects +---------------------- + +Weak reference objects have no attributes or methods, but do allow the referent +to be obtained, if it still exists, by calling it:: + + >>> import weakref + >>> class Object: + ... pass + ... + >>> o = Object() + >>> r = weakref.ref(o) + >>> o2 = r() + >>> o is o2 + True + +If the referent no longer exists, calling the reference object returns +:const:`None`:: + + >>> del o, o2 + >>> print r() + None + +Testing that a weak reference object is still live should be done using the +expression ``ref() is not None``. Normally, application code that needs to use +a reference object should follow this pattern:: + + # r is a weak reference object + o = r() + if o is None: + # referent has been garbage collected + print "Object has been deallocated; can't frobnicate." + else: + print "Object is still live!" + o.do_something_useful() + +Using a separate test for "liveness" creates race conditions in threaded +applications; another thread can cause a weak reference to become invalidated +before the weak reference is called; the idiom shown above is safe in threaded +applications as well as single-threaded applications. + +Specialized versions of :class:`ref` objects can be created through subclassing. +This is used in the implementation of the :class:`WeakValueDictionary` to reduce +the memory overhead for each entry in the mapping. This may be most useful to +associate additional information with a reference, but could also be used to +insert additional processing on calls to retrieve the referent. + +This example shows how a subclass of :class:`ref` can be used to store +additional information about an object and affect the value that's returned when +the referent is accessed:: + + import weakref + + class ExtendedRef(weakref.ref): + def __init__(self, ob, callback=None, **annotations): + super(ExtendedRef, self).__init__(ob, callback) + self.__counter = 0 + for k, v in annotations.iteritems(): + setattr(self, k, v) + + def __call__(self): + """Return a pair containing the referent and the number of + times the reference has been called. + """ + ob = super(ExtendedRef, self).__call__() + if ob is not None: + self.__counter += 1 + ob = (ob, self.__counter) + return ob + + +.. _weakref-example: + +Example +------- + +This simple example shows how an application can use objects IDs to retrieve +objects that it has seen before. The IDs of the objects can then be used in +other data structures without forcing the objects to remain alive, but the +objects can still be retrieved by ID if they do. + +.. % Example contributed by Tim Peters. + +:: + + import weakref + + _id2obj_dict = weakref.WeakValueDictionary() + + def remember(obj): + oid = id(obj) + _id2obj_dict[oid] = obj + return oid + + def id2obj(oid): + return _id2obj_dict[oid] + diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst new file mode 100644 index 0000000..c243f7c --- /dev/null +++ b/Doc/library/webbrowser.rst @@ -0,0 +1,199 @@ + +:mod:`webbrowser` --- Convenient Web-browser controller +======================================================= + +.. module:: webbrowser + :synopsis: Easy-to-use controller for Web browsers. +.. moduleauthor:: Fred L. Drake, Jr. <fdrake@acm.org> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +The :mod:`webbrowser` module provides a high-level interface to allow displaying +Web-based documents to users. Under most circumstances, simply calling the +:func:`open` function from this module will do the right thing. + +Under Unix, graphical browsers are preferred under X11, but text-mode browsers +will be used if graphical browsers are not available or an X11 display isn't +available. If text-mode browsers are used, the calling process will block until +the user exits the browser. + +If the environment variable :envvar:`BROWSER` exists, it is interpreted to +override the platform default list of browsers, as a os.pathsep-separated list +of browsers to try in order. When the value of a list part contains the string +``%s``, then it is interpreted as a literal browser command line to be used +with the argument URL substituted for ``%s``; if the part does not contain +``%s``, it is simply interpreted as the name of the browser to launch. + +For non-Unix platforms, or when a remote browser is available on Unix, the +controlling process will not wait for the user to finish with the browser, but +allow the remote browser to maintain its own windows on the display. If remote +browsers are not available on Unix, the controlling process will launch a new +browser and wait. + +The script :program:`webbrowser` can be used as a command-line interface for the +module. It accepts an URL as the argument. It accepts the following optional +parameters: :option:`-n` opens the URL in a new browser window, if possible; +:option:`-t` opens the URL in a new browser page ("tab"). The options are, +naturally, mutually exclusive. + +The following exception is defined: + + +.. exception:: Error + + Exception raised when a browser control error occurs. + +The following functions are defined: + + +.. function:: open(url[, new=0[, autoraise=1]]) + + Display *url* using the default browser. If *new* is 0, the *url* is opened in + the same browser window if possible. If *new* is 1, a new browser window is + opened if possible. If *new* is 2, a new browser page ("tab") is opened if + possible. If *autoraise* is true, the window is raised if possible (note that + under many window managers this will occur regardless of the setting of this + variable). + + .. versionchanged:: 2.5 + *new* can now be 2. + + +.. function:: open_new(url) + + Open *url* in a new window of the default browser, if possible, otherwise, open + *url* in the only browser window. + + +.. function:: open_new_tab(url) + + Open *url* in a new page ("tab") of the default browser, if possible, otherwise + equivalent to :func:`open_new`. + + .. versionadded:: 2.5 + + +.. function:: get([name]) + + Return a controller object for the browser type *name*. If *name* is empty, + return a controller for a default browser appropriate to the caller's + environment. + + +.. function:: register(name, constructor[, instance]) + + Register the browser type *name*. Once a browser type is registered, the + :func:`get` function can return a controller for that browser type. If + *instance* is not provided, or is ``None``, *constructor* will be called without + parameters to create an instance when needed. If *instance* is provided, + *constructor* will never be called, and may be ``None``. + + This entry point is only useful if you plan to either set the :envvar:`BROWSER` + variable or call :func:`get` with a nonempty argument matching the name of a + handler you declare. + +A number of browser types are predefined. This table gives the type names that +may be passed to the :func:`get` function and the corresponding instantiations +for the controller classes, all defined in this module. + ++-----------------------+-----------------------------------------+-------+ +| Type Name | Class Name | Notes | ++=======================+=========================================+=======+ +| ``'mozilla'`` | :class:`Mozilla('mozilla')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'firefox'`` | :class:`Mozilla('mozilla')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'netscape'`` | :class:`Mozilla('netscape')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'galeon'`` | :class:`Galeon('galeon')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'epiphany'`` | :class:`Galeon('epiphany')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'skipstone'`` | :class:`BackgroundBrowser('skipstone')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'kfmclient'`` | :class:`Konqueror()` | \(1) | ++-----------------------+-----------------------------------------+-------+ +| ``'konqueror'`` | :class:`Konqueror()` | \(1) | ++-----------------------+-----------------------------------------+-------+ +| ``'kfm'`` | :class:`Konqueror()` | \(1) | ++-----------------------+-----------------------------------------+-------+ +| ``'mosaic'`` | :class:`BackgroundBrowser('mosaic')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'opera'`` | :class:`Opera()` | | ++-----------------------+-----------------------------------------+-------+ +| ``'grail'`` | :class:`Grail()` | | ++-----------------------+-----------------------------------------+-------+ +| ``'links'`` | :class:`GenericBrowser('links')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'elinks'`` | :class:`Elinks('elinks')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'lynx'`` | :class:`GenericBrowser('lynx')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'w3m'`` | :class:`GenericBrowser('w3m')` | | ++-----------------------+-----------------------------------------+-------+ +| ``'windows-default'`` | :class:`WindowsDefault` | \(2) | ++-----------------------+-----------------------------------------+-------+ +| ``'internet-config'`` | :class:`InternetConfig` | \(3) | ++-----------------------+-----------------------------------------+-------+ +| ``'macosx'`` | :class:`MacOSX('default')` | \(4) | ++-----------------------+-----------------------------------------+-------+ + +Notes: + +(1) + "Konqueror" is the file manager for the KDE desktop environment for Unix, and + only makes sense to use if KDE is running. Some way of reliably detecting KDE + would be nice; the :envvar:`KDEDIR` variable is not sufficient. Note also that + the name "kfm" is used even when using the :program:`konqueror` command with KDE + 2 --- the implementation selects the best strategy for running Konqueror. + +(2) + Only on Windows platforms. + +(3) + Only on MacOS platforms; requires the standard MacPython :mod:`ic` module. + +(4) + Only on MacOS X platform. + +Here are some simple examples:: + + url = 'http://www.python.org' + + # Open URL in a new tab, if a browser window is already open. + webbrowser.open_new_tab(url + '/doc') + + # Open URL in new window, raising the window if possible. + webbrowser.open_new(url) + + +.. _browser-controllers: + +Browser Controller Objects +-------------------------- + +Browser controllers provide two methods which parallel two of the module-level +convenience functions: + + +.. method:: controller.open(url[, new[, autoraise=1]]) + + Display *url* using the browser handled by this controller. If *new* is 1, a new + browser window is opened if possible. If *new* is 2, a new browser page ("tab") + is opened if possible. + + +.. method:: controller.open_new(url) + + Open *url* in a new window of the browser handled by this controller, if + possible, otherwise, open *url* in the only browser window. Alias + :func:`open_new`. + + +.. method:: controller.open_new_tab(url) + + Open *url* in a new page ("tab") of the browser handled by this controller, if + possible, otherwise equivalent to :func:`open_new`. + + .. versionadded:: 2.5 + diff --git a/Doc/library/whichdb.rst b/Doc/library/whichdb.rst new file mode 100644 index 0000000..5c69818 --- /dev/null +++ b/Doc/library/whichdb.rst @@ -0,0 +1,20 @@ + +:mod:`whichdb` --- Guess which DBM module created a database +============================================================ + +.. module:: whichdb + :synopsis: Guess which DBM-style module created a given database. + + +The single function in this module attempts to guess which of the several simple +database modules available--\ :mod:`dbm`, :mod:`gdbm`, or :mod:`dbhash`\ +--should be used to open a given file. + + +.. function:: whichdb(filename) + + Returns one of the following values: ``None`` if the file can't be opened + because it's unreadable or doesn't exist; the empty string (``''``) if the + file's format can't be guessed; or a string containing the required module name, + such as ``'dbm'`` or ``'gdbm'``. + diff --git a/Doc/library/windows.rst b/Doc/library/windows.rst new file mode 100644 index 0000000..a231bc2 --- /dev/null +++ b/Doc/library/windows.rst @@ -0,0 +1,14 @@ + +**************************** +MS Windows Specific Services +**************************** + +This chapter describes modules that are only available on MS Windows platforms. + + +.. toctree:: + + msilib.rst + msvcrt.rst + _winreg.rst + winsound.rst diff --git a/Doc/library/winsound.rst b/Doc/library/winsound.rst new file mode 100644 index 0000000..c4c04bd --- /dev/null +++ b/Doc/library/winsound.rst @@ -0,0 +1,162 @@ + +:mod:`winsound` --- Sound-playing interface for Windows +======================================================= + +.. module:: winsound + :platform: Windows + :synopsis: Access to the sound-playing machinery for Windows. +.. moduleauthor:: Toby Dickenson <htrd90@zepler.org> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> + + +.. versionadded:: 1.5.2 + +The :mod:`winsound` module provides access to the basic sound-playing machinery +provided by Windows platforms. It includes functions and several constants. + + +.. function:: Beep(frequency, duration) + + Beep the PC's speaker. The *frequency* parameter specifies frequency, in hertz, + of the sound, and must be in the range 37 through 32,767. The *duration* + parameter specifies the number of milliseconds the sound should last. If the + system is not able to beep the speaker, :exc:`RuntimeError` is raised. + + .. note:: + + Under Windows 95 and 98, the Windows :cfunc:`Beep` function exists but is + useless (it ignores its arguments). In that case Python simulates it via direct + port manipulation (added in version 2.1). It's unknown whether that will work + on all systems. + + .. versionadded:: 1.6 + + +.. function:: PlaySound(sound, flags) + + Call the underlying :cfunc:`PlaySound` function from the Platform API. The + *sound* parameter may be a filename, audio data as a string, or ``None``. Its + interpretation depends on the value of *flags*, which can be a bit-wise ORed + combination of the constants described below. If the system indicates an error, + :exc:`RuntimeError` is raised. + + +.. function:: MessageBeep([type=MB_OK]) + + Call the underlying :cfunc:`MessageBeep` function from the Platform API. This + plays a sound as specified in the registry. The *type* argument specifies which + sound to play; possible values are ``-1``, ``MB_ICONASTERISK``, + ``MB_ICONEXCLAMATION``, ``MB_ICONHAND``, ``MB_ICONQUESTION``, and ``MB_OK``, all + described below. The value ``-1`` produces a "simple beep"; this is the final + fallback if a sound cannot be played otherwise. + + .. versionadded:: 2.3 + + +.. data:: SND_FILENAME + + The *sound* parameter is the name of a WAV file. Do not use with + :const:`SND_ALIAS`. + + +.. data:: SND_ALIAS + + The *sound* parameter is a sound association name from the registry. If the + registry contains no such name, play the system default sound unless + :const:`SND_NODEFAULT` is also specified. If no default sound is registered, + raise :exc:`RuntimeError`. Do not use with :const:`SND_FILENAME`. + + All Win32 systems support at least the following; most systems support many + more: + + +--------------------------+----------------------------------------+ + | :func:`PlaySound` *name* | Corresponding Control Panel Sound name | + +==========================+========================================+ + | ``'SystemAsterisk'`` | Asterisk | + +--------------------------+----------------------------------------+ + | ``'SystemExclamation'`` | Exclamation | + +--------------------------+----------------------------------------+ + | ``'SystemExit'`` | Exit Windows | + +--------------------------+----------------------------------------+ + | ``'SystemHand'`` | Critical Stop | + +--------------------------+----------------------------------------+ + | ``'SystemQuestion'`` | Question | + +--------------------------+----------------------------------------+ + + For example:: + + import winsound + # Play Windows exit sound. + winsound.PlaySound("SystemExit", winsound.SND_ALIAS) + + # Probably play Windows default sound, if any is registered (because + # "*" probably isn't the registered name of any sound). + winsound.PlaySound("*", winsound.SND_ALIAS) + + +.. data:: SND_LOOP + + Play the sound repeatedly. The :const:`SND_ASYNC` flag must also be used to + avoid blocking. Cannot be used with :const:`SND_MEMORY`. + + +.. data:: SND_MEMORY + + The *sound* parameter to :func:`PlaySound` is a memory image of a WAV file, as a + string. + + .. note:: + + This module does not support playing from a memory image asynchronously, so a + combination of this flag and :const:`SND_ASYNC` will raise :exc:`RuntimeError`. + + +.. data:: SND_PURGE + + Stop playing all instances of the specified sound. + + +.. data:: SND_ASYNC + + Return immediately, allowing sounds to play asynchronously. + + +.. data:: SND_NODEFAULT + + If the specified sound cannot be found, do not play the system default sound. + + +.. data:: SND_NOSTOP + + Do not interrupt sounds currently playing. + + +.. data:: SND_NOWAIT + + Return immediately if the sound driver is busy. + + +.. data:: MB_ICONASTERISK + + Play the ``SystemDefault`` sound. + + +.. data:: MB_ICONEXCLAMATION + + Play the ``SystemExclamation`` sound. + + +.. data:: MB_ICONHAND + + Play the ``SystemHand`` sound. + + +.. data:: MB_ICONQUESTION + + Play the ``SystemQuestion`` sound. + + +.. data:: MB_OK + + Play the ``SystemDefault`` sound. + diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst new file mode 100644 index 0000000..ff68684 --- /dev/null +++ b/Doc/library/wsgiref.rst @@ -0,0 +1,641 @@ +:mod:`wsgiref` --- WSGI Utilities and Reference Implementation +============================================================== + +.. module:: wsgiref + :synopsis: WSGI Utilities and Reference Implementation. +.. moduleauthor:: Phillip J. Eby <pje@telecommunity.com> +.. sectionauthor:: Phillip J. Eby <pje@telecommunity.com> + + +.. versionadded:: 2.5 + +The Web Server Gateway Interface (WSGI) is a standard interface between web +server software and web applications written in Python. Having a standard +interface makes it easy to use an application that supports WSGI with a number +of different web servers. + +Only authors of web servers and programming frameworks need to know every detail +and corner case of the WSGI design. You don't need to understand every detail +of WSGI just to install a WSGI application or to write a web application using +an existing framework. + +:mod:`wsgiref` is a reference implementation of the WSGI specification that can +be used to add WSGI support to a web server or framework. It provides utilities +for manipulating WSGI environment variables and response headers, base classes +for implementing WSGI servers, a demo HTTP server that serves WSGI applications, +and a validation tool that checks WSGI servers and applications for conformance +to the WSGI specification (:pep:`333`). + +See http://www.wsgi.org for more information about WSGI, and links to tutorials +and other resources. + +.. % XXX If you're just trying to write a web application... + + +:mod:`wsgiref.util` -- WSGI environment utilities +------------------------------------------------- + +.. module:: wsgiref.util + :synopsis: WSGI environment utilities. + + +This module provides a variety of utility functions for working with WSGI +environments. A WSGI environment is a dictionary containing HTTP request +variables as described in :pep:`333`. All of the functions taking an *environ* +parameter expect a WSGI-compliant dictionary to be supplied; please see +:pep:`333` for a detailed specification. + + +.. function:: guess_scheme(environ) + + Return a guess for whether ``wsgi.url_scheme`` should be "http" or "https", by + checking for a ``HTTPS`` environment variable in the *environ* dictionary. The + return value is a string. + + This function is useful when creating a gateway that wraps CGI or a CGI-like + protocol such as FastCGI. Typically, servers providing such protocols will + include a ``HTTPS`` variable with a value of "1" "yes", or "on" when a request + is received via SSL. So, this function returns "https" if such a value is + found, and "http" otherwise. + + +.. function:: request_uri(environ [, include_query=1]) + + Return the full request URI, optionally including the query string, using the + algorithm found in the "URL Reconstruction" section of :pep:`333`. If + *include_query* is false, the query string is not included in the resulting URI. + + +.. function:: application_uri(environ) + + Similar to :func:`request_uri`, except that the ``PATH_INFO`` and + ``QUERY_STRING`` variables are ignored. The result is the base URI of the + application object addressed by the request. + + +.. function:: shift_path_info(environ) + + Shift a single name from ``PATH_INFO`` to ``SCRIPT_NAME`` and return the name. + The *environ* dictionary is *modified* in-place; use a copy if you need to keep + the original ``PATH_INFO`` or ``SCRIPT_NAME`` intact. + + If there are no remaining path segments in ``PATH_INFO``, ``None`` is returned. + + Typically, this routine is used to process each portion of a request URI path, + for example to treat the path as a series of dictionary keys. This routine + modifies the passed-in environment to make it suitable for invoking another WSGI + application that is located at the target URI. For example, if there is a WSGI + application at ``/foo``, and the request URI path is ``/foo/bar/baz``, and the + WSGI application at ``/foo`` calls :func:`shift_path_info`, it will receive the + string "bar", and the environment will be updated to be suitable for passing to + a WSGI application at ``/foo/bar``. That is, ``SCRIPT_NAME`` will change from + ``/foo`` to ``/foo/bar``, and ``PATH_INFO`` will change from ``/bar/baz`` to + ``/baz``. + + When ``PATH_INFO`` is just a "/", this routine returns an empty string and + appends a trailing slash to ``SCRIPT_NAME``, even though empty path segments are + normally ignored, and ``SCRIPT_NAME`` doesn't normally end in a slash. This is + intentional behavior, to ensure that an application can tell the difference + between URIs ending in ``/x`` from ones ending in ``/x/`` when using this + routine to do object traversal. + + +.. function:: setup_testing_defaults(environ) + + Update *environ* with trivial defaults for testing purposes. + + This routine adds various parameters required for WSGI, including ``HTTP_HOST``, + ``SERVER_NAME``, ``SERVER_PORT``, ``REQUEST_METHOD``, ``SCRIPT_NAME``, + ``PATH_INFO``, and all of the :pep:`333`\ -defined ``wsgi.*`` variables. It + only supplies default values, and does not replace any existing settings for + these variables. + + This routine is intended to make it easier for unit tests of WSGI servers and + applications to set up dummy environments. It should NOT be used by actual WSGI + servers or applications, since the data is fake! + +In addition to the environment functions above, the :mod:`wsgiref.util` module +also provides these miscellaneous utilities: + + +.. function:: is_hop_by_hop(header_name) + + Return true if 'header_name' is an HTTP/1.1 "Hop-by-Hop" header, as defined by + :rfc:`2616`. + + +.. class:: FileWrapper(filelike [, blksize=8192]) + + A wrapper to convert a file-like object to an iterator. The resulting objects + support both :meth:`__getitem__` and :meth:`__iter__` iteration styles, for + compatibility with Python 2.1 and Jython. As the object is iterated over, the + optional *blksize* parameter will be repeatedly passed to the *filelike* + object's :meth:`read` method to obtain strings to yield. When :meth:`read` + returns an empty string, iteration is ended and is not resumable. + + If *filelike* has a :meth:`close` method, the returned object will also have a + :meth:`close` method, and it will invoke the *filelike* object's :meth:`close` + method when called. + + +:mod:`wsgiref.headers` -- WSGI response header tools +---------------------------------------------------- + +.. module:: wsgiref.headers + :synopsis: WSGI response header tools. + + +This module provides a single class, :class:`Headers`, for convenient +manipulation of WSGI response headers using a mapping-like interface. + + +.. class:: Headers(headers) + + Create a mapping-like object wrapping *headers*, which must be a list of header + name/value tuples as described in :pep:`333`. Any changes made to the new + :class:`Headers` object will directly update the *headers* list it was created + with. + + :class:`Headers` objects support typical mapping operations including + :meth:`__getitem__`, :meth:`get`, :meth:`__setitem__`, :meth:`setdefault`, + :meth:`__delitem__`, :meth:`__contains__` and :meth:`has_key`. For each of + these methods, the key is the header name (treated case-insensitively), and the + value is the first value associated with that header name. Setting a header + deletes any existing values for that header, then adds a new value at the end of + the wrapped header list. Headers' existing order is generally maintained, with + new headers added to the end of the wrapped list. + + Unlike a dictionary, :class:`Headers` objects do not raise an error when you try + to get or delete a key that isn't in the wrapped header list. Getting a + nonexistent header just returns ``None``, and deleting a nonexistent header does + nothing. + + :class:`Headers` objects also support :meth:`keys`, :meth:`values`, and + :meth:`items` methods. The lists returned by :meth:`keys` and :meth:`items` can + include the same key more than once if there is a multi-valued header. The + ``len()`` of a :class:`Headers` object is the same as the length of its + :meth:`items`, which is the same as the length of the wrapped header list. In + fact, the :meth:`items` method just returns a copy of the wrapped header list. + + Calling ``str()`` on a :class:`Headers` object returns a formatted string + suitable for transmission as HTTP response headers. Each header is placed on a + line with its value, separated by a colon and a space. Each line is terminated + by a carriage return and line feed, and the string is terminated with a blank + line. + + In addition to their mapping interface and formatting features, :class:`Headers` + objects also have the following methods for querying and adding multi-valued + headers, and for adding headers with MIME parameters: + + + .. method:: Headers.get_all(name) + + Return a list of all the values for the named header. + + The returned list will be sorted in the order they appeared in the original + header list or were added to this instance, and may contain duplicates. Any + fields deleted and re-inserted are always appended to the header list. If no + fields exist with the given name, returns an empty list. + + + .. method:: Headers.add_header(name, value, **_params) + + Add a (possibly multi-valued) header, with optional MIME parameters specified + via keyword arguments. + + *name* is the header field to add. Keyword arguments can be used to set MIME + parameters for the header field. Each parameter must be a string or ``None``. + Underscores in parameter names are converted to dashes, since dashes are illegal + in Python identifiers, but many MIME parameter names include dashes. If the + parameter value is a string, it is added to the header value parameters in the + form ``name="value"``. If it is ``None``, only the parameter name is added. + (This is used for MIME parameters without a value.) Example usage:: + + h.add_header('content-disposition', 'attachment', filename='bud.gif') + + The above will add a header that looks like this:: + + Content-Disposition: attachment; filename="bud.gif" + + +:mod:`wsgiref.simple_server` -- a simple WSGI HTTP server +--------------------------------------------------------- + +.. module:: wsgiref.simple_server + :synopsis: A simple WSGI HTTP server. + + +This module implements a simple HTTP server (based on :mod:`BaseHTTPServer`) +that serves WSGI applications. Each server instance serves a single WSGI +application on a given host and port. If you want to serve multiple +applications on a single host and port, you should create a WSGI application +that parses ``PATH_INFO`` to select which application to invoke for each +request. (E.g., using the :func:`shift_path_info` function from +:mod:`wsgiref.util`.) + + +.. function:: make_server(host, port, app [, server_class=WSGIServer [, handler_class=WSGIRequestHandler]]) + + Create a new WSGI server listening on *host* and *port*, accepting connections + for *app*. The return value is an instance of the supplied *server_class*, and + will process requests using the specified *handler_class*. *app* must be a WSGI + application object, as defined by :pep:`333`. + + Example usage:: + + from wsgiref.simple_server import make_server, demo_app + + httpd = make_server('', 8000, demo_app) + print "Serving HTTP on port 8000..." + + # Respond to requests until process is killed + httpd.serve_forever() + + # Alternative: serve one request, then exit + ##httpd.handle_request() + + +.. function:: demo_app(environ, start_response) + + This function is a small but complete WSGI application that returns a text page + containing the message "Hello world!" and a list of the key/value pairs provided + in the *environ* parameter. It's useful for verifying that a WSGI server (such + as :mod:`wsgiref.simple_server`) is able to run a simple WSGI application + correctly. + + +.. class:: WSGIServer(server_address, RequestHandlerClass) + + Create a :class:`WSGIServer` instance. *server_address* should be a + ``(host,port)`` tuple, and *RequestHandlerClass* should be the subclass of + :class:`BaseHTTPServer.BaseHTTPRequestHandler` that will be used to process + requests. + + You do not normally need to call this constructor, as the :func:`make_server` + function can handle all the details for you. + + :class:`WSGIServer` is a subclass of :class:`BaseHTTPServer.HTTPServer`, so all + of its methods (such as :meth:`serve_forever` and :meth:`handle_request`) are + available. :class:`WSGIServer` also provides these WSGI-specific methods: + + + .. method:: WSGIServer.set_app(application) + + Sets the callable *application* as the WSGI application that will receive + requests. + + + .. method:: WSGIServer.get_app() + + Returns the currently-set application callable. + + Normally, however, you do not need to use these additional methods, as + :meth:`set_app` is normally called by :func:`make_server`, and the + :meth:`get_app` exists mainly for the benefit of request handler instances. + + +.. class:: WSGIRequestHandler(request, client_address, server) + + Create an HTTP handler for the given *request* (i.e. a socket), *client_address* + (a ``(host,port)`` tuple), and *server* (:class:`WSGIServer` instance). + + You do not need to create instances of this class directly; they are + automatically created as needed by :class:`WSGIServer` objects. You can, + however, subclass this class and supply it as a *handler_class* to the + :func:`make_server` function. Some possibly relevant methods for overriding in + subclasses: + + + .. method:: WSGIRequestHandler.get_environ() + + Returns a dictionary containing the WSGI environment for a request. The default + implementation copies the contents of the :class:`WSGIServer` object's + :attr:`base_environ` dictionary attribute and then adds various headers derived + from the HTTP request. Each call to this method should return a new dictionary + containing all of the relevant CGI environment variables as specified in + :pep:`333`. + + + .. method:: WSGIRequestHandler.get_stderr() + + Return the object that should be used as the ``wsgi.errors`` stream. The default + implementation just returns ``sys.stderr``. + + + .. method:: WSGIRequestHandler.handle() + + Process the HTTP request. The default implementation creates a handler instance + using a :mod:`wsgiref.handlers` class to implement the actual WSGI application + interface. + + +:mod:`wsgiref.validate` --- WSGI conformance checker +---------------------------------------------------- + +.. module:: wsgiref.validate + :synopsis: WSGI conformance checker. + + +When creating new WSGI application objects, frameworks, servers, or middleware, +it can be useful to validate the new code's conformance using +:mod:`wsgiref.validate`. This module provides a function that creates WSGI +application objects that validate communications between a WSGI server or +gateway and a WSGI application object, to check both sides for protocol +conformance. + +Note that this utility does not guarantee complete :pep:`333` compliance; an +absence of errors from this module does not necessarily mean that errors do not +exist. However, if this module does produce an error, then it is virtually +certain that either the server or application is not 100% compliant. + +This module is based on the :mod:`paste.lint` module from Ian Bicking's "Python +Paste" library. + + +.. function:: validator(application) + + Wrap *application* and return a new WSGI application object. The returned + application will forward all requests to the original *application*, and will + check that both the *application* and the server invoking it are conforming to + the WSGI specification and to RFC 2616. + + Any detected nonconformance results in an :exc:`AssertionError` being raised; + note, however, that how these errors are handled is server-dependent. For + example, :mod:`wsgiref.simple_server` and other servers based on + :mod:`wsgiref.handlers` (that don't override the error handling methods to do + something else) will simply output a message that an error has occurred, and + dump the traceback to ``sys.stderr`` or some other error stream. + + This wrapper may also generate output using the :mod:`warnings` module to + indicate behaviors that are questionable but which may not actually be + prohibited by :pep:`333`. Unless they are suppressed using Python command-line + options or the :mod:`warnings` API, any such warnings will be written to + ``sys.stderr`` (*not* ``wsgi.errors``, unless they happen to be the same + object). + + +:mod:`wsgiref.handlers` -- server/gateway base classes +------------------------------------------------------ + +.. module:: wsgiref.handlers + :synopsis: WSGI server/gateway base classes. + + +This module provides base handler classes for implementing WSGI servers and +gateways. These base classes handle most of the work of communicating with a +WSGI application, as long as they are given a CGI-like environment, along with +input, output, and error streams. + + +.. class:: CGIHandler() + + CGI-based invocation via ``sys.stdin``, ``sys.stdout``, ``sys.stderr`` and + ``os.environ``. This is useful when you have a WSGI application and want to run + it as a CGI script. Simply invoke ``CGIHandler().run(app)``, where ``app`` is + the WSGI application object you wish to invoke. + + This class is a subclass of :class:`BaseCGIHandler` that sets ``wsgi.run_once`` + to true, ``wsgi.multithread`` to false, and ``wsgi.multiprocess`` to true, and + always uses :mod:`sys` and :mod:`os` to obtain the necessary CGI streams and + environment. + + +.. class:: BaseCGIHandler(stdin, stdout, stderr, environ [, multithread=True [, multiprocess=False]]) + + Similar to :class:`CGIHandler`, but instead of using the :mod:`sys` and + :mod:`os` modules, the CGI environment and I/O streams are specified explicitly. + The *multithread* and *multiprocess* values are used to set the + ``wsgi.multithread`` and ``wsgi.multiprocess`` flags for any applications run by + the handler instance. + + This class is a subclass of :class:`SimpleHandler` intended for use with + software other than HTTP "origin servers". If you are writing a gateway + protocol implementation (such as CGI, FastCGI, SCGI, etc.) that uses a + ``Status:`` header to send an HTTP status, you probably want to subclass this + instead of :class:`SimpleHandler`. + + +.. class:: SimpleHandler(stdin, stdout, stderr, environ [,multithread=True [, multiprocess=False]]) + + Similar to :class:`BaseCGIHandler`, but designed for use with HTTP origin + servers. If you are writing an HTTP server implementation, you will probably + want to subclass this instead of :class:`BaseCGIHandler` + + This class is a subclass of :class:`BaseHandler`. It overrides the + :meth:`__init__`, :meth:`get_stdin`, :meth:`get_stderr`, :meth:`add_cgi_vars`, + :meth:`_write`, and :meth:`_flush` methods to support explicitly setting the + environment and streams via the constructor. The supplied environment and + streams are stored in the :attr:`stdin`, :attr:`stdout`, :attr:`stderr`, and + :attr:`environ` attributes. + + +.. class:: BaseHandler() + + This is an abstract base class for running WSGI applications. Each instance + will handle a single HTTP request, although in principle you could create a + subclass that was reusable for multiple requests. + + :class:`BaseHandler` instances have only one method intended for external use: + + + .. method:: BaseHandler.run(app) + + Run the specified WSGI application, *app*. + + All of the other :class:`BaseHandler` methods are invoked by this method in the + process of running the application, and thus exist primarily to allow + customizing the process. + + The following methods MUST be overridden in a subclass: + + + .. method:: BaseHandler._write(data) + + Buffer the string *data* for transmission to the client. It's okay if this + method actually transmits the data; :class:`BaseHandler` just separates write + and flush operations for greater efficiency when the underlying system actually + has such a distinction. + + + .. method:: BaseHandler._flush() + + Force buffered data to be transmitted to the client. It's okay if this method + is a no-op (i.e., if :meth:`_write` actually sends the data). + + + .. method:: BaseHandler.get_stdin() + + Return an input stream object suitable for use as the ``wsgi.input`` of the + request currently being processed. + + + .. method:: BaseHandler.get_stderr() + + Return an output stream object suitable for use as the ``wsgi.errors`` of the + request currently being processed. + + + .. method:: BaseHandler.add_cgi_vars() + + Insert CGI variables for the current request into the :attr:`environ` attribute. + + Here are some other methods and attributes you may wish to override. This list + is only a summary, however, and does not include every method that can be + overridden. You should consult the docstrings and source code for additional + information before attempting to create a customized :class:`BaseHandler` + subclass. + + Attributes and methods for customizing the WSGI environment: + + + .. attribute:: BaseHandler.wsgi_multithread + + The value to be used for the ``wsgi.multithread`` environment variable. It + defaults to true in :class:`BaseHandler`, but may have a different default (or + be set by the constructor) in the other subclasses. + + + .. attribute:: BaseHandler.wsgi_multiprocess + + The value to be used for the ``wsgi.multiprocess`` environment variable. It + defaults to true in :class:`BaseHandler`, but may have a different default (or + be set by the constructor) in the other subclasses. + + + .. attribute:: BaseHandler.wsgi_run_once + + The value to be used for the ``wsgi.run_once`` environment variable. It + defaults to false in :class:`BaseHandler`, but :class:`CGIHandler` sets it to + true by default. + + + .. attribute:: BaseHandler.os_environ + + The default environment variables to be included in every request's WSGI + environment. By default, this is a copy of ``os.environ`` at the time that + :mod:`wsgiref.handlers` was imported, but subclasses can either create their own + at the class or instance level. Note that the dictionary should be considered + read-only, since the default value is shared between multiple classes and + instances. + + + .. attribute:: BaseHandler.server_software + + If the :attr:`origin_server` attribute is set, this attribute's value is used to + set the default ``SERVER_SOFTWARE`` WSGI environment variable, and also to set a + default ``Server:`` header in HTTP responses. It is ignored for handlers (such + as :class:`BaseCGIHandler` and :class:`CGIHandler`) that are not HTTP origin + servers. + + + .. method:: BaseHandler.get_scheme() + + Return the URL scheme being used for the current request. The default + implementation uses the :func:`guess_scheme` function from :mod:`wsgiref.util` + to guess whether the scheme should be "http" or "https", based on the current + request's :attr:`environ` variables. + + + .. method:: BaseHandler.setup_environ() + + Set the :attr:`environ` attribute to a fully-populated WSGI environment. The + default implementation uses all of the above methods and attributes, plus the + :meth:`get_stdin`, :meth:`get_stderr`, and :meth:`add_cgi_vars` methods and the + :attr:`wsgi_file_wrapper` attribute. It also inserts a ``SERVER_SOFTWARE`` key + if not present, as long as the :attr:`origin_server` attribute is a true value + and the :attr:`server_software` attribute is set. + + Methods and attributes for customizing exception handling: + + + .. method:: BaseHandler.log_exception(exc_info) + + Log the *exc_info* tuple in the server log. *exc_info* is a ``(type, value, + traceback)`` tuple. The default implementation simply writes the traceback to + the request's ``wsgi.errors`` stream and flushes it. Subclasses can override + this method to change the format or retarget the output, mail the traceback to + an administrator, or whatever other action may be deemed suitable. + + + .. attribute:: BaseHandler.traceback_limit + + The maximum number of frames to include in tracebacks output by the default + :meth:`log_exception` method. If ``None``, all frames are included. + + + .. method:: BaseHandler.error_output(environ, start_response) + + This method is a WSGI application to generate an error page for the user. It is + only invoked if an error occurs before headers are sent to the client. + + This method can access the current error information using ``sys.exc_info()``, + and should pass that information to *start_response* when calling it (as + described in the "Error Handling" section of :pep:`333`). + + The default implementation just uses the :attr:`error_status`, + :attr:`error_headers`, and :attr:`error_body` attributes to generate an output + page. Subclasses can override this to produce more dynamic error output. + + Note, however, that it's not recommended from a security perspective to spit out + diagnostics to any old user; ideally, you should have to do something special to + enable diagnostic output, which is why the default implementation doesn't + include any. + + + .. attribute:: BaseHandler.error_status + + The HTTP status used for error responses. This should be a status string as + defined in :pep:`333`; it defaults to a 500 code and message. + + + .. attribute:: BaseHandler.error_headers + + The HTTP headers used for error responses. This should be a list of WSGI + response headers (``(name, value)`` tuples), as described in :pep:`333`. The + default list just sets the content type to ``text/plain``. + + + .. attribute:: BaseHandler.error_body + + The error response body. This should be an HTTP response body string. It + defaults to the plain text, "A server error occurred. Please contact the + administrator." + + Methods and attributes for :pep:`333`'s "Optional Platform-Specific File + Handling" feature: + + + .. attribute:: BaseHandler.wsgi_file_wrapper + + A ``wsgi.file_wrapper`` factory, or ``None``. The default value of this + attribute is the :class:`FileWrapper` class from :mod:`wsgiref.util`. + + + .. method:: BaseHandler.sendfile() + + Override to implement platform-specific file transmission. This method is + called only if the application's return value is an instance of the class + specified by the :attr:`wsgi_file_wrapper` attribute. It should return a true + value if it was able to successfully transmit the file, so that the default + transmission code will not be executed. The default implementation of this + method just returns a false value. + + Miscellaneous methods and attributes: + + + .. attribute:: BaseHandler.origin_server + + This attribute should be set to a true value if the handler's :meth:`_write` and + :meth:`_flush` are being used to communicate directly to the client, rather than + via a CGI-like gateway protocol that wants the HTTP status in a special + ``Status:`` header. + + This attribute's default value is true in :class:`BaseHandler`, but false in + :class:`BaseCGIHandler` and :class:`CGIHandler`. + + + .. attribute:: BaseHandler.http_version + + If :attr:`origin_server` is true, this string attribute is used to set the HTTP + version of the response set to the client. It defaults to ``"1.0"``. + diff --git a/Doc/library/xdrlib.rst b/Doc/library/xdrlib.rst new file mode 100644 index 0000000..6339a7f --- /dev/null +++ b/Doc/library/xdrlib.rst @@ -0,0 +1,276 @@ + +:mod:`xdrlib` --- Encode and decode XDR data +============================================ + +.. module:: xdrlib + :synopsis: Encoders and decoders for the External Data Representation (XDR). + + +.. index:: + single: XDR + single: External Data Representation + +The :mod:`xdrlib` module supports the External Data Representation Standard as +described in :rfc:`1014`, written by Sun Microsystems, Inc. June 1987. It +supports most of the data types described in the RFC. + +The :mod:`xdrlib` module defines two classes, one for packing variables into XDR +representation, and another for unpacking from XDR representation. There are +also two exception classes. + + +.. class:: Packer() + + :class:`Packer` is the class for packing data into XDR representation. The + :class:`Packer` class is instantiated with no arguments. + + +.. class:: Unpacker(data) + + ``Unpacker`` is the complementary class which unpacks XDR data values from a + string buffer. The input buffer is given as *data*. + + +.. seealso:: + + :rfc:`1014` - XDR: External Data Representation Standard + This RFC defined the encoding of data which was XDR at the time this module was + originally written. It has apparently been obsoleted by :rfc:`1832`. + + :rfc:`1832` - XDR: External Data Representation Standard + Newer RFC that provides a revised definition of XDR. + + +.. _xdr-packer-objects: + +Packer Objects +-------------- + +:class:`Packer` instances have the following methods: + + +.. method:: Packer.get_buffer() + + Returns the current pack buffer as a string. + + +.. method:: Packer.reset() + + Resets the pack buffer to the empty string. + +In general, you can pack any of the most common XDR data types by calling the +appropriate ``pack_type()`` method. Each method takes a single argument, the +value to pack. The following simple data type packing methods are supported: +:meth:`pack_uint`, :meth:`pack_int`, :meth:`pack_enum`, :meth:`pack_bool`, +:meth:`pack_uhyper`, and :meth:`pack_hyper`. + + +.. method:: Packer.pack_float(value) + + Packs the single-precision floating point number *value*. + + +.. method:: Packer.pack_double(value) + + Packs the double-precision floating point number *value*. + +The following methods support packing strings, bytes, and opaque data: + + +.. method:: Packer.pack_fstring(n, s) + + Packs a fixed length string, *s*. *n* is the length of the string but it is + *not* packed into the data buffer. The string is padded with null bytes if + necessary to guaranteed 4 byte alignment. + + +.. method:: Packer.pack_fopaque(n, data) + + Packs a fixed length opaque data stream, similarly to :meth:`pack_fstring`. + + +.. method:: Packer.pack_string(s) + + Packs a variable length string, *s*. The length of the string is first packed + as an unsigned integer, then the string data is packed with + :meth:`pack_fstring`. + + +.. method:: Packer.pack_opaque(data) + + Packs a variable length opaque data string, similarly to :meth:`pack_string`. + + +.. method:: Packer.pack_bytes(bytes) + + Packs a variable length byte stream, similarly to :meth:`pack_string`. + +The following methods support packing arrays and lists: + + +.. method:: Packer.pack_list(list, pack_item) + + Packs a *list* of homogeneous items. This method is useful for lists with an + indeterminate size; i.e. the size is not available until the entire list has + been walked. For each item in the list, an unsigned integer ``1`` is packed + first, followed by the data value from the list. *pack_item* is the function + that is called to pack the individual item. At the end of the list, an unsigned + integer ``0`` is packed. + + For example, to pack a list of integers, the code might appear like this:: + + import xdrlib + p = xdrlib.Packer() + p.pack_list([1, 2, 3], p.pack_int) + + +.. method:: Packer.pack_farray(n, array, pack_item) + + Packs a fixed length list (*array*) of homogeneous items. *n* is the length of + the list; it is *not* packed into the buffer, but a :exc:`ValueError` exception + is raised if ``len(array)`` is not equal to *n*. As above, *pack_item* is the + function used to pack each element. + + +.. method:: Packer.pack_array(list, pack_item) + + Packs a variable length *list* of homogeneous items. First, the length of the + list is packed as an unsigned integer, then each element is packed as in + :meth:`pack_farray` above. + + +.. _xdr-unpacker-objects: + +Unpacker Objects +---------------- + +The :class:`Unpacker` class offers the following methods: + + +.. method:: Unpacker.reset(data) + + Resets the string buffer with the given *data*. + + +.. method:: Unpacker.get_position() + + Returns the current unpack position in the data buffer. + + +.. method:: Unpacker.set_position(position) + + Sets the data buffer unpack position to *position*. You should be careful about + using :meth:`get_position` and :meth:`set_position`. + + +.. method:: Unpacker.get_buffer() + + Returns the current unpack data buffer as a string. + + +.. method:: Unpacker.done() + + Indicates unpack completion. Raises an :exc:`Error` exception if all of the + data has not been unpacked. + +In addition, every data type that can be packed with a :class:`Packer`, can be +unpacked with an :class:`Unpacker`. Unpacking methods are of the form +``unpack_type()``, and take no arguments. They return the unpacked object. + + +.. method:: Unpacker.unpack_float() + + Unpacks a single-precision floating point number. + + +.. method:: Unpacker.unpack_double() + + Unpacks a double-precision floating point number, similarly to + :meth:`unpack_float`. + +In addition, the following methods unpack strings, bytes, and opaque data: + + +.. method:: Unpacker.unpack_fstring(n) + + Unpacks and returns a fixed length string. *n* is the number of characters + expected. Padding with null bytes to guaranteed 4 byte alignment is assumed. + + +.. method:: Unpacker.unpack_fopaque(n) + + Unpacks and returns a fixed length opaque data stream, similarly to + :meth:`unpack_fstring`. + + +.. method:: Unpacker.unpack_string() + + Unpacks and returns a variable length string. The length of the string is first + unpacked as an unsigned integer, then the string data is unpacked with + :meth:`unpack_fstring`. + + +.. method:: Unpacker.unpack_opaque() + + Unpacks and returns a variable length opaque data string, similarly to + :meth:`unpack_string`. + + +.. method:: Unpacker.unpack_bytes() + + Unpacks and returns a variable length byte stream, similarly to + :meth:`unpack_string`. + +The following methods support unpacking arrays and lists: + + +.. method:: Unpacker.unpack_list(unpack_item) + + Unpacks and returns a list of homogeneous items. The list is unpacked one + element at a time by first unpacking an unsigned integer flag. If the flag is + ``1``, then the item is unpacked and appended to the list. A flag of ``0`` + indicates the end of the list. *unpack_item* is the function that is called to + unpack the items. + + +.. method:: Unpacker.unpack_farray(n, unpack_item) + + Unpacks and returns (as a list) a fixed length array of homogeneous items. *n* + is number of list elements to expect in the buffer. As above, *unpack_item* is + the function used to unpack each element. + + +.. method:: Unpacker.unpack_array(unpack_item) + + Unpacks and returns a variable length *list* of homogeneous items. First, the + length of the list is unpacked as an unsigned integer, then each element is + unpacked as in :meth:`unpack_farray` above. + + +.. _xdr-exceptions: + +Exceptions +---------- + +Exceptions in this module are coded as class instances: + + +.. exception:: Error + + The base exception class. :exc:`Error` has a single public data member + :attr:`msg` containing the description of the error. + + +.. exception:: ConversionError + + Class derived from :exc:`Error`. Contains no additional instance variables. + +Here is an example of how you would catch one of these exceptions:: + + import xdrlib + p = xdrlib.Packer() + try: + p.pack_double(8.01) + except xdrlib.ConversionError as instance: + print 'packing the double failed:', instance.msg + diff --git a/Doc/library/xml.dom.minidom.rst b/Doc/library/xml.dom.minidom.rst new file mode 100644 index 0000000..54c5f3d --- /dev/null +++ b/Doc/library/xml.dom.minidom.rst @@ -0,0 +1,267 @@ + +:mod:`xml.dom.minidom` --- Lightweight DOM implementation +========================================================= + +.. module:: xml.dom.minidom + :synopsis: Lightweight Document Object Model (DOM) implementation. +.. moduleauthor:: Paul Prescod <paul@prescod.net> +.. sectionauthor:: Paul Prescod <paul@prescod.net> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +:mod:`xml.dom.minidom` is a light-weight implementation of the Document Object +Model interface. It is intended to be simpler than the full DOM and also +significantly smaller. + +DOM applications typically start by parsing some XML into a DOM. With +:mod:`xml.dom.minidom`, this is done through the parse functions:: + + from xml.dom.minidom import parse, parseString + + dom1 = parse('c:\\temp\\mydata.xml') # parse an XML file by name + + datasource = open('c:\\temp\\mydata.xml') + dom2 = parse(datasource) # parse an open file + + dom3 = parseString('<myxml>Some data<empty/> some more data</myxml>') + +The :func:`parse` function can take either a filename or an open file object. + + +.. function:: parse(filename_or_file, parser) + + Return a :class:`Document` from the given input. *filename_or_file* may be + either a file name, or a file-like object. *parser*, if given, must be a SAX2 + parser object. This function will change the document handler of the parser and + activate namespace support; other parser configuration (like setting an entity + resolver) must have been done in advance. + +If you have XML in a string, you can use the :func:`parseString` function +instead: + + +.. function:: parseString(string[, parser]) + + Return a :class:`Document` that represents the *string*. This method creates a + :class:`StringIO` object for the string and passes that on to :func:`parse`. + +Both functions return a :class:`Document` object representing the content of the +document. + +What the :func:`parse` and :func:`parseString` functions do is connect an XML +parser with a "DOM builder" that can accept parse events from any SAX parser and +convert them into a DOM tree. The name of the functions are perhaps misleading, +but are easy to grasp when learning the interfaces. The parsing of the document +will be completed before these functions return; it's simply that these +functions do not provide a parser implementation themselves. + +You can also create a :class:`Document` by calling a method on a "DOM +Implementation" object. You can get this object either by calling the +:func:`getDOMImplementation` function in the :mod:`xml.dom` package or the +:mod:`xml.dom.minidom` module. Using the implementation from the +:mod:`xml.dom.minidom` module will always return a :class:`Document` instance +from the minidom implementation, while the version from :mod:`xml.dom` may +provide an alternate implementation (this is likely if you have the `PyXML +package <http://pyxml.sourceforge.net/>`_ installed). Once you have a +:class:`Document`, you can add child nodes to it to populate the DOM:: + + from xml.dom.minidom import getDOMImplementation + + impl = getDOMImplementation() + + newdoc = impl.createDocument(None, "some_tag", None) + top_element = newdoc.documentElement + text = newdoc.createTextNode('Some textual content.') + top_element.appendChild(text) + +Once you have a DOM document object, you can access the parts of your XML +document through its properties and methods. These properties are defined in +the DOM specification. The main property of the document object is the +:attr:`documentElement` property. It gives you the main element in the XML +document: the one that holds all others. Here is an example program:: + + dom3 = parseString("<myxml>Some data</myxml>") + assert dom3.documentElement.tagName == "myxml" + +When you are finished with a DOM, you should clean it up. This is necessary +because some versions of Python do not support garbage collection of objects +that refer to each other in a cycle. Until this restriction is removed from all +versions of Python, it is safest to write your code as if cycles would not be +cleaned up. + +The way to clean up a DOM is to call its :meth:`unlink` method:: + + dom1.unlink() + dom2.unlink() + dom3.unlink() + +:meth:`unlink` is a :mod:`xml.dom.minidom`\ -specific extension to the DOM API. +After calling :meth:`unlink` on a node, the node and its descendants are +essentially useless. + + +.. seealso:: + + `Document Object Model (DOM) Level 1 Specification <http://www.w3.org/TR/REC-DOM-Level-1/>`_ + The W3C recommendation for the DOM supported by :mod:`xml.dom.minidom`. + + +.. _minidom-objects: + +DOM Objects +----------- + +The definition of the DOM API for Python is given as part of the :mod:`xml.dom` +module documentation. This section lists the differences between the API and +:mod:`xml.dom.minidom`. + + +.. method:: Node.unlink() + + Break internal references within the DOM so that it will be garbage collected on + versions of Python without cyclic GC. Even when cyclic GC is available, using + this can make large amounts of memory available sooner, so calling this on DOM + objects as soon as they are no longer needed is good practice. This only needs + to be called on the :class:`Document` object, but may be called on child nodes + to discard children of that node. + + +.. method:: Node.writexml(writer[,indent=""[,addindent=""[,newl=""]]]) + + Write XML to the writer object. The writer should have a :meth:`write` method + which matches that of the file object interface. The *indent* parameter is the + indentation of the current node. The *addindent* parameter is the incremental + indentation to use for subnodes of the current one. The *newl* parameter + specifies the string to use to terminate newlines. + + .. versionchanged:: 2.1 + The optional keyword parameters *indent*, *addindent*, and *newl* were added to + support pretty output. + + .. versionchanged:: 2.3 + For the :class:`Document` node, an additional keyword argument *encoding* can be + used to specify the encoding field of the XML header. + + +.. method:: Node.toxml([encoding]) + + Return the XML that the DOM represents as a string. + + With no argument, the XML header does not specify an encoding, and the result is + Unicode string if the default encoding cannot represent all characters in the + document. Encoding this string in an encoding other than UTF-8 is likely + incorrect, since UTF-8 is the default encoding of XML. + + With an explicit *encoding* argument, the result is a byte string in the + specified encoding. It is recommended that this argument is always specified. To + avoid :exc:`UnicodeError` exceptions in case of unrepresentable text data, the + encoding argument should be specified as "utf-8". + + .. versionchanged:: 2.3 + the *encoding* argument was introduced. + + +.. method:: Node.toprettyxml([indent[, newl]]) + + Return a pretty-printed version of the document. *indent* specifies the + indentation string and defaults to a tabulator; *newl* specifies the string + emitted at the end of each line and defaults to ``\n``. + + .. versionadded:: 2.1 + + .. versionchanged:: 2.3 + the encoding argument; see :meth:`toxml`. + +The following standard DOM methods have special considerations with +:mod:`xml.dom.minidom`: + + +.. method:: Node.cloneNode(deep) + + Although this method was present in the version of :mod:`xml.dom.minidom` + packaged with Python 2.0, it was seriously broken. This has been corrected for + subsequent releases. + + +.. _dom-example: + +DOM Example +----------- + +This example program is a fairly realistic example of a simple program. In this +particular case, we do not take much advantage of the flexibility of the DOM. + +.. literalinclude:: ../includes/minidom-example.py + + +.. _minidom-and-dom: + +minidom and the DOM standard +---------------------------- + +The :mod:`xml.dom.minidom` module is essentially a DOM 1.0-compatible DOM with +some DOM 2 features (primarily namespace features). + +Usage of the DOM interface in Python is straight-forward. The following mapping +rules apply: + +* Interfaces are accessed through instance objects. Applications should not + instantiate the classes themselves; they should use the creator functions + available on the :class:`Document` object. Derived interfaces support all + operations (and attributes) from the base interfaces, plus any new operations. + +* Operations are used as methods. Since the DOM uses only :keyword:`in` + parameters, the arguments are passed in normal order (from left to right). + There are no optional arguments. :keyword:`void` operations return ``None``. + +* IDL attributes map to instance attributes. For compatibility with the OMG IDL + language mapping for Python, an attribute ``foo`` can also be accessed through + accessor methods :meth:`_get_foo` and :meth:`_set_foo`. :keyword:`readonly` + attributes must not be changed; this is not enforced at runtime. + +* The types ``short int``, ``unsigned int``, ``unsigned long long``, and + ``boolean`` all map to Python integer objects. + +* The type ``DOMString`` maps to Python strings. :mod:`xml.dom.minidom` supports + either byte or Unicode strings, but will normally produce Unicode strings. + Values of type ``DOMString`` may also be ``None`` where allowed to have the IDL + ``null`` value by the DOM specification from the W3C. + +* :keyword:`const` declarations map to variables in their respective scope (e.g. + ``xml.dom.minidom.Node.PROCESSING_INSTRUCTION_NODE``); they must not be changed. + +* ``DOMException`` is currently not supported in :mod:`xml.dom.minidom`. + Instead, :mod:`xml.dom.minidom` uses standard Python exceptions such as + :exc:`TypeError` and :exc:`AttributeError`. + +* :class:`NodeList` objects are implemented using Python's built-in list type. + Starting with Python 2.2, these objects provide the interface defined in the DOM + specification, but with earlier versions of Python they do not support the + official API. They are, however, much more "Pythonic" than the interface + defined in the W3C recommendations. + +The following interfaces have no implementation in :mod:`xml.dom.minidom`: + +* :class:`DOMTimeStamp` + +* :class:`DocumentType` (added in Python 2.1) + +* :class:`DOMImplementation` (added in Python 2.1) + +* :class:`CharacterData` + +* :class:`CDATASection` + +* :class:`Notation` + +* :class:`Entity` + +* :class:`EntityReference` + +* :class:`DocumentFragment` + +Most of these reflect information in the XML document that is not of general +utility to most DOM users. + diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst new file mode 100644 index 0000000..80a91b8 --- /dev/null +++ b/Doc/library/xml.dom.pulldom.rst @@ -0,0 +1,69 @@ + +:mod:`xml.dom.pulldom` --- Support for building partial DOM trees +================================================================= + +.. module:: xml.dom.pulldom + :synopsis: Support for building partial DOM trees from SAX events. +.. moduleauthor:: Paul Prescod <paul@prescod.net> + + +.. versionadded:: 2.0 + +:mod:`xml.dom.pulldom` allows building only selected portions of a Document +Object Model representation of a document from SAX events. + + +.. class:: PullDOM([documentFactory]) + + :class:`xml.sax.handler.ContentHandler` implementation that ... + + +.. class:: DOMEventStream(stream, parser, bufsize) + + ... + + +.. class:: SAX2DOM([documentFactory]) + + :class:`xml.sax.handler.ContentHandler` implementation that ... + + +.. function:: parse(stream_or_string[, parser[, bufsize]]) + + ... + + +.. function:: parseString(string[, parser]) + + ... + + +.. data:: default_bufsize + + Default value for the *bufsize* parameter to :func:`parse`. + + .. versionchanged:: 2.1 + The value of this variable can be changed before calling :func:`parse` and the + new value will take effect. + + +.. _domeventstream-objects: + +DOMEventStream Objects +---------------------- + + +.. method:: DOMEventStream.getEvent() + + ... + + +.. method:: DOMEventStream.expandNode(node) + + ... + + +.. method:: DOMEventStream.reset() + + ... + diff --git a/Doc/library/xml.dom.rst b/Doc/library/xml.dom.rst new file mode 100644 index 0000000..76f5cc1 --- /dev/null +++ b/Doc/library/xml.dom.rst @@ -0,0 +1,1045 @@ + +:mod:`xml.dom` --- The Document Object Model API +================================================ + +.. module:: xml.dom + :synopsis: Document Object Model API for Python. +.. sectionauthor:: Paul Prescod <paul@prescod.net> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +The Document Object Model, or "DOM," is a cross-language API from the World Wide +Web Consortium (W3C) for accessing and modifying XML documents. A DOM +implementation presents an XML document as a tree structure, or allows client +code to build such a structure from scratch. It then gives access to the +structure through a set of objects which provided well-known interfaces. + +The DOM is extremely useful for random-access applications. SAX only allows you +a view of one bit of the document at a time. If you are looking at one SAX +element, you have no access to another. If you are looking at a text node, you +have no access to a containing element. When you write a SAX application, you +need to keep track of your program's position in the document somewhere in your +own code. SAX does not do it for you. Also, if you need to look ahead in the +XML document, you are just out of luck. + +Some applications are simply impossible in an event driven model with no access +to a tree. Of course you could build some sort of tree yourself in SAX events, +but the DOM allows you to avoid writing that code. The DOM is a standard tree +representation for XML data. + +The Document Object Model is being defined by the W3C in stages, or "levels" in +their terminology. The Python mapping of the API is substantially based on the +DOM Level 2 recommendation. The mapping of the Level 3 specification, currently +only available in draft form, is being developed by the `Python XML Special +Interest Group <http://www.python.org/sigs/xml-sig/>`_ as part of the `PyXML +package <http://pyxml.sourceforge.net/>`_. Refer to the documentation bundled +with that package for information on the current state of DOM Level 3 support. + +.. % What if your needs are somewhere between SAX and the DOM? Perhaps +.. % you cannot afford to load the entire tree in memory but you find the +.. % SAX model somewhat cumbersome and low-level. There is also a module +.. % called xml.dom.pulldom that allows you to build trees of only the +.. % parts of a document that you need structured access to. It also has +.. % features that allow you to find your way around the DOM. +.. % See http://www.prescod.net/python/pulldom + +DOM applications typically start by parsing some XML into a DOM. How this is +accomplished is not covered at all by DOM Level 1, and Level 2 provides only +limited improvements: There is a :class:`DOMImplementation` object class which +provides access to :class:`Document` creation methods, but no way to access an +XML reader/parser/Document builder in an implementation-independent way. There +is also no well-defined way to access these methods without an existing +:class:`Document` object. In Python, each DOM implementation will provide a +function :func:`getDOMImplementation`. DOM Level 3 adds a Load/Store +specification, which defines an interface to the reader, but this is not yet +available in the Python standard library. + +Once you have a DOM document object, you can access the parts of your XML +document through its properties and methods. These properties are defined in +the DOM specification; this portion of the reference manual describes the +interpretation of the specification in Python. + +The specification provided by the W3C defines the DOM API for Java, ECMAScript, +and OMG IDL. The Python mapping defined here is based in large part on the IDL +version of the specification, but strict compliance is not required (though +implementations are free to support the strict mapping from IDL). See section +:ref:`dom-conformance` for a detailed discussion of mapping requirements. + + +.. seealso:: + + `Document Object Model (DOM) Level 2 Specification <http://www.w3.org/TR/DOM-Level-2-Core/>`_ + The W3C recommendation upon which the Python DOM API is based. + + `Document Object Model (DOM) Level 1 Specification <http://www.w3.org/TR/REC-DOM-Level-1/>`_ + The W3C recommendation for the DOM supported by :mod:`xml.dom.minidom`. + + `PyXML <http://pyxml.sourceforge.net>`_ + Users that require a full-featured implementation of DOM should use the PyXML + package. + + `Python Language Mapping Specification <http://www.omg.org/docs/formal/02-11-05.pdf>`_ + This specifies the mapping from OMG IDL to Python. + + +Module Contents +--------------- + +The :mod:`xml.dom` contains the following functions: + + +.. function:: registerDOMImplementation(name, factory) + + Register the *factory* function with the name *name*. The factory function + should return an object which implements the :class:`DOMImplementation` + interface. The factory function can return the same object every time, or a new + one for each call, as appropriate for the specific implementation (e.g. if that + implementation supports some customization). + + +.. function:: getDOMImplementation([name[, features]]) + + Return a suitable DOM implementation. The *name* is either well-known, the + module name of a DOM implementation, or ``None``. If it is not ``None``, imports + the corresponding module and returns a :class:`DOMImplementation` object if the + import succeeds. If no name is given, and if the environment variable + :envvar:`PYTHON_DOM` is set, this variable is used to find the implementation. + + If name is not given, this examines the available implementations to find one + with the required feature set. If no implementation can be found, raise an + :exc:`ImportError`. The features list must be a sequence of ``(feature, + version)`` pairs which are passed to the :meth:`hasFeature` method on available + :class:`DOMImplementation` objects. + +Some convenience constants are also provided: + + +.. data:: EMPTY_NAMESPACE + + The value used to indicate that no namespace is associated with a node in the + DOM. This is typically found as the :attr:`namespaceURI` of a node, or used as + the *namespaceURI* parameter to a namespaces-specific method. + + .. versionadded:: 2.2 + + +.. data:: XML_NAMESPACE + + The namespace URI associated with the reserved prefix ``xml``, as defined by + `Namespaces in XML <http://www.w3.org/TR/REC-xml-names/>`_ (section 4). + + .. versionadded:: 2.2 + + +.. data:: XMLNS_NAMESPACE + + The namespace URI for namespace declarations, as defined by `Document Object + Model (DOM) Level 2 Core Specification + <http://www.w3.org/TR/DOM-Level-2-Core/core.html>`_ (section 1.1.8). + + .. versionadded:: 2.2 + + +.. data:: XHTML_NAMESPACE + + The URI of the XHTML namespace as defined by `XHTML 1.0: The Extensible + HyperText Markup Language <http://www.w3.org/TR/xhtml1/>`_ (section 3.1.1). + + .. versionadded:: 2.2 + +In addition, :mod:`xml.dom` contains a base :class:`Node` class and the DOM +exception classes. The :class:`Node` class provided by this module does not +implement any of the methods or attributes defined by the DOM specification; +concrete DOM implementations must provide those. The :class:`Node` class +provided as part of this module does provide the constants used for the +:attr:`nodeType` attribute on concrete :class:`Node` objects; they are located +within the class rather than at the module level to conform with the DOM +specifications. + +.. % Should the Node documentation go here? + + +.. _dom-objects: + +Objects in the DOM +------------------ + +The definitive documentation for the DOM is the DOM specification from the W3C. + +Note that DOM attributes may also be manipulated as nodes instead of as simple +strings. It is fairly rare that you must do this, however, so this usage is not +yet documented. + ++--------------------------------+-----------------------------------+---------------------------------+ +| Interface | Section | Purpose | ++================================+===================================+=================================+ +| :class:`DOMImplementation` | :ref:`dom-implementation-objects` | Interface to the underlying | +| | | implementation. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Node` | :ref:`dom-node-objects` | Base interface for most objects | +| | | in a document. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`NodeList` | :ref:`dom-nodelist-objects` | Interface for a sequence of | +| | | nodes. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`DocumentType` | :ref:`dom-documenttype-objects` | Information about the | +| | | declarations needed to process | +| | | a document. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Document` | :ref:`dom-document-objects` | Object which represents an | +| | | entire document. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Element` | :ref:`dom-element-objects` | Element nodes in the document | +| | | hierarchy. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Attr` | :ref:`dom-attr-objects` | Attribute value nodes on | +| | | element nodes. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Comment` | :ref:`dom-comment-objects` | Representation of comments in | +| | | the source document. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`Text` | :ref:`dom-text-objects` | Nodes containing textual | +| | | content from the document. | ++--------------------------------+-----------------------------------+---------------------------------+ +| :class:`ProcessingInstruction` | :ref:`dom-pi-objects` | Processing instruction | +| | | representation. | ++--------------------------------+-----------------------------------+---------------------------------+ + +An additional section describes the exceptions defined for working with the DOM +in Python. + + +.. _dom-implementation-objects: + +DOMImplementation Objects +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :class:`DOMImplementation` interface provides a way for applications to +determine the availability of particular features in the DOM they are using. +DOM Level 2 added the ability to create new :class:`Document` and +:class:`DocumentType` objects using the :class:`DOMImplementation` as well. + + +.. method:: DOMImplementation.hasFeature(feature, version) + + Return true if the feature identified by the pair of strings *feature* and + *version* is implemented. + + +.. method:: DOMImplementation.createDocument(namespaceUri, qualifiedName, doctype) + + Return a new :class:`Document` object (the root of the DOM), with a child + :class:`Element` object having the given *namespaceUri* and *qualifiedName*. The + *doctype* must be a :class:`DocumentType` object created by + :meth:`createDocumentType`, or ``None``. In the Python DOM API, the first two + arguments can also be ``None`` in order to indicate that no :class:`Element` + child is to be created. + + +.. method:: DOMImplementation.createDocumentType(qualifiedName, publicId, systemId) + + Return a new :class:`DocumentType` object that encapsulates the given + *qualifiedName*, *publicId*, and *systemId* strings, representing the + information contained in an XML document type declaration. + + +.. _dom-node-objects: + +Node Objects +^^^^^^^^^^^^ + +All of the components of an XML document are subclasses of :class:`Node`. + + +.. attribute:: Node.nodeType + + An integer representing the node type. Symbolic constants for the types are on + the :class:`Node` object: :const:`ELEMENT_NODE`, :const:`ATTRIBUTE_NODE`, + :const:`TEXT_NODE`, :const:`CDATA_SECTION_NODE`, :const:`ENTITY_NODE`, + :const:`PROCESSING_INSTRUCTION_NODE`, :const:`COMMENT_NODE`, + :const:`DOCUMENT_NODE`, :const:`DOCUMENT_TYPE_NODE`, :const:`NOTATION_NODE`. + This is a read-only attribute. + + +.. attribute:: Node.parentNode + + The parent of the current node, or ``None`` for the document node. The value is + always a :class:`Node` object or ``None``. For :class:`Element` nodes, this + will be the parent element, except for the root element, in which case it will + be the :class:`Document` object. For :class:`Attr` nodes, this is always + ``None``. This is a read-only attribute. + + +.. attribute:: Node.attributes + + A :class:`NamedNodeMap` of attribute objects. Only elements have actual values + for this; others provide ``None`` for this attribute. This is a read-only + attribute. + + +.. attribute:: Node.previousSibling + + The node that immediately precedes this one with the same parent. For + instance the element with an end-tag that comes just before the *self* + element's start-tag. Of course, XML documents are made up of more than just + elements so the previous sibling could be text, a comment, or something else. + If this node is the first child of the parent, this attribute will be + ``None``. This is a read-only attribute. + + +.. attribute:: Node.nextSibling + + The node that immediately follows this one with the same parent. See also + :attr:`previousSibling`. If this is the last child of the parent, this + attribute will be ``None``. This is a read-only attribute. + + +.. attribute:: Node.childNodes + + A list of nodes contained within this node. This is a read-only attribute. + + +.. attribute:: Node.firstChild + + The first child of the node, if there are any, or ``None``. This is a read-only + attribute. + + +.. attribute:: Node.lastChild + + The last child of the node, if there are any, or ``None``. This is a read-only + attribute. + + +.. attribute:: Node.localName + + The part of the :attr:`tagName` following the colon if there is one, else the + entire :attr:`tagName`. The value is a string. + + +.. attribute:: Node.prefix + + The part of the :attr:`tagName` preceding the colon if there is one, else the + empty string. The value is a string, or ``None`` + + +.. attribute:: Node.namespaceURI + + The namespace associated with the element name. This will be a string or + ``None``. This is a read-only attribute. + + +.. attribute:: Node.nodeName + + This has a different meaning for each node type; see the DOM specification for + details. You can always get the information you would get here from another + property such as the :attr:`tagName` property for elements or the :attr:`name` + property for attributes. For all node types, the value of this attribute will be + either a string or ``None``. This is a read-only attribute. + + +.. attribute:: Node.nodeValue + + This has a different meaning for each node type; see the DOM specification for + details. The situation is similar to that with :attr:`nodeName`. The value is + a string or ``None``. + + +.. method:: Node.hasAttributes() + + Returns true if the node has any attributes. + + +.. method:: Node.hasChildNodes() + + Returns true if the node has any child nodes. + + +.. method:: Node.isSameNode(other) + + Returns true if *other* refers to the same node as this node. This is especially + useful for DOM implementations which use any sort of proxy architecture (because + more than one object can refer to the same node). + + .. note:: + + This is based on a proposed DOM Level 3 API which is still in the "working + draft" stage, but this particular interface appears uncontroversial. Changes + from the W3C will not necessarily affect this method in the Python DOM interface + (though any new W3C API for this would also be supported). + + +.. method:: Node.appendChild(newChild) + + Add a new child node to this node at the end of the list of children, returning + *newChild*. + + +.. method:: Node.insertBefore(newChild, refChild) + + Insert a new child node before an existing child. It must be the case that + *refChild* is a child of this node; if not, :exc:`ValueError` is raised. + *newChild* is returned. If *refChild* is ``None``, it inserts *newChild* at the + end of the children's list. + + +.. method:: Node.removeChild(oldChild) + + Remove a child node. *oldChild* must be a child of this node; if not, + :exc:`ValueError` is raised. *oldChild* is returned on success. If *oldChild* + will not be used further, its :meth:`unlink` method should be called. + + +.. method:: Node.replaceChild(newChild, oldChild) + + Replace an existing node with a new node. It must be the case that *oldChild* + is a child of this node; if not, :exc:`ValueError` is raised. + + +.. method:: Node.normalize() + + Join adjacent text nodes so that all stretches of text are stored as single + :class:`Text` instances. This simplifies processing text from a DOM tree for + many applications. + + .. versionadded:: 2.1 + + +.. method:: Node.cloneNode(deep) + + Clone this node. Setting *deep* means to clone all child nodes as well. This + returns the clone. + + +.. _dom-nodelist-objects: + +NodeList Objects +^^^^^^^^^^^^^^^^ + +A :class:`NodeList` represents a sequence of nodes. These objects are used in +two ways in the DOM Core recommendation: the :class:`Element` objects provides +one as its list of child nodes, and the :meth:`getElementsByTagName` and +:meth:`getElementsByTagNameNS` methods of :class:`Node` return objects with this +interface to represent query results. + +The DOM Level 2 recommendation defines one method and one attribute for these +objects: + + +.. method:: NodeList.item(i) + + Return the *i*'th item from the sequence, if there is one, or ``None``. The + index *i* is not allowed to be less then zero or greater than or equal to the + length of the sequence. + + +.. attribute:: NodeList.length + + The number of nodes in the sequence. + +In addition, the Python DOM interface requires that some additional support is +provided to allow :class:`NodeList` objects to be used as Python sequences. All +:class:`NodeList` implementations must include support for :meth:`__len__` and +:meth:`__getitem__`; this allows iteration over the :class:`NodeList` in +:keyword:`for` statements and proper support for the :func:`len` built-in +function. + +If a DOM implementation supports modification of the document, the +:class:`NodeList` implementation must also support the :meth:`__setitem__` and +:meth:`__delitem__` methods. + + +.. _dom-documenttype-objects: + +DocumentType Objects +^^^^^^^^^^^^^^^^^^^^ + +Information about the notations and entities declared by a document (including +the external subset if the parser uses it and can provide the information) is +available from a :class:`DocumentType` object. The :class:`DocumentType` for a +document is available from the :class:`Document` object's :attr:`doctype` +attribute; if there is no ``DOCTYPE`` declaration for the document, the +document's :attr:`doctype` attribute will be set to ``None`` instead of an +instance of this interface. + +:class:`DocumentType` is a specialization of :class:`Node`, and adds the +following attributes: + + +.. attribute:: DocumentType.publicId + + The public identifier for the external subset of the document type definition. + This will be a string or ``None``. + + +.. attribute:: DocumentType.systemId + + The system identifier for the external subset of the document type definition. + This will be a URI as a string, or ``None``. + + +.. attribute:: DocumentType.internalSubset + + A string giving the complete internal subset from the document. This does not + include the brackets which enclose the subset. If the document has no internal + subset, this should be ``None``. + + +.. attribute:: DocumentType.name + + The name of the root element as given in the ``DOCTYPE`` declaration, if + present. + + +.. attribute:: DocumentType.entities + + This is a :class:`NamedNodeMap` giving the definitions of external entities. + For entity names defined more than once, only the first definition is provided + (others are ignored as required by the XML recommendation). This may be + ``None`` if the information is not provided by the parser, or if no entities are + defined. + + +.. attribute:: DocumentType.notations + + This is a :class:`NamedNodeMap` giving the definitions of notations. For + notation names defined more than once, only the first definition is provided + (others are ignored as required by the XML recommendation). This may be + ``None`` if the information is not provided by the parser, or if no notations + are defined. + + +.. _dom-document-objects: + +Document Objects +^^^^^^^^^^^^^^^^ + +A :class:`Document` represents an entire XML document, including its constituent +elements, attributes, processing instructions, comments etc. Remeber that it +inherits properties from :class:`Node`. + + +.. attribute:: Document.documentElement + + The one and only root element of the document. + + +.. method:: Document.createElement(tagName) + + Create and return a new element node. The element is not inserted into the + document when it is created. You need to explicitly insert it with one of the + other methods such as :meth:`insertBefore` or :meth:`appendChild`. + + +.. method:: Document.createElementNS(namespaceURI, tagName) + + Create and return a new element with a namespace. The *tagName* may have a + prefix. The element is not inserted into the document when it is created. You + need to explicitly insert it with one of the other methods such as + :meth:`insertBefore` or :meth:`appendChild`. + + +.. method:: Document.createTextNode(data) + + Create and return a text node containing the data passed as a parameter. As + with the other creation methods, this one does not insert the node into the + tree. + + +.. method:: Document.createComment(data) + + Create and return a comment node containing the data passed as a parameter. As + with the other creation methods, this one does not insert the node into the + tree. + + +.. method:: Document.createProcessingInstruction(target, data) + + Create and return a processing instruction node containing the *target* and + *data* passed as parameters. As with the other creation methods, this one does + not insert the node into the tree. + + +.. method:: Document.createAttribute(name) + + Create and return an attribute node. This method does not associate the + attribute node with any particular element. You must use + :meth:`setAttributeNode` on the appropriate :class:`Element` object to use the + newly created attribute instance. + + +.. method:: Document.createAttributeNS(namespaceURI, qualifiedName) + + Create and return an attribute node with a namespace. The *tagName* may have a + prefix. This method does not associate the attribute node with any particular + element. You must use :meth:`setAttributeNode` on the appropriate + :class:`Element` object to use the newly created attribute instance. + + +.. method:: Document.getElementsByTagName(tagName) + + Search for all descendants (direct children, children's children, etc.) with a + particular element type name. + + +.. method:: Document.getElementsByTagNameNS(namespaceURI, localName) + + Search for all descendants (direct children, children's children, etc.) with a + particular namespace URI and localname. The localname is the part of the + namespace after the prefix. + + +.. _dom-element-objects: + +Element Objects +^^^^^^^^^^^^^^^ + +:class:`Element` is a subclass of :class:`Node`, so inherits all the attributes +of that class. + + +.. attribute:: Element.tagName + + The element type name. In a namespace-using document it may have colons in it. + The value is a string. + + +.. method:: Element.getElementsByTagName(tagName) + + Same as equivalent method in the :class:`Document` class. + + +.. method:: Element.getElementsByTagNameNS(tagName) + + Same as equivalent method in the :class:`Document` class. + + +.. method:: Element.hasAttribute(name) + + Returns true if the element has an attribute named by *name*. + + +.. method:: Element.hasAttributeNS(namespaceURI, localName) + + Returns true if the element has an attribute named by *namespaceURI* and + *localName*. + + +.. method:: Element.getAttribute(name) + + Return the value of the attribute named by *name* as a string. If no such + attribute exists, an empty string is returned, as if the attribute had no value. + + +.. method:: Element.getAttributeNode(attrname) + + Return the :class:`Attr` node for the attribute named by *attrname*. + + +.. method:: Element.getAttributeNS(namespaceURI, localName) + + Return the value of the attribute named by *namespaceURI* and *localName* as a + string. If no such attribute exists, an empty string is returned, as if the + attribute had no value. + + +.. method:: Element.getAttributeNodeNS(namespaceURI, localName) + + Return an attribute value as a node, given a *namespaceURI* and *localName*. + + +.. method:: Element.removeAttribute(name) + + Remove an attribute by name. No exception is raised if there is no matching + attribute. + + +.. method:: Element.removeAttributeNode(oldAttr) + + Remove and return *oldAttr* from the attribute list, if present. If *oldAttr* is + not present, :exc:`NotFoundErr` is raised. + + +.. method:: Element.removeAttributeNS(namespaceURI, localName) + + Remove an attribute by name. Note that it uses a localName, not a qname. No + exception is raised if there is no matching attribute. + + +.. method:: Element.setAttribute(name, value) + + Set an attribute value from a string. + + +.. method:: Element.setAttributeNode(newAttr) + + Add a new attribute node to the element, replacing an existing attribute if + necessary if the :attr:`name` attribute matches. If a replacement occurs, the + old attribute node will be returned. If *newAttr* is already in use, + :exc:`InuseAttributeErr` will be raised. + + +.. method:: Element.setAttributeNodeNS(newAttr) + + Add a new attribute node to the element, replacing an existing attribute if + necessary if the :attr:`namespaceURI` and :attr:`localName` attributes match. + If a replacement occurs, the old attribute node will be returned. If *newAttr* + is already in use, :exc:`InuseAttributeErr` will be raised. + + +.. method:: Element.setAttributeNS(namespaceURI, qname, value) + + Set an attribute value from a string, given a *namespaceURI* and a *qname*. + Note that a qname is the whole attribute name. This is different than above. + + +.. _dom-attr-objects: + +Attr Objects +^^^^^^^^^^^^ + +:class:`Attr` inherits from :class:`Node`, so inherits all its attributes. + + +.. attribute:: Attr.name + + The attribute name. In a namespace-using document it may have colons in it. + + +.. attribute:: Attr.localName + + The part of the name following the colon if there is one, else the entire name. + This is a read-only attribute. + + +.. attribute:: Attr.prefix + + The part of the name preceding the colon if there is one, else the empty string. + + +.. _dom-attributelist-objects: + +NamedNodeMap Objects +^^^^^^^^^^^^^^^^^^^^ + +:class:`NamedNodeMap` does *not* inherit from :class:`Node`. + + +.. attribute:: NamedNodeMap.length + + The length of the attribute list. + + +.. method:: NamedNodeMap.item(index) + + Return an attribute with a particular index. The order you get the attributes + in is arbitrary but will be consistent for the life of a DOM. Each item is an + attribute node. Get its value with the :attr:`value` attribute. + +There are also experimental methods that give this class more mapping behavior. +You can use them or you can use the standardized :meth:`getAttribute\*` family +of methods on the :class:`Element` objects. + + +.. _dom-comment-objects: + +Comment Objects +^^^^^^^^^^^^^^^ + +:class:`Comment` represents a comment in the XML document. It is a subclass of +:class:`Node`, but cannot have child nodes. + + +.. attribute:: Comment.data + + The content of the comment as a string. The attribute contains all characters + between the leading ``<!-``\ ``-`` and trailing ``-``\ ``->``, but does not + include them. + + +.. _dom-text-objects: + +Text and CDATASection Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :class:`Text` interface represents text in the XML document. If the parser +and DOM implementation support the DOM's XML extension, portions of the text +enclosed in CDATA marked sections are stored in :class:`CDATASection` objects. +These two interfaces are identical, but provide different values for the +:attr:`nodeType` attribute. + +These interfaces extend the :class:`Node` interface. They cannot have child +nodes. + + +.. attribute:: Text.data + + The content of the text node as a string. + +.. note:: + + The use of a :class:`CDATASection` node does not indicate that the node + represents a complete CDATA marked section, only that the content of the node + was part of a CDATA section. A single CDATA section may be represented by more + than one node in the document tree. There is no way to determine whether two + adjacent :class:`CDATASection` nodes represent different CDATA marked sections. + + +.. _dom-pi-objects: + +ProcessingInstruction Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Represents a processing instruction in the XML document; this inherits from the +:class:`Node` interface and cannot have child nodes. + + +.. attribute:: ProcessingInstruction.target + + The content of the processing instruction up to the first whitespace character. + This is a read-only attribute. + + +.. attribute:: ProcessingInstruction.data + + The content of the processing instruction following the first whitespace + character. + + +.. _dom-exceptions: + +Exceptions +^^^^^^^^^^ + +.. versionadded:: 2.1 + +The DOM Level 2 recommendation defines a single exception, :exc:`DOMException`, +and a number of constants that allow applications to determine what sort of +error occurred. :exc:`DOMException` instances carry a :attr:`code` attribute +that provides the appropriate value for the specific exception. + +The Python DOM interface provides the constants, but also expands the set of +exceptions so that a specific exception exists for each of the exception codes +defined by the DOM. The implementations must raise the appropriate specific +exception, each of which carries the appropriate value for the :attr:`code` +attribute. + + +.. exception:: DOMException + + Base exception class used for all specific DOM exceptions. This exception class + cannot be directly instantiated. + + +.. exception:: DomstringSizeErr + + Raised when a specified range of text does not fit into a string. This is not + known to be used in the Python DOM implementations, but may be received from DOM + implementations not written in Python. + + +.. exception:: HierarchyRequestErr + + Raised when an attempt is made to insert a node where the node type is not + allowed. + + +.. exception:: IndexSizeErr + + Raised when an index or size parameter to a method is negative or exceeds the + allowed values. + + +.. exception:: InuseAttributeErr + + Raised when an attempt is made to insert an :class:`Attr` node that is already + present elsewhere in the document. + + +.. exception:: InvalidAccessErr + + Raised if a parameter or an operation is not supported on the underlying object. + + +.. exception:: InvalidCharacterErr + + This exception is raised when a string parameter contains a character that is + not permitted in the context it's being used in by the XML 1.0 recommendation. + For example, attempting to create an :class:`Element` node with a space in the + element type name will cause this error to be raised. + + +.. exception:: InvalidModificationErr + + Raised when an attempt is made to modify the type of a node. + + +.. exception:: InvalidStateErr + + Raised when an attempt is made to use an object that is not defined or is no + longer usable. + + +.. exception:: NamespaceErr + + If an attempt is made to change any object in a way that is not permitted with + regard to the `Namespaces in XML <http://www.w3.org/TR/REC-xml-names/>`_ + recommendation, this exception is raised. + + +.. exception:: NotFoundErr + + Exception when a node does not exist in the referenced context. For example, + :meth:`NamedNodeMap.removeNamedItem` will raise this if the node passed in does + not exist in the map. + + +.. exception:: NotSupportedErr + + Raised when the implementation does not support the requested type of object or + operation. + + +.. exception:: NoDataAllowedErr + + This is raised if data is specified for a node which does not support data. + + .. % XXX a better explanation is needed! + + +.. exception:: NoModificationAllowedErr + + Raised on attempts to modify an object where modifications are not allowed (such + as for read-only nodes). + + +.. exception:: SyntaxErr + + Raised when an invalid or illegal string is specified. + + .. % XXX how is this different from InvalidCharacterErr ??? + + +.. exception:: WrongDocumentErr + + Raised when a node is inserted in a different document than it currently belongs + to, and the implementation does not support migrating the node from one document + to the other. + +The exception codes defined in the DOM recommendation map to the exceptions +described above according to this table: + ++--------------------------------------+---------------------------------+ +| Constant | Exception | ++======================================+=================================+ +| :const:`DOMSTRING_SIZE_ERR` | :exc:`DomstringSizeErr` | ++--------------------------------------+---------------------------------+ +| :const:`HIERARCHY_REQUEST_ERR` | :exc:`HierarchyRequestErr` | ++--------------------------------------+---------------------------------+ +| :const:`INDEX_SIZE_ERR` | :exc:`IndexSizeErr` | ++--------------------------------------+---------------------------------+ +| :const:`INUSE_ATTRIBUTE_ERR` | :exc:`InuseAttributeErr` | ++--------------------------------------+---------------------------------+ +| :const:`INVALID_ACCESS_ERR` | :exc:`InvalidAccessErr` | ++--------------------------------------+---------------------------------+ +| :const:`INVALID_CHARACTER_ERR` | :exc:`InvalidCharacterErr` | ++--------------------------------------+---------------------------------+ +| :const:`INVALID_MODIFICATION_ERR` | :exc:`InvalidModificationErr` | ++--------------------------------------+---------------------------------+ +| :const:`INVALID_STATE_ERR` | :exc:`InvalidStateErr` | ++--------------------------------------+---------------------------------+ +| :const:`NAMESPACE_ERR` | :exc:`NamespaceErr` | ++--------------------------------------+---------------------------------+ +| :const:`NOT_FOUND_ERR` | :exc:`NotFoundErr` | ++--------------------------------------+---------------------------------+ +| :const:`NOT_SUPPORTED_ERR` | :exc:`NotSupportedErr` | ++--------------------------------------+---------------------------------+ +| :const:`NO_DATA_ALLOWED_ERR` | :exc:`NoDataAllowedErr` | ++--------------------------------------+---------------------------------+ +| :const:`NO_MODIFICATION_ALLOWED_ERR` | :exc:`NoModificationAllowedErr` | ++--------------------------------------+---------------------------------+ +| :const:`SYNTAX_ERR` | :exc:`SyntaxErr` | ++--------------------------------------+---------------------------------+ +| :const:`WRONG_DOCUMENT_ERR` | :exc:`WrongDocumentErr` | ++--------------------------------------+---------------------------------+ + + +.. _dom-conformance: + +Conformance +----------- + +This section describes the conformance requirements and relationships between +the Python DOM API, the W3C DOM recommendations, and the OMG IDL mapping for +Python. + + +.. _dom-type-mapping: + +Type Mapping +^^^^^^^^^^^^ + +The primitive IDL types used in the DOM specification are mapped to Python types +according to the following table. + ++------------------+-------------------------------------------+ +| IDL Type | Python Type | ++==================+===========================================+ +| ``boolean`` | ``IntegerType`` (with a value of ``0`` or | +| | ``1``) | ++------------------+-------------------------------------------+ +| ``int`` | ``IntegerType`` | ++------------------+-------------------------------------------+ +| ``long int`` | ``IntegerType`` | ++------------------+-------------------------------------------+ +| ``unsigned int`` | ``IntegerType`` | ++------------------+-------------------------------------------+ + +Additionally, the :class:`DOMString` defined in the recommendation is mapped to +a Python string or Unicode string. Applications should be able to handle +Unicode whenever a string is returned from the DOM. + +The IDL :keyword:`null` value is mapped to ``None``, which may be accepted or +provided by the implementation whenever :keyword:`null` is allowed by the API. + + +.. _dom-accessor-methods: + +Accessor Methods +^^^^^^^^^^^^^^^^ + +The mapping from OMG IDL to Python defines accessor functions for IDL +:keyword:`attribute` declarations in much the way the Java mapping does. +Mapping the IDL declarations :: + + readonly attribute string someValue; + attribute string anotherValue; + +yields three accessor functions: a "get" method for :attr:`someValue` +(:meth:`_get_someValue`), and "get" and "set" methods for :attr:`anotherValue` +(:meth:`_get_anotherValue` and :meth:`_set_anotherValue`). The mapping, in +particular, does not require that the IDL attributes are accessible as normal +Python attributes: ``object.someValue`` is *not* required to work, and may +raise an :exc:`AttributeError`. + +The Python DOM API, however, *does* require that normal attribute access work. +This means that the typical surrogates generated by Python IDL compilers are not +likely to work, and wrapper objects may be needed on the client if the DOM +objects are accessed via CORBA. While this does require some additional +consideration for CORBA DOM clients, the implementers with experience using DOM +over CORBA from Python do not consider this a problem. Attributes that are +declared :keyword:`readonly` may not restrict write access in all DOM +implementations. + +In the Python DOM API, accessor functions are not required. If provided, they +should take the form defined by the Python IDL mapping, but these methods are +considered unnecessary since the attributes are accessible directly from Python. +"Set" accessors should never be provided for :keyword:`readonly` attributes. + +The IDL definitions do not fully embody the requirements of the W3C DOM API, +such as the notion of certain objects, such as the return value of +:meth:`getElementsByTagName`, being "live". The Python DOM API does not require +implementations to enforce such requirements. + diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst new file mode 100644 index 0000000..ead8d29 --- /dev/null +++ b/Doc/library/xml.etree.elementtree.rst @@ -0,0 +1,444 @@ + +:mod:`xml.etree.ElementTree` --- The ElementTree XML API +======================================================== + +.. module:: xml.etree.ElementTree + :synopsis: Implementation of the ElementTree API. +.. moduleauthor:: Fredrik Lundh <fredrik@pythonware.com> + + +.. versionadded:: 2.5 + +The Element type is a flexible container object, designed to store hierarchical +data structures in memory. The type can be described as a cross between a list +and a dictionary. + +Each element has a number of properties associated with it: + +* a tag which is a string identifying what kind of data this element represents + (the element type, in other words). + +* a number of attributes, stored in a Python dictionary. + +* a text string. + +* an optional tail string. + +* a number of child elements, stored in a Python sequence + +To create an element instance, use the Element or SubElement factory functions. + +The :class:`ElementTree` class can be used to wrap an element structure, and +convert it from and to XML. + +A C implementation of this API is available as :mod:`xml.etree.cElementTree`. + + +.. _elementtree-functions: + +Functions +--------- + + +.. function:: Comment([text]) + + Comment element factory. This factory function creates a special element that + will be serialized as an XML comment. The comment string can be either an 8-bit + ASCII string or a Unicode string. *text* is a string containing the comment + string. Returns an element instance representing a comment. + + +.. function:: dump(elem) + + Writes an element tree or element structure to sys.stdout. This function should + be used for debugging only. + + The exact output format is implementation dependent. In this version, it's + written as an ordinary XML file. + + *elem* is an element tree or an individual element. + + +.. function:: Element(tag[, attrib][, **extra]) + + Element factory. This function returns an object implementing the standard + Element interface. The exact class or type of that object is implementation + dependent, but it will always be compatible with the _ElementInterface class in + this module. + + The element name, attribute names, and attribute values can be either 8-bit + ASCII strings or Unicode strings. *tag* is the element name. *attrib* is an + optional dictionary, containing element attributes. *extra* contains additional + attributes, given as keyword arguments. Returns an element instance. + + +.. function:: fromstring(text) + + Parses an XML section from a string constant. Same as XML. *text* is a string + containing XML data. Returns an Element instance. + + +.. function:: iselement(element) + + Checks if an object appears to be a valid element object. *element* is an + element instance. Returns a true value if this is an element object. + + +.. function:: iterparse(source[, events]) + + Parses an XML section into an element tree incrementally, and reports what's + going on to the user. *source* is a filename or file object containing XML data. + *events* is a list of events to report back. If omitted, only "end" events are + reported. Returns an iterator providing ``(event, elem)`` pairs. + + +.. function:: parse(source[, parser]) + + Parses an XML section into an element tree. *source* is a filename or file + object containing XML data. *parser* is an optional parser instance. If not + given, the standard XMLTreeBuilder parser is used. Returns an ElementTree + instance. + + +.. function:: ProcessingInstruction(target[, text]) + + PI element factory. This factory function creates a special element that will + be serialized as an XML processing instruction. *target* is a string containing + the PI target. *text* is a string containing the PI contents, if given. Returns + an element instance, representing a processing instruction. + + +.. function:: SubElement(parent, tag[, attrib[, **extra]]) + + Subelement factory. This function creates an element instance, and appends it + to an existing element. + + The element name, attribute names, and attribute values can be either 8-bit + ASCII strings or Unicode strings. *parent* is the parent element. *tag* is the + subelement name. *attrib* is an optional dictionary, containing element + attributes. *extra* contains additional attributes, given as keyword arguments. + Returns an element instance. + + +.. function:: tostring(element[, encoding]) + + Generates a string representation of an XML element, including all subelements. + *element* is an Element instance. *encoding* is the output encoding (default is + US-ASCII). Returns an encoded string containing the XML data. + + +.. function:: XML(text) + + Parses an XML section from a string constant. This function can be used to + embed "XML literals" in Python code. *text* is a string containing XML data. + Returns an Element instance. + + +.. function:: XMLID(text) + + Parses an XML section from a string constant, and also returns a dictionary + which maps from element id:s to elements. *text* is a string containing XML + data. Returns a tuple containing an Element instance and a dictionary. + + +.. _elementtree-element-interface: + +The Element Interface +--------------------- + +Element objects returned by Element or SubElement have the following methods +and attributes. + + +.. attribute:: Element.tag + + A string identifying what kind of data this element represents (the element + type, in other words). + + +.. attribute:: Element.text + + The *text* attribute can be used to hold additional data associated with the + element. As the name implies this attribute is usually a string but may be any + application-specific object. If the element is created from an XML file the + attribute will contain any text found between the element tags. + + +.. attribute:: Element.tail + + The *tail* attribute can be used to hold additional data associated with the + element. This attribute is usually a string but may be any application-specific + object. If the element is created from an XML file the attribute will contain + any text found after the element's end tag and before the next tag. + + +.. attribute:: Element.attrib + + A dictionary containing the element's attributes. Note that while the *attrib* + value is always a real mutable Python dictionary, an ElementTree implementation + may choose to use another internal representation, and create the dictionary + only if someone asks for it. To take advantage of such implementations, use the + dictionary methods below whenever possible. + +The following dictionary-like methods work on the element attributes. + + +.. method:: Element.clear() + + Resets an element. This function removes all subelements, clears all + attributes, and sets the text and tail attributes to None. + + +.. method:: Element.get(key[, default=None]) + + Gets the element attribute named *key*. + + Returns the attribute value, or *default* if the attribute was not found. + + +.. method:: Element.items() + + Returns the element attributes as a sequence of (name, value) pairs. The + attributes are returned in an arbitrary order. + + +.. method:: Element.keys() + + Returns the elements attribute names as a list. The names are returned in an + arbitrary order. + + +.. method:: Element.set(key, value) + + Set the attribute *key* on the element to *value*. + +The following methods work on the element's children (subelements). + + +.. method:: Element.append(subelement) + + Adds the element *subelement* to the end of this elements internal list of + subelements. + + +.. method:: Element.find(match) + + Finds the first subelement matching *match*. *match* may be a tag name or path. + Returns an element instance or ``None``. + + +.. method:: Element.findall(match) + + Finds all subelements matching *match*. *match* may be a tag name or path. + Returns an iterable yielding all matching elements in document order. + + +.. method:: Element.findtext(condition[, default=None]) + + Finds text for the first subelement matching *condition*. *condition* may be a + tag name or path. Returns the text content of the first matching element, or + *default* if no element was found. Note that if the matching element has no + text content an empty string is returned. + + +.. method:: Element.getchildren() + + Returns all subelements. The elements are returned in document order. + + +.. method:: Element.getiterator([tag=None]) + + Creates a tree iterator with the current element as the root. The iterator + iterates over this element and all elements below it that match the given tag. + If tag is ``None`` or ``'*'`` then all elements are iterated over. Returns an + iterable that provides element objects in document (depth first) order. + + +.. method:: Element.insert(index, element) + + Inserts a subelement at the given position in this element. + + +.. method:: Element.makeelement(tag, attrib) + + Creates a new element object of the same type as this element. Do not call this + method, use the SubElement factory function instead. + + +.. method:: Element.remove(subelement) + + Removes *subelement* from the element. Unlike the findXYZ methods this method + compares elements based on the instance identity, not on tag value or contents. + +Element objects also support the following sequence type methods for working +with subelements: :meth:`__delitem__`, :meth:`__getitem__`, :meth:`__setitem__`, +:meth:`__len__`. + +Caution: Because Element objects do not define a :meth:`__nonzero__` method, +elements with no subelements will test as ``False``. :: + + element = root.find('foo') + + if not element: # careful! + print "element not found, or element has no subelements" + + if element is None: + print "element not found" + + +.. _elementtree-elementtree-objects: + +ElementTree Objects +------------------- + + +.. class:: ElementTree([element,] [file]) + + ElementTree wrapper class. This class represents an entire element hierarchy, + and adds some extra support for serialization to and from standard XML. + + *element* is the root element. The tree is initialized with the contents of the + XML *file* if given. + + +.. method:: ElementTree._setroot(element) + + Replaces the root element for this tree. This discards the current contents of + the tree, and replaces it with the given element. Use with care. *element* is + an element instance. + + +.. method:: ElementTree.find(path) + + Finds the first toplevel element with given tag. Same as getroot().find(path). + *path* is the element to look for. Returns the first matching element, or + ``None`` if no element was found. + + +.. method:: ElementTree.findall(path) + + Finds all toplevel elements with the given tag. Same as getroot().findall(path). + *path* is the element to look for. Returns a list or iterator containing all + matching elements, in document order. + + +.. method:: ElementTree.findtext(path[, default]) + + Finds the element text for the first toplevel element with given tag. Same as + getroot().findtext(path). *path* is the toplevel element to look for. *default* + is the value to return if the element was not found. Returns the text content of + the first matching element, or the default value no element was found. Note + that if the element has is found, but has no text content, this method returns + an empty string. + + +.. method:: ElementTree.getiterator([tag]) + + Creates and returns a tree iterator for the root element. The iterator loops + over all elements in this tree, in section order. *tag* is the tag to look for + (default is to return all elements) + + +.. method:: ElementTree.getroot() + + Returns the root element for this tree. + + +.. method:: ElementTree.parse(source[, parser]) + + Loads an external XML section into this element tree. *source* is a file name or + file object. *parser* is an optional parser instance. If not given, the + standard XMLTreeBuilder parser is used. Returns the section root element. + + +.. method:: ElementTree.write(file[, encoding]) + + Writes the element tree to a file, as XML. *file* is a file name, or a file + object opened for writing. *encoding* is the output encoding (default is + US-ASCII). + + +.. _elementtree-qname-objects: + +QName Objects +------------- + + +.. class:: QName(text_or_uri[, tag]) + + QName wrapper. This can be used to wrap a QName attribute value, in order to + get proper namespace handling on output. *text_or_uri* is a string containing + the QName value, in the form {uri}local, or, if the tag argument is given, the + URI part of a QName. If *tag* is given, the first argument is interpreted as an + URI, and this argument is interpreted as a local name. :class:`QName` instances + are opaque. + + +.. _elementtree-treebuilder-objects: + +TreeBuilder Objects +------------------- + + +.. class:: TreeBuilder([element_factory]) + + Generic element structure builder. This builder converts a sequence of start, + data, and end method calls to a well-formed element structure. You can use this + class to build an element structure using a custom XML parser, or a parser for + some other XML-like format. The *element_factory* is called to create new + Element instances when given. + + +.. method:: TreeBuilder.close() + + Flushes the parser buffers, and returns the toplevel documen element. Returns an + Element instance. + + +.. method:: TreeBuilder.data(data) + + Adds text to the current element. *data* is a string. This should be either an + 8-bit string containing ASCII text, or a Unicode string. + + +.. method:: TreeBuilder.end(tag) + + Closes the current element. *tag* is the element name. Returns the closed + element. + + +.. method:: TreeBuilder.start(tag, attrs) + + Opens a new element. *tag* is the element name. *attrs* is a dictionary + containing element attributes. Returns the opened element. + + +.. _elementtree-xmltreebuilder-objects: + +XMLTreeBuilder Objects +---------------------- + + +.. class:: XMLTreeBuilder([html,] [target]) + + Element structure builder for XML source data, based on the expat parser. *html* + are predefined HTML entities. This flag is not supported by the current + implementation. *target* is the target object. If omitted, the builder uses an + instance of the standard TreeBuilder class. + + +.. method:: XMLTreeBuilder.close() + + Finishes feeding data to the parser. Returns an element structure. + + +.. method:: XMLTreeBuilder.doctype(name, pubid, system) + + Handles a doctype declaration. *name* is the doctype name. *pubid* is the public + identifier. *system* is the system identifier. + + +.. method:: XMLTreeBuilder.feed(data) + + Feeds data to the parser. *data* is encoded data. + diff --git a/Doc/library/xml.etree.rst b/Doc/library/xml.etree.rst new file mode 100644 index 0000000..e14c5f9 --- /dev/null +++ b/Doc/library/xml.etree.rst @@ -0,0 +1,25 @@ +:mod:`xml.etree` --- The ElementTree API for XML +================================================ + +.. module:: xml.etree + :synopsis: Package containing common ElementTree modules. +.. moduleauthor:: Fredrik Lundh <fredrik@pythonware.com> + + +.. versionadded:: 2.5 + +The ElementTree package is a simple, efficient, and quite popular library for +XML manipulation in Python. The :mod:`xml.etree` package contains the most +common components from the ElementTree API library. In the current release, +this package contains the :mod:`ElementTree`, :mod:`ElementPath`, and +:mod:`ElementInclude` modules from the full ElementTree distribution. + +.. % XXX To be continued! + + +.. seealso:: + + `ElementTree Overview <http://effbot.org/tag/elementtree>`_ + The home page for :mod:`ElementTree`. This includes links to additional + documentation, alternative implementations, and other add-ons. + diff --git a/Doc/library/xml.sax.handler.rst b/Doc/library/xml.sax.handler.rst new file mode 100644 index 0000000..bc287d1 --- /dev/null +++ b/Doc/library/xml.sax.handler.rst @@ -0,0 +1,402 @@ + +:mod:`xml.sax.handler` --- Base classes for SAX handlers +======================================================== + +.. module:: xml.sax.handler + :synopsis: Base classes for SAX event handlers. +.. moduleauthor:: Lars Marius Garshol <larsga@garshol.priv.no> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +The SAX API defines four kinds of handlers: content handlers, DTD handlers, +error handlers, and entity resolvers. Applications normally only need to +implement those interfaces whose events they are interested in; they can +implement the interfaces in a single object or in multiple objects. Handler +implementations should inherit from the base classes provided in the module +:mod:`xml.sax.handler`, so that all methods get default implementations. + + +.. class:: ContentHandler + + This is the main callback interface in SAX, and the one most important to + applications. The order of events in this interface mirrors the order of the + information in the document. + + +.. class:: DTDHandler + + Handle DTD events. + + This interface specifies only those DTD events required for basic parsing + (unparsed entities and attributes). + + +.. class:: EntityResolver + + Basic interface for resolving entities. If you create an object implementing + this interface, then register the object with your Parser, the parser will call + the method in your object to resolve all external entities. + + +.. class:: ErrorHandler + + Interface used by the parser to present error and warning messages to the + application. The methods of this object control whether errors are immediately + converted to exceptions or are handled in some other way. + +In addition to these classes, :mod:`xml.sax.handler` provides symbolic constants +for the feature and property names. + + +.. data:: feature_namespaces + + Value: ``"http://xml.org/sax/features/namespaces"`` --- true: Perform Namespace + processing. --- false: Optionally do not perform Namespace processing (implies + namespace-prefixes; default). --- access: (parsing) read-only; (not parsing) + read/write + + +.. data:: feature_namespace_prefixes + + Value: ``"http://xml.org/sax/features/namespace-prefixes"`` --- true: Report + the original prefixed names and attributes used for Namespace + declarations. --- false: Do not report attributes used for Namespace + declarations, and optionally do not report original prefixed names + (default). --- access: (parsing) read-only; (not parsing) read/write + + +.. data:: feature_string_interning + + Value: ``"http://xml.org/sax/features/string-interning"`` --- true: All element + names, prefixes, attribute names, Namespace URIs, and local names are interned + using the built-in intern function. --- false: Names are not necessarily + interned, although they may be (default). --- access: (parsing) read-only; (not + parsing) read/write + + +.. data:: feature_validation + + Value: ``"http://xml.org/sax/features/validation"`` --- true: Report all + validation errors (implies external-general-entities and + external-parameter-entities). --- false: Do not report validation errors. --- + access: (parsing) read-only; (not parsing) read/write + + +.. data:: feature_external_ges + + Value: ``"http://xml.org/sax/features/external-general-entities"`` --- true: + Include all external general (text) entities. --- false: Do not include + external general entities. --- access: (parsing) read-only; (not parsing) + read/write + + +.. data:: feature_external_pes + + Value: ``"http://xml.org/sax/features/external-parameter-entities"`` --- true: + Include all external parameter entities, including the external DTD subset. --- + false: Do not include any external parameter entities, even the external DTD + subset. --- access: (parsing) read-only; (not parsing) read/write + + +.. data:: all_features + + List of all features. + + +.. data:: property_lexical_handler + + Value: ``"http://xml.org/sax/properties/lexical-handler"`` --- data type: + xml.sax.sax2lib.LexicalHandler (not supported in Python 2) --- description: An + optional extension handler for lexical events like comments. --- access: + read/write + + +.. data:: property_declaration_handler + + Value: ``"http://xml.org/sax/properties/declaration-handler"`` --- data type: + xml.sax.sax2lib.DeclHandler (not supported in Python 2) --- description: An + optional extension handler for DTD-related events other than notations and + unparsed entities. --- access: read/write + + +.. data:: property_dom_node + + Value: ``"http://xml.org/sax/properties/dom-node"`` --- data type: + org.w3c.dom.Node (not supported in Python 2) --- description: When parsing, + the current DOM node being visited if this is a DOM iterator; when not parsing, + the root DOM node for iteration. --- access: (parsing) read-only; (not parsing) + read/write + + +.. data:: property_xml_string + + Value: ``"http://xml.org/sax/properties/xml-string"`` --- data type: String --- + description: The literal string of characters that was the source for the + current event. --- access: read-only + + +.. data:: all_properties + + List of all known property names. + + +.. _content-handler-objects: + +ContentHandler Objects +---------------------- + +Users are expected to subclass :class:`ContentHandler` to support their +application. The following methods are called by the parser on the appropriate +events in the input document: + + +.. method:: ContentHandler.setDocumentLocator(locator) + + Called by the parser to give the application a locator for locating the origin + of document events. + + SAX parsers are strongly encouraged (though not absolutely required) to supply a + locator: if it does so, it must supply the locator to the application by + invoking this method before invoking any of the other methods in the + DocumentHandler interface. + + The locator allows the application to determine the end position of any + document-related event, even if the parser is not reporting an error. Typically, + the application will use this information for reporting its own errors (such as + character content that does not match an application's business rules). The + information returned by the locator is probably not sufficient for use with a + search engine. + + Note that the locator will return correct information only during the invocation + of the events in this interface. The application should not attempt to use it at + any other time. + + +.. method:: ContentHandler.startDocument() + + Receive notification of the beginning of a document. + + The SAX parser will invoke this method only once, before any other methods in + this interface or in DTDHandler (except for :meth:`setDocumentLocator`). + + +.. method:: ContentHandler.endDocument() + + Receive notification of the end of a document. + + The SAX parser will invoke this method only once, and it will be the last method + invoked during the parse. The parser shall not invoke this method until it has + either abandoned parsing (because of an unrecoverable error) or reached the end + of input. + + +.. method:: ContentHandler.startPrefixMapping(prefix, uri) + + Begin the scope of a prefix-URI Namespace mapping. + + The information from this event is not necessary for normal Namespace + processing: the SAX XML reader will automatically replace prefixes for element + and attribute names when the ``feature_namespaces`` feature is enabled (the + default). + + There are cases, however, when applications need to use prefixes in character + data or in attribute values, where they cannot safely be expanded automatically; + the :meth:`startPrefixMapping` and :meth:`endPrefixMapping` events supply the + information to the application to expand prefixes in those contexts itself, if + necessary. + + .. % XXX This is not really the default, is it? MvL + + Note that :meth:`startPrefixMapping` and :meth:`endPrefixMapping` events are not + guaranteed to be properly nested relative to each-other: all + :meth:`startPrefixMapping` events will occur before the corresponding + :meth:`startElement` event, and all :meth:`endPrefixMapping` events will occur + after the corresponding :meth:`endElement` event, but their order is not + guaranteed. + + +.. method:: ContentHandler.endPrefixMapping(prefix) + + End the scope of a prefix-URI mapping. + + See :meth:`startPrefixMapping` for details. This event will always occur after + the corresponding :meth:`endElement` event, but the order of + :meth:`endPrefixMapping` events is not otherwise guaranteed. + + +.. method:: ContentHandler.startElement(name, attrs) + + Signals the start of an element in non-namespace mode. + + The *name* parameter contains the raw XML 1.0 name of the element type as a + string and the *attrs* parameter holds an object of the :class:`Attributes` + interface (see :ref:`attributes-objects`) containing the attributes of + the element. The object passed as *attrs* may be re-used by the parser; holding + on to a reference to it is not a reliable way to keep a copy of the attributes. + To keep a copy of the attributes, use the :meth:`copy` method of the *attrs* + object. + + +.. method:: ContentHandler.endElement(name) + + Signals the end of an element in non-namespace mode. + + The *name* parameter contains the name of the element type, just as with the + :meth:`startElement` event. + + +.. method:: ContentHandler.startElementNS(name, qname, attrs) + + Signals the start of an element in namespace mode. + + The *name* parameter contains the name of the element type as a ``(uri, + localname)`` tuple, the *qname* parameter contains the raw XML 1.0 name used in + the source document, and the *attrs* parameter holds an instance of the + :class:`AttributesNS` interface (see :ref:`attributes-ns-objects`) + containing the attributes of the element. If no namespace is associated with + the element, the *uri* component of *name* will be ``None``. The object passed + as *attrs* may be re-used by the parser; holding on to a reference to it is not + a reliable way to keep a copy of the attributes. To keep a copy of the + attributes, use the :meth:`copy` method of the *attrs* object. + + Parsers may set the *qname* parameter to ``None``, unless the + ``feature_namespace_prefixes`` feature is activated. + + +.. method:: ContentHandler.endElementNS(name, qname) + + Signals the end of an element in namespace mode. + + The *name* parameter contains the name of the element type, just as with the + :meth:`startElementNS` method, likewise the *qname* parameter. + + +.. method:: ContentHandler.characters(content) + + Receive notification of character data. + + The Parser will call this method to report each chunk of character data. SAX + parsers may return all contiguous character data in a single chunk, or they may + split it into several chunks; however, all of the characters in any single event + must come from the same external entity so that the Locator provides useful + information. + + *content* may be a Unicode string or a byte string; the ``expat`` reader module + produces always Unicode strings. + + .. note:: + + The earlier SAX 1 interface provided by the Python XML Special Interest Group + used a more Java-like interface for this method. Since most parsers used from + Python did not take advantage of the older interface, the simpler signature was + chosen to replace it. To convert old code to the new interface, use *content* + instead of slicing content with the old *offset* and *length* parameters. + + +.. method:: ContentHandler.ignorableWhitespace(whitespace) + + Receive notification of ignorable whitespace in element content. + + Validating Parsers must use this method to report each chunk of ignorable + whitespace (see the W3C XML 1.0 recommendation, section 2.10): non-validating + parsers may also use this method if they are capable of parsing and using + content models. + + SAX parsers may return all contiguous whitespace in a single chunk, or they may + split it into several chunks; however, all of the characters in any single event + must come from the same external entity, so that the Locator provides useful + information. + + +.. method:: ContentHandler.processingInstruction(target, data) + + Receive notification of a processing instruction. + + The Parser will invoke this method once for each processing instruction found: + note that processing instructions may occur before or after the main document + element. + + A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a + text declaration (XML 1.0, section 4.3.1) using this method. + + +.. method:: ContentHandler.skippedEntity(name) + + Receive notification of a skipped entity. + + The Parser will invoke this method once for each entity skipped. Non-validating + processors may skip entities if they have not seen the declarations (because, + for example, the entity was declared in an external DTD subset). All processors + may skip external entities, depending on the values of the + ``feature_external_ges`` and the ``feature_external_pes`` properties. + + +.. _dtd-handler-objects: + +DTDHandler Objects +------------------ + +:class:`DTDHandler` instances provide the following methods: + + +.. method:: DTDHandler.notationDecl(name, publicId, systemId) + + Handle a notation declaration event. + + +.. method:: DTDHandler.unparsedEntityDecl(name, publicId, systemId, ndata) + + Handle an unparsed entity declaration event. + + +.. _entity-resolver-objects: + +EntityResolver Objects +---------------------- + + +.. method:: EntityResolver.resolveEntity(publicId, systemId) + + Resolve the system identifier of an entity and return either the system + identifier to read from as a string, or an InputSource to read from. The default + implementation returns *systemId*. + + +.. _sax-error-handler: + +ErrorHandler Objects +-------------------- + +Objects with this interface are used to receive error and warning information +from the :class:`XMLReader`. If you create an object that implements this +interface, then register the object with your :class:`XMLReader`, the parser +will call the methods in your object to report all warnings and errors. There +are three levels of errors available: warnings, (possibly) recoverable errors, +and unrecoverable errors. All methods take a :exc:`SAXParseException` as the +only parameter. Errors and warnings may be converted to an exception by raising +the passed-in exception object. + + +.. method:: ErrorHandler.error(exception) + + Called when the parser encounters a recoverable error. If this method does not + raise an exception, parsing may continue, but further document information + should not be expected by the application. Allowing the parser to continue may + allow additional errors to be discovered in the input document. + + +.. method:: ErrorHandler.fatalError(exception) + + Called when the parser encounters an error it cannot recover from; parsing is + expected to terminate when this method returns. + + +.. method:: ErrorHandler.warning(exception) + + Called when the parser presents minor warning information to the application. + Parsing is expected to continue when this method returns, and document + information will continue to be passed to the application. Raising an exception + in this method will cause parsing to end. + diff --git a/Doc/library/xml.sax.reader.rst b/Doc/library/xml.sax.reader.rst new file mode 100644 index 0000000..d64a4fc --- /dev/null +++ b/Doc/library/xml.sax.reader.rst @@ -0,0 +1,386 @@ + +:mod:`xml.sax.xmlreader` --- Interface for XML parsers +====================================================== + +.. module:: xml.sax.xmlreader + :synopsis: Interface which SAX-compliant XML parsers must implement. +.. moduleauthor:: Lars Marius Garshol <larsga@garshol.priv.no> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +SAX parsers implement the :class:`XMLReader` interface. They are implemented in +a Python module, which must provide a function :func:`create_parser`. This +function is invoked by :func:`xml.sax.make_parser` with no arguments to create +a new parser object. + + +.. class:: XMLReader() + + Base class which can be inherited by SAX parsers. + + +.. class:: IncrementalParser() + + In some cases, it is desirable not to parse an input source at once, but to feed + chunks of the document as they get available. Note that the reader will normally + not read the entire file, but read it in chunks as well; still :meth:`parse` + won't return until the entire document is processed. So these interfaces should + be used if the blocking behaviour of :meth:`parse` is not desirable. + + When the parser is instantiated it is ready to begin accepting data from the + feed method immediately. After parsing has been finished with a call to close + the reset method must be called to make the parser ready to accept new data, + either from feed or using the parse method. + + Note that these methods must *not* be called during parsing, that is, after + parse has been called and before it returns. + + By default, the class also implements the parse method of the XMLReader + interface using the feed, close and reset methods of the IncrementalParser + interface as a convenience to SAX 2.0 driver writers. + + +.. class:: Locator() + + Interface for associating a SAX event with a document location. A locator object + will return valid results only during calls to DocumentHandler methods; at any + other time, the results are unpredictable. If information is not available, + methods may return ``None``. + + +.. class:: InputSource([systemId]) + + Encapsulation of the information needed by the :class:`XMLReader` to read + entities. + + This class may include information about the public identifier, system + identifier, byte stream (possibly with character encoding information) and/or + the character stream of an entity. + + Applications will create objects of this class for use in the + :meth:`XMLReader.parse` method and for returning from + EntityResolver.resolveEntity. + + An :class:`InputSource` belongs to the application, the :class:`XMLReader` is + not allowed to modify :class:`InputSource` objects passed to it from the + application, although it may make copies and modify those. + + +.. class:: AttributesImpl(attrs) + + This is an implementation of the :class:`Attributes` interface (see section + :ref:`attributes-objects`). This is a dictionary-like object which + represents the element attributes in a :meth:`startElement` call. In addition + to the most useful dictionary operations, it supports a number of other + methods as described by the interface. Objects of this class should be + instantiated by readers; *attrs* must be a dictionary-like object containing + a mapping from attribute names to attribute values. + + +.. class:: AttributesNSImpl(attrs, qnames) + + Namespace-aware variant of :class:`AttributesImpl`, which will be passed to + :meth:`startElementNS`. It is derived from :class:`AttributesImpl`, but + understands attribute names as two-tuples of *namespaceURI* and + *localname*. In addition, it provides a number of methods expecting qualified + names as they appear in the original document. This class implements the + :class:`AttributesNS` interface (see section :ref:`attributes-ns-objects`). + + +.. _xmlreader-objects: + +XMLReader Objects +----------------- + +The :class:`XMLReader` interface supports the following methods: + + +.. method:: XMLReader.parse(source) + + Process an input source, producing SAX events. The *source* object can be a + system identifier (a string identifying the input source -- typically a file + name or an URL), a file-like object, or an :class:`InputSource` object. When + :meth:`parse` returns, the input is completely processed, and the parser object + can be discarded or reset. As a limitation, the current implementation only + accepts byte streams; processing of character streams is for further study. + + +.. method:: XMLReader.getContentHandler() + + Return the current :class:`ContentHandler`. + + +.. method:: XMLReader.setContentHandler(handler) + + Set the current :class:`ContentHandler`. If no :class:`ContentHandler` is set, + content events will be discarded. + + +.. method:: XMLReader.getDTDHandler() + + Return the current :class:`DTDHandler`. + + +.. method:: XMLReader.setDTDHandler(handler) + + Set the current :class:`DTDHandler`. If no :class:`DTDHandler` is set, DTD + events will be discarded. + + +.. method:: XMLReader.getEntityResolver() + + Return the current :class:`EntityResolver`. + + +.. method:: XMLReader.setEntityResolver(handler) + + Set the current :class:`EntityResolver`. If no :class:`EntityResolver` is set, + attempts to resolve an external entity will result in opening the system + identifier for the entity, and fail if it is not available. + + +.. method:: XMLReader.getErrorHandler() + + Return the current :class:`ErrorHandler`. + + +.. method:: XMLReader.setErrorHandler(handler) + + Set the current error handler. If no :class:`ErrorHandler` is set, errors will + be raised as exceptions, and warnings will be printed. + + +.. method:: XMLReader.setLocale(locale) + + Allow an application to set the locale for errors and warnings. + + SAX parsers are not required to provide localization for errors and warnings; if + they cannot support the requested locale, however, they must throw a SAX + exception. Applications may request a locale change in the middle of a parse. + + +.. method:: XMLReader.getFeature(featurename) + + Return the current setting for feature *featurename*. If the feature is not + recognized, :exc:`SAXNotRecognizedException` is raised. The well-known + featurenames are listed in the module :mod:`xml.sax.handler`. + + +.. method:: XMLReader.setFeature(featurename, value) + + Set the *featurename* to *value*. If the feature is not recognized, + :exc:`SAXNotRecognizedException` is raised. If the feature or its setting is not + supported by the parser, *SAXNotSupportedException* is raised. + + +.. method:: XMLReader.getProperty(propertyname) + + Return the current setting for property *propertyname*. If the property is not + recognized, a :exc:`SAXNotRecognizedException` is raised. The well-known + propertynames are listed in the module :mod:`xml.sax.handler`. + + +.. method:: XMLReader.setProperty(propertyname, value) + + Set the *propertyname* to *value*. If the property is not recognized, + :exc:`SAXNotRecognizedException` is raised. If the property or its setting is + not supported by the parser, *SAXNotSupportedException* is raised. + + +.. _incremental-parser-objects: + +IncrementalParser Objects +------------------------- + +Instances of :class:`IncrementalParser` offer the following additional methods: + + +.. method:: IncrementalParser.feed(data) + + Process a chunk of *data*. + + +.. method:: IncrementalParser.close() + + Assume the end of the document. That will check well-formedness conditions that + can be checked only at the end, invoke handlers, and may clean up resources + allocated during parsing. + + +.. method:: IncrementalParser.reset() + + This method is called after close has been called to reset the parser so that it + is ready to parse new documents. The results of calling parse or feed after + close without calling reset are undefined. + + +.. _locator-objects: + +Locator Objects +--------------- + +Instances of :class:`Locator` provide these methods: + + +.. method:: Locator.getColumnNumber() + + Return the column number where the current event ends. + + +.. method:: Locator.getLineNumber() + + Return the line number where the current event ends. + + +.. method:: Locator.getPublicId() + + Return the public identifier for the current event. + + +.. method:: Locator.getSystemId() + + Return the system identifier for the current event. + + +.. _input-source-objects: + +InputSource Objects +------------------- + + +.. method:: InputSource.setPublicId(id) + + Sets the public identifier of this :class:`InputSource`. + + +.. method:: InputSource.getPublicId() + + Returns the public identifier of this :class:`InputSource`. + + +.. method:: InputSource.setSystemId(id) + + Sets the system identifier of this :class:`InputSource`. + + +.. method:: InputSource.getSystemId() + + Returns the system identifier of this :class:`InputSource`. + + +.. method:: InputSource.setEncoding(encoding) + + Sets the character encoding of this :class:`InputSource`. + + The encoding must be a string acceptable for an XML encoding declaration (see + section 4.3.3 of the XML recommendation). + + The encoding attribute of the :class:`InputSource` is ignored if the + :class:`InputSource` also contains a character stream. + + +.. method:: InputSource.getEncoding() + + Get the character encoding of this InputSource. + + +.. method:: InputSource.setByteStream(bytefile) + + Set the byte stream (a Python file-like object which does not perform + byte-to-character conversion) for this input source. + + The SAX parser will ignore this if there is also a character stream specified, + but it will use a byte stream in preference to opening a URI connection itself. + + If the application knows the character encoding of the byte stream, it should + set it with the setEncoding method. + + +.. method:: InputSource.getByteStream() + + Get the byte stream for this input source. + + The getEncoding method will return the character encoding for this byte stream, + or None if unknown. + + +.. method:: InputSource.setCharacterStream(charfile) + + Set the character stream for this input source. (The stream must be a Python 1.6 + Unicode-wrapped file-like that performs conversion to Unicode strings.) + + If there is a character stream specified, the SAX parser will ignore any byte + stream and will not attempt to open a URI connection to the system identifier. + + +.. method:: InputSource.getCharacterStream() + + Get the character stream for this input source. + + +.. _attributes-objects: + +The :class:`Attributes` Interface +--------------------------------- + +:class:`Attributes` objects implement a portion of the mapping protocol, +including the methods :meth:`copy`, :meth:`get`, :meth:`has_key`, :meth:`items`, +:meth:`keys`, and :meth:`values`. The following methods are also provided: + + +.. method:: Attributes.getLength() + + Return the number of attributes. + + +.. method:: Attributes.getNames() + + Return the names of the attributes. + + +.. method:: Attributes.getType(name) + + Returns the type of the attribute *name*, which is normally ``'CDATA'``. + + +.. method:: Attributes.getValue(name) + + Return the value of attribute *name*. + +.. % getValueByQName, getNameByQName, getQNameByName, getQNames available +.. % here already, but documented only for derived class. + + +.. _attributes-ns-objects: + +The :class:`AttributesNS` Interface +----------------------------------- + +This interface is a subtype of the :class:`Attributes` interface (see section +:ref:`attributes-objects`). All methods supported by that interface are also +available on :class:`AttributesNS` objects. + +The following methods are also available: + + +.. method:: AttributesNS.getValueByQName(name) + + Return the value for a qualified name. + + +.. method:: AttributesNS.getNameByQName(name) + + Return the ``(namespace, localname)`` pair for a qualified *name*. + + +.. method:: AttributesNS.getQNameByName(name) + + Return the qualified name for a ``(namespace, localname)`` pair. + + +.. method:: AttributesNS.getQNames() + + Return the qualified names of all attributes. + diff --git a/Doc/library/xml.sax.rst b/Doc/library/xml.sax.rst new file mode 100644 index 0000000..43d17c2 --- /dev/null +++ b/Doc/library/xml.sax.rst @@ -0,0 +1,143 @@ + +:mod:`xml.sax` --- Support for SAX2 parsers +=========================================== + +.. module:: xml.sax + :synopsis: Package containing SAX2 base classes and convenience functions. +.. moduleauthor:: Lars Marius Garshol <larsga@garshol.priv.no> +.. sectionauthor:: Fred L. Drake, Jr. <fdrake@acm.org> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +The :mod:`xml.sax` package provides a number of modules which implement the +Simple API for XML (SAX) interface for Python. The package itself provides the +SAX exceptions and the convenience functions which will be most used by users of +the SAX API. + +The convenience functions are: + + +.. function:: make_parser([parser_list]) + + Create and return a SAX :class:`XMLReader` object. The first parser found will + be used. If *parser_list* is provided, it must be a sequence of strings which + name modules that have a function named :func:`create_parser`. Modules listed + in *parser_list* will be used before modules in the default list of parsers. + + +.. function:: parse(filename_or_stream, handler[, error_handler]) + + Create a SAX parser and use it to parse a document. The document, passed in as + *filename_or_stream*, can be a filename or a file object. The *handler* + parameter needs to be a SAX :class:`ContentHandler` instance. If + *error_handler* is given, it must be a SAX :class:`ErrorHandler` instance; if + omitted, :exc:`SAXParseException` will be raised on all errors. There is no + return value; all work must be done by the *handler* passed in. + + +.. function:: parseString(string, handler[, error_handler]) + + Similar to :func:`parse`, but parses from a buffer *string* received as a + parameter. + +A typical SAX application uses three kinds of objects: readers, handlers and +input sources. "Reader" in this context is another term for parser, i.e. some +piece of code that reads the bytes or characters from the input source, and +produces a sequence of events. The events then get distributed to the handler +objects, i.e. the reader invokes a method on the handler. A SAX application +must therefore obtain a reader object, create or open the input sources, create +the handlers, and connect these objects all together. As the final step of +preparation, the reader is called to parse the input. During parsing, methods on +the handler objects are called based on structural and syntactic events from the +input data. + +For these objects, only the interfaces are relevant; they are normally not +instantiated by the application itself. Since Python does not have an explicit +notion of interface, they are formally introduced as classes, but applications +may use implementations which do not inherit from the provided classes. The +:class:`InputSource`, :class:`Locator`, :class:`Attributes`, +:class:`AttributesNS`, and :class:`XMLReader` interfaces are defined in the +module :mod:`xml.sax.xmlreader`. The handler interfaces are defined in +:mod:`xml.sax.handler`. For convenience, :class:`InputSource` (which is often +instantiated directly) and the handler classes are also available from +:mod:`xml.sax`. These interfaces are described below. + +In addition to these classes, :mod:`xml.sax` provides the following exception +classes. + + +.. exception:: SAXException(msg[, exception]) + + Encapsulate an XML error or warning. This class can contain basic error or + warning information from either the XML parser or the application: it can be + subclassed to provide additional functionality or to add localization. Note + that although the handlers defined in the :class:`ErrorHandler` interface + receive instances of this exception, it is not required to actually raise the + exception --- it is also useful as a container for information. + + When instantiated, *msg* should be a human-readable description of the error. + The optional *exception* parameter, if given, should be ``None`` or an exception + that was caught by the parsing code and is being passed along as information. + + This is the base class for the other SAX exception classes. + + +.. exception:: SAXParseException(msg, exception, locator) + + Subclass of :exc:`SAXException` raised on parse errors. Instances of this class + are passed to the methods of the SAX :class:`ErrorHandler` interface to provide + information about the parse error. This class supports the SAX :class:`Locator` + interface as well as the :class:`SAXException` interface. + + +.. exception:: SAXNotRecognizedException(msg[, exception]) + + Subclass of :exc:`SAXException` raised when a SAX :class:`XMLReader` is + confronted with an unrecognized feature or property. SAX applications and + extensions may use this class for similar purposes. + + +.. exception:: SAXNotSupportedException(msg[, exception]) + + Subclass of :exc:`SAXException` raised when a SAX :class:`XMLReader` is asked to + enable a feature that is not supported, or to set a property to a value that the + implementation does not support. SAX applications and extensions may use this + class for similar purposes. + + +.. seealso:: + + `SAX: The Simple API for XML <http://www.saxproject.org/>`_ + This site is the focal point for the definition of the SAX API. It provides a + Java implementation and online documentation. Links to implementations and + historical information are also available. + + Module :mod:`xml.sax.handler` + Definitions of the interfaces for application-provided objects. + + Module :mod:`xml.sax.saxutils` + Convenience functions for use in SAX applications. + + Module :mod:`xml.sax.xmlreader` + Definitions of the interfaces for parser-provided objects. + + +.. _sax-exception-objects: + +SAXException Objects +-------------------- + +The :class:`SAXException` exception class supports the following methods: + + +.. method:: SAXException.getMessage() + + Return a human-readable message describing the error condition. + + +.. method:: SAXException.getException() + + Return an encapsulated exception object, or ``None``. + diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst new file mode 100644 index 0000000..0585a9b --- /dev/null +++ b/Doc/library/xml.sax.utils.rst @@ -0,0 +1,83 @@ + +:mod:`xml.sax.saxutils` --- SAX Utilities +========================================= + +.. module:: xml.sax.saxutils + :synopsis: Convenience functions and classes for use with SAX. +.. moduleauthor:: Lars Marius Garshol <larsga@garshol.priv.no> +.. sectionauthor:: Martin v. Löwis <martin@v.loewis.de> + + +.. versionadded:: 2.0 + +The module :mod:`xml.sax.saxutils` contains a number of classes and functions +that are commonly useful when creating SAX applications, either in direct use, +or as base classes. + + +.. function:: escape(data[, entities]) + + Escape ``'&'``, ``'<'``, and ``'>'`` in a string of data. + + You can escape other strings of data by passing a dictionary as the optional + *entities* parameter. The keys and values must all be strings; each key will be + replaced with its corresponding value. + + +.. function:: unescape(data[, entities]) + + Unescape ``'&amp;'``, ``'&lt;'``, and ``'&gt;'`` in a string of data. + + You can unescape other strings of data by passing a dictionary as the optional + *entities* parameter. The keys and values must all be strings; each key will be + replaced with its corresponding value. + + .. versionadded:: 2.3 + + +.. function:: quoteattr(data[, entities]) + + Similar to :func:`escape`, but also prepares *data* to be used as an + attribute value. The return value is a quoted version of *data* with any + additional required replacements. :func:`quoteattr` will select a quote + character based on the content of *data*, attempting to avoid encoding any + quote characters in the string. If both single- and double-quote characters + are already in *data*, the double-quote characters will be encoded and *data* + will be wrapped in double-quotes. The resulting string can be used directly + as an attribute value:: + + >>> print "<element attr=%s>" % quoteattr("ab ' cd \" ef") + <element attr="ab ' cd &quot; ef"> + + This function is useful when generating attribute values for HTML or any SGML + using the reference concrete syntax. + + .. versionadded:: 2.2 + + +.. class:: XMLGenerator([out[, encoding]]) + + This class implements the :class:`ContentHandler` interface by writing SAX + events back into an XML document. In other words, using an :class:`XMLGenerator` + as the content handler will reproduce the original document being parsed. *out* + should be a file-like object which will default to *sys.stdout*. *encoding* is + the encoding of the output stream which defaults to ``'iso-8859-1'``. + + +.. class:: XMLFilterBase(base) + + This class is designed to sit between an :class:`XMLReader` and the client + application's event handlers. By default, it does nothing but pass requests up + to the reader and events on to the handlers unmodified, but subclasses can + override specific methods to modify the event stream or the configuration + requests as they pass through. + + +.. function:: prepare_input_source(source[, base]) + + This function takes an input source and an optional base URL and returns a fully + resolved :class:`InputSource` object ready for reading. The input source can be + given as a string, a file-like object, or an :class:`InputSource` object; + parsers will use this function to implement the polymorphic *source* argument to + their :meth:`parse` method. + diff --git a/Doc/library/xmlrpclib.rst b/Doc/library/xmlrpclib.rst new file mode 100644 index 0000000..cd507c4 --- /dev/null +++ b/Doc/library/xmlrpclib.rst @@ -0,0 +1,422 @@ + +:mod:`xmlrpclib` --- XML-RPC client access +========================================== + +.. module:: xmlrpclib + :synopsis: XML-RPC client access. +.. moduleauthor:: Fredrik Lundh <fredrik@pythonware.com> +.. sectionauthor:: Eric S. Raymond <esr@snark.thyrsus.com> + + +.. % Not everything is documented yet. It might be good to describe +.. % Marshaller, Unmarshaller, getparser, dumps, loads, and Transport. + +.. versionadded:: 2.2 + +XML-RPC is a Remote Procedure Call method that uses XML passed via HTTP as a +transport. With it, a client can call methods with parameters on a remote +server (the server is named by a URI) and get back structured data. This module +supports writing XML-RPC client code; it handles all the details of translating +between conformable Python objects and XML on the wire. + + +.. class:: ServerProxy(uri[, transport[, encoding[, verbose[, allow_none[, use_datetime]]]]]) + + A :class:`ServerProxy` instance is an object that manages communication with a + remote XML-RPC server. The required first argument is a URI (Uniform Resource + Indicator), and will normally be the URL of the server. The optional second + argument is a transport factory instance; by default it is an internal + :class:`SafeTransport` instance for https: URLs and an internal HTTP + :class:`Transport` instance otherwise. The optional third argument is an + encoding, by default UTF-8. The optional fourth argument is a debugging flag. + If *allow_none* is true, the Python constant ``None`` will be translated into + XML; the default behaviour is for ``None`` to raise a :exc:`TypeError`. This is + a commonly-used extension to the XML-RPC specification, but isn't supported by + all clients and servers; see http://ontosys.com/xml-rpc/extensions.php for a + description. The *use_datetime* flag can be used to cause date/time values to + be presented as :class:`datetime.datetime` objects; this is false by default. + :class:`datetime.datetime`, :class:`datetime.date` and :class:`datetime.time` + objects may be passed to calls. :class:`datetime.date` objects are converted + with a time of "00:00:00". :class:`datetime.time` objects are converted using + today's date. + + Both the HTTP and HTTPS transports support the URL syntax extension for HTTP + Basic Authentication: ``http://user:pass@host:port/path``. The ``user:pass`` + portion will be base64-encoded as an HTTP 'Authorization' header, and sent to + the remote server as part of the connection process when invoking an XML-RPC + method. You only need to use this if the remote server requires a Basic + Authentication user and password. + + The returned instance is a proxy object with methods that can be used to invoke + corresponding RPC calls on the remote server. If the remote server supports the + introspection API, the proxy can also be used to query the remote server for the + methods it supports (service discovery) and fetch other server-associated + metadata. + + :class:`ServerProxy` instance methods take Python basic types and objects as + arguments and return Python basic types and classes. Types that are conformable + (e.g. that can be marshalled through XML), include the following (and except + where noted, they are unmarshalled as the same Python type): + + +---------------------------------+---------------------------------------------+ + | Name | Meaning | + +=================================+=============================================+ + | :const:`boolean` | The :const:`True` and :const:`False` | + | | constants | + +---------------------------------+---------------------------------------------+ + | :const:`integers` | Pass in directly | + +---------------------------------+---------------------------------------------+ + | :const:`floating-point numbers` | Pass in directly | + +---------------------------------+---------------------------------------------+ + | :const:`strings` | Pass in directly | + +---------------------------------+---------------------------------------------+ + | :const:`arrays` | Any Python sequence type containing | + | | conformable elements. Arrays are returned | + | | as lists | + +---------------------------------+---------------------------------------------+ + | :const:`structures` | A Python dictionary. Keys must be strings, | + | | values may be any conformable type. Objects | + | | of user-defined classes can be passed in; | + | | only their *__dict__* attribute is | + | | transmitted. | + +---------------------------------+---------------------------------------------+ + | :const:`dates` | in seconds since the epoch (pass in an | + | | instance of the :class:`DateTime` class) or | + | | a :class:`datetime.datetime`, | + | | :class:`datetime.date` or | + | | :class:`datetime.time` instance | + +---------------------------------+---------------------------------------------+ + | :const:`binary data` | pass in an instance of the :class:`Binary` | + | | wrapper class | + +---------------------------------+---------------------------------------------+ + + This is the full set of data types supported by XML-RPC. Method calls may also + raise a special :exc:`Fault` instance, used to signal XML-RPC server errors, or + :exc:`ProtocolError` used to signal an error in the HTTP/HTTPS transport layer. + Both :exc:`Fault` and :exc:`ProtocolError` derive from a base class called + :exc:`Error`. Note that even though starting with Python 2.2 you can subclass + builtin types, the xmlrpclib module currently does not marshal instances of such + subclasses. + + When passing strings, characters special to XML such as ``<``, ``>``, and ``&`` + will be automatically escaped. However, it's the caller's responsibility to + ensure that the string is free of characters that aren't allowed in XML, such as + the control characters with ASCII values between 0 and 31 (except, of course, + tab, newline and carriage return); failing to do this will result in an XML-RPC + request that isn't well-formed XML. If you have to pass arbitrary strings via + XML-RPC, use the :class:`Binary` wrapper class described below. + + :class:`Server` is retained as an alias for :class:`ServerProxy` for backwards + compatibility. New code should use :class:`ServerProxy`. + + .. versionchanged:: 2.5 + The *use_datetime* flag was added. + + .. versionchanged:: 2.6 + Instances of new-style classes can be passed in if they have an *__dict__* + attribute and don't have a base class that is marshalled in a special way. + + +.. seealso:: + + `XML-RPC HOWTO <http://www.tldp.org/HOWTO/XML-RPC-HOWTO/index.html>`_ + A good description of XML operation and client software in several languages. + Contains pretty much everything an XML-RPC client developer needs to know. + + `XML-RPC Hacks page <http://xmlrpc-c.sourceforge.net/hacks.php>`_ + Extensions for various open-source libraries to support introspection and + multicall. + + +.. _serverproxy-objects: + +ServerProxy Objects +------------------- + +A :class:`ServerProxy` instance has a method corresponding to each remote +procedure call accepted by the XML-RPC server. Calling the method performs an +RPC, dispatched by both name and argument signature (e.g. the same method name +can be overloaded with multiple argument signatures). The RPC finishes by +returning a value, which may be either returned data in a conformant type or a +:class:`Fault` or :class:`ProtocolError` object indicating an error. + +Servers that support the XML introspection API support some common methods +grouped under the reserved :attr:`system` member: + + +.. method:: ServerProxy.system.listMethods() + + This method returns a list of strings, one for each (non-system) method + supported by the XML-RPC server. + + +.. method:: ServerProxy.system.methodSignature(name) + + This method takes one parameter, the name of a method implemented by the XML-RPC + server.It returns an array of possible signatures for this method. A signature + is an array of types. The first of these types is the return type of the method, + the rest are parameters. + + Because multiple signatures (ie. overloading) is permitted, this method returns + a list of signatures rather than a singleton. + + Signatures themselves are restricted to the top level parameters expected by a + method. For instance if a method expects one array of structs as a parameter, + and it returns a string, its signature is simply "string, array". If it expects + three integers and returns a string, its signature is "string, int, int, int". + + If no signature is defined for the method, a non-array value is returned. In + Python this means that the type of the returned value will be something other + that list. + + +.. method:: ServerProxy.system.methodHelp(name) + + This method takes one parameter, the name of a method implemented by the XML-RPC + server. It returns a documentation string describing the use of that method. If + no such string is available, an empty string is returned. The documentation + string may contain HTML markup. + +Introspection methods are currently supported by servers written in PHP, C and +Microsoft .NET. Partial introspection support is included in recent updates to +UserLand Frontier. Introspection support for Perl, Python and Java is available +at the `XML-RPC Hacks <http://xmlrpc-c.sourceforge.net/hacks.php>`_ page. + + +.. _boolean-objects: + +Boolean Objects +--------------- + +This class may be initialized from any Python value; the instance returned +depends only on its truth value. It supports various Python operators through +:meth:`__cmp__`, :meth:`__repr__`, :meth:`__int__`, and :meth:`__bool__` +methods, all implemented in the obvious ways. + +It also has the following method, supported mainly for internal use by the +unmarshalling code: + + +.. method:: Boolean.encode(out) + + Write the XML-RPC encoding of this Boolean item to the out stream object. + + +.. _datetime-objects: + +DateTime Objects +---------------- + +This class may be initialized with seconds since the epoch, a time tuple, an ISO +8601 time/date string, or a :class:`datetime.datetime`, :class:`datetime.date` +or :class:`datetime.time` instance. It has the following methods, supported +mainly for internal use by the marshalling/unmarshalling code: + + +.. method:: DateTime.decode(string) + + Accept a string as the instance's new time value. + + +.. method:: DateTime.encode(out) + + Write the XML-RPC encoding of this :class:`DateTime` item to the *out* stream + object. + +It also supports certain of Python's built-in operators through :meth:`__cmp__` +and :meth:`__repr__` methods. + + +.. _binary-objects: + +Binary Objects +-------------- + +This class may be initialized from string data (which may include NULs). The +primary access to the content of a :class:`Binary` object is provided by an +attribute: + + +.. attribute:: Binary.data + + The binary data encapsulated by the :class:`Binary` instance. The data is + provided as an 8-bit string. + +:class:`Binary` objects have the following methods, supported mainly for +internal use by the marshalling/unmarshalling code: + + +.. method:: Binary.decode(string) + + Accept a base64 string and decode it as the instance's new data. + + +.. method:: Binary.encode(out) + + Write the XML-RPC base 64 encoding of this binary item to the out stream object. + +It also supports certain of Python's built-in operators through a +:meth:`__cmp__` method. + + +.. _fault-objects: + +Fault Objects +------------- + +A :class:`Fault` object encapsulates the content of an XML-RPC fault tag. Fault +objects have the following members: + + +.. attribute:: Fault.faultCode + + A string indicating the fault type. + + +.. attribute:: Fault.faultString + + A string containing a diagnostic message associated with the fault. + + +.. _protocol-error-objects: + +ProtocolError Objects +--------------------- + +A :class:`ProtocolError` object describes a protocol error in the underlying +transport layer (such as a 404 'not found' error if the server named by the URI +does not exist). It has the following members: + + +.. attribute:: ProtocolError.url + + The URI or URL that triggered the error. + + +.. attribute:: ProtocolError.errcode + + The error code. + + +.. attribute:: ProtocolError.errmsg + + The error message or diagnostic string. + + +.. attribute:: ProtocolError.headers + + A string containing the headers of the HTTP/HTTPS request that triggered the + error. + + +MultiCall Objects +----------------- + +.. versionadded:: 2.4 + +In http://www.xmlrpc.com/discuss/msgReader%241208, an approach is presented to +encapsulate multiple calls to a remote server into a single request. + + +.. class:: MultiCall(server) + + Create an object used to boxcar method calls. *server* is the eventual target of + the call. Calls can be made to the result object, but they will immediately + return ``None``, and only store the call name and parameters in the + :class:`MultiCall` object. Calling the object itself causes all stored calls to + be transmitted as a single ``system.multicall`` request. The result of this call + is a generator; iterating over this generator yields the individual results. + +A usage example of this class is :: + + multicall = MultiCall(server_proxy) + multicall.add(2,3) + multicall.get_address("Guido") + add_result, address = multicall() + + +Convenience Functions +--------------------- + + +.. function:: boolean(value) + + Convert any Python value to one of the XML-RPC Boolean constants, ``True`` or + ``False``. + + +.. function:: dumps(params[, methodname[, methodresponse[, encoding[, allow_none]]]]) + + Convert *params* into an XML-RPC request. or into a response if *methodresponse* + is true. *params* can be either a tuple of arguments or an instance of the + :exc:`Fault` exception class. If *methodresponse* is true, only a single value + can be returned, meaning that *params* must be of length 1. *encoding*, if + supplied, is the encoding to use in the generated XML; the default is UTF-8. + Python's :const:`None` value cannot be used in standard XML-RPC; to allow using + it via an extension, provide a true value for *allow_none*. + + +.. function:: loads(data[, use_datetime]) + + Convert an XML-RPC request or response into Python objects, a ``(params, + methodname)``. *params* is a tuple of argument; *methodname* is a string, or + ``None`` if no method name is present in the packet. If the XML-RPC packet + represents a fault condition, this function will raise a :exc:`Fault` exception. + The *use_datetime* flag can be used to cause date/time values to be presented as + :class:`datetime.datetime` objects; this is false by default. Note that even if + you call an XML-RPC method with :class:`datetime.date` or :class:`datetime.time` + objects, they are converted to :class:`DateTime` objects internally, so only + :class:`datetime.datetime` objects will be returned. + + .. versionchanged:: 2.5 + The *use_datetime* flag was added. + + +.. _xmlrpc-client-example: + +Example of Client Usage +----------------------- + +:: + + # simple test program (from the XML-RPC specification) + from xmlrpclib import ServerProxy, Error + + # server = ServerProxy("http://localhost:8000") # local server + server = ServerProxy("http://betty.userland.com") + + print server + + try: + print server.examples.getStateName(41) + except Error as v: + print "ERROR", v + +To access an XML-RPC server through a proxy, you need to define a custom +transport. The following example, written by NoboNobo, shows how: + +.. % fill in original author's name if we ever learn it + +.. % Example taken from http://lowlife.jp/nobonobo/wiki/xmlrpcwithproxy.html + +:: + + import xmlrpclib, httplib + + class ProxiedTransport(xmlrpclib.Transport): + def set_proxy(self, proxy): + self.proxy = proxy + def make_connection(self, host): + self.realhost = host + h = httplib.HTTP(self.proxy) + return h + def send_request(self, connection, handler, request_body): + connection.putrequest("POST", 'http://%s%s' % (self.realhost, handler)) + def send_host(self, connection, host): + connection.putheader('Host', self.realhost) + + p = ProxiedTransport() + p.set_proxy('proxy-server:8080') + server = xmlrpclib.Server('http://time.xmlrpc.com/RPC2', transport=p) + print server.currentTime.getCurrentTime() + diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst new file mode 100644 index 0000000..5e51bfc --- /dev/null +++ b/Doc/library/zipfile.rst @@ -0,0 +1,408 @@ + +:mod:`zipfile` --- Work with ZIP archives +========================================= + +.. module:: zipfile + :synopsis: Read and write ZIP-format archive files. +.. moduleauthor:: James C. Ahlstrom <jim@interet.com> +.. sectionauthor:: James C. Ahlstrom <jim@interet.com> + + +.. % LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org> + +.. versionadded:: 1.6 + +The ZIP file format is a common archive and compression standard. This module +provides tools to create, read, write, append, and list a ZIP file. Any +advanced use of this module will require an understanding of the format, as +defined in `PKZIP Application Note +<http://www.pkware.com/business_and_developers/developer/appnote/>`_. + +This module does not currently handle ZIP files which have appended comments, or +multi-disk ZIP files. It can handle ZIP files that use the ZIP64 extensions +(that is ZIP files that are more than 4 GByte in size). It supports decryption +of encrypted files in ZIP archives, but it cannot currently create an encrypted +file. + +The available attributes of this module are: + + +.. exception:: BadZipfile + + The error raised for bad ZIP files (old name: ``zipfile.error``). + + +.. exception:: LargeZipFile + + The error raised when a ZIP file would require ZIP64 functionality but that has + not been enabled. + + +.. class:: ZipFile + + The class for reading and writing ZIP files. See section + :ref:`zipfile-objects` for constructor details. + + +.. class:: PyZipFile + + Class for creating ZIP archives containing Python libraries. + + +.. class:: ZipInfo([filename[, date_time]]) + + Class used to represent information about a member of an archive. Instances + of this class are returned by the :meth:`getinfo` and :meth:`infolist` + methods of :class:`ZipFile` objects. Most users of the :mod:`zipfile` module + will not need to create these, but only use those created by this + module. *filename* should be the full name of the archive member, and + *date_time* should be a tuple containing six fields which describe the time + of the last modification to the file; the fields are described in section + :ref:`zipinfo-objects`. + + +.. function:: is_zipfile(filename) + + Returns ``True`` if *filename* is a valid ZIP file based on its magic number, + otherwise returns ``False``. This module does not currently handle ZIP files + which have appended comments. + + +.. data:: ZIP_STORED + + The numeric constant for an uncompressed archive member. + + +.. data:: ZIP_DEFLATED + + The numeric constant for the usual ZIP compression method. This requires the + zlib module. No other compression methods are currently supported. + + +.. seealso:: + + `PKZIP Application Note <http://www.pkware.com/business_and_developers/developer/appnote/>`_ + Documentation on the ZIP file format by Phil Katz, the creator of the format and + algorithms used. + + `Info-ZIP Home Page <http://www.info-zip.org/>`_ + Information about the Info-ZIP project's ZIP archive programs and development + libraries. + + +.. _zipfile-objects: + +ZipFile Objects +--------------- + + +.. class:: ZipFile(file[, mode[, compression[, allowZip64]]]) + + Open a ZIP file, where *file* can be either a path to a file (a string) or a + file-like object. The *mode* parameter should be ``'r'`` to read an existing + file, ``'w'`` to truncate and write a new file, or ``'a'`` to append to an + existing file. If *mode* is ``'a'`` and *file* refers to an existing ZIP file, + then additional files are added to it. If *file* does not refer to a ZIP file, + then a new ZIP archive is appended to the file. This is meant for adding a ZIP + archive to another file, such as :file:`python.exe`. Using :: + + cat myzip.zip >> python.exe + + also works, and at least :program:`WinZip` can read such files. If *mode* is + ``a`` and the file does not exist at all, it is created. *compression* is the + ZIP compression method to use when writing the archive, and should be + :const:`ZIP_STORED` or :const:`ZIP_DEFLATED`; unrecognized values will cause + :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` is specified but the + :mod:`zlib` module is not available, :exc:`RuntimeError` is also raised. The + default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` zipfile will create + ZIP files that use the ZIP64 extensions when the zipfile is larger than 2 GB. If + it is false (the default) :mod:`zipfile` will raise an exception when the ZIP + file would require ZIP64 extensions. ZIP64 extensions are disabled by default + because the default :program:`zip` and :program:`unzip` commands on Unix (the + InfoZIP utilities) don't support these extensions. + + .. versionchanged:: 2.6 + If the file does not exist, it is created if the mode is 'a'. + + +.. method:: ZipFile.close() + + Close the archive file. You must call :meth:`close` before exiting your program + or essential records will not be written. + + +.. method:: ZipFile.getinfo(name) + + Return a :class:`ZipInfo` object with information about the archive member + *name*. Calling :meth:`getinfo` for a name not currently contained in the + archive will raise a :exc:`KeyError`. + + +.. method:: ZipFile.infolist() + + Return a list containing a :class:`ZipInfo` object for each member of the + archive. The objects are in the same order as their entries in the actual ZIP + file on disk if an existing archive was opened. + + +.. method:: ZipFile.namelist() + + Return a list of archive members by name. + + +.. method:: ZipFile.open(name[, mode[, pwd]]) + + Extract a member from the archive as a file-like object (ZipExtFile). *name* is + the name of the file in the archive. The *mode* parameter, if included, must be + one of the following: ``'r'`` (the default), ``'U'``, or ``'rU'``. Choosing + ``'U'`` or ``'rU'`` will enable universal newline support in the read-only + object. *pwd* is the password used for encrypted files. Calling :meth:`open` + on a closed ZipFile will raise a :exc:`RuntimeError`. + + .. note:: + + The file-like object is read-only and provides the following methods: + :meth:`read`, :meth:`readline`, :meth:`readlines`, :meth:`__iter__`, + :meth:`next`. + + .. note:: + + If the ZipFile was created by passing in a file-like object as the first + argument to the constructor, then the object returned by :meth:`open` shares the + ZipFile's file pointer. Under these circumstances, the object returned by + :meth:`open` should not be used after any additional operations are performed + on the ZipFile object. If the ZipFile was created by passing in a string (the + filename) as the first argument to the constructor, then :meth:`open` will + create a new file object that will be held by the ZipExtFile, allowing it to + operate independently of the ZipFile. + + .. versionadded:: 2.6 + + +.. method:: ZipFile.printdir() + + Print a table of contents for the archive to ``sys.stdout``. + + +.. method:: ZipFile.setpassword(pwd) + + Set *pwd* as default password to extract encrypted files. + + .. versionadded:: 2.6 + + +.. method:: ZipFile.read(name[, pwd]) + + Return the bytes of the file in the archive. The archive must be open for read + or append. *pwd* is the password used for encrypted files and, if specified, it + will override the default password set with :meth:`setpassword`. Calling + :meth:`read` on a closed ZipFile will raise a :exc:`RuntimeError`. + + .. versionchanged:: 2.6 + *pwd* was added. + + +.. method:: ZipFile.testzip() + + Read all the files in the archive and check their CRC's and file headers. + Return the name of the first bad file, or else return ``None``. Calling + :meth:`testzip` on a closed ZipFile will raise a :exc:`RuntimeError`. + + +.. method:: ZipFile.write(filename[, arcname[, compress_type]]) + + Write the file named *filename* to the archive, giving it the archive name + *arcname* (by default, this will be the same as *filename*, but without a drive + letter and with leading path separators removed). If given, *compress_type* + overrides the value given for the *compression* parameter to the constructor for + the new entry. The archive must be open with mode ``'w'`` or ``'a'`` -- calling + :meth:`write` on a ZipFile created with mode ``'r'`` will raise a + :exc:`RuntimeError`. Calling :meth:`write` on a closed ZipFile will raise a + :exc:`RuntimeError`. + + .. note:: + + There is no official file name encoding for ZIP files. If you have unicode file + names, please convert them to byte strings in your desired encoding before + passing them to :meth:`write`. WinZip interprets all file names as encoded in + CP437, also known as DOS Latin. + + .. note:: + + Archive names should be relative to the archive root, that is, they should not + start with a path separator. + + .. note:: + + If ``arcname`` (or ``filename``, if ``arcname`` is not given) contains a null + byte, the name of the file in the archive will be truncated at the null byte. + + +.. method:: ZipFile.writestr(zinfo_or_arcname, bytes) + + Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file + name it will be given in the archive, or a :class:`ZipInfo` instance. If it's + an instance, at least the filename, date, and time must be given. If it's a + name, the date and time is set to the current date and time. The archive must be + opened with mode ``'w'`` or ``'a'`` -- calling :meth:`writestr` on a ZipFile + created with mode ``'r'`` will raise a :exc:`RuntimeError`. Calling + :meth:`writestr` on a closed ZipFile will raise a :exc:`RuntimeError`. + +The following data attribute is also available: + + +.. attribute:: ZipFile.debug + + The level of debug output to use. This may be set from ``0`` (the default, no + output) to ``3`` (the most output). Debugging information is written to + ``sys.stdout``. + + +.. _pyzipfile-objects: + +PyZipFile Objects +----------------- + +The :class:`PyZipFile` constructor takes the same parameters as the +:class:`ZipFile` constructor. Instances have one method in addition to those of +:class:`ZipFile` objects. + + +.. method:: PyZipFile.writepy(pathname[, basename]) + + Search for files :file:`\*.py` and add the corresponding file to the archive. + The corresponding file is a :file:`\*.pyo` file if available, else a + :file:`\*.pyc` file, compiling if necessary. If the pathname is a file, the + filename must end with :file:`.py`, and just the (corresponding + :file:`\*.py[co]`) file is added at the top level (no path information). If the + pathname is a file that does not end with :file:`.py`, a :exc:`RuntimeError` + will be raised. If it is a directory, and the directory is not a package + directory, then all the files :file:`\*.py[co]` are added at the top level. If + the directory is a package directory, then all :file:`\*.py[co]` are added under + the package name as a file path, and if any subdirectories are package + directories, all of these are added recursively. *basename* is intended for + internal use only. The :meth:`writepy` method makes archives with file names + like this:: + + string.pyc # Top level name + test/__init__.pyc # Package directory + test/testall.pyc # Module test.testall + test/bogus/__init__.pyc # Subpackage directory + test/bogus/myfile.pyc # Submodule test.bogus.myfile + + +.. _zipinfo-objects: + +ZipInfo Objects +--------------- + +Instances of the :class:`ZipInfo` class are returned by the :meth:`getinfo` and +:meth:`infolist` methods of :class:`ZipFile` objects. Each object stores +information about a single member of the ZIP archive. + +Instances have the following attributes: + + +.. attribute:: ZipInfo.filename + + Name of the file in the archive. + + +.. attribute:: ZipInfo.date_time + + The time and date of the last modification to the archive member. This is a + tuple of six values: + + +-------+--------------------------+ + | Index | Value | + +=======+==========================+ + | ``0`` | Year | + +-------+--------------------------+ + | ``1`` | Month (one-based) | + +-------+--------------------------+ + | ``2`` | Day of month (one-based) | + +-------+--------------------------+ + | ``3`` | Hours (zero-based) | + +-------+--------------------------+ + | ``4`` | Minutes (zero-based) | + +-------+--------------------------+ + | ``5`` | Seconds (zero-based) | + +-------+--------------------------+ + + +.. attribute:: ZipInfo.compress_type + + Type of compression for the archive member. + + +.. attribute:: ZipInfo.comment + + Comment for the individual archive member. + + +.. attribute:: ZipInfo.extra + + Expansion field data. The `PKZIP Application Note + <http://www.pkware.com/business_and_developers/developer/appnote/>`_ contains + some comments on the internal structure of the data contained in this string. + + +.. attribute:: ZipInfo.create_system + + System which created ZIP archive. + + +.. attribute:: ZipInfo.create_version + + PKZIP version which created ZIP archive. + + +.. attribute:: ZipInfo.extract_version + + PKZIP version needed to extract archive. + + +.. attribute:: ZipInfo.reserved + + Must be zero. + + +.. attribute:: ZipInfo.flag_bits + + ZIP flag bits. + + +.. attribute:: ZipInfo.volume + + Volume number of file header. + + +.. attribute:: ZipInfo.internal_attr + + Internal attributes. + + +.. attribute:: ZipInfo.external_attr + + External file attributes. + + +.. attribute:: ZipInfo.header_offset + + Byte offset to the file header. + + +.. attribute:: ZipInfo.CRC + + CRC-32 of the uncompressed file. + + +.. attribute:: ZipInfo.compress_size + + Size of the compressed data. + + +.. attribute:: ZipInfo.file_size + + Size of the uncompressed file. + diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst new file mode 100644 index 0000000..f2b2358 --- /dev/null +++ b/Doc/library/zipimport.rst @@ -0,0 +1,137 @@ + +:mod:`zipimport` --- Import modules from Zip archives +===================================================== + +.. module:: zipimport + :synopsis: support for importing Python modules from ZIP archives. +.. moduleauthor:: Just van Rossum <just@letterror.com> + + +.. versionadded:: 2.3 + +This module adds the ability to import Python modules (:file:`\*.py`, +:file:`\*.py[co]`) and packages from ZIP-format archives. It is usually not +needed to use the :mod:`zipimport` module explicitly; it is automatically used +by the builtin :keyword:`import` mechanism for ``sys.path`` items that are paths +to ZIP archives. + +Typically, ``sys.path`` is a list of directory names as strings. This module +also allows an item of ``sys.path`` to be a string naming a ZIP file archive. +The ZIP archive can contain a subdirectory structure to support package imports, +and a path within the archive can be specified to only import from a +subdirectory. For example, the path :file:`/tmp/example.zip/lib/` would only +import from the :file:`lib/` subdirectory within the archive. + +Any files may be present in the ZIP archive, but only files :file:`.py` and +:file:`.py[co]` are available for import. ZIP import of dynamic modules +(:file:`.pyd`, :file:`.so`) is disallowed. Note that if an archive only contains +:file:`.py` files, Python will not attempt to modify the archive by adding the +corresponding :file:`.pyc` or :file:`.pyo` file, meaning that if a ZIP archive +doesn't contain :file:`.pyc` files, importing may be rather slow. + +The available attributes of this module are: + + +.. exception:: ZipImportError + + Exception raised by zipimporter objects. It's a subclass of :exc:`ImportError`, + so it can be caught as :exc:`ImportError`, too. + + +.. class:: zipimporter + + The class for importing ZIP files. See section :ref:`zipimporter-objects` + for constructor details. + + +.. seealso:: + + `PKZIP Application Note <http://www.pkware.com/business_and_developers/developer/appnote/>`_ + Documentation on the ZIP file format by Phil Katz, the creator of the format and + algorithms used. + + :pep:`0273` - Import Modules from Zip Archives + Written by James C. Ahlstrom, who also provided an implementation. Python 2.3 + follows the specification in PEP 273, but uses an implementation written by Just + van Rossum that uses the import hooks described in PEP 302. + + :pep:`0302` - New Import Hooks + The PEP to add the import hooks that help this module work. + + +.. _zipimporter-objects: + +zipimporter Objects +------------------- + + +.. class:: zipimporter(archivepath) + + Create a new zipimporter instance. *archivepath* must be a path to a zipfile. + :exc:`ZipImportError` is raised if *archivepath* doesn't point to a valid ZIP + archive. + + +.. method:: zipimporter.find_module(fullname[, path]) + + Search for a module specified by *fullname*. *fullname* must be the fully + qualified (dotted) module name. It returns the zipimporter instance itself if + the module was found, or :const:`None` if it wasn't. The optional *path* + argument is ignored---it's there for compatibility with the importer protocol. + + +.. method:: zipimporter.get_code(fullname) + + Return the code object for the specified module. Raise :exc:`ZipImportError` if + the module couldn't be found. + + +.. method:: zipimporter.get_data(pathname) + + Return the data associated with *pathname*. Raise :exc:`IOError` if the file + wasn't found. + + +.. method:: zipimporter.get_source(fullname) + + Return the source code for the specified module. Raise :exc:`ZipImportError` if + the module couldn't be found, return :const:`None` if the archive does contain + the module, but has no source for it. + + +.. method:: zipimporter.is_package(fullname) + + Return True if the module specified by *fullname* is a package. Raise + :exc:`ZipImportError` if the module couldn't be found. + + +.. method:: zipimporter.load_module(fullname) + + Load the module specified by *fullname*. *fullname* must be the fully qualified + (dotted) module name. It returns the imported module, or raises + :exc:`ZipImportError` if it wasn't found. + + +Examples +-------- + +.. _zipimport-examples: + +Here is an example that imports a module from a ZIP archive - note that the +:mod:`zipimport` module is not explicitly used. :: + + $ unzip -l /tmp/example.zip + Archive: /tmp/example.zip + Length Date Time Name + -------- ---- ---- ---- + 8467 11-26-02 22:30 jwzthreading.py + -------- ------- + 8467 1 file + $ ./python + Python 2.3 (#1, Aug 1 2003, 19:54:32) + >>> import sys + >>> sys.path.insert(0, '/tmp/example.zip') # Add .zip file to front of path + >>> import jwzthreading + >>> jwzthreading.__file__ + '/tmp/example.zip/jwzthreading.py' + diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst new file mode 100644 index 0000000..e57a156 --- /dev/null +++ b/Doc/library/zlib.rst @@ -0,0 +1,209 @@ + +:mod:`zlib` --- Compression compatible with :program:`gzip` +=========================================================== + +.. module:: zlib + :synopsis: Low-level interface to compression and decompression routines compatible with + gzip. + + +For applications that require data compression, the functions in this module +allow compression and decompression, using the zlib library. The zlib library +has its own home page at http://www.zlib.net. There are known +incompatibilities between the Python module and versions of the zlib library +earlier than 1.1.3; 1.1.3 has a security vulnerability, so we recommend using +1.1.4 or later. + +zlib's functions have many options and often need to be used in a particular +order. This documentation doesn't attempt to cover all of the permutations; +consult the zlib manual at http://www.zlib.net/manual.html for authoritative +information. + +The available exception and functions in this module are: + + +.. exception:: error + + Exception raised on compression and decompression errors. + + +.. function:: adler32(string[, value]) + + Computes a Adler-32 checksum of *string*. (An Adler-32 checksum is almost as + reliable as a CRC32 but can be computed much more quickly.) If *value* is + present, it is used as the starting value of the checksum; otherwise, a fixed + default value is used. This allows computing a running checksum over the + concatenation of several input strings. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. Since + the algorithm is designed for use as a checksum algorithm, it is not suitable + for use as a general hash algorithm. + + +.. function:: compress(string[, level]) + + Compresses the data in *string*, returning a string contained compressed data. + *level* is an integer from ``1`` to ``9`` controlling the level of compression; + ``1`` is fastest and produces the least compression, ``9`` is slowest and + produces the most. The default value is ``6``. Raises the :exc:`error` + exception if any error occurs. + + +.. function:: compressobj([level]) + + Returns a compression object, to be used for compressing data streams that won't + fit into memory at once. *level* is an integer from ``1`` to ``9`` controlling + the level of compression; ``1`` is fastest and produces the least compression, + ``9`` is slowest and produces the most. The default value is ``6``. + + +.. function:: crc32(string[, value]) + + .. index:: + single: Cyclic Redundancy Check + single: checksum; Cyclic Redundancy Check + + Computes a CRC (Cyclic Redundancy Check) checksum of *string*. If *value* is + present, it is used as the starting value of the checksum; otherwise, a fixed + default value is used. This allows computing a running checksum over the + concatenation of several input strings. The algorithm is not cryptographically + strong, and should not be used for authentication or digital signatures. Since + the algorithm is designed for use as a checksum algorithm, it is not suitable + for use as a general hash algorithm. + + .. % + + +.. function:: decompress(string[, wbits[, bufsize]]) + + Decompresses the data in *string*, returning a string containing the + uncompressed data. The *wbits* parameter controls the size of the window + buffer. If *bufsize* is given, it is used as the initial size of the output + buffer. Raises the :exc:`error` exception if any error occurs. + + The absolute value of *wbits* is the base two logarithm of the size of the + history buffer (the "window size") used when compressing data. Its absolute + value should be between 8 and 15 for the most recent versions of the zlib + library, larger values resulting in better compression at the expense of greater + memory usage. The default value is 15. When *wbits* is negative, the standard + :program:`gzip` header is suppressed; this is an undocumented feature of the + zlib library, used for compatibility with :program:`unzip`'s compression file + format. + + *bufsize* is the initial size of the buffer used to hold decompressed data. If + more space is required, the buffer size will be increased as needed, so you + don't have to get this value exactly right; tuning it will only save a few calls + to :cfunc:`malloc`. The default size is 16384. + + +.. function:: decompressobj([wbits]) + + Returns a decompression object, to be used for decompressing data streams that + won't fit into memory at once. The *wbits* parameter controls the size of the + window buffer. + +Compression objects support the following methods: + + +.. method:: Compress.compress(string) + + Compress *string*, returning a string containing compressed data for at least + part of the data in *string*. This data should be concatenated to the output + produced by any preceding calls to the :meth:`compress` method. Some input may + be kept in internal buffers for later processing. + + +.. method:: Compress.flush([mode]) + + All pending input is processed, and a string containing the remaining compressed + output is returned. *mode* can be selected from the constants + :const:`Z_SYNC_FLUSH`, :const:`Z_FULL_FLUSH`, or :const:`Z_FINISH`, + defaulting to :const:`Z_FINISH`. :const:`Z_SYNC_FLUSH` and + :const:`Z_FULL_FLUSH` allow compressing further strings of data, while + :const:`Z_FINISH` finishes the compressed stream and prevents compressing any + more data. After calling :meth:`flush` with *mode* set to :const:`Z_FINISH`, + the :meth:`compress` method cannot be called again; the only realistic action is + to delete the object. + + +.. method:: Compress.copy() + + Returns a copy of the compression object. This can be used to efficiently + compress a set of data that share a common initial prefix. + + .. versionadded:: 2.5 + +Decompression objects support the following methods, and two attributes: + + +.. attribute:: Decompress.unused_data + + A string which contains any bytes past the end of the compressed data. That is, + this remains ``""`` until the last byte that contains compression data is + available. If the whole string turned out to contain compressed data, this is + ``""``, the empty string. + + The only way to determine where a string of compressed data ends is by actually + decompressing it. This means that when compressed data is contained part of a + larger file, you can only find the end of it by reading data and feeding it + followed by some non-empty string into a decompression object's + :meth:`decompress` method until the :attr:`unused_data` attribute is no longer + the empty string. + + +.. attribute:: Decompress.unconsumed_tail + + A string that contains any data that was not consumed by the last + :meth:`decompress` call because it exceeded the limit for the uncompressed data + buffer. This data has not yet been seen by the zlib machinery, so you must feed + it (possibly with further data concatenated to it) back to a subsequent + :meth:`decompress` method call in order to get correct output. + + +.. method:: Decompress.decompress(string[, max_length]) + + Decompress *string*, returning a string containing the uncompressed data + corresponding to at least part of the data in *string*. This data should be + concatenated to the output produced by any preceding calls to the + :meth:`decompress` method. Some of the input data may be preserved in internal + buffers for later processing. + + If the optional parameter *max_length* is supplied then the return value will be + no longer than *max_length*. This may mean that not all of the compressed input + can be processed; and unconsumed data will be stored in the attribute + :attr:`unconsumed_tail`. This string must be passed to a subsequent call to + :meth:`decompress` if decompression is to continue. If *max_length* is not + supplied then the whole input is decompressed, and :attr:`unconsumed_tail` is an + empty string. + + +.. method:: Decompress.flush([length]) + + All pending input is processed, and a string containing the remaining + uncompressed output is returned. After calling :meth:`flush`, the + :meth:`decompress` method cannot be called again; the only realistic action is + to delete the object. + + The optional parameter *length* sets the initial size of the output buffer. + + +.. method:: Decompress.copy() + + Returns a copy of the decompression object. This can be used to save the state + of the decompressor midway through the data stream in order to speed up random + seeks into the stream at a future point. + + .. versionadded:: 2.5 + + +.. seealso:: + + Module :mod:`gzip` + Reading and writing :program:`gzip`\ -format files. + + http://www.zlib.net + The zlib library home page. + + http://www.zlib.net/manual.html + The zlib manual explains the semantics and usage of the library's many + functions. + diff --git a/Doc/license.rst b/Doc/license.rst new file mode 100644 index 0000000..a9165f4 --- /dev/null +++ b/Doc/license.rst @@ -0,0 +1,647 @@ +.. highlightlang:: none + +.. _history-and-license: + +******************* +History and License +******************* + + +History of the software +======================= + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl/) in the Netherlands as a +successor of a language called ABC. Guido remains Python's principal author, +although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for National +Research Initiatives (CNRI, see http://www.cnri.reston.va.us/) in Reston, +Virginia where he released several versions of the software. + +In May 2000, Guido and the Python core development team moved to BeOpen.com to +form the BeOpen PythonLabs team. In October of the same year, the PythonLabs +team moved to Digital Creations (now Zope Corporation; see +http://www.zope.com/). In 2001, the Python Software Foundation (PSF, see +http://www.python.org/psf/) was formed, a non-profit organization created +specifically to own Python-related Intellectual Property. Zope Corporation is a +sponsoring member of the PSF. + +All Python releases are Open Source (see http://www.opensource.org/ for the Open +Source Definition). Historically, most, but not all, Python releases have also +been GPL-compatible; the table below summarizes the various releases. + ++----------------+--------------+-----------+------------+-----------------+ +| Release | Derived from | Year | Owner | GPL compatible? | ++================+==============+===========+============+=================+ +| 0.9.0 thru 1.2 | n/a | 1991-1995 | CWI | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 1.3 thru 1.5.2 | 1.2 | 1995-1999 | CNRI | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 1.6 | 1.5.2 | 2000 | CNRI | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.0 | 1.6 | 2000 | BeOpen.com | no | ++----------------+--------------+-----------+------------+-----------------+ +| 1.6.1 | 1.6 | 2001 | CNRI | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1 | 2.0+1.6.1 | 2001 | PSF | no | ++----------------+--------------+-----------+------------+-----------------+ +| 2.0.1 | 2.0+1.6.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.1 | 2.1+2.0.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2 | 2.1.1 | 2001 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.2 | 2.1.1 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.1.3 | 2.1.2 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.1 | 2.2 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.2 | 2.2.1 | 2002 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.2.3 | 2.2.2 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3 | 2.2.2 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.1 | 2.3 | 2002-2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.2 | 2.3.1 | 2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.3 | 2.3.2 | 2003 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.4 | 2.3.3 | 2004 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.3.5 | 2.3.4 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4 | 2.3 | 2004 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.1 | 2.4 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.2 | 2.4.1 | 2005 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.3 | 2.4.2 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.4.4 | 2.4.3 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5 | 2.4 | 2006 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ +| 2.5.1 | 2.5 | 2007 | PSF | yes | ++----------------+--------------+-----------+------------+-----------------+ + +.. note:: + + GPL-compatible doesn't mean that we're distributing Python under the GPL. All + Python licenses, unlike the GPL, let you distribute a modified version without + making your changes open source. The GPL-compatible licenses make it possible to + combine Python with other software that is released under the GPL; the others + don't. + +Thanks to the many outside volunteers who have worked under Guido's direction to +make these releases possible. + + +Terms and conditions for accessing or otherwise using Python +============================================================ + + +.. centered:: PSF LICENSE AGREEMENT FOR PYTHON |release| + +#. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and + the Individual or Organization ("Licensee") accessing and otherwise using Python + |release| software in source or binary form and its associated documentation. + +#. Subject to the terms and conditions of this License Agreement, PSF hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python |release| alone or in any derivative + version, provided, however, that PSF's License Agreement and PSF's notice of + copyright, i.e., "Copyright © 2001-2007 Python Software Foundation; All Rights + Reserved" are retained in Python |release| alone or in any derivative version + prepared by Licensee. + +#. In the event Licensee prepares a derivative work that is based on or + incorporates Python |release| or any part thereof, and wants to make the + derivative work available to others as provided herein, then Licensee hereby + agrees to include in any such work a brief summary of the changes made to Python + |release|. + +#. PSF is making Python |release| available to Licensee on an "AS IS" basis. + PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF PYTHON |release| WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON |release| + FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON |release|, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. Nothing in this License Agreement shall be deemed to create any relationship + of agency, partnership, or joint venture between PSF and Licensee. This License + Agreement does not grant permission to use PSF trademarks or trade name in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. + +#. By copying, installing or otherwise using Python |release|, Licensee agrees + to be bound by the terms and conditions of this License Agreement. + + +.. centered:: BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 + + +.. centered:: BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +#. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an office at + 160 Saratoga Avenue, Santa Clara, CA 95051, and the Individual or Organization + ("Licensee") accessing and otherwise using this software in source or binary + form and its associated documentation ("the Software"). + +#. Subject to the terms and conditions of this BeOpen Python License Agreement, + BeOpen hereby grants Licensee a non-exclusive, royalty-free, world-wide license + to reproduce, analyze, test, perform and/or display publicly, prepare derivative + works, distribute, and otherwise use the Software alone or in any derivative + version, provided, however, that the BeOpen Python License is retained in the + Software, alone or in any derivative version prepared by Licensee. + +#. BeOpen is making the Software available to Licensee on an "AS IS" basis. + BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF + EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND DISCLAIMS ANY REPRESENTATION OR + WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE + USE OF THE SOFTWARE WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE SOFTWARE FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF USING, + MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY DERIVATIVE THEREOF, EVEN IF + ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. This License Agreement shall be governed by and interpreted in all respects + by the law of the State of California, excluding conflict of law provisions. + Nothing in this License Agreement shall be deemed to create any relationship of + agency, partnership, or joint venture between BeOpen and Licensee. This License + Agreement does not grant permission to use BeOpen trademarks or trade names in a + trademark sense to endorse or promote products or services of Licensee, or any + third party. As an exception, the "BeOpen Python" logos available at + http://www.pythonlabs.com/logos.html may be used according to the permissions + granted on that web page. + +#. By copying, installing or otherwise using the software, Licensee agrees to be + bound by the terms and conditions of this License Agreement. + + +.. centered:: CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 + +#. This LICENSE AGREEMENT is between the Corporation for National Research + Initiatives, having an office at 1895 Preston White Drive, Reston, VA 20191 + ("CNRI"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 1.6.1 software in source or binary form and its + associated documentation. + +#. Subject to the terms and conditions of this License Agreement, CNRI hereby + grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, + analyze, test, perform and/or display publicly, prepare derivative works, + distribute, and otherwise use Python 1.6.1 alone or in any derivative version, + provided, however, that CNRI's License Agreement and CNRI's notice of copyright, + i.e., "Copyright © 1995-2001 Corporation for National Research Initiatives; All + Rights Reserved" are retained in Python 1.6.1 alone or in any derivative version + prepared by Licensee. Alternately, in lieu of CNRI's License Agreement, + Licensee may substitute the following text (omitting the quotes): "Python 1.6.1 + is made available subject to the terms and conditions in CNRI's License + Agreement. This Agreement together with Python 1.6.1 may be located on the + Internet using the following unique, persistent identifier (known as a handle): + 1895.22/1013. This Agreement may also be obtained from a proxy server on the + Internet using the following URL: http://hdl.handle.net/1895.22/1013." + +#. In the event Licensee prepares a derivative work that is based on or + incorporates Python 1.6.1 or any part thereof, and wants to make the derivative + work available to others as provided herein, then Licensee hereby agrees to + include in any such work a brief summary of the changes made to Python 1.6.1. + +#. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" basis. CNRI + MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, + BUT NOT LIMITATION, CNRI MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY + OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF + PYTHON 1.6.1 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. + +#. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON 1.6.1 FOR + ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF + MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, OR ANY DERIVATIVE + THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +#. This License Agreement will automatically terminate upon a material breach of + its terms and conditions. + +#. This License Agreement shall be governed by the federal intellectual property + law of the United States, including without limitation the federal copyright + law, and, to the extent such U.S. federal law does not apply, by the law of the + Commonwealth of Virginia, excluding Virginia's conflict of law provisions. + Notwithstanding the foregoing, with regard to derivative works based on Python + 1.6.1 that incorporate non-separable material that was previously distributed + under the GNU General Public License (GPL), the law of the Commonwealth of + Virginia shall govern this License Agreement only as to issues arising under or + with respect to Paragraphs 4, 5, and 7 of this License Agreement. Nothing in + this License Agreement shall be deemed to create any relationship of agency, + partnership, or joint venture between CNRI and Licensee. This License Agreement + does not grant permission to use CNRI trademarks or trade name in a trademark + sense to endorse or promote products or services of Licensee, or any third + party. + +#. By clicking on the "ACCEPT" button where indicated, or by copying, installing + or otherwise using Python 1.6.1, Licensee agrees to be bound by the terms and + conditions of this License Agreement. + + +.. centered:: ACCEPT + + +.. centered:: CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 + +Copyright © 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, The +Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of Stichting Mathematisch Centrum or CWI not be used in advertising or +publicity pertaining to distribution of the software without specific, written +prior permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO +EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS +SOFTWARE. + + +Licenses and Acknowledgements for Incorporated Software +======================================================= + +This section is an incomplete, but growing list of licenses and acknowledgements +for third-party software incorporated in the Python distribution. + + +Mersenne Twister +---------------- + +The :mod:`_random` module includes code based on a download from +http://www.math.keio.ac.jp/ matumoto/MT2002/emt19937ar.html. The following are +the verbatim comments from the original code:: + + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp + + +Sockets +------- + +The :mod:`socket` module uses the functions, :func:`getaddrinfo`, and +:func:`getnameinfo`, which are coded in separate source files from the WIDE +Project, http://www.wide.ad.jp/about/index.html. :: + + Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. Neither the name of the project nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND + GAI_ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE + FOR GAI_ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON GAI_ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN GAI_ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + +Floating point exception control +-------------------------------- + +The source for the :mod:`fpectl` module includes the following notice:: + + --------------------------------------------------------------------- + / Copyright (c) 1996. \ + | The Regents of the University of California. | + | All rights reserved. | + | | + | Permission to use, copy, modify, and distribute this software for | + | any purpose without fee is hereby granted, provided that this en- | + | tire notice is included in all copies of any software which is or | + | includes a copy or modification of this software and in all | + | copies of the supporting documentation for such software. | + | | + | This work was produced at the University of California, Lawrence | + | Livermore National Laboratory under contract no. W-7405-ENG-48 | + | between the U.S. Department of Energy and The Regents of the | + | University of California for the operation of UC LLNL. | + | | + | DISCLAIMER | + | | + | This software was prepared as an account of work sponsored by an | + | agency of the United States Government. Neither the United States | + | Government nor the University of California nor any of their em- | + | ployees, makes any warranty, express or implied, or assumes any | + | liability or responsibility for the accuracy, completeness, or | + | usefulness of any information, apparatus, product, or process | + | disclosed, or represents that its use would not infringe | + | privately-owned rights. Reference herein to any specific commer- | + | cial products, process, or service by trade name, trademark, | + | manufacturer, or otherwise, does not necessarily constitute or | + | imply its endorsement, recommendation, or favoring by the United | + | States Government or the University of California. The views and | + | opinions of authors expressed herein do not necessarily state or | + | reflect those of the United States Government or the University | + | of California, and shall not be used for advertising or product | + \ endorsement purposes. / + --------------------------------------------------------------------- + + +MD5 message digest algorithm +---------------------------- + +The source code for the :mod:`md5` module contains the following notice:: + + Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + L. Peter Deutsch + ghost@aladdin.com + + Independent implementation of MD5 (RFC 1321). + + This code implements the MD5 Algorithm defined in RFC 1321, whose + text is available at + http://www.ietf.org/rfc/rfc1321.txt + The code is derived from the text of the RFC, including the test suite + (section A.5) but excluding the rest of Appendix A. It does not include + any code or documentation that is identified in the RFC as being + copyrighted. + + The original and principal author of md5.h is L. Peter Deutsch + <ghost@aladdin.com>. Other authors are noted in the change history + that follows (in reverse chronological order): + + 2002-04-13 lpd Removed support for non-ANSI compilers; removed + references to Ghostscript; clarified derivation from RFC 1321; + now handles byte order either statically or dynamically. + 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. + 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); + added conditionalization for C++ compilation from Martin + Purschke <purschke@bnl.gov>. + 1999-05-03 lpd Original version. + + +Asynchronous socket services +---------------------------- + +The :mod:`asynchat` and :mod:`asyncore` modules contain the following notice:: + + Copyright 1996 by Sam Rushing + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software and + its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of Sam + Rushing not be used in advertising or publicity pertaining to + distribution of the software without specific, written prior + permission. + + SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN + NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR + CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Cookie management +----------------- + +The :mod:`Cookie` module contains the following notice:: + + Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu> + + All Rights Reserved + + Permission to use, copy, modify, and distribute this software + and its documentation for any purpose and without fee is hereby + granted, provided that the above copyright notice appear in all + copies and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Timothy O'Malley not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR + ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. + + +Profiling +--------- + +The :mod:`profile` and :mod:`pstats` modules contain the following notice:: + + Copyright 1994, by InfoSeek Corporation, all rights reserved. + Written by James Roskind + + Permission to use, copy, modify, and distribute this Python software + and its associated documentation for any purpose (subject to the + restriction in the following sentence) without fee is hereby granted, + provided that the above copyright notice appears in all copies, and + that both that copyright notice and this permission notice appear in + supporting documentation, and that the name of InfoSeek not be used in + advertising or publicity pertaining to distribution of the software + without specific, written prior permission. This permission is + explicitly restricted to the copying and modification of the software + to remain in Python, compiled Python, or other languages (such as C) + wherein the modified or derived code is exclusively imported into a + Python module. + + INFOSEEK CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS. IN NO EVENT SHALL INFOSEEK CORPORATION BE LIABLE FOR ANY + SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER + RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF + CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +Execution tracing +----------------- + +The :mod:`trace` module contains the following notice:: + + portions copyright 2001, Autonomous Zones Industries, Inc., all rights... + err... reserved and offered to the public under the terms of the + Python 2.2 license. + Author: Zooko O'Whielacronx + http://zooko.com/ + mailto:zooko@zooko.com + + Copyright 2000, Mojam Media, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1999, Bioreason, Inc., all rights reserved. + Author: Andrew Dalke + + Copyright 1995-1997, Automatrix, Inc., all rights reserved. + Author: Skip Montanaro + + Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved. + + + Permission to use, copy, modify, and distribute this Python software and + its associated documentation for any purpose without fee is hereby + granted, provided that the above copyright notice appears in all copies, + and that both that copyright notice and this permission notice appear in + supporting documentation, and that the name of neither Automatrix, + Bioreason or Mojam Media be used in advertising or publicity pertaining to + distribution of the software without specific, written prior permission. + + +UUencode and UUdecode functions +------------------------------- + +The :mod:`uu` module contains the following notice:: + + Copyright 1994 by Lance Ellinghouse + Cathedral City, California Republic, United States of America. + All Rights Reserved + Permission to use, copy, modify, and distribute this software and its + documentation for any purpose and without fee is hereby granted, + provided that the above copyright notice appear in all copies and that + both that copyright notice and this permission notice appear in + supporting documentation, and that the name of Lance Ellinghouse + not be used in advertising or publicity pertaining to distribution + of the software without specific, written prior permission. + LANCE ELLINGHOUSE DISCLAIMS ALL WARRANTIES WITH REGARD TO + THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS, IN NO EVENT SHALL LANCE ELLINGHOUSE CENTRUM BE LIABLE + FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + Modified by Jack Jansen, CWI, July 1995: + - Use binascii module to do the actual line-by-line conversion + between ascii and binary. This results in a 1000-fold speedup. The C + version is still 5 times faster, though. + - Arguments more compliant with python standard + + +XML Remote Procedure Calls +-------------------------- + +The :mod:`xmlrpclib` module contains the following notice:: + + The XML-RPC client interface is + + Copyright (c) 1999-2002 by Secret Labs AB + Copyright (c) 1999-2002 by Fredrik Lundh + + By obtaining, using, and/or copying this software and/or its + associated documentation, you agree that you have read, understood, + and will comply with the following terms and conditions: + + Permission to use, copy, modify, and distribute this software and + its associated documentation for any purpose and without fee is + hereby granted, provided that the above copyright notice appears in + all copies, and that both that copyright notice and this permission + notice appear in supporting documentation, and that the name of + Secret Labs AB or the author not be used in advertising or publicity + pertaining to distribution of the software without specific, written + prior permission. + + SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD + TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- + ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR + BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY + DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + OF THIS SOFTWARE. + diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst new file mode 100644 index 0000000..e7587f6 --- /dev/null +++ b/Doc/reference/compound_stmts.rst @@ -0,0 +1,554 @@ + +.. _compound: + +******************* +Compound statements +******************* + +.. index:: pair: compound; statement + +Compound statements contain (groups of) other statements; they affect or control +the execution of those other statements in some way. In general, compound +statements span multiple lines, although in simple incarnations a whole compound +statement may be contained in one line. + +The :keyword:`if`, :keyword:`while` and :keyword:`for` statements implement +traditional control flow constructs. :keyword:`try` specifies exception +handlers and/or cleanup code for a group of statements. Function and class +definitions are also syntactically compound statements. + +.. index:: + single: clause + single: suite + +Compound statements consist of one or more 'clauses.' A clause consists of a +header and a 'suite.' The clause headers of a particular compound statement are +all at the same indentation level. Each clause header begins with a uniquely +identifying keyword and ends with a colon. A suite is a group of statements +controlled by a clause. A suite can be one or more semicolon-separated simple +statements on the same line as the header, following the header's colon, or it +can be one or more indented statements on subsequent lines. Only the latter +form of suite can contain nested compound statements; the following is illegal, +mostly because it wouldn't be clear to which :keyword:`if` clause a following +:keyword:`else` clause would belong: :: + + if test1: if test2: print x + +Also note that the semicolon binds tighter than the colon in this context, so +that in the following example, either all or none of the :keyword:`print` +statements are executed:: + + if x < y < z: print x; print y; print z + +Summarizing: + +.. productionlist:: + compound_stmt: `if_stmt` + : | `while_stmt` + : | `for_stmt` + : | `try_stmt` + : | `with_stmt` + : | `funcdef` + : | `classdef` + suite: `stmt_list` NEWLINE | NEWLINE INDENT `statement`+ DEDENT + statement: `stmt_list` NEWLINE | `compound_stmt` + stmt_list: `simple_stmt` (";" `simple_stmt`)* [";"] + +.. index:: + single: NEWLINE token + single: DEDENT token + pair: dangling; else + +Note that statements always end in a ``NEWLINE`` possibly followed by a +``DEDENT``. Also note that optional continuation clauses always begin with a +keyword that cannot start a statement, thus there are no ambiguities (the +'dangling :keyword:`else`' problem is solved in Python by requiring nested +:keyword:`if` statements to be indented). + +The formatting of the grammar rules in the following sections places each clause +on a separate line for clarity. + + +.. _if: + +The :keyword:`if` statement +=========================== + +.. index:: statement: if + +The :keyword:`if` statement is used for conditional execution: + +.. productionlist:: + if_stmt: "if" `expression` ":" `suite` + : ( "elif" `expression` ":" `suite` )* + : ["else" ":" `suite`] + +.. index:: + keyword: elif + keyword: else + +It selects exactly one of the suites by evaluating the expressions one by one +until one is found to be true (see section :ref:`booleans` for the definition of +true and false); then that suite is executed (and no other part of the +:keyword:`if` statement is executed or evaluated). If all expressions are +false, the suite of the :keyword:`else` clause, if present, is executed. + + +.. _while: + +The :keyword:`while` statement +============================== + +.. index:: + statement: while + pair: loop; statement + +The :keyword:`while` statement is used for repeated execution as long as an +expression is true: + +.. productionlist:: + while_stmt: "while" `expression` ":" `suite` + : ["else" ":" `suite`] + +.. index:: keyword: else + +This repeatedly tests the expression and, if it is true, executes the first +suite; if the expression is false (which may be the first time it is tested) the +suite of the :keyword:`else` clause, if present, is executed and the loop +terminates. + +.. index:: + statement: break + statement: continue + +A :keyword:`break` statement executed in the first suite terminates the loop +without executing the :keyword:`else` clause's suite. A :keyword:`continue` +statement executed in the first suite skips the rest of the suite and goes back +to testing the expression. + + +.. _for: + +The :keyword:`for` statement +============================ + +.. index:: + statement: for + pair: loop; statement + +.. index:: object: sequence + +The :keyword:`for` statement is used to iterate over the elements of a sequence +(such as a string, tuple or list) or other iterable object: + +.. productionlist:: + for_stmt: "for" `target_list` "in" `expression_list` ":" `suite` + : ["else" ":" `suite`] + +.. index:: + keyword: in + keyword: else + pair: target; list + +The expression list is evaluated once; it should yield an iterable object. An +iterator is created for the result of the ``expression_list``. The suite is +then executed once for each item provided by the iterator, in the order of +ascending indices. Each item in turn is assigned to the target list using the +standard rules for assignments, and then the suite is executed. When the items +are exhausted (which is immediately when the sequence is empty), the suite in +the :keyword:`else` clause, if present, is executed, and the loop terminates. + +.. index:: + statement: break + statement: continue + +A :keyword:`break` statement executed in the first suite terminates the loop +without executing the :keyword:`else` clause's suite. A :keyword:`continue` +statement executed in the first suite skips the rest of the suite and continues +with the next item, or with the :keyword:`else` clause if there was no next +item. + +The suite may assign to the variable(s) in the target list; this does not affect +the next item assigned to it. + +.. index:: + builtin: range + pair: Pascal; language + +The target list is not deleted when the loop is finished, but if the sequence is +empty, it will not have been assigned to at all by the loop. Hint: the built-in +function :func:`range` returns a sequence of integers suitable to emulate the +effect of Pascal's ``for i := a to b do``; e.g., ``range(3)`` returns the list +``[0, 1, 2]``. + +.. warning:: + + .. index:: + single: loop; over mutable sequence + single: mutable sequence; loop over + + There is a subtlety when the sequence is being modified by the loop (this can + only occur for mutable sequences, i.e. lists). An internal counter is used to + keep track of which item is used next, and this is incremented on each + iteration. When this counter has reached the length of the sequence the loop + terminates. This means that if the suite deletes the current (or a previous) + item from the sequence, the next item will be skipped (since it gets the index + of the current item which has already been treated). Likewise, if the suite + inserts an item in the sequence before the current item, the current item will + be treated again the next time through the loop. This can lead to nasty bugs + that can be avoided by making a temporary copy using a slice of the whole + sequence, e.g., + +:: + + for x in a[:]: + if x < 0: a.remove(x) + + +.. _try: + +The :keyword:`try` statement +============================ + +.. index:: statement: try + +The :keyword:`try` statement specifies exception handlers and/or cleanup code +for a group of statements: + +.. productionlist:: + try_stmt: try1_stmt | try2_stmt + try1_stmt: "try" ":" `suite` + : ("except" [`expression` ["," `target`]] ":" `suite`)+ + : ["else" ":" `suite`] + : ["finally" ":" `suite`] + try2_stmt: "try" ":" `suite` + : "finally" ":" `suite` + +.. versionchanged:: 2.5 + In previous versions of Python, :keyword:`try`...\ :keyword:`except`...\ + :keyword:`finally` did not work. :keyword:`try`...\ :keyword:`except` had to be + nested in :keyword:`try`...\ :keyword:`finally`. + +.. index:: keyword: except + +The :keyword:`except` clause(s) specify one or more exception handlers. When no +exception occurs in the :keyword:`try` clause, no exception handler is executed. +When an exception occurs in the :keyword:`try` suite, a search for an exception +handler is started. This search inspects the except clauses in turn until one +is found that matches the exception. An expression-less except clause, if +present, must be last; it matches any exception. For an except clause with an +expression, that expression is evaluated, and the clause matches the exception +if the resulting object is "compatible" with the exception. An object is +compatible with an exception if it is the class or a base class of the exception +object or a tuple containing an item compatible with the exception. + +If no except clause matches the exception, the search for an exception handler +continues in the surrounding code and on the invocation stack. [#]_ + +If the evaluation of an expression in the header of an except clause raises an +exception, the original search for a handler is canceled and a search starts for +the new exception in the surrounding code and on the call stack (it is treated +as if the entire :keyword:`try` statement raised the exception). + +When a matching except clause is found, the exception is assigned to the target +specified in that except clause, if present, and the except clause's suite is +executed. All except clauses must have an executable block. When the end of +this block is reached, execution continues normally after the entire try +statement. (This means that if two nested handlers exist for the same +exception, and the exception occurs in the try clause of the inner handler, the +outer handler will not handle the exception.) + +.. index:: + module: sys + object: traceback + +Before an except clause's suite is executed, details about the exception are +stored in the :mod:`sys` module and can be access via :func:`sys.exc_info`. +:func:`sys.exc_info` returns a 3-tuple consisting of: ``exc_type`` receives the +object identifying the exception; ``exc_value`` receives the exception's +parameter; ``exc_traceback`` receives a traceback object (see section +:ref:`types`) identifying the point in the program where the exception +occurred. :func:`sys.exc_info` values are restored to their previous values +(before the call) when returning from a function that handled an exception. + +.. index:: + keyword: else + statement: return + statement: break + statement: continue + +The optional :keyword:`else` clause is executed if and when control flows off +the end of the :keyword:`try` clause. [#]_ Exceptions in the :keyword:`else` +clause are not handled by the preceding :keyword:`except` clauses. + +.. index:: keyword: finally + +If :keyword:`finally` is present, it specifies a 'cleanup' handler. The +:keyword:`try` clause is executed, including any :keyword:`except` and +:keyword:`else` clauses. If an exception occurs in any of the clauses and is +not handled, the exception is temporarily saved. The :keyword:`finally` clause +is executed. If there is a saved exception, it is re-raised at the end of the +:keyword:`finally` clause. If the :keyword:`finally` clause raises another +exception or executes a :keyword:`return` or :keyword:`break` statement, the +saved exception is lost. The exception information is not available to the +program during execution of the :keyword:`finally` clause. + +.. index:: + statement: return + statement: break + statement: continue + +When a :keyword:`return`, :keyword:`break` or :keyword:`continue` statement is +executed in the :keyword:`try` suite of a :keyword:`try`...\ :keyword:`finally` +statement, the :keyword:`finally` clause is also executed 'on the way out.' A +:keyword:`continue` statement is illegal in the :keyword:`finally` clause. (The +reason is a problem with the current implementation --- this restriction may be +lifted in the future). + +Additional information on exceptions can be found in section :ref:`exceptions`, +and information on using the :keyword:`raise` statement to generate exceptions +may be found in section :ref:`raise`. + + +.. _with: + +The :keyword:`with` statement +============================= + +.. index:: statement: with + +.. versionadded:: 2.5 + +The :keyword:`with` statement is used to wrap the execution of a block with +methods defined by a context manager (see section :ref:`context-managers`). This +allows common :keyword:`try`...\ :keyword:`except`...\ :keyword:`finally` usage +patterns to be encapsulated for convenient reuse. + +.. productionlist:: + with_stmt: "with" `expression` ["as" `target`] ":" `suite` + +The execution of the :keyword:`with` statement proceeds as follows: + +#. The context expression is evaluated to obtain a context manager. + +#. The context manager's :meth:`__enter__` method is invoked. + +#. If a target was included in the :keyword:`with` statement, the return value + from :meth:`__enter__` is assigned to it. + + .. note:: + + The :keyword:`with` statement guarantees that if the :meth:`__enter__` method + returns without an error, then :meth:`__exit__` will always be called. Thus, if + an error occurs during the assignment to the target list, it will be treated the + same as an error occurring within the suite would be. See step 5 below. + +#. The suite is executed. + +#. The context manager's :meth:`__exit__` method is invoked. If an exception + caused the suite to be exited, its type, value, and traceback are passed as + arguments to :meth:`__exit__`. Otherwise, three :const:`None` arguments are + supplied. + + If the suite was exited due to an exception, and the return value from the + :meth:`__exit__` method was false, the exception is reraised. If the return + value was true, the exception is suppressed, and execution continues with the + statement following the :keyword:`with` statement. + + If the suite was exited for any reason other than an exception, the return value + from :meth:`__exit__` is ignored, and execution proceeds at the normal location + for the kind of exit that was taken. + +.. note:: + + In Python 2.5, the :keyword:`with` statement is only allowed when the + ``with_statement`` feature has been enabled. It will always be enabled in + Python 2.6. This ``__future__`` import statement can be used to enable the + feature:: + + from __future__ import with_statement + + +.. seealso:: + + :pep:`0343` - The "with" statement + The specification, background, and examples for the Python :keyword:`with` + statement. + + +.. _function: + +Function definitions +==================== + +.. index:: + pair: function; definition + statement: def + +.. index:: + object: user-defined function + object: function + +A function definition defines a user-defined function object (see section +:ref:`types`): + +.. productionlist:: + funcdef: [`decorators`] "def" `funcname` "(" [`parameter_list`] ")" ["->" `expression`]? ":" `suite` + decorators: `decorator`+ + decorator: "@" `dotted_name` ["(" [`argument_list` [","]] ")"] NEWLINE + dotted_name: `identifier` ("." `identifier`)* + parameter_list: (`defparameter` ",")* + : ( "*" [`parameter`] ("," `defparameter`)* + : [, "**" `parameter`] + : | "**" `parameter` + : | `defparameter` [","] ) + parameter: `identifier` [":" `expression`] + defparameter: `parameter` ["=" `expression`] + funcname: `identifier` + +.. index:: + pair: function; name + pair: name; binding + +A function definition is an executable statement. Its execution binds the +function name in the current local namespace to a function object (a wrapper +around the executable code for the function). This function object contains a +reference to the current global namespace as the global namespace to be used +when the function is called. + +The function definition does not execute the function body; this gets executed +only when the function is called. + +A function definition may be wrapped by one or more decorator expressions. +Decorator expressions are evaluated when the function is defined, in the scope +that contains the function definition. The result must be a callable, which is +invoked with the function object as the only argument. The returned value is +bound to the function name instead of the function object. Multiple decorators +are applied in nested fashion. For example, the following code:: + + @f1(arg) + @f2 + def func(): pass + +is equivalent to:: + + def func(): pass + func = f1(arg)(f2(func)) + +.. index:: triple: default; parameter; value + +When one or more parameters have the form *parameter* ``=`` *expression*, the +function is said to have "default parameter values." For a parameter with a +default value, the corresponding argument may be omitted from a call, in which +case the parameter's default value is substituted. If a parameter has a default +value, all following parameters up until the "``*``" must also have a default +value --- this is a syntactic restriction that is not expressed by the grammar. + +**Default parameter values are evaluated when the function definition is +executed.** This means that the expression is evaluated once, when the function +is defined, and that that same "pre-computed" value is used for each call. This +is especially important to understand when a default parameter is a mutable +object, such as a list or a dictionary: if the function modifies the object +(e.g. by appending an item to a list), the default value is in effect modified. +This is generally not what was intended. A way around this is to use ``None`` +as the default, and explicitly test for it in the body of the function, e.g.:: + + def whats_on_the_telly(penguin=None): + if penguin is None: + penguin = [] + penguin.append("property of the zoo") + return penguin + +Function call semantics are described in more detail in section :ref:`calls`. A +function call always assigns values to all parameters mentioned in the parameter +list, either from position arguments, from keyword arguments, or from default +values. If the form "``*identifier``" is present, it is initialized to a tuple +receiving any excess positional parameters, defaulting to the empty tuple. If +the form "``**identifier``" is present, it is initialized to a new dictionary +receiving any excess keyword arguments, defaulting to a new empty dictionary. +Parameters after "``*``" or "``*identifier``" are keyword-only parameters and +may only be passed used keyword arguments. + +.. index:: pair: function; annotations + +Parameters may have annotations of the form "``: expression``" following the +parameter name. Any parameter may have an annotation even those of the form +``*identifier`` or ``**identifier``. Functions may have "return" annotation of +the form "``-> expression``" after the parameter list. These annotations can be +any valid Python expression and are evaluated when the function definition is +executed. Annotations may be evaluated in a different order than they appear in +the source code. The presence of annotations does not change the semantics of a +function. The annotation values are available as values of a dictionary keyed +by the parameters' names in the :attr:`__annotations__` attribute of the +function object. + +.. index:: pair: lambda; form + +It is also possible to create anonymous functions (functions not bound to a +name), for immediate use in expressions. This uses lambda forms, described in +section :ref:`lambda`. Note that the lambda form is merely a shorthand for a +simplified function definition; a function defined in a ":keyword:`def`" +statement can be passed around or assigned to another name just like a function +defined by a lambda form. The ":keyword:`def`" form is actually more powerful +since it allows the execution of multiple statements and annotations. + +**Programmer's note:** Functions are first-class objects. A "``def``" form +executed inside a function definition defines a local function that can be +returned or passed around. Free variables used in the nested function can +access the local variables of the function containing the def. See section +:ref:`naming` for details. + + +.. _class: + +Class definitions +================= + +.. index:: + pair: class; definition + statement: class + +.. index:: object: class + +A class definition defines a class object (see section :ref:`types`): + +.. productionlist:: + classdef: "class" `classname` [`inheritance`] ":" `suite` + inheritance: "(" [`expression_list`] ")" + classname: `identifier` + +.. index:: + single: inheritance + pair: class; name + pair: name; binding + pair: execution; frame + +A class definition is an executable statement. It first evaluates the +inheritance list, if present. Each item in the inheritance list should evaluate +to a class object or class type which allows subclassing. The class's suite is +then executed in a new execution frame (see section :ref:`naming`), using a +newly created local namespace and the original global namespace. (Usually, the +suite contains only function definitions.) When the class's suite finishes +execution, its execution frame is discarded but its local namespace is saved. A +class object is then created using the inheritance list for the base classes and +the saved local namespace for the attribute dictionary. The class name is bound +to this class object in the original local namespace. + +**Programmer's note:** Variables defined in the class definition are class +variables; they are shared by all instances. To define instance variables, they +must be given a value in the :meth:`__init__` method or in another method. Both +class and instance variables are accessible through the notation +"``self.name``", and an instance variable hides a class variable with the same +name when accessed in this way. Class variables with immutable values can be +used as defaults for instance variables. For new-style classes, descriptors can +be used to create instance variables with different implementation details. + +.. rubric:: Footnotes + +.. [#] The exception is propogated to the invocation stack only if there is no + :keyword:`finally` clause that negates the exception. + +.. [#] Currently, control "flows off the end" except in the case of an exception or the + execution of a :keyword:`return`, :keyword:`continue`, or :keyword:`break` + statement. + diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst new file mode 100644 index 0000000..2f6013e --- /dev/null +++ b/Doc/reference/datamodel.rst @@ -0,0 +1,2118 @@ + +.. _datamodel: + +********** +Data model +********** + + +.. _objects: + +Objects, values and types +========================= + +.. index:: + single: object + single: data + +:dfn:`Objects` are Python's abstraction for data. All data in a Python program +is represented by objects or by relations between objects. (In a sense, and in +conformance to Von Neumann's model of a "stored program computer," code is also +represented by objects.) + +.. index:: + builtin: id + builtin: type + single: identity of an object + single: value of an object + single: type of an object + single: mutable object + single: immutable object + +Every object has an identity, a type and a value. An object's *identity* never +changes once it has been created; you may think of it as the object's address in +memory. The ':keyword:`is`' operator compares the identity of two objects; the +:func:`id` function returns an integer representing its identity (currently +implemented as its address). An object's :dfn:`type` is also unchangeable. [#]_ +An object's type determines the operations that the object supports (e.g., "does +it have a length?") and also defines the possible values for objects of that +type. The :func:`type` function returns an object's type (which is an object +itself). The *value* of some objects can change. Objects whose value can +change are said to be *mutable*; objects whose value is unchangeable once they +are created are called *immutable*. (The value of an immutable container object +that contains a reference to a mutable object can change when the latter's value +is changed; however the container is still considered immutable, because the +collection of objects it contains cannot be changed. So, immutability is not +strictly the same as having an unchangeable value, it is more subtle.) An +object's mutability is determined by its type; for instance, numbers, strings +and tuples are immutable, while dictionaries and lists are mutable. + +.. index:: + single: garbage collection + single: reference counting + single: unreachable object + +Objects are never explicitly destroyed; however, when they become unreachable +they may be garbage-collected. An implementation is allowed to postpone garbage +collection or omit it altogether --- it is a matter of implementation quality +how garbage collection is implemented, as long as no objects are collected that +are still reachable. (Implementation note: the current implementation uses a +reference-counting scheme with (optional) delayed detection of cyclically linked +garbage, which collects most objects as soon as they become unreachable, but is +not guaranteed to collect garbage containing circular references. See the +documentation of the :mod:`gc` module for information on controlling the +collection of cyclic garbage.) + +Note that the use of the implementation's tracing or debugging facilities may +keep objects alive that would normally be collectable. Also note that catching +an exception with a ':keyword:`try`...\ :keyword:`except`' statement may keep +objects alive. + +Some objects contain references to "external" resources such as open files or +windows. It is understood that these resources are freed when the object is +garbage-collected, but since garbage collection is not guaranteed to happen, +such objects also provide an explicit way to release the external resource, +usually a :meth:`close` method. Programs are strongly recommended to explicitly +close such objects. The ':keyword:`try`...\ :keyword:`finally`' statement +provides a convenient way to do this. + +.. index:: single: container + +Some objects contain references to other objects; these are called *containers*. +Examples of containers are tuples, lists and dictionaries. The references are +part of a container's value. In most cases, when we talk about the value of a +container, we imply the values, not the identities of the contained objects; +however, when we talk about the mutability of a container, only the identities +of the immediately contained objects are implied. So, if an immutable container +(like a tuple) contains a reference to a mutable object, its value changes if +that mutable object is changed. + +Types affect almost all aspects of object behavior. Even the importance of +object identity is affected in some sense: for immutable types, operations that +compute new values may actually return a reference to any existing object with +the same type and value, while for mutable objects this is not allowed. E.g., +after ``a = 1; b = 1``, ``a`` and ``b`` may or may not refer to the same object +with the value one, depending on the implementation, but after ``c = []; d = +[]``, ``c`` and ``d`` are guaranteed to refer to two different, unique, newly +created empty lists. (Note that ``c = d = []`` assigns the same object to both +``c`` and ``d``.) + + +.. _types: + +The standard type hierarchy +=========================== + +.. index:: + single: type + pair: data; type + pair: type; hierarchy + pair: extension; module + pair: C; language + +Below is a list of the types that are built into Python. Extension modules +(written in C, Java, or other languages, depending on the implementation) can +define additional types. Future versions of Python may add types to the type +hierarchy (e.g., rational numbers, efficiently stored arrays of integers, etc.). + +.. index:: + single: attribute + pair: special; attribute + triple: generic; special; attribute + +Some of the type descriptions below contain a paragraph listing 'special +attributes.' These are attributes that provide access to the implementation and +are not intended for general use. Their definition may change in the future. + +None + .. index:: object: None + + This type has a single value. There is a single object with this value. This + object is accessed through the built-in name ``None``. It is used to signify the + absence of a value in many situations, e.g., it is returned from functions that + don't explicitly return anything. Its truth value is false. + +NotImplemented + .. index:: object: NotImplemented + + This type has a single value. There is a single object with this value. This + object is accessed through the built-in name ``NotImplemented``. Numeric methods + and rich comparison methods may return this value if they do not implement the + operation for the operands provided. (The interpreter will then try the + reflected operation, or some other fallback, depending on the operator.) Its + truth value is true. + +Ellipsis + .. index:: object: Ellipsis + + This type has a single value. There is a single object with this value. This + object is accessed through the literal ``...`` or the built-in name + ``Ellipsis``. Its truth value is true. + +Numbers + .. index:: object: numeric + + These are created by numeric literals and returned as results by arithmetic + operators and arithmetic built-in functions. Numeric objects are immutable; + once created their value never changes. Python numbers are of course strongly + related to mathematical numbers, but subject to the limitations of numerical + representation in computers. + + Python distinguishes between integers, floating point numbers, and complex + numbers: + + Integers + .. index:: object: integer + + These represent elements from the mathematical set of integers (positive and + negative). + + There are three types of integers: + + Plain integers + .. index:: + object: plain integer + single: OverflowError (built-in exception) + + These represent numbers in the range -2147483648 through 2147483647. (The range + may be larger on machines with a larger natural word size, but not smaller.) + When the result of an operation would fall outside this range, the result is + normally returned as a long integer (in some cases, the exception + :exc:`OverflowError` is raised instead). For the purpose of shift and mask + operations, integers are assumed to have a binary, 2's complement notation using + 32 or more bits, and hiding no bits from the user (i.e., all 4294967296 + different bit patterns correspond to different values). + + Long integers + .. index:: object: long integer + + These represent numbers in an unlimited range, subject to available (virtual) + memory only. For the purpose of shift and mask operations, a binary + representation is assumed, and negative numbers are represented in a variant of + 2's complement which gives the illusion of an infinite string of sign bits + extending to the left. + + Booleans + .. index:: + object: Boolean + single: False + single: True + + These represent the truth values False and True. The two objects representing + the values False and True are the only Boolean objects. The Boolean type is a + subtype of plain integers, and Boolean values behave like the values 0 and 1, + respectively, in almost all contexts, the exception being that when converted to + a string, the strings ``"False"`` or ``"True"`` are returned, respectively. + + .. index:: pair: integer; representation + + The rules for integer representation are intended to give the most meaningful + interpretation of shift and mask operations involving negative integers and the + least surprises when switching between the plain and long integer domains. Any + operation except left shift, if it yields a result in the plain integer domain + without causing overflow, will yield the same result in the long integer domain + or when using mixed operands. + + .. % Integers + + Floating point numbers + .. index:: + object: floating point + pair: floating point; number + pair: C; language + pair: Java; language + + These represent machine-level double precision floating point numbers. You are + at the mercy of the underlying machine architecture (and C or Java + implementation) for the accepted range and handling of overflow. Python does not + support single-precision floating point numbers; the savings in processor and + memory usage that are usually the reason for using these is dwarfed by the + overhead of using objects in Python, so there is no reason to complicate the + language with two kinds of floating point numbers. + + Complex numbers + .. index:: + object: complex + pair: complex; number + + These represent complex numbers as a pair of machine-level double precision + floating point numbers. The same caveats apply as for floating point numbers. + The real and imaginary parts of a complex number ``z`` can be retrieved through + the read-only attributes ``z.real`` and ``z.imag``. + + .. % Numbers + +Sequences + .. index:: + builtin: len + object: sequence + single: index operation + single: item selection + single: subscription + + These represent finite ordered sets indexed by non-negative numbers. The + built-in function :func:`len` returns the number of items of a sequence. When + the length of a sequence is *n*, the index set contains the numbers 0, 1, + ..., *n*-1. Item *i* of sequence *a* is selected by ``a[i]``. + + .. index:: single: slicing + + Sequences also support slicing: ``a[i:j]`` selects all items with index *k* such + that *i* ``<=`` *k* ``<`` *j*. When used as an expression, a slice is a + sequence of the same type. This implies that the index set is renumbered so + that it starts at 0. + + .. index:: single: extended slicing + + Some sequences also support "extended slicing" with a third "step" parameter: + ``a[i:j:k]`` selects all items of *a* with index *x* where ``x = i + n*k``, *n* + ``>=`` ``0`` and *i* ``<=`` *x* ``<`` *j*. + + Sequences are distinguished according to their mutability: + + Immutable sequences + .. index:: + object: immutable sequence + object: immutable + + An object of an immutable sequence type cannot change once it is created. (If + the object contains references to other objects, these other objects may be + mutable and may be changed; however, the collection of objects directly + referenced by an immutable object cannot change.) + + The following types are immutable sequences: + + Strings + .. index:: + builtin: chr + builtin: ord + object: string + single: character + single: byte + single: ASCII@ASCII + + The items of a string are characters. There is no separate character type; a + character is represented by a string of one item. Characters represent (at + least) 8-bit bytes. The built-in functions :func:`chr` and :func:`ord` convert + between characters and nonnegative integers representing the byte values. Bytes + with the values 0-127 usually represent the corresponding ASCII values, but the + interpretation of values is up to the program. The string data type is also + used to represent arrays of bytes, e.g., to hold data read from a file. + + .. index:: + single: ASCII@ASCII + single: EBCDIC + single: character set + pair: string; comparison + builtin: chr + builtin: ord + + (On systems whose native character set is not ASCII, strings may use EBCDIC in + their internal representation, provided the functions :func:`chr` and + :func:`ord` implement a mapping between ASCII and EBCDIC, and string comparison + preserves the ASCII order. Or perhaps someone can propose a better rule?) + + Unicode + .. index:: + builtin: unichr + builtin: ord + builtin: unicode + object: unicode + single: character + single: integer + single: Unicode + + The items of a Unicode object are Unicode code units. A Unicode code unit is + represented by a Unicode object of one item and can hold either a 16-bit or + 32-bit value representing a Unicode ordinal (the maximum value for the ordinal + is given in ``sys.maxunicode``, and depends on how Python is configured at + compile time). Surrogate pairs may be present in the Unicode object, and will + be reported as two separate items. The built-in functions :func:`unichr` and + :func:`ord` convert between code units and nonnegative integers representing the + Unicode ordinals as defined in the Unicode Standard 3.0. Conversion from and to + other encodings are possible through the Unicode method :meth:`encode` and the + built-in function :func:`unicode`. + + Tuples + .. index:: + object: tuple + pair: singleton; tuple + pair: empty; tuple + + The items of a tuple are arbitrary Python objects. Tuples of two or more items + are formed by comma-separated lists of expressions. A tuple of one item (a + 'singleton') can be formed by affixing a comma to an expression (an expression + by itself does not create a tuple, since parentheses must be usable for grouping + of expressions). An empty tuple can be formed by an empty pair of parentheses. + + .. % Immutable sequences + + Mutable sequences + .. index:: + object: mutable sequence + object: mutable + pair: assignment; statement + single: delete + statement: del + single: subscription + single: slicing + + Mutable sequences can be changed after they are created. The subscription and + slicing notations can be used as the target of assignment and :keyword:`del` + (delete) statements. + + There is currently a single intrinsic mutable sequence type: + + Lists + .. index:: object: list + + The items of a list are arbitrary Python objects. Lists are formed by placing a + comma-separated list of expressions in square brackets. (Note that there are no + special cases needed to form lists of length 0 or 1.) + + .. index:: module: array + + The extension module :mod:`array` provides an additional example of a mutable + sequence type. + + .. % Mutable sequences + + .. % Sequences + +Set types + .. index:: + builtin: len + object: set type + + These represent unordered, finite sets of unique, immutable objects. As such, + they cannot be indexed by any subscript. However, they can be iterated over, and + the built-in function :func:`len` returns the number of items in a set. Common + uses for sets are fast membership testing, removing duplicates from a sequence, + and computing mathematical operations such as intersection, union, difference, + and symmetric difference. + + For set elements, the same immutability rules apply as for dictionary keys. Note + that numeric types obey the normal rules for numeric comparison: if two numbers + compare equal (e.g., ``1`` and ``1.0``), only one of them can be contained in a + set. + + There are currently two intrinsic set types: + + Sets + .. index:: object: set + + These represent a mutable set. They are created by the built-in :func:`set` + constructor and can be modified afterwards by several methods, such as + :meth:`add`. + + Frozen sets + .. index:: object: frozenset + + These represent an immutable set. They are created by the built-in + :func:`frozenset` constructor. As a frozenset is immutable and hashable, it can + be used again as an element of another set, or as a dictionary key. + + .. % Set types + +Mappings + .. index:: + builtin: len + single: subscription + object: mapping + + These represent finite sets of objects indexed by arbitrary index sets. The + subscript notation ``a[k]`` selects the item indexed by ``k`` from the mapping + ``a``; this can be used in expressions and as the target of assignments or + :keyword:`del` statements. The built-in function :func:`len` returns the number + of items in a mapping. + + There is currently a single intrinsic mapping type: + + Dictionaries + .. index:: object: dictionary + + These represent finite sets of objects indexed by nearly arbitrary values. The + only types of values not acceptable as keys are values containing lists or + dictionaries or other mutable types that are compared by value rather than by + object identity, the reason being that the efficient implementation of + dictionaries requires a key's hash value to remain constant. Numeric types used + for keys obey the normal rules for numeric comparison: if two numbers compare + equal (e.g., ``1`` and ``1.0``) then they can be used interchangeably to index + the same dictionary entry. + + Dictionaries are mutable; they can be created by the ``{...}`` notation (see + section :ref:`dict`). + + .. index:: + module: dbm + module: gdbm + module: bsddb + + The extension modules :mod:`dbm`, :mod:`gdbm`, and :mod:`bsddb` provide + additional examples of mapping types. + + .. % Mapping types + +Callable types + .. index:: + object: callable + pair: function; call + single: invocation + pair: function; argument + + These are the types to which the function call operation (see section + :ref:`calls`) can be applied: + + User-defined functions + .. index:: + pair: user-defined; function + object: function + object: user-defined function + + A user-defined function object is created by a function definition (see + section :ref:`function`). It should be called with an argument list + containing the same number of items as the function's formal parameter + list. + + Special attributes: + + +-------------------------+-------------------------------+-----------+ + | Attribute | Meaning | | + +=========================+===============================+===========+ + | :attr:`__doc__` | The function's documentation | Writable | + | | string, or ``None`` if | | + | | unavailable | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__name__` | The function's name | Writable | + +-------------------------+-------------------------------+-----------+ + | :attr:`__module__` | The name of the module the | Writable | + | | function was defined in, or | | + | | ``None`` if unavailable. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__defaults__` | A tuple containing default | Writable | + | | argument values for those | | + | | arguments that have defaults, | | + | | or ``None`` if no arguments | | + | | have a default value | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__code__` | The code object representing | Writable | + | | the compiled function body. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__globals__` | A reference to the dictionary | Read-only | + | | that holds the function's | | + | | global variables --- the | | + | | global namespace of the | | + | | module in which the function | | + | | was defined. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__dict__` | The namespace supporting | Writable | + | | arbitrary function | | + | | attributes. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__closure__` | ``None`` or a tuple of cells | Read-only | + | | that contain bindings for the | | + | | function's free variables. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__annotations__` | A dict containing annotations | Writable | + | | of parameters. The keys of | | + | | the dict are the parameter | | + | | names, or ``'return'`` for | | + | | the return annotation, if | | + | | provided. | | + +-------------------------+-------------------------------+-----------+ + | :attr:`__kwdefaults__` | A dict containing defaults | Writable | + | | for keyword-only parameters. | | + +-------------------------+-------------------------------+-----------+ + + Most of the attributes labelled "Writable" check the type of the assigned value. + + .. versionchanged:: 2.4 + ``__name__`` is now writable. + + Function objects also support getting and setting arbitrary attributes, which + can be used, for example, to attach metadata to functions. Regular attribute + dot-notation is used to get and set such attributes. *Note that the current + implementation only supports function attributes on user-defined functions. + Function attributes on built-in functions may be supported in the future.* + + Additional information about a function's definition can be retrieved from its + code object; see the description of internal types below. + + .. index:: + single: __doc__ (function attribute) + single: __name__ (function attribute) + single: __module__ (function attribute) + single: __dict__ (function attribute) + single: __defaults__ (function attribute) + single: __closure__ (function attribute) + single: __code__ (function attribute) + single: __globals__ (function attribute) + single: __annotations__ (function attribute) + single: __kwdefaults__ (function attribute) + pair: global; namespace + + User-defined methods + .. index:: + object: method + object: user-defined method + pair: user-defined; method + + A user-defined method object combines a class, a class instance (or ``None``) + and any callable object (normally a user-defined function). + + Special read-only attributes: :attr:`im_self` is the class instance object, + :attr:`im_func` is the function object; :attr:`im_class` is the class of + :attr:`im_self` for bound methods or the class that asked for the method for + unbound methods; :attr:`__doc__` is the method's documentation (same as + ``im_func.__doc__``); :attr:`__name__` is the method name (same as + ``im_func.__name__``); :attr:`__module__` is the name of the module the method + was defined in, or ``None`` if unavailable. + + .. versionchanged:: 2.2 + :attr:`im_self` used to refer to the class that defined the method. + + .. index:: + single: __doc__ (method attribute) + single: __name__ (method attribute) + single: __module__ (method attribute) + single: im_func (method attribute) + single: im_self (method attribute) + + Methods also support accessing (but not setting) the arbitrary function + attributes on the underlying function object. + + User-defined method objects may be created when getting an attribute of a class + (perhaps via an instance of that class), if that attribute is a user-defined + function object, an unbound user-defined method object, or a class method + object. When the attribute is a user-defined method object, a new method object + is only created if the class from which it is being retrieved is the same as, or + a derived class of, the class stored in the original method object; otherwise, + the original method object is used as it is. + + .. index:: + single: im_class (method attribute) + single: im_func (method attribute) + single: im_self (method attribute) + + When a user-defined method object is created by retrieving a user-defined + function object from a class, its :attr:`im_self` attribute is ``None`` + and the method object is said to be unbound. When one is created by + retrieving a user-defined function object from a class via one of its + instances, its :attr:`im_self` attribute is the instance, and the method + object is said to be bound. In either case, the new method's + :attr:`im_class` attribute is the class from which the retrieval takes + place, and its :attr:`im_func` attribute is the original function object. + + .. index:: single: im_func (method attribute) + + When a user-defined method object is created by retrieving another method object + from a class or instance, the behaviour is the same as for a function object, + except that the :attr:`im_func` attribute of the new instance is not the + original method object but its :attr:`im_func` attribute. + + .. index:: + single: im_class (method attribute) + single: im_func (method attribute) + single: im_self (method attribute) + + When a user-defined method object is created by retrieving a class method object + from a class or instance, its :attr:`im_self` attribute is the class itself (the + same as the :attr:`im_class` attribute), and its :attr:`im_func` attribute is + the function object underlying the class method. + + When an unbound user-defined method object is called, the underlying function + (:attr:`im_func`) is called, with the restriction that the first argument must + be an instance of the proper class (:attr:`im_class`) or of a derived class + thereof. + + When a bound user-defined method object is called, the underlying function + (:attr:`im_func`) is called, inserting the class instance (:attr:`im_self`) in + front of the argument list. For instance, when :class:`C` is a class which + contains a definition for a function :meth:`f`, and ``x`` is an instance of + :class:`C`, calling ``x.f(1)`` is equivalent to calling ``C.f(x, 1)``. + + When a user-defined method object is derived from a class method object, the + "class instance" stored in :attr:`im_self` will actually be the class itself, so + that calling either ``x.f(1)`` or ``C.f(1)`` is equivalent to calling ``f(C,1)`` + where ``f`` is the underlying function. + + Note that the transformation from function object to (unbound or bound) method + object happens each time the attribute is retrieved from the class or instance. + In some cases, a fruitful optimization is to assign the attribute to a local + variable and call that local variable. Also notice that this transformation only + happens for user-defined functions; other callable objects (and all non-callable + objects) are retrieved without transformation. It is also important to note + that user-defined functions which are attributes of a class instance are not + converted to bound methods; this *only* happens when the function is an + attribute of the class. + + Generator functions + .. index:: + single: generator; function + single: generator; iterator + + A function or method which uses the :keyword:`yield` statement (see section + :ref:`yield`) is called a :dfn:`generator + function`. Such a function, when called, always returns an iterator object + which can be used to execute the body of the function: calling the iterator's + :meth:`__next__` method will cause the function to execute until it provides a + value using the :keyword:`yield` statement. When the function executes a + :keyword:`return` statement or falls off the end, a :exc:`StopIteration` + exception is raised and the iterator will have reached the end of the set of + values to be returned. + + Built-in functions + .. index:: + object: built-in function + object: function + pair: C; language + + A built-in function object is a wrapper around a C function. Examples of + built-in functions are :func:`len` and :func:`math.sin` (:mod:`math` is a + standard built-in module). The number and type of the arguments are + determined by the C function. Special read-only attributes: + :attr:`__doc__` is the function's documentation string, or ``None`` if + unavailable; :attr:`__name__` is the function's name; :attr:`__self__` is + set to ``None`` (but see the next item); :attr:`__module__` is the name of + the module the function was defined in or ``None`` if unavailable. + + Built-in methods + .. index:: + object: built-in method + object: method + pair: built-in; method + + This is really a different disguise of a built-in function, this time containing + an object passed to the C function as an implicit extra argument. An example of + a built-in method is ``alist.append()``, assuming *alist* is a list object. In + this case, the special read-only attribute :attr:`__self__` is set to the object + denoted by *list*. + + Class Types + Class types, or "new-style classes," are callable. These objects normally act + as factories for new instances of themselves, but variations are possible for + class types that override :meth:`__new__`. The arguments of the call are passed + to :meth:`__new__` and, in the typical case, to :meth:`__init__` to initialize + the new instance. + + Classic Classes + .. index:: + single: __init__() (object method) + object: class + object: class instance + object: instance + pair: class object; call + + Class objects are described below. When a class object is called, a new class + instance (also described below) is created and returned. This implies a call to + the class's :meth:`__init__` method if it has one. Any arguments are passed on + to the :meth:`__init__` method. If there is no :meth:`__init__` method, the + class must be called without arguments. + + Class instances + Class instances are described below. Class instances are callable only when the + class has a :meth:`__call__` method; ``x(arguments)`` is a shorthand for + ``x.__call__(arguments)``. + +Modules + .. index:: + statement: import + object: module + + Modules are imported by the :keyword:`import` statement (see section + :ref:`import`). A module object has a + namespace implemented by a dictionary object (this is the dictionary referenced + by the __globals__ attribute of functions defined in the module). Attribute + references are translated to lookups in this dictionary, e.g., ``m.x`` is + equivalent to ``m.__dict__["x"]``. A module object does not contain the code + object used to initialize the module (since it isn't needed once the + initialization is done). + + .. % + + Attribute assignment updates the module's namespace dictionary, e.g., ``m.x = + 1`` is equivalent to ``m.__dict__["x"] = 1``. + + .. index:: single: __dict__ (module attribute) + + Special read-only attribute: :attr:`__dict__` is the module's namespace as a + dictionary object. + + .. index:: + single: __name__ (module attribute) + single: __doc__ (module attribute) + single: __file__ (module attribute) + pair: module; namespace + + Predefined (writable) attributes: :attr:`__name__` is the module's name; + :attr:`__doc__` is the module's documentation string, or ``None`` if + unavailable; :attr:`__file__` is the pathname of the file from which the module + was loaded, if it was loaded from a file. The :attr:`__file__` attribute is not + present for C modules that are statically linked into the interpreter; for + extension modules loaded dynamically from a shared library, it is the pathname + of the shared library file. + +Classes + Class objects are created by class definitions (see section :ref:`class`). A + class has a namespace implemented by a dictionary object. Class attribute + references are translated to lookups in this dictionary, e.g., ``C.x`` is + translated to ``C.__dict__["x"]``. When the attribute name is not found + there, the attribute search continues in the base classes. The search is + depth-first, left-to-right in the order of occurrence in the base class list. + + .. index:: + object: class + object: class instance + object: instance + pair: class object; call + single: container + object: dictionary + pair: class; attribute + + When a class attribute reference (for class :class:`C`, say) would yield a + user-defined function object or an unbound user-defined method object whose + associated class is either :class:`C` or one of its base classes, it is + transformed into an unbound user-defined method object whose :attr:`im_class` + attribute is :class:`C`. When it would yield a class method object, it is + transformed into a bound user-defined method object whose :attr:`im_class` + and :attr:`im_self` attributes are both :class:`C`. When it would yield a + static method object, it is transformed into the object wrapped by the static + method object. See section :ref:`descriptors` for another way in which + attributes retrieved from a class may differ from those actually contained in + its :attr:`__dict__`. + + .. index:: triple: class; attribute; assignment + + Class attribute assignments update the class's dictionary, never the dictionary + of a base class. + + .. index:: pair: class object; call + + A class object can be called (see above) to yield a class instance (see below). + + .. index:: + single: __name__ (class attribute) + single: __module__ (class attribute) + single: __dict__ (class attribute) + single: __bases__ (class attribute) + single: __doc__ (class attribute) + + Special attributes: :attr:`__name__` is the class name; :attr:`__module__` is + the module name in which the class was defined; :attr:`__dict__` is the + dictionary containing the class's namespace; :attr:`__bases__` is a tuple + (possibly empty or a singleton) containing the base classes, in the order of + their occurrence in the base class list; :attr:`__doc__` is the class's + documentation string, or None if undefined. + +Class instances + .. index:: + object: class instance + object: instance + pair: class; instance + pair: class instance; attribute + + A class instance is created by calling a class object (see above). A class + instance has a namespace implemented as a dictionary which is the first place in + which attribute references are searched. When an attribute is not found there, + and the instance's class has an attribute by that name, the search continues + with the class attributes. If a class attribute is found that is a user-defined + function object or an unbound user-defined method object whose associated class + is the class (call it :class:`C`) of the instance for which the attribute + reference was initiated or one of its bases, it is transformed into a bound + user-defined method object whose :attr:`im_class` attribute is :class:`C` and + whose :attr:`im_self` attribute is the instance. Static method and class method + objects are also transformed, as if they had been retrieved from class + :class:`C`; see above under "Classes". See section :ref:`descriptors` for + another way in which attributes of a class retrieved via its instances may + differ from the objects actually stored in the class's :attr:`__dict__`. If no + class attribute is found, and the object's class has a :meth:`__getattr__` + method, that is called to satisfy the lookup. + + .. index:: triple: class instance; attribute; assignment + + Attribute assignments and deletions update the instance's dictionary, never a + class's dictionary. If the class has a :meth:`__setattr__` or + :meth:`__delattr__` method, this is called instead of updating the instance + dictionary directly. + + .. index:: + object: numeric + object: sequence + object: mapping + + Class instances can pretend to be numbers, sequences, or mappings if they have + methods with certain special names. See section :ref:`specialnames`. + + .. index:: + single: __dict__ (instance attribute) + single: __class__ (instance attribute) + + Special attributes: :attr:`__dict__` is the attribute dictionary; + :attr:`__class__` is the instance's class. + +Files + .. index:: + object: file + builtin: open + single: popen() (in module os) + single: makefile() (socket method) + single: sys.stdin + single: sys.stdout + single: sys.stderr + single: stdio + single: stdin (in module sys) + single: stdout (in module sys) + single: stderr (in module sys) + + A file object represents an open file. File objects are created by the + :func:`open` built-in function, and also by :func:`os.popen`, + :func:`os.fdopen`, and the :meth:`makefile` method of socket objects (and + perhaps by other functions or methods provided by extension modules). The + objects ``sys.stdin``, ``sys.stdout`` and ``sys.stderr`` are initialized to + file objects corresponding to the interpreter's standard input, output and + error streams. See :ref:`bltin-file-objects` for complete documentation of + file objects. + +Internal types + .. index:: + single: internal type + single: types, internal + + A few types used internally by the interpreter are exposed to the user. Their + definitions may change with future versions of the interpreter, but they are + mentioned here for completeness. + + Code objects + .. index:: + single: bytecode + object: code + + Code objects represent *byte-compiled* executable Python code, or *bytecode*. + The difference between a code object and a function object is that the function + object contains an explicit reference to the function's globals (the module in + which it was defined), while a code object contains no context; also the default + argument values are stored in the function object, not in the code object + (because they represent values calculated at run-time). Unlike function + objects, code objects are immutable and contain no references (directly or + indirectly) to mutable objects. + + Special read-only attributes: :attr:`co_name` gives the function name; + :attr:`co_argcount` is the number of positional arguments (including arguments + with default values); :attr:`co_nlocals` is the number of local variables used + by the function (including arguments); :attr:`co_varnames` is a tuple containing + the names of the local variables (starting with the argument names); + :attr:`co_cellvars` is a tuple containing the names of local variables that are + referenced by nested functions; :attr:`co_freevars` is a tuple containing the + names of free variables; :attr:`co_code` is a string representing the sequence + of bytecode instructions; :attr:`co_consts` is a tuple containing the literals + used by the bytecode; :attr:`co_names` is a tuple containing the names used by + the bytecode; :attr:`co_filename` is the filename from which the code was + compiled; :attr:`co_firstlineno` is the first line number of the function; + :attr:`co_lnotab` is a string encoding the mapping from byte code offsets to + line numbers (for details see the source code of the interpreter); + :attr:`co_stacksize` is the required stack size (including local variables); + :attr:`co_flags` is an integer encoding a number of flags for the interpreter. + + .. index:: + single: co_argcount (code object attribute) + single: co_code (code object attribute) + single: co_consts (code object attribute) + single: co_filename (code object attribute) + single: co_firstlineno (code object attribute) + single: co_flags (code object attribute) + single: co_lnotab (code object attribute) + single: co_name (code object attribute) + single: co_names (code object attribute) + single: co_nlocals (code object attribute) + single: co_stacksize (code object attribute) + single: co_varnames (code object attribute) + single: co_cellvars (code object attribute) + single: co_freevars (code object attribute) + + .. index:: object: generator + + The following flag bits are defined for :attr:`co_flags`: bit ``0x04`` is set if + the function uses the ``*arguments`` syntax to accept an arbitrary number of + positional arguments; bit ``0x08`` is set if the function uses the + ``**keywords`` syntax to accept arbitrary keyword arguments; bit ``0x20`` is set + if the function is a generator. + + Future feature declarations (``from __future__ import division``) also use bits + in :attr:`co_flags` to indicate whether a code object was compiled with a + particular feature enabled: bit ``0x2000`` is set if the function was compiled + with future division enabled; bits ``0x10`` and ``0x1000`` were used in earlier + versions of Python. + + Other bits in :attr:`co_flags` are reserved for internal use. + + .. index:: single: documentation string + + If a code object represents a function, the first item in :attr:`co_consts` is + the documentation string of the function, or ``None`` if undefined. + + Frame objects + .. index:: object: frame + + Frame objects represent execution frames. They may occur in traceback objects + (see below). + + .. index:: + single: f_back (frame attribute) + single: f_code (frame attribute) + single: f_globals (frame attribute) + single: f_locals (frame attribute) + single: f_lasti (frame attribute) + single: f_builtins (frame attribute) + + Special read-only attributes: :attr:`f_back` is to the previous stack frame + (towards the caller), or ``None`` if this is the bottom stack frame; + :attr:`f_code` is the code object being executed in this frame; :attr:`f_locals` + is the dictionary used to look up local variables; :attr:`f_globals` is used for + global variables; :attr:`f_builtins` is used for built-in (intrinsic) names; + :attr:`f_lasti` gives the precise instruction (this is an index into the + bytecode string of the code object). + + .. index:: + single: f_trace (frame attribute) + single: f_exc_type (frame attribute) + single: f_exc_value (frame attribute) + single: f_exc_traceback (frame attribute) + single: f_lineno (frame attribute) + + Special writable attributes: :attr:`f_trace`, if not ``None``, is a function + called at the start of each source code line (this is used by the debugger); + :attr:`f_exc_type`, :attr:`f_exc_value`, :attr:`f_exc_traceback` represent the + last exception raised in the parent frame provided another exception was ever + raised in the current frame (in all other cases they are None); :attr:`f_lineno` + is the current line number of the frame --- writing to this from within a trace + function jumps to the given line (only for the bottom-most frame). A debugger + can implement a Jump command (aka Set Next Statement) by writing to f_lineno. + + Traceback objects + .. index:: + object: traceback + pair: stack; trace + pair: exception; handler + pair: execution; stack + single: exc_info (in module sys) + single: exc_traceback (in module sys) + single: last_traceback (in module sys) + single: sys.exc_info + single: sys.last_traceback + + Traceback objects represent a stack trace of an exception. A traceback object + is created when an exception occurs. When the search for an exception handler + unwinds the execution stack, at each unwound level a traceback object is + inserted in front of the current traceback. When an exception handler is + entered, the stack trace is made available to the program. (See section + :ref:`try`.) It is accessible as the third item of the + tuple returned by ``sys.exc_info()``. When the program contains no suitable + handler, the stack trace is written (nicely formatted) to the standard error + stream; if the interpreter is interactive, it is also made available to the user + as ``sys.last_traceback``. + + .. index:: + single: tb_next (traceback attribute) + single: tb_frame (traceback attribute) + single: tb_lineno (traceback attribute) + single: tb_lasti (traceback attribute) + statement: try + + Special read-only attributes: :attr:`tb_next` is the next level in the stack + trace (towards the frame where the exception occurred), or ``None`` if there is + no next level; :attr:`tb_frame` points to the execution frame of the current + level; :attr:`tb_lineno` gives the line number where the exception occurred; + :attr:`tb_lasti` indicates the precise instruction. The line number and last + instruction in the traceback may differ from the line number of its frame object + if the exception occurred in a :keyword:`try` statement with no matching except + clause or with a finally clause. + + Slice objects + .. index:: builtin: slice + + Slice objects are used to represent slices when *extended slice syntax* is used. + This is a slice using two colons, or multiple slices or ellipses separated by + commas, e.g., ``a[i:j:step]``, ``a[i:j, k:l]``, or ``a[..., i:j]``. They are + also created by the built-in :func:`slice` function. + + .. index:: + single: start (slice object attribute) + single: stop (slice object attribute) + single: step (slice object attribute) + + Special read-only attributes: :attr:`start` is the lower bound; :attr:`stop` is + the upper bound; :attr:`step` is the step value; each is ``None`` if omitted. + These attributes can have any type. + + Slice objects support one method: + + + .. method:: slice.indices(self, length) + + This method takes a single integer argument *length* and computes information + about the extended slice that the slice object would describe if applied to a + sequence of *length* items. It returns a tuple of three integers; respectively + these are the *start* and *stop* indices and the *step* or stride length of the + slice. Missing or out-of-bounds indices are handled in a manner consistent with + regular slices. + + .. versionadded:: 2.3 + + Static method objects + Static method objects provide a way of defeating the transformation of function + objects to method objects described above. A static method object is a wrapper + around any other object, usually a user-defined method object. When a static + method object is retrieved from a class or a class instance, the object actually + returned is the wrapped object, which is not subject to any further + transformation. Static method objects are not themselves callable, although the + objects they wrap usually are. Static method objects are created by the built-in + :func:`staticmethod` constructor. + + Class method objects + A class method object, like a static method object, is a wrapper around another + object that alters the way in which that object is retrieved from classes and + class instances. The behaviour of class method objects upon such retrieval is + described above, under "User-defined methods". Class method objects are created + by the built-in :func:`classmethod` constructor. + + .. % Internal types + +.. % Types +.. % ========================================================================= + + +New-style and classic classes +============================= + +Classes and instances come in two flavors: old-style or classic, and new-style. + +Up to Python 2.1, old-style classes were the only flavour available to the user. +The concept of (old-style) class is unrelated to the concept of type: if *x* is +an instance of an old-style class, then ``x.__class__`` designates the class of +*x*, but ``type(x)`` is always ``<type 'instance'>``. This reflects the fact +that all old-style instances, independently of their class, are implemented with +a single built-in type, called ``instance``. + +New-style classes were introduced in Python 2.2 to unify classes and types. A +new-style class neither more nor less than a user-defined type. If *x* is an +instance of a new-style class, then ``type(x)`` is the same as ``x.__class__``. + +The major motivation for introducing new-style classes is to provide a unified +object model with a full meta-model. It also has a number of immediate +benefits, like the ability to subclass most built-in types, or the introduction +of "descriptors", which enable computed properties. + +For compatibility reasons, classes are still old-style by default. New-style +classes are created by specifying another new-style class (i.e. a type) as a +parent class, or the "top-level type" :class:`object` if no other parent is +needed. The behaviour of new-style classes differs from that of old-style +classes in a number of important details in addition to what :func:`type` +returns. Some of these changes are fundamental to the new object model, like +the way special methods are invoked. Others are "fixes" that could not be +implemented before for compatibility concerns, like the method resolution order +in case of multiple inheritance. + +This manual is not up-to-date with respect to new-style classes. For now, +please see http://www.python.org/doc/newstyle.html for more information. + +.. index:: + single: class + single: class + single: class + +The plan is to eventually drop old-style classes, leaving only the semantics of +new-style classes. This change will probably only be feasible in Python 3.0. +new-style classic old-style + +.. % ========================================================================= + + +.. _specialnames: + +Special method names +==================== + +.. index:: + pair: operator; overloading + single: __getitem__() (mapping object method) + +A class can implement certain operations that are invoked by special syntax +(such as arithmetic operations or subscripting and slicing) by defining methods +with special names. This is Python's approach to :dfn:`operator overloading`, +allowing classes to define their own behavior with respect to language +operators. For instance, if a class defines a method named :meth:`__getitem__`, +and ``x`` is an instance of this class, then ``x[i]`` is equivalent [#]_ to +``x.__getitem__(i)``. Except where mentioned, attempts to execute an operation +raise an exception when no appropriate method is defined. + +When implementing a class that emulates any built-in type, it is important that +the emulation only be implemented to the degree that it makes sense for the +object being modelled. For example, some sequences may work well with retrieval +of individual elements, but extracting a slice may not make sense. (One example +of this is the :class:`NodeList` interface in the W3C's Document Object Model.) + + +.. _customization: + +Basic customization +------------------- + + +.. method:: object.__new__(cls[, ...]) + + Called to create a new instance of class *cls*. :meth:`__new__` is a static + method (special-cased so you need not declare it as such) that takes the class + of which an instance was requested as its first argument. The remaining + arguments are those passed to the object constructor expression (the call to the + class). The return value of :meth:`__new__` should be the new object instance + (usually an instance of *cls*). + + Typical implementations create a new instance of the class by invoking the + superclass's :meth:`__new__` method using ``super(currentclass, + cls).__new__(cls[, ...])`` with appropriate arguments and then modifying the + newly-created instance as necessary before returning it. + + If :meth:`__new__` returns an instance of *cls*, then the new instance's + :meth:`__init__` method will be invoked like ``__init__(self[, ...])``, where + *self* is the new instance and the remaining arguments are the same as were + passed to :meth:`__new__`. + + If :meth:`__new__` does not return an instance of *cls*, then the new instance's + :meth:`__init__` method will not be invoked. + + :meth:`__new__` is intended mainly to allow subclasses of immutable types (like + int, str, or tuple) to customize instance creation. + + +.. method:: object.__init__(self[, ...]) + + .. index:: pair: class; constructor + + Called when the instance is created. The arguments are those passed to the + class constructor expression. If a base class has an :meth:`__init__` method, + the derived class's :meth:`__init__` method, if any, must explicitly call it to + ensure proper initialization of the base class part of the instance; for + example: ``BaseClass.__init__(self, [args...])``. As a special constraint on + constructors, no value may be returned; doing so will cause a :exc:`TypeError` + to be raised at runtime. + + +.. method:: object.__del__(self) + + .. index:: + single: destructor + statement: del + + Called when the instance is about to be destroyed. This is also called a + destructor. If a base class has a :meth:`__del__` method, the derived class's + :meth:`__del__` method, if any, must explicitly call it to ensure proper + deletion of the base class part of the instance. Note that it is possible + (though not recommended!) for the :meth:`__del__` method to postpone destruction + of the instance by creating a new reference to it. It may then be called at a + later time when this new reference is deleted. It is not guaranteed that + :meth:`__del__` methods are called for objects that still exist when the + interpreter exits. + + .. note:: + + ``del x`` doesn't directly call ``x.__del__()`` --- the former decrements + the reference count for ``x`` by one, and the latter is only called when + ``x``'s reference count reaches zero. Some common situations that may + prevent the reference count of an object from going to zero include: + circular references between objects (e.g., a doubly-linked list or a tree + data structure with parent and child pointers); a reference to the object + on the stack frame of a function that caught an exception (the traceback + stored in ``sys.exc_info()[2]`` keeps the stack frame alive); or a + reference to the object on the stack frame that raised an unhandled + exception in interactive mode (the traceback stored in + ``sys.last_traceback`` keeps the stack frame alive). The first situation + can only be remedied by explicitly breaking the cycles; the latter two + situations can be resolved by storing ``None`` in ``sys.last_traceback``. + Circular references which are garbage are detected when the option cycle + detector is enabled (it's on by default), but can only be cleaned up if + there are no Python- level :meth:`__del__` methods involved. Refer to the + documentation for the :mod:`gc` module for more information about how + :meth:`__del__` methods are handled by the cycle detector, particularly + the description of the ``garbage`` value. + + .. warning:: + + Due to the precarious circumstances under which :meth:`__del__` methods are + invoked, exceptions that occur during their execution are ignored, and a warning + is printed to ``sys.stderr`` instead. Also, when :meth:`__del__` is invoked in + response to a module being deleted (e.g., when execution of the program is + done), other globals referenced by the :meth:`__del__` method may already have + been deleted. For this reason, :meth:`__del__` methods should do the absolute + minimum needed to maintain external invariants. Starting with version 1.5, + Python guarantees that globals whose name begins with a single underscore are + deleted from their module before other globals are deleted; if no other + references to such globals exist, this may help in assuring that imported + modules are still available at the time when the :meth:`__del__` method is + called. + + +.. method:: object.__repr__(self) + + .. index:: builtin: repr + + Called by the :func:`repr` built-in function and by string conversions (reverse + quotes) to compute the "official" string representation of an object. If at all + possible, this should look like a valid Python expression that could be used to + recreate an object with the same value (given an appropriate environment). If + this is not possible, a string of the form ``<...some useful description...>`` + should be returned. The return value must be a string object. If a class + defines :meth:`__repr__` but not :meth:`__str__`, then :meth:`__repr__` is also + used when an "informal" string representation of instances of that class is + required. + + .. index:: + pair: string; conversion + pair: reverse; quotes + pair: backward; quotes + single: back-quotes + + This is typically used for debugging, so it is important that the representation + is information-rich and unambiguous. + + +.. method:: object.__str__(self) + + .. index:: + builtin: str + statement: print + + Called by the :func:`str` built-in function and by the :keyword:`print` + statement to compute the "informal" string representation of an object. This + differs from :meth:`__repr__` in that it does not have to be a valid Python + expression: a more convenient or concise representation may be used instead. + The return value must be a string object. + + +.. method:: object.__lt__(self, other) + object.__le__(self, other) + object.__eq__(self, other) + object.__ne__(self, other) + object.__gt__(self, other) + object.__ge__(self, other) + + .. versionadded:: 2.1 + + These are the so-called "rich comparison" methods, and are called for comparison + operators in preference to :meth:`__cmp__` below. The correspondence between + operator symbols and method names is as follows: ``x<y`` calls ``x.__lt__(y)``, + ``x<=y`` calls ``x.__le__(y)``, ``x==y`` calls ``x.__eq__(y)``, ``x!=y`` calls + ``x.__ne__(y)``, ``x>y`` calls ``x.__gt__(y)``, and ``x>=y`` calls + ``x.__ge__(y)``. + + A rich comparison method may return the singleton ``NotImplemented`` if it does + not implement the operation for a given pair of arguments. By convention, + ``False`` and ``True`` are returned for a successful comparison. However, these + methods can return any value, so if the comparison operator is used in a Boolean + context (e.g., in the condition of an ``if`` statement), Python will call + :func:`bool` on the value to determine if the result is true or false. + + There are no implied relationships among the comparison operators. The truth of + ``x==y`` does not imply that ``x!=y`` is false. Accordingly, when defining + :meth:`__eq__`, one should also define :meth:`__ne__` so that the operators will + behave as expected. + + There are no reflected (swapped-argument) versions of these methods (to be used + when the left argument does not support the operation but the right argument + does); rather, :meth:`__lt__` and :meth:`__gt__` are each other's reflection, + :meth:`__le__` and :meth:`__ge__` are each other's reflection, and + :meth:`__eq__` and :meth:`__ne__` are their own reflection. + + Arguments to rich comparison methods are never coerced. + + +.. method:: object.__cmp__(self, other) + + .. index:: + builtin: cmp + single: comparisons + + Called by comparison operations if rich comparison (see above) is not defined. + Should return a negative integer if ``self < other``, zero if ``self == other``, + a positive integer if ``self > other``. If no :meth:`__cmp__`, :meth:`__eq__` + or :meth:`__ne__` operation is defined, class instances are compared by object + identity ("address"). See also the description of :meth:`__hash__` for some + important notes on creating objects which support custom comparison operations + and are usable as dictionary keys. (Note: the restriction that exceptions are + not propagated by :meth:`__cmp__` has been removed since Python 1.5.) + + +.. method:: object.__rcmp__(self, other) + + .. versionchanged:: 2.1 + No longer supported. + + +.. method:: object.__hash__(self) + + .. index:: + object: dictionary + builtin: hash + + Called for the key object for dictionary operations, and by the built-in + function :func:`hash`. Should return a 32-bit integer usable as a hash value + for dictionary operations. The only required property is that objects which + compare equal have the same hash value; it is advised to somehow mix together + (e.g., using exclusive or) the hash values for the components of the object that + also play a part in comparison of objects. If a class does not define a + :meth:`__cmp__` method it should not define a :meth:`__hash__` operation either; + if it defines :meth:`__cmp__` or :meth:`__eq__` but not :meth:`__hash__`, its + instances will not be usable as dictionary keys. If a class defines mutable + objects and implements a :meth:`__cmp__` or :meth:`__eq__` method, it should not + implement :meth:`__hash__`, since the dictionary implementation requires that a + key's hash value is immutable (if the object's hash value changes, it will be in + the wrong hash bucket). + + .. versionchanged:: 2.5 + :meth:`__hash__` may now also return a long integer object; the 32-bit integer + is then derived from the hash of that object. + + .. index:: single: __cmp__() (object method) + + +.. method:: object.__bool__(self) + + .. index:: single: __len__() (mapping object method) + + Called to implement truth value testing, and the built-in operation ``bool()``; + should return ``False`` or ``True``. When this method is not defined, + :meth:`__len__` is called, if it is defined (see below) and ``True`` is returned + when the length is not zero. If a class defines neither :meth:`__len__` nor + :meth:`__bool__`, all its instances are considered true. + + +.. method:: object.__unicode__(self) + + .. index:: builtin: unicode + + Called to implement :func:`unicode` builtin; should return a Unicode object. + When this method is not defined, string conversion is attempted, and the result + of string conversion is converted to Unicode using the system default encoding. + + +.. _attribute-access: + +Customizing attribute access +---------------------------- + +The following methods can be defined to customize the meaning of attribute +access (use of, assignment to, or deletion of ``x.name``) for class instances. + + +.. method:: object.__getattr__(self, name) + + Called when an attribute lookup has not found the attribute in the usual places + (i.e. it is not an instance attribute nor is it found in the class tree for + ``self``). ``name`` is the attribute name. This method should return the + (computed) attribute value or raise an :exc:`AttributeError` exception. + + .. index:: single: __setattr__() (object method) + + Note that if the attribute is found through the normal mechanism, + :meth:`__getattr__` is not called. (This is an intentional asymmetry between + :meth:`__getattr__` and :meth:`__setattr__`.) This is done both for efficiency + reasons and because otherwise :meth:`__setattr__` would have no way to access + other attributes of the instance. Note that at least for instance variables, + you can fake total control by not inserting any values in the instance attribute + dictionary (but instead inserting them in another object). See the + :meth:`__getattribute__` method below for a way to actually get total control in + new-style classes. + + +.. method:: object.__setattr__(self, name, value) + + Called when an attribute assignment is attempted. This is called instead of the + normal mechanism (i.e. store the value in the instance dictionary). *name* is + the attribute name, *value* is the value to be assigned to it. + + .. index:: single: __dict__ (instance attribute) + + If :meth:`__setattr__` wants to assign to an instance attribute, it should not + simply execute ``self.name = value`` --- this would cause a recursive call to + itself. Instead, it should insert the value in the dictionary of instance + attributes, e.g., ``self.__dict__[name] = value``. For new-style classes, + rather than accessing the instance dictionary, it should call the base class + method with the same name, for example, ``object.__setattr__(self, name, + value)``. + + +.. method:: object.__delattr__(self, name) + + Like :meth:`__setattr__` but for attribute deletion instead of assignment. This + should only be implemented if ``del obj.name`` is meaningful for the object. + + +.. _new-style-attribute-access: + +More attribute access for new-style classes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following methods only apply to new-style classes. + + +.. method:: object.__getattribute__(self, name) + + Called unconditionally to implement attribute accesses for instances of the + class. If the class also defines :meth:`__getattr__`, the latter will not be + called unless :meth:`__getattribute__` either calls it explicitly or raises an + :exc:`AttributeError`. This method should return the (computed) attribute value + or raise an :exc:`AttributeError` exception. In order to avoid infinite + recursion in this method, its implementation should always call the base class + method with the same name to access any attributes it needs, for example, + ``object.__getattribute__(self, name)``. + + +.. _descriptors: + +Implementing Descriptors +^^^^^^^^^^^^^^^^^^^^^^^^ + +The following methods only apply when an instance of the class containing the +method (a so-called *descriptor* class) appears in the class dictionary of +another new-style class, known as the *owner* class. In the examples below, "the +attribute" refers to the attribute whose name is the key of the property in the +owner class' ``__dict__``. Descriptors can only be implemented as new-style +classes themselves. + + +.. method:: object.__get__(self, instance, owner) + + Called to get the attribute of the owner class (class attribute access) or of an + instance of that class (instance attribute access). *owner* is always the owner + class, while *instance* is the instance that the attribute was accessed through, + or ``None`` when the attribute is accessed through the *owner*. This method + should return the (computed) attribute value or raise an :exc:`AttributeError` + exception. + + +.. method:: object.__set__(self, instance, value) + + Called to set the attribute on an instance *instance* of the owner class to a + new value, *value*. + + +.. method:: object.__delete__(self, instance) + + Called to delete the attribute on an instance *instance* of the owner class. + + +.. _descriptor-invocation: + +Invoking Descriptors +^^^^^^^^^^^^^^^^^^^^ + +In general, a descriptor is an object attribute with "binding behavior", one +whose attribute access has been overridden by methods in the descriptor +protocol: :meth:`__get__`, :meth:`__set__`, and :meth:`__delete__`. If any of +those methods are defined for an object, it is said to be a descriptor. + +The default behavior for attribute access is to get, set, or delete the +attribute from an object's dictionary. For instance, ``a.x`` has a lookup chain +starting with ``a.__dict__['x']``, then ``type(a).__dict__['x']``, and +continuing through the base classes of ``type(a)`` excluding metaclasses. + +However, if the looked-up value is an object defining one of the descriptor +methods, then Python may override the default behavior and invoke the descriptor +method instead. Where this occurs in the precedence chain depends on which +descriptor methods were defined and how they were called. Note that descriptors +are only invoked for new style objects or classes (ones that subclass +:class:`object()` or :class:`type()`). + +The starting point for descriptor invocation is a binding, ``a.x``. How the +arguments are assembled depends on ``a``: + +Direct Call + The simplest and least common call is when user code directly invokes a + descriptor method: ``x.__get__(a)``. + +Instance Binding + If binding to a new-style object instance, ``a.x`` is transformed into the call: + ``type(a).__dict__['x'].__get__(a, type(a))``. + +Class Binding + If binding to a new-style class, ``A.x`` is transformed into the call: + ``A.__dict__['x'].__get__(None, A)``. + +Super Binding + If ``a`` is an instance of :class:`super`, then the binding ``super(B, + obj).m()`` searches ``obj.__class__.__mro__`` for the base class ``A`` + immediately preceding ``B`` and then invokes the descriptor with the call: + ``A.__dict__['m'].__get__(obj, A)``. + +For instance bindings, the precedence of descriptor invocation depends on the +which descriptor methods are defined. Data descriptors define both +:meth:`__get__` and :meth:`__set__`. Non-data descriptors have just the +:meth:`__get__` method. Data descriptors always override a redefinition in an +instance dictionary. In contrast, non-data descriptors can be overridden by +instances. + +Python methods (including :func:`staticmethod` and :func:`classmethod`) are +implemented as non-data descriptors. Accordingly, instances can redefine and +override methods. This allows individual instances to acquire behaviors that +differ from other instances of the same class. + +The :func:`property` function is implemented as a data descriptor. Accordingly, +instances cannot override the behavior of a property. + + +.. _slots: + +__slots__ +^^^^^^^^^ + +By default, instances of both old and new-style classes have a dictionary for +attribute storage. This wastes space for objects having very few instance +variables. The space consumption can become acute when creating large numbers +of instances. + +The default can be overridden by defining *__slots__* in a new-style class +definition. The *__slots__* declaration takes a sequence of instance variables +and reserves just enough space in each instance to hold a value for each +variable. Space is saved because *__dict__* is not created for each instance. + + +.. data:: __slots__ + + This class variable can be assigned a string, iterable, or sequence of strings + with variable names used by instances. If defined in a new-style class, + *__slots__* reserves space for the declared variables and prevents the automatic + creation of *__dict__* and *__weakref__* for each instance. + + .. versionadded:: 2.2 + +Notes on using *__slots__* + +* Without a *__dict__* variable, instances cannot be assigned new variables not + listed in the *__slots__* definition. Attempts to assign to an unlisted + variable name raises :exc:`AttributeError`. If dynamic assignment of new + variables is desired, then add ``'__dict__'`` to the sequence of strings in the + *__slots__* declaration. + + .. versionchanged:: 2.3 + Previously, adding ``'__dict__'`` to the *__slots__* declaration would not + enable the assignment of new attributes not specifically listed in the sequence + of instance variable names. + +* Without a *__weakref__* variable for each instance, classes defining + *__slots__* do not support weak references to its instances. If weak reference + support is needed, then add ``'__weakref__'`` to the sequence of strings in the + *__slots__* declaration. + + .. versionchanged:: 2.3 + Previously, adding ``'__weakref__'`` to the *__slots__* declaration would not + enable support for weak references. + +* *__slots__* are implemented at the class level by creating descriptors + (:ref:`descriptors`) for each variable name. As a result, class attributes + cannot be used to set default values for instance variables defined by + *__slots__*; otherwise, the class attribute would overwrite the descriptor + assignment. + +* If a class defines a slot also defined in a base class, the instance variable + defined by the base class slot is inaccessible (except by retrieving its + descriptor directly from the base class). This renders the meaning of the + program undefined. In the future, a check may be added to prevent this. + +* The action of a *__slots__* declaration is limited to the class where it is + defined. As a result, subclasses will have a *__dict__* unless they also define + *__slots__*. + +* *__slots__* do not work for classes derived from "variable-length" built-in + types such as :class:`long`, :class:`str` and :class:`tuple`. + +* Any non-string iterable may be assigned to *__slots__*. Mappings may also be + used; however, in the future, special meaning may be assigned to the values + corresponding to each key. + +* *__class__* assignment works only if both classes have the same *__slots__*. + + .. versionchanged:: 2.6 + Previously, *__class__* assignment raised an error if either new or old class + had *__slots__*. + + +.. _metaclasses: + +Customizing class creation +-------------------------- + +By default, new-style classes are constructed using :func:`type`. A class +definition is read into a separate namespace and the value of class name is +bound to the result of ``type(name, bases, dict)``. + +When the class definition is read, if *__metaclass__* is defined then the +callable assigned to it will be called instead of :func:`type`. The allows +classes or functions to be written which monitor or alter the class creation +process: + +* Modifying the class dictionary prior to the class being created. + +* Returning an instance of another class -- essentially performing the role of a + factory function. + + +.. data:: __metaclass__ + + This variable can be any callable accepting arguments for ``name``, ``bases``, + and ``dict``. Upon class creation, the callable is used instead of the built-in + :func:`type`. + + .. versionadded:: 2.2 + +The appropriate metaclass is determined by the following precedence rules: + +* If ``dict['__metaclass__']`` exists, it is used. + +* Otherwise, if there is at least one base class, its metaclass is used (this + looks for a *__class__* attribute first and if not found, uses its type). + +* Otherwise, if a global variable named __metaclass__ exists, it is used. + +* Otherwise, the old-style, classic metaclass (types.ClassType) is used. + +The potential uses for metaclasses are boundless. Some ideas that have been +explored including logging, interface checking, automatic delegation, automatic +property creation, proxies, frameworks, and automatic resource +locking/synchronization. + + +.. _callable-types: + +Emulating callable objects +-------------------------- + + +.. method:: object.__call__(self[, args...]) + + .. index:: pair: call; instance + + Called when the instance is "called" as a function; if this method is defined, + ``x(arg1, arg2, ...)`` is a shorthand for ``x.__call__(arg1, arg2, ...)``. + + +.. _sequence-types: + +Emulating container types +------------------------- + +The following methods can be defined to implement container objects. Containers +usually are sequences (such as lists or tuples) or mappings (like dictionaries), +but can represent other containers as well. The first set of methods is used +either to emulate a sequence or to emulate a mapping; the difference is that for +a sequence, the allowable keys should be the integers *k* for which ``0 <= k < +N`` where *N* is the length of the sequence, or slice objects, which define a +range of items. (For backwards compatibility, the method :meth:`__getslice__` +(see below) can also be defined to handle simple, but not extended slices.) It +is also recommended that mappings provide the methods :meth:`keys`, +:meth:`values`, :meth:`items`, :meth:`has_key`, :meth:`get`, :meth:`clear`, +:meth:`setdefault`, :meth:`iterkeys`, :meth:`itervalues`, :meth:`iteritems`, +:meth:`pop`, :meth:`popitem`, :meth:`copy`, and :meth:`update` behaving similar +to those for Python's standard dictionary objects. The :mod:`UserDict` module +provides a :class:`DictMixin` class to help create those methods from a base set +of :meth:`__getitem__`, :meth:`__setitem__`, :meth:`__delitem__`, and +:meth:`keys`. Mutable sequences should provide methods :meth:`append`, +:meth:`count`, :meth:`index`, :meth:`extend`, :meth:`insert`, :meth:`pop`, +:meth:`remove`, :meth:`reverse` and :meth:`sort`, like Python standard list +objects. Finally, sequence types should implement addition (meaning +concatenation) and multiplication (meaning repetition) by defining the methods +:meth:`__add__`, :meth:`__radd__`, :meth:`__iadd__`, :meth:`__mul__`, +:meth:`__rmul__` and :meth:`__imul__` described below; they should not define +other numerical operators. It is recommended that both mappings and sequences +implement the :meth:`__contains__` method to allow efficient use of the ``in`` +operator; for mappings, ``in`` should be equivalent of :meth:`has_key`; for +sequences, it should search through the values. It is further recommended that +both mappings and sequences implement the :meth:`__iter__` method to allow +efficient iteration through the container; for mappings, :meth:`__iter__` should +be the same as :meth:`iterkeys`; for sequences, it should iterate through the +values. + + +.. method:: object.__len__(self) + + .. index:: + builtin: len + single: __bool__() (object method) + + Called to implement the built-in function :func:`len`. Should return the length + of the object, an integer ``>=`` 0. Also, an object that doesn't define a + :meth:`__bool__` method and whose :meth:`__len__` method returns zero is + considered to be false in a Boolean context. + + +.. method:: object.__getitem__(self, key) + + .. index:: object: slice + + Called to implement evaluation of ``self[key]``. For sequence types, the + accepted keys should be integers and slice objects. Note that the special + interpretation of negative indexes (if the class wishes to emulate a sequence + type) is up to the :meth:`__getitem__` method. If *key* is of an inappropriate + type, :exc:`TypeError` may be raised; if of a value outside the set of indexes + for the sequence (after any special interpretation of negative values), + :exc:`IndexError` should be raised. For mapping types, if *key* is missing (not + in the container), :exc:`KeyError` should be raised. + + .. note:: + + :keyword:`for` loops expect that an :exc:`IndexError` will be raised for illegal + indexes to allow proper detection of the end of the sequence. + + +.. method:: object.__setitem__(self, key, value) + + Called to implement assignment to ``self[key]``. Same note as for + :meth:`__getitem__`. This should only be implemented for mappings if the + objects support changes to the values for keys, or if new keys can be added, or + for sequences if elements can be replaced. The same exceptions should be raised + for improper *key* values as for the :meth:`__getitem__` method. + + +.. method:: object.__delitem__(self, key) + + Called to implement deletion of ``self[key]``. Same note as for + :meth:`__getitem__`. This should only be implemented for mappings if the + objects support removal of keys, or for sequences if elements can be removed + from the sequence. The same exceptions should be raised for improper *key* + values as for the :meth:`__getitem__` method. + + +.. method:: object.__iter__(self) + + This method is called when an iterator is required for a container. This method + should return a new iterator object that can iterate over all the objects in the + container. For mappings, it should iterate over the keys of the container, and + should also be made available as the method :meth:`iterkeys`. + + Iterator objects also need to implement this method; they are required to return + themselves. For more information on iterator objects, see :ref:`typeiter`. + +The membership test operators (:keyword:`in` and :keyword:`not in`) are normally +implemented as an iteration through a sequence. However, container objects can +supply the following special method with a more efficient implementation, which +also does not require the object be a sequence. + + +.. method:: object.__contains__(self, item) + + Called to implement membership test operators. Should return true if *item* is + in *self*, false otherwise. For mapping objects, this should consider the keys + of the mapping rather than the values or the key-item pairs. + + +.. _sequence-methods: + +Additional methods for emulation of sequence types +-------------------------------------------------- + +The following optional methods can be defined to further emulate sequence +objects. Immutable sequences methods should at most only define +:meth:`__getslice__`; mutable sequences might define all three methods. + + +.. method:: object.__getslice__(self, i, j) + + .. deprecated:: 2.0 + Support slice objects as parameters to the :meth:`__getitem__` method. + + Called to implement evaluation of ``self[i:j]``. The returned object should be + of the same type as *self*. Note that missing *i* or *j* in the slice + expression are replaced by zero or ``sys.maxint``, respectively. If negative + indexes are used in the slice, the length of the sequence is added to that + index. If the instance does not implement the :meth:`__len__` method, an + :exc:`AttributeError` is raised. No guarantee is made that indexes adjusted this + way are not still negative. Indexes which are greater than the length of the + sequence are not modified. If no :meth:`__getslice__` is found, a slice object + is created instead, and passed to :meth:`__getitem__` instead. + + +.. method:: object.__setslice__(self, i, j, sequence) + + Called to implement assignment to ``self[i:j]``. Same notes for *i* and *j* as + for :meth:`__getslice__`. + + This method is deprecated. If no :meth:`__setslice__` is found, or for extended + slicing of the form ``self[i:j:k]``, a slice object is created, and passed to + :meth:`__setitem__`, instead of :meth:`__setslice__` being called. + + +.. method:: object.__delslice__(self, i, j) + + Called to implement deletion of ``self[i:j]``. Same notes for *i* and *j* as for + :meth:`__getslice__`. This method is deprecated. If no :meth:`__delslice__` is + found, or for extended slicing of the form ``self[i:j:k]``, a slice object is + created, and passed to :meth:`__delitem__`, instead of :meth:`__delslice__` + being called. + +Notice that these methods are only invoked when a single slice with a single +colon is used, and the slice method is available. For slice operations +involving extended slice notation, or in absence of the slice methods, +:meth:`__getitem__`, :meth:`__setitem__` or :meth:`__delitem__` is called with a +slice object as argument. + +The following example demonstrate how to make your program or module compatible +with earlier versions of Python (assuming that methods :meth:`__getitem__`, +:meth:`__setitem__` and :meth:`__delitem__` support slice objects as +arguments):: + + class MyClass: + ... + def __getitem__(self, index): + ... + def __setitem__(self, index, value): + ... + def __delitem__(self, index): + ... + + if sys.version_info < (2, 0): + # They won't be defined if version is at least 2.0 final + + def __getslice__(self, i, j): + return self[max(0, i):max(0, j):] + def __setslice__(self, i, j, seq): + self[max(0, i):max(0, j):] = seq + def __delslice__(self, i, j): + del self[max(0, i):max(0, j):] + ... + +Note the calls to :func:`max`; these are necessary because of the handling of +negative indices before the :meth:`__\*slice__` methods are called. When +negative indexes are used, the :meth:`__\*item__` methods receive them as +provided, but the :meth:`__\*slice__` methods get a "cooked" form of the index +values. For each negative index value, the length of the sequence is added to +the index before calling the method (which may still result in a negative +index); this is the customary handling of negative indexes by the built-in +sequence types, and the :meth:`__\*item__` methods are expected to do this as +well. However, since they should already be doing that, negative indexes cannot +be passed in; they must be constrained to the bounds of the sequence before +being passed to the :meth:`__\*item__` methods. Calling ``max(0, i)`` +conveniently returns the proper value. + + +.. _numeric-types: + +Emulating numeric types +----------------------- + +The following methods can be defined to emulate numeric objects. Methods +corresponding to operations that are not supported by the particular kind of +number implemented (e.g., bitwise operations for non-integral numbers) should be +left undefined. + + +.. method:: object.__add__(self, other) + object.__sub__(self, other) + object.__mul__(self, other) + object.__floordiv__(self, other) + object.__mod__(self, other) + object.__divmod__(self, other) + object.__pow__(self, other[, modulo]) + object.__lshift__(self, other) + object.__rshift__(self, other) + object.__and__(self, other) + object.__xor__(self, other) + object.__or__(self, other) + + .. index:: + builtin: divmod + builtin: pow + builtin: pow + + These methods are called to implement the binary arithmetic operations (``+``, + ``-``, ``*``, ``//``, ``%``, :func:`divmod`, :func:`pow`, ``**``, ``<<``, + ``>>``, ``&``, ``^``, ``|``). For instance, to evaluate the expression + *x*``+``*y*, where *x* is an instance of a class that has an :meth:`__add__` + method, ``x.__add__(y)`` is called. The :meth:`__divmod__` method should be the + equivalent to using :meth:`__floordiv__` and :meth:`__mod__`; it should not be + related to :meth:`__truediv__` (described below). Note that :meth:`__pow__` + should be defined to accept an optional third argument if the ternary version of + the built-in :func:`pow` function is to be supported. + + If one of those methods does not support the operation with the supplied + arguments, it should return ``NotImplemented``. + + +.. method:: object.__div__(self, other) + object.__truediv__(self, other) + + The division operator (``/``) is implemented by these methods. The + :meth:`__truediv__` method is used when ``__future__.division`` is in effect, + otherwise :meth:`__div__` is used. If only one of these two methods is defined, + the object will not support division in the alternate context; :exc:`TypeError` + will be raised instead. + + +.. method:: object.__radd__(self, other) + object.__rsub__(self, other) + object.__rmul__(self, other) + object.__rdiv__(self, other) + object.__rtruediv__(self, other) + object.__rfloordiv__(self, other) + object.__rmod__(self, other) + object.__rdivmod__(self, other) + object.__rpow__(self, other) + object.__rlshift__(self, other) + object.__rrshift__(self, other) + object.__rand__(self, other) + object.__rxor__(self, other) + object.__ror__(self, other) + + .. index:: + builtin: divmod + builtin: pow + + These methods are called to implement the binary arithmetic operations (``+``, + ``-``, ``*``, ``/``, ``%``, :func:`divmod`, :func:`pow`, ``**``, ``<<``, ``>>``, + ``&``, ``^``, ``|``) with reflected (swapped) operands. These functions are + only called if the left operand does not support the corresponding operation and + the operands are of different types. [#]_ For instance, to evaluate the + expression *x*``-``*y*, where *y* is an instance of a class that has an + :meth:`__rsub__` method, ``y.__rsub__(x)`` is called if ``x.__sub__(y)`` returns + *NotImplemented*. + + .. index:: builtin: pow + + Note that ternary :func:`pow` will not try calling :meth:`__rpow__` (the + coercion rules would become too complicated). + + .. note:: + + If the right operand's type is a subclass of the left operand's type and that + subclass provides the reflected method for the operation, this method will be + called before the left operand's non-reflected method. This behavior allows + subclasses to override their ancestors' operations. + + +.. method:: object.__iadd__(self, other) + object.__isub__(self, other) + object.__imul__(self, other) + object.__idiv__(self, other) + object.__itruediv__(self, other) + object.__ifloordiv__(self, other) + object.__imod__(self, other) + object.__ipow__(self, other[, modulo]) + object.__ilshift__(self, other) + object.__irshift__(self, other) + object.__iand__(self, other) + object.__ixor__(self, other) + object.__ior__(self, other) + + These methods are called to implement the augmented arithmetic operations + (``+=``, ``-=``, ``*=``, ``/=``, ``//=``, ``%=``, ``**=``, ``<<=``, ``>>=``, + ``&=``, ``^=``, ``|=``). These methods should attempt to do the operation + in-place (modifying *self*) and return the result (which could be, but does + not have to be, *self*). If a specific method is not defined, the augmented + operation falls back to the normal methods. For instance, to evaluate the + expression *x*``+=``*y*, where *x* is an instance of a class that has an + :meth:`__iadd__` method, ``x.__iadd__(y)`` is called. If *x* is an instance + of a class that does not define a :meth:`__iadd__` method, ``x.__add__(y)`` + and ``y.__radd__(x)`` are considered, as with the evaluation of *x*``+``*y*. + + +.. method:: object.__neg__(self) + object.__pos__(self) + object.__abs__(self) + object.__invert__(self) + + .. index:: builtin: abs + + Called to implement the unary arithmetic operations (``-``, ``+``, :func:`abs` + and ``~``). + + +.. method:: object.__complex__(self) + object.__int__(self) + object.__long__(self) + object.__float__(self) + + .. index:: + builtin: complex + builtin: int + builtin: long + builtin: float + + Called to implement the built-in functions :func:`complex`, :func:`int`, + :func:`long`, and :func:`float`. Should return a value of the appropriate type. + + +.. method:: object.__index__(self) + + Called to implement :func:`operator.index`. Also called whenever Python needs + an integer object (such as in slicing, or in the built-in :func:`bin`, + :func:`hex` and :func:`oct` functions). Must return an integer (int or long). + + .. versionadded:: 2.5 + + +.. _context-managers: + +With Statement Context Managers +------------------------------- + +.. versionadded:: 2.5 + +A :dfn:`context manager` is an object that defines the runtime context to be +established when executing a :keyword:`with` statement. The context manager +handles the entry into, and the exit from, the desired runtime context for the +execution of the block of code. Context managers are normally invoked using the +:keyword:`with` statement (described in section :ref:`with`), but can also be +used by directly invoking their methods. + +.. index:: + statement: with + single: context manager + +Typical uses of context managers include saving and restoring various kinds of +global state, locking and unlocking resources, closing opened files, etc. + +For more information on context managers, see :ref:`typecontextmanager`. + + +.. method:: object.__enter__(self) + + Enter the runtime context related to this object. The :keyword:`with` statement + will bind this method's return value to the target(s) specified in the + :keyword:`as` clause of the statement, if any. + + +.. method:: object.__exit__(self, exc_type, exc_value, traceback) + + Exit the runtime context related to this object. The parameters describe the + exception that caused the context to be exited. If the context was exited + without an exception, all three arguments will be :const:`None`. + + If an exception is supplied, and the method wishes to suppress the exception + (i.e., prevent it from being propagated), it should return a true value. + Otherwise, the exception will be processed normally upon exit from this method. + + Note that :meth:`__exit__` methods should not reraise the passed-in exception; + this is the caller's responsibility. + + +.. seealso:: + + :pep:`0343` - The "with" statement + The specification, background, and examples for the Python :keyword:`with` + statement. + +.. rubric:: Footnotes + +.. [#] Since Python 2.2, a gradual merging of types and classes has been started that + makes this and a few other assertions made in this manual not 100% accurate and + complete: for example, it *is* now possible in some cases to change an object's + type, under certain controlled conditions. Until this manual undergoes + extensive revision, it must now be taken as authoritative only regarding + "classic classes", that are still the default, for compatibility purposes, in + Python 2.2 and 2.3. For more information, see + http://www.python.org/doc/newstyle.html. + +.. [#] This, and other statements, are only roughly true for instances of new-style + classes. + +.. [#] For operands of the same type, it is assumed that if the non-reflected method + (such as :meth:`__add__`) fails the operation is not supported, which is why the + reflected method is not called. + diff --git a/Doc/reference/executionmodel.rst b/Doc/reference/executionmodel.rst new file mode 100644 index 0000000..27802c8 --- /dev/null +++ b/Doc/reference/executionmodel.rst @@ -0,0 +1,232 @@ + +.. _execmodel: + +*************** +Execution model +*************** + +.. index:: single: execution model + + +.. _naming: + +Naming and binding +================== + +.. index:: + pair: code; block + single: namespace + single: scope + +.. index:: + single: name + pair: binding; name + +:dfn:`Names` refer to objects. Names are introduced by name binding operations. +Each occurrence of a name in the program text refers to the :dfn:`binding` of +that name established in the innermost function block containing the use. + +.. index:: single: block + +A :dfn:`block` is a piece of Python program text that is executed as a unit. +The following are blocks: a module, a function body, and a class definition. +Each command typed interactively is a block. A script file (a file given as +standard input to the interpreter or specified on the interpreter command line +the first argument) is a code block. A script command (a command specified on +the interpreter command line with the '**-c**' option) is a code block. The string +argument passed to the built-in functions :func:`eval` and :func:`exec` is a +code block. The expression read and evaluated by the built-in function +:func:`input` is a code block. + +.. index:: pair: execution; frame + +A code block is executed in an :dfn:`execution frame`. A frame contains some +administrative information (used for debugging) and determines where and how +execution continues after the code block's execution has completed. + +.. index:: single: scope + +A :dfn:`scope` defines the visibility of a name within a block. If a local +variable is defined in a block, its scope includes that block. If the +definition occurs in a function block, the scope extends to any blocks contained +within the defining one, unless a contained block introduces a different binding +for the name. The scope of names defined in a class block is limited to the +class block; it does not extend to the code blocks of methods. + +.. index:: single: environment + +When a name is used in a code block, it is resolved using the nearest enclosing +scope. The set of all such scopes visible to a code block is called the block's +:dfn:`environment`. + +.. index:: pair: free; variable + +If a name is bound in a block, it is a local variable of that block. If a name +is bound at the module level, it is a global variable. (The variables of the +module code block are local and global.) If a variable is used in a code block +but not defined there, it is a :dfn:`free variable`. + +.. index:: + single: NameError (built-in exception) + single: UnboundLocalError + +When a name is not found at all, a :exc:`NameError` exception is raised. If the +name refers to a local variable that has not been bound, a +:exc:`UnboundLocalError` exception is raised. :exc:`UnboundLocalError` is a +subclass of :exc:`NameError`. + +.. index:: statement: from + +The following constructs bind names: formal parameters to functions, +:keyword:`import` statements, class and function definitions (these bind the +class or function name in the defining block), and targets that are identifiers +if occurring in an assignment, :keyword:`for` loop header, or in the second +position of an :keyword:`except` clause header. The :keyword:`import` statement +of the form "``from ...import *``" binds all names defined in the imported +module, except those beginning with an underscore. This form may only be used +at the module level. + +A target occurring in a :keyword:`del` statement is also considered bound for +this purpose (though the actual semantics are to unbind the name). It is +illegal to unbind a name that is referenced by an enclosing scope; the compiler +will report a :exc:`SyntaxError`. + +Each assignment or import statement occurs within a block defined by a class or +function definition or at the module level (the top-level code block). + +If a name binding operation occurs anywhere within a code block, all uses of the +name within the block are treated as references to the current block. This can +lead to errors when a name is used within a block before it is bound. This rule +is subtle. Python lacks declarations and allows name binding operations to +occur anywhere within a code block. The local variables of a code block can be +determined by scanning the entire text of the block for name binding operations. + +If the global statement occurs within a block, all uses of the name specified in +the statement refer to the binding of that name in the top-level namespace. +Names are resolved in the top-level namespace by searching the global namespace, +i.e. the namespace of the module containing the code block, and the builtin +namespace, the namespace of the module :mod:`__builtin__`. The global namespace +is searched first. If the name is not found there, the builtin namespace is +searched. The global statement must precede all uses of the name. + +.. index:: pair: restricted; execution + +The built-in namespace associated with the execution of a code block is actually +found by looking up the name ``__builtins__`` in its global namespace; this +should be a dictionary or a module (in the latter case the module's dictionary +is used). By default, when in the :mod:`__main__` module, ``__builtins__`` is +the built-in module :mod:`__builtin__` (note: no 's'); when in any other module, +``__builtins__`` is an alias for the dictionary of the :mod:`__builtin__` module +itself. ``__builtins__`` can be set to a user-created dictionary to create a +weak form of restricted execution. + +.. note:: + + Users should not touch ``__builtins__``; it is strictly an implementation + detail. Users wanting to override values in the built-in namespace should + :keyword:`import` the :mod:`__builtin__` (no 's') module and modify its + attributes appropriately. + +.. index:: module: __main__ + +The namespace for a module is automatically created the first time a module is +imported. The main module for a script is always called :mod:`__main__`. + +The global statement has the same scope as a name binding operation in the same +block. If the nearest enclosing scope for a free variable contains a global +statement, the free variable is treated as a global. + +A class definition is an executable statement that may use and define names. +These references follow the normal rules for name resolution. The namespace of +the class definition becomes the attribute dictionary of the class. Names +defined at the class scope are not visible in methods. + + +.. _dynamic-features: + +Interaction with dynamic features +--------------------------------- + +There are several cases where Python statements are illegal when used in +conjunction with nested scopes that contain free variables. + +If a variable is referenced in an enclosing scope, it is illegal to delete the +name. An error will be reported at compile time. + +If the wild card form of import --- ``import *`` --- is used in a function and +the function contains or is a nested block with free variables, the compiler +will raise a :exc:`SyntaxError`. + +The :func:`eval` and :func:`exec` functions do +not have access to the full environment for resolving names. Names may be +resolved in the local and global namespaces of the caller. Free variables are +not resolved in the nearest enclosing namespace, but in the global namespace. +[#]_ The :func:`exec` and :func:`eval` functions have optional +arguments to override the global and local namespace. If only one namespace is +specified, it is used for both. + + +.. _exceptions: + +Exceptions +========== + +.. index:: single: exception + +.. index:: + single: raise an exception + single: handle an exception + single: exception handler + single: errors + single: error handling + +Exceptions are a means of breaking out of the normal flow of control of a code +block in order to handle errors or other exceptional conditions. An exception +is *raised* at the point where the error is detected; it may be *handled* by the +surrounding code block or by any code block that directly or indirectly invoked +the code block where the error occurred. + +The Python interpreter raises an exception when it detects a run-time error +(such as division by zero). A Python program can also explicitly raise an +exception with the :keyword:`raise` statement. Exception handlers are specified +with the :keyword:`try` ... :keyword:`except` statement. The :keyword:`try` ... +:keyword:`finally` statement specifies cleanup code which does not handle the +exception, but is executed whether an exception occurred or not in the preceding +code. + +.. index:: single: termination model + +Python uses the "termination" model of error handling: an exception handler can +find out what happened and continue execution at an outer level, but it cannot +repair the cause of the error and retry the failing operation (except by +re-entering the offending piece of code from the top). + +.. index:: single: SystemExit (built-in exception) + +When an exception is not handled at all, the interpreter terminates execution of +the program, or returns to its interactive main loop. In either case, it prints +a stack backtrace, except when the exception is :exc:`SystemExit`. + +Exceptions are identified by class instances. The :keyword:`except` clause is +selected depending on the class of the instance: it must reference the class of +the instance or a base class thereof. The instance can be received by the +handler and can carry additional information about the exceptional condition. + +Exceptions can also be identified by strings, in which case the +:keyword:`except` clause is selected by object identity. An arbitrary value can +be raised along with the identifying string which can be passed to the handler. + +.. warning:: + + Messages to exceptions are not part of the Python API. Their contents may + change from one version of Python to the next without warning and should not be + relied on by code which will run under multiple versions of the interpreter. + +See also the description of the :keyword:`try` statement in section :ref:`try` +and :keyword:`raise` statement in section :ref:`raise`. + +.. rubric:: Footnotes + +.. [#] This limitation occurs because the code that is executed by these operations is + not available at the time the module is compiled. + diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst new file mode 100644 index 0000000..28c1406 --- /dev/null +++ b/Doc/reference/expressions.rst @@ -0,0 +1,1283 @@ + +.. _expressions: + +*********** +Expressions +*********** + +.. index:: single: expression + +This chapter explains the meaning of the elements of expressions in Python. + +.. index:: single: BNF + +**Syntax Notes:** In this and the following chapters, extended BNF notation will +be used to describe syntax, not lexical analysis. When (one alternative of) a +syntax rule has the form + +.. productionlist:: * + name: `othername` + +.. index:: single: syntax + +and no semantics are given, the semantics of this form of ``name`` are the same +as for ``othername``. + + +.. _conversions: + +Arithmetic conversions +====================== + +.. index:: pair: arithmetic; conversion + +.. XXX no coercion rules are documented anymore + +When a description of an arithmetic operator below uses the phrase "the numeric +arguments are converted to a common type," the arguments are coerced using the +coercion rules. If both arguments are standard +numeric types, the following coercions are applied: + +* If either argument is a complex number, the other is converted to complex; + +* otherwise, if either argument is a floating point number, the other is + converted to floating point; + +* otherwise, if either argument is a long integer, the other is converted to + long integer; + +* otherwise, both must be plain integers and no conversion is necessary. + +Some additional rules apply for certain operators (e.g., a string left argument +to the '%' operator). Extensions can define their own coercions. + + +.. _atoms: + +Atoms +===== + +.. index:: single: atom + +Atoms are the most basic elements of expressions. The simplest atoms are +identifiers or literals. Forms enclosed in reverse quotes or in parentheses, +brackets or braces are also categorized syntactically as atoms. The syntax for +atoms is: + +.. productionlist:: + atom: `identifier` | `literal` | `enclosure` + enclosure: `parenth_form` | `list_display` + : | `generator_expression` | `dict_display` + : | `string_conversion` | `yield_atom` + + +.. _atom-identifiers: + +Identifiers (Names) +------------------- + +.. index:: + single: name + single: identifier + +An identifier occurring as an atom is a name. See section :ref:`identifiers` +for lexical definition and section :ref:`naming` for documentation of naming and +binding. + +.. index:: exception: NameError + +When the name is bound to an object, evaluation of the atom yields that object. +When a name is not bound, an attempt to evaluate it raises a :exc:`NameError` +exception. + +.. index:: + pair: name; mangling + pair: private; names + +**Private name mangling:** When an identifier that textually occurs in a class +definition begins with two or more underscore characters and does not end in two +or more underscores, it is considered a :dfn:`private name` of that class. +Private names are transformed to a longer form before code is generated for +them. The transformation inserts the class name in front of the name, with +leading underscores removed, and a single underscore inserted in front of the +class name. For example, the identifier ``__spam`` occurring in a class named +``Ham`` will be transformed to ``_Ham__spam``. This transformation is +independent of the syntactical context in which the identifier is used. If the +transformed name is extremely long (longer than 255 characters), implementation +defined truncation may happen. If the class name consists only of underscores, +no transformation is done. + +.. % +.. % + + +.. _atom-literals: + +Literals +-------- + +.. index:: single: literal + +Python supports string literals and various numeric literals: + +.. productionlist:: + literal: `stringliteral` | `integer` | `longinteger` + : | `floatnumber` | `imagnumber` + +Evaluation of a literal yields an object of the given type (string, integer, +long integer, floating point number, complex number) with the given value. The +value may be approximated in the case of floating point and imaginary (complex) +literals. See section :ref:`literals` for details. + +.. index:: + triple: immutable; data; type + pair: immutable; object + +All literals correspond to immutable data types, and hence the object's identity +is less important than its value. Multiple evaluations of literals with the +same value (either the same occurrence in the program text or a different +occurrence) may obtain the same object or a different object with the same +value. + + +.. _parenthesized: + +Parenthesized forms +------------------- + +.. index:: single: parenthesized form + +A parenthesized form is an optional expression list enclosed in parentheses: + +.. productionlist:: + parenth_form: "(" [`expression_list`] ")" + +A parenthesized expression list yields whatever that expression list yields: if +the list contains at least one comma, it yields a tuple; otherwise, it yields +the single expression that makes up the expression list. + +.. index:: pair: empty; tuple + +An empty pair of parentheses yields an empty tuple object. Since tuples are +immutable, the rules for literals apply (i.e., two occurrences of the empty +tuple may or may not yield the same object). + +.. index:: + single: comma + pair: tuple; display + +Note that tuples are not formed by the parentheses, but rather by use of the +comma operator. The exception is the empty tuple, for which parentheses *are* +required --- allowing unparenthesized "nothing" in expressions would cause +ambiguities and allow common typos to pass uncaught. + + +.. _lists: + +List displays +------------- + +.. index:: + pair: list; display + pair: list; comprehensions + +A list display is a possibly empty series of expressions enclosed in square +brackets: + +.. productionlist:: + list_display: "[" [`expression_list` | `list_comprehension`] "]" + list_comprehension: `expression` `list_for` + list_for: "for" `target_list` "in" `old_expression_list` [`list_iter`] + old_expression_list: `old_expression` [("," `old_expression`)+ [","]] + list_iter: `list_for` | `list_if` + list_if: "if" `old_expression` [`list_iter`] + +.. index:: + pair: list; comprehensions + object: list + pair: empty; list + +A list display yields a new list object. Its contents are specified by +providing either a list of expressions or a list comprehension. When a +comma-separated list of expressions is supplied, its elements are evaluated from +left to right and placed into the list object in that order. When a list +comprehension is supplied, it consists of a single expression followed by at +least one :keyword:`for` clause and zero or more :keyword:`for` or :keyword:`if` +clauses. In this case, the elements of the new list are those that would be +produced by considering each of the :keyword:`for` or :keyword:`if` clauses a +block, nesting from left to right, and evaluating the expression to produce a +list element each time the innermost block is reached [#]_. + + +.. _genexpr: + +Generator expressions +--------------------- + +.. index:: pair: generator; expression + +A generator expression is a compact generator notation in parentheses: + +.. productionlist:: + generator_expression: "(" `expression` `genexpr_for` ")" + genexpr_for: "for" `target_list` "in" `or_test` [`genexpr_iter`] + genexpr_iter: `genexpr_for` | `genexpr_if` + genexpr_if: "if" `old_expression` [`genexpr_iter`] + +.. index:: object: generator + +A generator expression yields a new generator object. It consists of a single +expression followed by at least one :keyword:`for` clause and zero or more +:keyword:`for` or :keyword:`if` clauses. The iterating values of the new +generator are those that would be produced by considering each of the +:keyword:`for` or :keyword:`if` clauses a block, nesting from left to right, and +evaluating the expression to yield a value that is reached the innermost block +for each iteration. + +Variables used in the generator expression are evaluated lazily when the +:meth:`__next__` method is called for generator object (in the same fashion as +normal generators). However, the leftmost :keyword:`for` clause is immediately +evaluated so that error produced by it can be seen before any other possible +error in the code that handles the generator expression. Subsequent +:keyword:`for` clauses cannot be evaluated immediately since they may depend on +the previous :keyword:`for` loop. For example: ``(x*y for x in range(10) for y +in bar(x))``. + +The parentheses can be omitted on calls with only one argument. See section +:ref:`calls` for the detail. + + +.. _dict: + +Dictionary displays +------------------- + +.. index:: pair: dictionary; display + +.. index:: + single: key + single: datum + single: key/datum pair + +A dictionary display is a possibly empty series of key/datum pairs enclosed in +curly braces: + +.. productionlist:: + dict_display: "{" [`key_datum_list`] "}" + key_datum_list: `key_datum` ("," `key_datum`)* [","] + key_datum: `expression` ":" `expression` + +.. index:: object: dictionary + +A dictionary display yields a new dictionary object. + +The key/datum pairs are evaluated from left to right to define the entries of +the dictionary: each key object is used as a key into the dictionary to store +the corresponding datum. + +.. index:: pair: immutable; object + +Restrictions on the types of the key values are listed earlier in section +:ref:`types`. (To summarize, the key type should be hashable, which excludes +all mutable objects.) Clashes between duplicate keys are not detected; the last +datum (textually rightmost in the display) stored for a given key value +prevails. + + +.. _yieldexpr: + +Yield expressions +----------------- + +.. index:: + keyword: yield + pair: yield; expression + pair: generator; function + +.. productionlist:: + yield_atom: "(" `yield_expression` ")" + yield_expression: "yield" [`expression_list`] + +.. versionadded:: 2.5 + +The :keyword:`yield` expression is only used when defining a generator function, +and can only be used in the body of a function definition. Using a +:keyword:`yield` expression in a function definition is sufficient to cause that +definition to create a generator function instead of a normal function. + +When a generator function is called, it returns an iterator known as a +generator. That generator then controls the execution of a generator function. +The execution starts when one of the generator's methods is called. At that +time, the execution proceeds to the first :keyword:`yield` expression, where it +is suspended again, returning the value of :token:`expression_list` to +generator's caller. By suspended we mean that all local state is retained, +including the current bindings of local variables, the instruction pointer, and +the internal evaluation stack. When the execution is resumed by calling one of +the generator's methods, the function can proceed exactly as if the +:keyword:`yield` expression was just another external call. The value of the +:keyword:`yield` expression after resuming depends on the method which resumed +the execution. + +.. index:: single: coroutine + +All of this makes generator functions quite similar to coroutines; they yield +multiple times, they have more than one entry point and their execution can be +suspended. The only difference is that a generator function cannot control +where should the execution continue after it yields; the control is always +transfered to the generator's caller. + +.. index:: object: generator + +The following generator's methods can be used to control the execution of a +generator function: + +.. index:: exception: StopIteration + + +.. method:: generator.next() + + Starts the execution of a generator function or resumes it at the last executed + :keyword:`yield` expression. When a generator function is resumed with a + :meth:`next` method, the current :keyword:`yield` expression always evaluates to + :const:`None`. The execution then continues to the next :keyword:`yield` + expression, where the generator is suspended again, and the value of the + :token:`expression_list` is returned to :meth:`next`'s caller. If the generator + exits without yielding another value, a :exc:`StopIteration` exception is + raised. + + +.. method:: generator.send(value) + + Resumes the execution and "sends" a value into the generator function. The + ``value`` argument becomes the result of the current :keyword:`yield` + expression. The :meth:`send` method returns the next value yielded by the + generator, or raises :exc:`StopIteration` if the generator exits without + yielding another value. When :meth:`send` is called to start the generator, it + must be called with :const:`None` as the argument, because there is no + :keyword:`yield` expression that could receieve the value. + + +.. method:: generator.throw(type[, value[, traceback]]) + + Raises an exception of type ``type`` at the point where generator was paused, + and returns the next value yielded by the generator function. If the generator + exits without yielding another value, a :exc:`StopIteration` exception is + raised. If the generator function does not catch the passed-in exception, or + raises a different exception, then that exception propagates to the caller. + +.. index:: exception: GeneratorExit + + +.. method:: generator.close() + + Raises a :exc:`GeneratorExit` at the point where the generator function was + paused. If the generator function then raises :exc:`StopIteration` (by exiting + normally, or due to already being closed) or :exc:`GeneratorExit` (by not + catching the exception), close returns to its caller. If the generator yields a + value, a :exc:`RuntimeError` is raised. If the generator raises any other + exception, it is propagated to the caller. :meth:`close` does nothing if the + generator has already exited due to an exception or normal exit. + +Here is a simple example that demonstrates the behavior of generators and +generator functions:: + + >>> def echo(value=None): + ... print "Execution starts when 'next()' is called for the first time." + ... try: + ... while True: + ... try: + ... value = (yield value) + ... except GeneratorExit: + ... # never catch GeneratorExit + ... raise + ... except Exception, e: + ... value = e + ... finally: + ... print "Don't forget to clean up when 'close()' is called." + ... + >>> generator = echo(1) + >>> print generator.next() + Execution starts when 'next()' is called for the first time. + 1 + >>> print generator.next() + None + >>> print generator.send(2) + 2 + >>> generator.throw(TypeError, "spam") + TypeError('spam',) + >>> generator.close() + Don't forget to clean up when 'close()' is called. + + +.. seealso:: + + :pep:`0342` - Coroutines via Enhanced Generators + The proposal to enhance the API and syntax of generators, making them usable as + simple coroutines. + + +.. _primaries: + +Primaries +========= + +.. index:: single: primary + +Primaries represent the most tightly bound operations of the language. Their +syntax is: + +.. productionlist:: + primary: `atom` | `attributeref` | `subscription` | `slicing` | `call` + + +.. _attribute-references: + +Attribute references +-------------------- + +.. index:: pair: attribute; reference + +An attribute reference is a primary followed by a period and a name: + +.. productionlist:: + attributeref: `primary` "." `identifier` + +.. index:: + exception: AttributeError + object: module + object: list + +The primary must evaluate to an object of a type that supports attribute +references, e.g., a module, list, or an instance. This object is then asked to +produce the attribute whose name is the identifier. If this attribute is not +available, the exception :exc:`AttributeError` is raised. Otherwise, the type +and value of the object produced is determined by the object. Multiple +evaluations of the same attribute reference may yield different objects. + + +.. _subscriptions: + +Subscriptions +------------- + +.. index:: single: subscription + +.. index:: + object: sequence + object: mapping + object: string + object: tuple + object: list + object: dictionary + pair: sequence; item + +A subscription selects an item of a sequence (string, tuple or list) or mapping +(dictionary) object: + +.. productionlist:: + subscription: `primary` "[" `expression_list` "]" + +The primary must evaluate to an object of a sequence or mapping type. + +If the primary is a mapping, the expression list must evaluate to an object +whose value is one of the keys of the mapping, and the subscription selects the +value in the mapping that corresponds to that key. (The expression list is a +tuple except if it has exactly one item.) + +If the primary is a sequence, the expression (list) must evaluate to a plain +integer. If this value is negative, the length of the sequence is added to it +(so that, e.g., ``x[-1]`` selects the last item of ``x``.) The resulting value +must be a nonnegative integer less than the number of items in the sequence, and +the subscription selects the item whose index is that value (counting from +zero). + +.. index:: + single: character + pair: string; item + +A string's items are characters. A character is not a separate data type but a +string of exactly one character. + + +.. _slicings: + +Slicings +-------- + +.. index:: + single: slicing + single: slice + +.. index:: + object: sequence + object: string + object: tuple + object: list + +A slicing selects a range of items in a sequence object (e.g., a string, tuple +or list). Slicings may be used as expressions or as targets in assignment or +:keyword:`del` statements. The syntax for a slicing: + +.. productionlist:: + slicing: `simple_slicing` | `extended_slicing` + simple_slicing: `primary` "[" `short_slice` "]" + extended_slicing: `primary` "[" `slice_list` "]" + slice_list: `slice_item` ("," `slice_item`)* [","] + slice_item: `expression` | `proper_slice` | `ellipsis` + proper_slice: `short_slice` | `long_slice` + short_slice: [`lower_bound`] ":" [`upper_bound`] + long_slice: `short_slice` ":" [`stride`] + lower_bound: `expression` + upper_bound: `expression` + stride: `expression` + ellipsis: "..." + +.. index:: pair: extended; slicing + +There is ambiguity in the formal syntax here: anything that looks like an +expression list also looks like a slice list, so any subscription can be +interpreted as a slicing. Rather than further complicating the syntax, this is +disambiguated by defining that in this case the interpretation as a subscription +takes priority over the interpretation as a slicing (this is the case if the +slice list contains no proper slice nor ellipses). Similarly, when the slice +list has exactly one short slice and no trailing comma, the interpretation as a +simple slicing takes priority over that as an extended slicing. + +The semantics for a simple slicing are as follows. The primary must evaluate to +a sequence object. The lower and upper bound expressions, if present, must +evaluate to plain integers; defaults are zero and the ``sys.maxint``, +respectively. If either bound is negative, the sequence's length is added to +it. The slicing now selects all items with index *k* such that ``i <= k < j`` +where *i* and *j* are the specified lower and upper bounds. This may be an +empty sequence. It is not an error if *i* or *j* lie outside the range of valid +indexes (such items don't exist so they aren't selected). + +.. index:: + single: start (slice object attribute) + single: stop (slice object attribute) + single: step (slice object attribute) + +The semantics for an extended slicing are as follows. The primary must evaluate +to a mapping object, and it is indexed with a key that is constructed from the +slice list, as follows. If the slice list contains at least one comma, the key +is a tuple containing the conversion of the slice items; otherwise, the +conversion of the lone slice item is the key. The conversion of a slice item +that is an expression is that expression. The conversion of a proper slice is a +slice object (see section :ref:`types`) whose :attr:`start`, :attr:`stop` and +:attr:`step` attributes are the values of the expressions given as lower bound, +upper bound and stride, respectively, substituting ``None`` for missing +expressions. + + +.. _calls: + +Calls +----- + +.. index:: single: call + +.. index:: object: callable + +A call calls a callable object (e.g., a function) with a possibly empty series +of arguments: + +.. productionlist:: + call: `primary` "(" [`argument_list` [","] + : | `expression` `genexpr_for`] ")" + argument_list: `positional_arguments` ["," `keyword_arguments`] + : ["," "*" `expression`] + : ["," "**" `expression`] + : | `keyword_arguments` ["," "*" `expression`] + : ["," "**" `expression`] + : | "*" `expression` ["," "**" `expression`] + : | "**" `expression` + positional_arguments: `expression` ("," `expression`)* + keyword_arguments: `keyword_item` ("," `keyword_item`)* + keyword_item: `identifier` "=" `expression` + +A trailing comma may be present after the positional and keyword arguments but +does not affect the semantics. + +The primary must evaluate to a callable object (user-defined functions, built-in +functions, methods of built-in objects, class objects, methods of class +instances, and certain class instances themselves are callable; extensions may +define additional callable object types). All argument expressions are +evaluated before the call is attempted. Please refer to section :ref:`function` +for the syntax of formal parameter lists. + +If keyword arguments are present, they are first converted to positional +arguments, as follows. First, a list of unfilled slots is created for the +formal parameters. If there are N positional arguments, they are placed in the +first N slots. Next, for each keyword argument, the identifier is used to +determine the corresponding slot (if the identifier is the same as the first +formal parameter name, the first slot is used, and so on). If the slot is +already filled, a :exc:`TypeError` exception is raised. Otherwise, the value of +the argument is placed in the slot, filling it (even if the expression is +``None``, it fills the slot). When all arguments have been processed, the slots +that are still unfilled are filled with the corresponding default value from the +function definition. (Default values are calculated, once, when the function is +defined; thus, a mutable object such as a list or dictionary used as default +value will be shared by all calls that don't specify an argument value for the +corresponding slot; this should usually be avoided.) If there are any unfilled +slots for which no default value is specified, a :exc:`TypeError` exception is +raised. Otherwise, the list of filled slots is used as the argument list for +the call. + +If there are more positional arguments than there are formal parameter slots, a +:exc:`TypeError` exception is raised, unless a formal parameter using the syntax +``*identifier`` is present; in this case, that formal parameter receives a tuple +containing the excess positional arguments (or an empty tuple if there were no +excess positional arguments). + +If any keyword argument does not correspond to a formal parameter name, a +:exc:`TypeError` exception is raised, unless a formal parameter using the syntax +``**identifier`` is present; in this case, that formal parameter receives a +dictionary containing the excess keyword arguments (using the keywords as keys +and the argument values as corresponding values), or a (new) empty dictionary if +there were no excess keyword arguments. + +If the syntax ``*expression`` appears in the function call, ``expression`` must +evaluate to a sequence. Elements from this sequence are treated as if they were +additional positional arguments; if there are postional arguments *x1*,...,*xN* +, and ``expression`` evaluates to a sequence *y1*,...,*yM*, this is equivalent +to a call with M+N positional arguments *x1*,...,*xN*,*y1*,...,*yM*. + +A consequence of this is that although the ``*expression`` syntax appears +*after* any keyword arguments, it is processed *before* the keyword arguments +(and the ``**expression`` argument, if any -- see below). So:: + + >>> def f(a, b): + ... print a, b + ... + >>> f(b=1, *(2,)) + 2 1 + >>> f(a=1, *(2,)) + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: f() got multiple values for keyword argument 'a' + >>> f(1, *(2,)) + 1 2 + +It is unusual for both keyword arguments and the ``*expression`` syntax to be +used in the same call, so in practice this confusion does not arise. + +If the syntax ``**expression`` appears in the function call, ``expression`` must +evaluate to a mapping, the contents of which are treated as additional keyword +arguments. In the case of a keyword appearing in both ``expression`` and as an +explicit keyword argument, a :exc:`TypeError` exception is raised. + +Formal parameters using the syntax ``*identifier`` or ``**identifier`` cannot be +used as positional argument slots or as keyword argument names. + +A call always returns some value, possibly ``None``, unless it raises an +exception. How this value is computed depends on the type of the callable +object. + +If it is--- + +a user-defined function: + .. index:: + pair: function; call + triple: user-defined; function; call + object: user-defined function + object: function + + The code block for the function is executed, passing it the argument list. The + first thing the code block will do is bind the formal parameters to the + arguments; this is described in section :ref:`function`. When the code block + executes a :keyword:`return` statement, this specifies the return value of the + function call. + +a built-in function or method: + .. index:: + pair: function; call + pair: built-in function; call + pair: method; call + pair: built-in method; call + object: built-in method + object: built-in function + object: method + object: function + + The result is up to the interpreter; see :ref:`built-in-funcs` for the + descriptions of built-in functions and methods. + +a class object: + .. index:: + object: class + pair: class object; call + + A new instance of that class is returned. + +a class instance method: + .. index:: + object: class instance + object: instance + pair: class instance; call + + The corresponding user-defined function is called, with an argument list that is + one longer than the argument list of the call: the instance becomes the first + argument. + +a class instance: + .. index:: + pair: instance; call + single: __call__() (object method) + + The class must define a :meth:`__call__` method; the effect is then the same as + if that method was called. + + +.. _power: + +The power operator +================== + +The power operator binds more tightly than unary operators on its left; it binds +less tightly than unary operators on its right. The syntax is: + +.. productionlist:: + power: `primary` ["**" `u_expr`] + +Thus, in an unparenthesized sequence of power and unary operators, the operators +are evaluated from right to left (this does not constrain the evaluation order +for the operands). + +The power operator has the same semantics as the built-in :func:`pow` function, +when called with two arguments: it yields its left argument raised to the power +of its right argument. The numeric arguments are first converted to a common +type. The result type is that of the arguments after coercion. + +With mixed operand types, the coercion rules for binary arithmetic operators +apply. For int and long int operands, the result has the same type as the +operands (after coercion) unless the second argument is negative; in that case, +all arguments are converted to float and a float result is delivered. For +example, ``10**2`` returns ``100``, but ``10**-2`` returns ``0.01``. (This last +feature was added in Python 2.2. In Python 2.1 and before, if both arguments +were of integer types and the second argument was negative, an exception was +raised). + +Raising ``0.0`` to a negative power results in a :exc:`ZeroDivisionError`. +Raising a negative number to a fractional power results in a :exc:`ValueError`. + + +.. _unary: + +Unary arithmetic operations +=========================== + +.. index:: + triple: unary; arithmetic; operation + triple: unary; bit-wise; operation + +All unary arithmetic (and bit-wise) operations have the same priority: + +.. productionlist:: + u_expr: `power` | "-" `u_expr` | "+" `u_expr` | "~" `u_expr` + +.. index:: + single: negation + single: minus + +The unary ``-`` (minus) operator yields the negation of its numeric argument. + +.. index:: single: plus + +The unary ``+`` (plus) operator yields its numeric argument unchanged. + +.. index:: single: inversion + +The unary ``~`` (invert) operator yields the bit-wise inversion of its plain or +long integer argument. The bit-wise inversion of ``x`` is defined as +``-(x+1)``. It only applies to integral numbers. + +.. index:: exception: TypeError + +In all three cases, if the argument does not have the proper type, a +:exc:`TypeError` exception is raised. + + +.. _binary: + +Binary arithmetic operations +============================ + +.. index:: triple: binary; arithmetic; operation + +The binary arithmetic operations have the conventional priority levels. Note +that some of these operations also apply to certain non-numeric types. Apart +from the power operator, there are only two levels, one for multiplicative +operators and one for additive operators: + +.. productionlist:: + m_expr: `u_expr` | `m_expr` "*" `u_expr` | `m_expr` "//" `u_expr` | `m_expr` "/" `u_expr` + : | `m_expr` "%" `u_expr` + a_expr: `m_expr` | `a_expr` "+" `m_expr` | `a_expr` "-" `m_expr` + +.. index:: single: multiplication + +The ``*`` (multiplication) operator yields the product of its arguments. The +arguments must either both be numbers, or one argument must be an integer (plain +or long) and the other must be a sequence. In the former case, the numbers are +converted to a common type and then multiplied together. In the latter case, +sequence repetition is performed; a negative repetition factor yields an empty +sequence. + +.. index:: + exception: ZeroDivisionError + single: division + +The ``/`` (division) and ``//`` (floor division) operators yield the quotient of +their arguments. The numeric arguments are first converted to a common type. +Plain or long integer division yields an integer of the same type; the result is +that of mathematical division with the 'floor' function applied to the result. +Division by zero raises the :exc:`ZeroDivisionError` exception. + +.. index:: single: modulo + +The ``%`` (modulo) operator yields the remainder from the division of the first +argument by the second. The numeric arguments are first converted to a common +type. A zero right argument raises the :exc:`ZeroDivisionError` exception. The +arguments may be floating point numbers, e.g., ``3.14%0.7`` equals ``0.34`` +(since ``3.14`` equals ``4*0.7 + 0.34``.) The modulo operator always yields a +result with the same sign as its second operand (or zero); the absolute value of +the result is strictly smaller than the absolute value of the second operand +[#]_. + +The integer division and modulo operators are connected by the following +identity: ``x == (x/y)*y + (x%y)``. Integer division and modulo are also +connected with the built-in function :func:`divmod`: ``divmod(x, y) == (x/y, +x%y)``. These identities don't hold for floating point numbers; there similar +identities hold approximately where ``x/y`` is replaced by ``floor(x/y)`` or +``floor(x/y) - 1`` [#]_. + +In addition to performing the modulo operation on numbers, the ``%`` operator is +also overloaded by string and unicode objects to perform string formatting (also +known as interpolation). The syntax for string formatting is described in the +Python Library Reference, section :ref:`string-formatting`. + +The floor division operator, the modulo operator, and the :func:`divmod` +function are not defined for complex numbers. Instead, convert to a +floating point number using the :func:`abs` function if appropriate. + +.. index:: single: addition + +The ``+`` (addition) operator yields the sum of its arguments. The arguments +must either both be numbers or both sequences of the same type. In the former +case, the numbers are converted to a common type and then added together. In +the latter case, the sequences are concatenated. + +.. index:: single: subtraction + +The ``-`` (subtraction) operator yields the difference of its arguments. The +numeric arguments are first converted to a common type. + + +.. _shifting: + +Shifting operations +=================== + +.. index:: pair: shifting; operation + +The shifting operations have lower priority than the arithmetic operations: + +.. productionlist:: + shift_expr: `a_expr` | `shift_expr` ( "<<" | ">>" ) `a_expr` + +These operators accept plain or long integers as arguments. The arguments are +converted to a common type. They shift the first argument to the left or right +by the number of bits given by the second argument. + +.. index:: exception: ValueError + +A right shift by *n* bits is defined as division by ``pow(2,n)``. A left shift +by *n* bits is defined as multiplication with ``pow(2,n)``; for plain integers +there is no overflow check so in that case the operation drops bits and flips +the sign if the result is not less than ``pow(2,31)`` in absolute value. +Negative shift counts raise a :exc:`ValueError` exception. + + +.. _bitwise: + +Binary bit-wise operations +========================== + +.. index:: triple: binary; bit-wise; operation + +Each of the three bitwise operations has a different priority level: + +.. productionlist:: + and_expr: `shift_expr` | `and_expr` "&" `shift_expr` + xor_expr: `and_expr` | `xor_expr` "^" `and_expr` + or_expr: `xor_expr` | `or_expr` "|" `xor_expr` + +.. index:: pair: bit-wise; and + +The ``&`` operator yields the bitwise AND of its arguments, which must be plain +or long integers. The arguments are converted to a common type. + +.. index:: + pair: bit-wise; xor + pair: exclusive; or + +The ``^`` operator yields the bitwise XOR (exclusive OR) of its arguments, which +must be plain or long integers. The arguments are converted to a common type. + +.. index:: + pair: bit-wise; or + pair: inclusive; or + +The ``|`` operator yields the bitwise (inclusive) OR of its arguments, which +must be plain or long integers. The arguments are converted to a common type. + + +.. _comparisons: + +Comparisons +=========== + +.. index:: single: comparison + +.. index:: pair: C; language + +Unlike C, all comparison operations in Python have the same priority, which is +lower than that of any arithmetic, shifting or bitwise operation. Also unlike +C, expressions like ``a < b < c`` have the interpretation that is conventional +in mathematics: + +.. productionlist:: + comparison: `or_expr` ( `comp_operator` `or_expr` )* + comp_operator: "<" | ">" | "==" | ">=" | "<=" | "!=" + : | "is" ["not"] | ["not"] "in" + +Comparisons yield boolean values: ``True`` or ``False``. + +.. index:: pair: chaining; comparisons + +Comparisons can be chained arbitrarily, e.g., ``x < y <= z`` is equivalent to +``x < y and y <= z``, except that ``y`` is evaluated only once (but in both +cases ``z`` is not evaluated at all when ``x < y`` is found to be false). + +Formally, if *a*, *b*, *c*, ..., *y*, *z* are expressions and *opa*, *opb*, ..., +*opy* are comparison operators, then *a opa b opb c* ...*y opy z* is equivalent +to *a opa b* :keyword:`and` *b opb c* :keyword:`and` ... *y opy z*, except that +each expression is evaluated at most once. + +Note that *a opa b opb c* doesn't imply any kind of comparison between *a* and +*c*, so that, e.g., ``x < y > z`` is perfectly legal (though perhaps not +pretty). + +The operators ``<``, ``>``, ``==``, ``>=``, ``<=``, and ``!=`` compare the +values of two objects. The objects need not have the same type. If both are +numbers, they are converted to a common type. Otherwise, objects of different +types *always* compare unequal, and are ordered consistently but arbitrarily. +You can control comparison behavior of objects of non-builtin types by defining +a ``__cmp__`` method or rich comparison methods like ``__gt__``, described in +section :ref:`specialnames`. + +(This unusual definition of comparison was used to simplify the definition of +operations like sorting and the :keyword:`in` and :keyword:`not in` operators. +In the future, the comparison rules for objects of different types are likely to +change.) + +Comparison of objects of the same type depends on the type: + +* Numbers are compared arithmetically. + +* Strings are compared lexicographically using the numeric equivalents (the + result of the built-in function :func:`ord`) of their characters. Unicode and + 8-bit strings are fully interoperable in this behavior. + +* Tuples and lists are compared lexicographically using comparison of + corresponding elements. This means that to compare equal, each element must + compare equal and the two sequences must be of the same type and have the same + length. + + If not equal, the sequences are ordered the same as their first differing + elements. For example, ``cmp([1,2,x], [1,2,y])`` returns the same as + ``cmp(x,y)``. If the corresponding element does not exist, the shorter sequence + is ordered first (for example, ``[1,2] < [1,2,3]``). + +* Mappings (dictionaries) compare equal if and only if their sorted (key, value) + lists compare equal. [#]_ Outcomes other than equality are resolved + consistently, but are not otherwise defined. [#]_ + +* Most other objects of builtin types compare unequal unless they are the same + object; the choice whether one object is considered smaller or larger than + another one is made arbitrarily but consistently within one execution of a + program. + +The operators :keyword:`in` and :keyword:`not in` test for set membership. ``x +in s`` evaluates to true if *x* is a member of the set *s*, and false otherwise. +``x not in s`` returns the negation of ``x in s``. The set membership test has +traditionally been bound to sequences; an object is a member of a set if the set +is a sequence and contains an element equal to that object. However, it is +possible for an object to support membership tests without being a sequence. In +particular, dictionaries support membership testing as a nicer way of spelling +``key in dict``; other mapping types may follow suit. + +For the list and tuple types, ``x in y`` is true if and only if there exists an +index *i* such that ``x == y[i]`` is true. + +For the Unicode and string types, ``x in y`` is true if and only if *x* is a +substring of *y*. An equivalent test is ``y.find(x) != -1``. Note, *x* and *y* +need not be the same type; consequently, ``u'ab' in 'abc'`` will return +``True``. Empty strings are always considered to be a substring of any other +string, so ``"" in "abc"`` will return ``True``. + +.. versionchanged:: 2.3 + Previously, *x* was required to be a string of length ``1``. + +For user-defined classes which define the :meth:`__contains__` method, ``x in +y`` is true if and only if ``y.__contains__(x)`` is true. + +For user-defined classes which do not define :meth:`__contains__` and do define +:meth:`__getitem__`, ``x in y`` is true if and only if there is a non-negative +integer index *i* such that ``x == y[i]``, and all lower integer indices do not +raise :exc:`IndexError` exception. (If any other exception is raised, it is as +if :keyword:`in` raised that exception). + +.. index:: + operator: in + operator: not in + pair: membership; test + object: sequence + +The operator :keyword:`not in` is defined to have the inverse true value of +:keyword:`in`. + +.. index:: + operator: is + operator: is not + pair: identity; test + +The operators :keyword:`is` and :keyword:`is not` test for object identity: ``x +is y`` is true if and only if *x* and *y* are the same object. ``x is not y`` +yields the inverse truth value. + + +.. _booleans: + +Boolean operations +================== + +.. index:: + pair: Conditional; expression + pair: Boolean; operation + +Boolean operations have the lowest priority of all Python operations: + +.. productionlist:: + expression: `conditional_expression` | `lambda_form` + old_expression: `or_test` | `old_lambda_form` + conditional_expression: `or_test` ["if" `or_test` "else" `expression`] + or_test: `and_test` | `or_test` "or" `and_test` + and_test: `not_test` | `and_test` "and" `not_test` + not_test: `comparison` | "not" `not_test` + +In the context of Boolean operations, and also when expressions are used by +control flow statements, the following values are interpreted as false: +``False``, ``None``, numeric zero of all types, and empty strings and containers +(including strings, tuples, lists, dictionaries, sets and frozensets). All +other values are interpreted as true. + +.. index:: operator: not + +The operator :keyword:`not` yields ``True`` if its argument is false, ``False`` +otherwise. + +The expression ``x if C else y`` first evaluates *C* (*not* *x*); if *C* is +true, *x* is evaluated and its value is returned; otherwise, *y* is evaluated +and its value is returned. + +.. versionadded:: 2.5 + +.. index:: operator: and + +The expression ``x and y`` first evaluates *x*; if *x* is false, its value is +returned; otherwise, *y* is evaluated and the resulting value is returned. + +.. index:: operator: or + +The expression ``x or y`` first evaluates *x*; if *x* is true, its value is +returned; otherwise, *y* is evaluated and the resulting value is returned. + +(Note that neither :keyword:`and` nor :keyword:`or` restrict the value and type +they return to ``False`` and ``True``, but rather return the last evaluated +argument. This is sometimes useful, e.g., if ``s`` is a string that should be +replaced by a default value if it is empty, the expression ``s or 'foo'`` yields +the desired value. Because :keyword:`not` has to invent a value anyway, it does +not bother to return a value of the same type as its argument, so e.g., ``not +'foo'`` yields ``False``, not ``''``.) + + +.. _lambdas: + +Lambdas +======= + +.. index:: + pair: lambda; expression + pair: lambda; form + pair: anonymous; function + +.. productionlist:: + lambda_form: "lambda" [`parameter_list`]: `expression` + old_lambda_form: "lambda" [`parameter_list`]: `old_expression` + +Lambda forms (lambda expressions) have the same syntactic position as +expressions. They are a shorthand to create anonymous functions; the expression +``lambda arguments: expression`` yields a function object. The unnamed object +behaves like a function object defined with :: + + def name(arguments): + return expression + +See section :ref:`function` for the syntax of parameter lists. Note that +functions created with lambda forms cannot contain statements or annotations. + +.. _lambda: + + +.. _exprlists: + +Expression lists +================ + +.. index:: pair: expression; list + +.. productionlist:: + expression_list: `expression` ( "," `expression` )* [","] + +.. index:: object: tuple + +An expression list containing at least one comma yields a tuple. The length of +the tuple is the number of expressions in the list. The expressions are +evaluated from left to right. + +.. index:: pair: trailing; comma + +The trailing comma is required only to create a single tuple (a.k.a. a +*singleton*); it is optional in all other cases. A single expression without a +trailing comma doesn't create a tuple, but rather yields the value of that +expression. (To create an empty tuple, use an empty pair of parentheses: +``()``.) + + +.. _evalorder: + +Evaluation order +================ + +.. index:: pair: evaluation; order + +Python evaluates expressions from left to right. Notice that while evaluating an +assignment, the right-hand side is evaluated before the left-hand side. + +In the following lines, expressions will be evaluated in the arithmetic order of +their suffixes:: + + expr1, expr2, expr3, expr4 + (expr1, expr2, expr3, expr4) + {expr1: expr2, expr3: expr4} + expr1 + expr2 * (expr3 - expr4) + func(expr1, expr2, *expr3, **expr4) + expr3, expr4 = expr1, expr2 + + +.. _operator-summary: + +Summary +======= + +.. index:: pair: operator; precedence + +The following table summarizes the operator precedences in Python, from lowest +precedence (least binding) to highest precedence (most binding). Operators in +the same box have the same precedence. Unless the syntax is explicitly given, +operators are binary. Operators in the same box group left to right (except for +comparisons, including tests, which all have the same precedence and chain from +left to right --- see section :ref:`comparisons` --- and exponentiation, which +groups from right to left). + ++----------------------------------------------+-------------------------------------+ +| Operator | Description | ++==============================================+=====================================+ +| :keyword:`lambda` | Lambda expression | ++----------------------------------------------+-------------------------------------+ +| :keyword:`or` | Boolean OR | ++----------------------------------------------+-------------------------------------+ +| :keyword:`and` | Boolean AND | ++----------------------------------------------+-------------------------------------+ +| :keyword:`not` *x* | Boolean NOT | ++----------------------------------------------+-------------------------------------+ +| :keyword:`in`, :keyword:`not` :keyword:`in` | Membership tests | ++----------------------------------------------+-------------------------------------+ +| :keyword:`is`, :keyword:`is not` | Identity tests | ++----------------------------------------------+-------------------------------------+ +| ``<``, ``<=``, ``>``, ``>=``, ``!=``, ``==`` | Comparisons | ++----------------------------------------------+-------------------------------------+ +| ``|`` | Bitwise OR | ++----------------------------------------------+-------------------------------------+ +| ``^`` | Bitwise XOR | ++----------------------------------------------+-------------------------------------+ +| ``&`` | Bitwise AND | ++----------------------------------------------+-------------------------------------+ +| ``<<``, ``>>`` | Shifts | ++----------------------------------------------+-------------------------------------+ +| ``+``, ``-`` | Addition and subtraction | ++----------------------------------------------+-------------------------------------+ +| ``*``, ``/``, ``%`` | Multiplication, division, remainder | ++----------------------------------------------+-------------------------------------+ +| ``+x``, ``-x`` | Positive, negative | ++----------------------------------------------+-------------------------------------+ +| ``~x`` | Bitwise not | ++----------------------------------------------+-------------------------------------+ +| ``**`` | Exponentiation | ++----------------------------------------------+-------------------------------------+ +| ``x.attribute`` | Attribute reference | ++----------------------------------------------+-------------------------------------+ +| ``x[index]`` | Subscription | ++----------------------------------------------+-------------------------------------+ +| ``x[index:index]`` | Slicing | ++----------------------------------------------+-------------------------------------+ +| ``f(arguments...)`` | Function call | ++----------------------------------------------+-------------------------------------+ +| ``(expressions...)`` | Binding or tuple display | ++----------------------------------------------+-------------------------------------+ +| ``[expressions...]`` | List display | ++----------------------------------------------+-------------------------------------+ +| ``{key:datum...}`` | Dictionary display | ++----------------------------------------------+-------------------------------------+ + +.. rubric:: Footnotes + +.. [#] In Python 2.3, a list comprehension "leaks" the control variables of each + ``for`` it contains into the containing scope. However, this behavior is + deprecated, and relying on it will not work once this bug is fixed in a future + release + +.. [#] While ``abs(x%y) < abs(y)`` is true mathematically, for floats it may not be + true numerically due to roundoff. For example, and assuming a platform on which + a Python float is an IEEE 754 double-precision number, in order that ``-1e-100 % + 1e100`` have the same sign as ``1e100``, the computed result is ``-1e-100 + + 1e100``, which is numerically exactly equal to ``1e100``. Function :func:`fmod` + in the :mod:`math` module returns a result whose sign matches the sign of the + first argument instead, and so returns ``-1e-100`` in this case. Which approach + is more appropriate depends on the application. + +.. [#] If x is very close to an exact integer multiple of y, it's possible for + ``floor(x/y)`` to be one larger than ``(x-x%y)/y`` due to rounding. In such + cases, Python returns the latter result, in order to preserve that + ``divmod(x,y)[0] * y + x % y`` be very close to ``x``. + +.. [#] The implementation computes this efficiently, without constructing lists or + sorting. + +.. [#] Earlier versions of Python used lexicographic comparison of the sorted (key, + value) lists, but this was very expensive for the common case of comparing for + equality. An even earlier version of Python compared dictionaries by identity + only, but this caused surprises because people expected to be able to test a + dictionary for emptiness by comparing it to ``{}``. + diff --git a/Doc/reference/index.rst b/Doc/reference/index.rst new file mode 100644 index 0000000..18bf053 --- /dev/null +++ b/Doc/reference/index.rst @@ -0,0 +1,30 @@ +.. _reference-index: + +################################# + The Python language reference +################################# + +:Release: |version| +:Date: |today| + +This reference manual describes the syntax and "core semantics" of the +language. It is terse, but attempts to be exact and complete. The semantics of +non-essential built-in object types and of the built-in functions and modules +are described in :ref:`library-index`. For an informal introduction to the +language, see :ref:`tutorial-index`. For C or C++ programmers, two additional +manuals exist: :ref:`extending-index` describes the high-level picture of how to +write a Python extension module, and the :ref:`c-api-index` describes the +interfaces available to C/C++ programmers in detail. + +.. toctree:: + :maxdepth: 2 + + introduction.rst + lexical_analysis.rst + datamodel.rst + executionmodel.rst + expressions.rst + simple_stmts.rst + compound_stmts.rst + toplevel_components.rst + diff --git a/Doc/reference/introduction.rst b/Doc/reference/introduction.rst new file mode 100644 index 0000000..0d53719 --- /dev/null +++ b/Doc/reference/introduction.rst @@ -0,0 +1,138 @@ + +.. _introduction: + +************ +Introduction +************ + +This reference manual describes the Python programming language. It is not +intended as a tutorial. + +While I am trying to be as precise as possible, I chose to use English rather +than formal specifications for everything except syntax and lexical analysis. +This should make the document more understandable to the average reader, but +will leave room for ambiguities. Consequently, if you were coming from Mars and +tried to re-implement Python from this document alone, you might have to guess +things and in fact you would probably end up implementing quite a different +language. On the other hand, if you are using Python and wonder what the precise +rules about a particular area of the language are, you should definitely be able +to find them here. If you would like to see a more formal definition of the +language, maybe you could volunteer your time --- or invent a cloning machine +:-). + +It is dangerous to add too many implementation details to a language reference +document --- the implementation may change, and other implementations of the +same language may work differently. On the other hand, there is currently only +one Python implementation in widespread use (although alternate implementations +exist), and its particular quirks are sometimes worth being mentioned, +especially where the implementation imposes additional limitations. Therefore, +you'll find short "implementation notes" sprinkled throughout the text. + +Every Python implementation comes with a number of built-in and standard +modules. These are documented in :ref:`library-index`. A few built-in modules +are mentioned when they interact in a significant way with the language +definition. + + +.. _implementations: + +Alternate Implementations +========================= + +Though there is one Python implementation which is by far the most popular, +there are some alternate implementations which are of particular interest to +different audiences. + +Known implementations include: + +CPython + This is the original and most-maintained implementation of Python, written in C. + New language features generally appear here first. + +Jython + Python implemented in Java. This implementation can be used as a scripting + language for Java applications, or can be used to create applications using the + Java class libraries. It is also often used to create tests for Java libraries. + More information can be found at `the Jython website <http://www.jython.org/>`_. + +Python for .NET + This implementation actually uses the CPython implementation, but is a managed + .NET application and makes .NET libraries available. This was created by Brian + Lloyd. For more information, see the `Python for .NET home page + <http://www.zope.org/Members/Brian/PythonNet>`_. + +IronPython + An alternate Python for .NET. Unlike Python.NET, this is a complete Python + implementation that generates IL, and compiles Python code directly to .NET + assemblies. It was created by Jim Hugunin, the original creator of Jython. For + more information, see `the IronPython website + <http://workspaces.gotdotnet.com/ironpython>`_. + +PyPy + An implementation of Python written in Python; even the bytecode interpreter is + written in Python. This is executed using CPython as the underlying + interpreter. One of the goals of the project is to encourage experimentation + with the language itself by making it easier to modify the interpreter (since it + is written in Python). Additional information is available on `the PyPy + project's home page <http://codespeak.net/pypy/>`_. + +Each of these implementations varies in some way from the language as documented +in this manual, or introduces specific information beyond what's covered in the +standard Python documentation. Please refer to the implementation-specific +documentation to determine what else you need to know about the specific +implementation you're using. + + +.. _notation: + +Notation +======== + +.. index:: + single: BNF + single: grammar + single: syntax + single: notation + +The descriptions of lexical analysis and syntax use a modified BNF grammar +notation. This uses the following style of definition: + +.. productionlist:: * + name: `lc_letter` (`lc_letter` | "_")* + lc_letter: "a"..."z" + +The first line says that a ``name`` is an ``lc_letter`` followed by a sequence +of zero or more ``lc_letter``\ s and underscores. An ``lc_letter`` in turn is +any of the single characters ``'a'`` through ``'z'``. (This rule is actually +adhered to for the names defined in lexical and grammar rules in this document.) + +Each rule begins with a name (which is the name defined by the rule) and +``::=``. A vertical bar (``|``) is used to separate alternatives; it is the +least binding operator in this notation. A star (``*``) means zero or more +repetitions of the preceding item; likewise, a plus (``+``) means one or more +repetitions, and a phrase enclosed in square brackets (``[ ]``) means zero or +one occurrences (in other words, the enclosed phrase is optional). The ``*`` +and ``+`` operators bind as tightly as possible; parentheses are used for +grouping. Literal strings are enclosed in quotes. White space is only +meaningful to separate tokens. Rules are normally contained on a single line; +rules with many alternatives may be formatted alternatively with each line after +the first beginning with a vertical bar. + +.. index:: + single: lexical definitions + single: ASCII@ASCII + +In lexical definitions (as the example above), two more conventions are used: +Two literal characters separated by three dots mean a choice of any single +character in the given (inclusive) range of ASCII characters. A phrase between +angular brackets (``<...>``) gives an informal description of the symbol +defined; e.g., this could be used to describe the notion of 'control character' +if needed. + +Even though the notation used is almost the same, there is a big difference +between the meaning of lexical and syntactic definitions: a lexical definition +operates on the individual characters of the input source, while a syntax +definition operates on the stream of tokens generated by the lexical analysis. +All uses of BNF in the next chapter ("Lexical Analysis") are lexical +definitions; uses in subsequent chapters are syntactic definitions. + diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst new file mode 100644 index 0000000..35e92cf --- /dev/null +++ b/Doc/reference/lexical_analysis.rst @@ -0,0 +1,758 @@ + +.. _lexical: + +**************** +Lexical analysis +**************** + +.. index:: + single: lexical analysis + single: parser + single: token + +A Python program is read by a *parser*. Input to the parser is a stream of +*tokens*, generated by the *lexical analyzer*. This chapter describes how the +lexical analyzer breaks a file into tokens. + +Python uses the 7-bit ASCII character set for program text. + +.. versionadded:: 2.3 + An encoding declaration can be used to indicate that string literals and + comments use an encoding different from ASCII. + +For compatibility with older versions, Python only warns if it finds 8-bit +characters; those warnings should be corrected by either declaring an explicit +encoding, or using escape sequences if those bytes are binary data, instead of +characters. + +The run-time character set depends on the I/O devices connected to the program +but is generally a superset of ASCII. + +**Future compatibility note:** It may be tempting to assume that the character +set for 8-bit characters is ISO Latin-1 (an ASCII superset that covers most +western languages that use the Latin alphabet), but it is possible that in the +future Unicode text editors will become common. These generally use the UTF-8 +encoding, which is also an ASCII superset, but with very different use for the +characters with ordinals 128-255. While there is no consensus on this subject +yet, it is unwise to assume either Latin-1 or UTF-8, even though the current +implementation appears to favor Latin-1. This applies both to the source +character set and the run-time character set. + + +.. _line-structure: + +Line structure +============== + +.. index:: single: line structure + +A Python program is divided into a number of *logical lines*. + + +.. _logical: + +Logical lines +------------- + +.. index:: + single: logical line + single: physical line + single: line joining + single: NEWLINE token + +The end of a logical line is represented by the token NEWLINE. Statements +cannot cross logical line boundaries except where NEWLINE is allowed by the +syntax (e.g., between statements in compound statements). A logical line is +constructed from one or more *physical lines* by following the explicit or +implicit *line joining* rules. + + +.. _physical: + +Physical lines +-------------- + +A physical line is a sequence of characters terminated by an end-of-line +sequence. In source files, any of the standard platform line termination +sequences can be used - the Unix form using ASCII LF (linefeed), the Windows +form using the ASCII sequence CR LF (return followed by linefeed), or the +Macintosh form using the ASCII CR (return) character. All of these forms can be +used equally, regardless of platform. + +When embedding Python, source code strings should be passed to Python APIs using +the standard C conventions for newline characters (the ``\n`` character, +representing ASCII LF, is the line terminator). + + +.. _comments: + +Comments +-------- + +.. index:: + single: comment + single: hash character + +A comment starts with a hash character (``#``) that is not part of a string +literal, and ends at the end of the physical line. A comment signifies the end +of the logical line unless the implicit line joining rules are invoked. Comments +are ignored by the syntax; they are not tokens. + + +.. _encodings: + +Encoding declarations +--------------------- + +.. index:: + single: source character set + single: encodings + +If a comment in the first or second line of the Python script matches the +regular expression ``coding[=:]\s*([-\w.]+)``, this comment is processed as an +encoding declaration; the first group of this expression names the encoding of +the source code file. The recommended forms of this expression are :: + + # -*- coding: <encoding-name> -*- + +which is recognized also by GNU Emacs, and :: + + # vim:fileencoding=<encoding-name> + +which is recognized by Bram Moolenaar's VIM. In addition, if the first bytes of +the file are the UTF-8 byte-order mark (``'\xef\xbb\xbf'``), the declared file +encoding is UTF-8 (this is supported, among others, by Microsoft's +:program:`notepad`). + +If an encoding is declared, the encoding name must be recognized by Python. The +encoding is used for all lexical analysis, in particular to find the end of a +string, and to interpret the contents of Unicode literals. String literals are +converted to Unicode for syntactical analysis, then converted back to their +original encoding before interpretation starts. The encoding declaration must +appear on a line of its own. + +.. % XXX there should be a list of supported encodings. + + +.. _explicit-joining: + +Explicit line joining +--------------------- + +.. index:: + single: physical line + single: line joining + single: line continuation + single: backslash character + +Two or more physical lines may be joined into logical lines using backslash +characters (``\``), as follows: when a physical line ends in a backslash that is +not part of a string literal or comment, it is joined with the following forming +a single logical line, deleting the backslash and the following end-of-line +character. For example: + +.. % + +:: + + if 1900 < year < 2100 and 1 <= month <= 12 \ + and 1 <= day <= 31 and 0 <= hour < 24 \ + and 0 <= minute < 60 and 0 <= second < 60: # Looks like a valid date + return 1 + +A line ending in a backslash cannot carry a comment. A backslash does not +continue a comment. A backslash does not continue a token except for string +literals (i.e., tokens other than string literals cannot be split across +physical lines using a backslash). A backslash is illegal elsewhere on a line +outside a string literal. + + +.. _implicit-joining: + +Implicit line joining +--------------------- + +Expressions in parentheses, square brackets or curly braces can be split over +more than one physical line without using backslashes. For example:: + + month_names = ['Januari', 'Februari', 'Maart', # These are the + 'April', 'Mei', 'Juni', # Dutch names + 'Juli', 'Augustus', 'September', # for the months + 'Oktober', 'November', 'December'] # of the year + +Implicitly continued lines can carry comments. The indentation of the +continuation lines is not important. Blank continuation lines are allowed. +There is no NEWLINE token between implicit continuation lines. Implicitly +continued lines can also occur within triple-quoted strings (see below); in that +case they cannot carry comments. + + +.. _blank-lines: + +Blank lines +----------- + +.. index:: single: blank line + +A logical line that contains only spaces, tabs, formfeeds and possibly a +comment, is ignored (i.e., no NEWLINE token is generated). During interactive +input of statements, handling of a blank line may differ depending on the +implementation of the read-eval-print loop. In the standard implementation, an +entirely blank logical line (i.e. one containing not even whitespace or a +comment) terminates a multi-line statement. + + +.. _indentation: + +Indentation +----------- + +.. index:: + single: indentation + single: whitespace + single: leading whitespace + single: space + single: tab + single: grouping + single: statement grouping + +Leading whitespace (spaces and tabs) at the beginning of a logical line is used +to compute the indentation level of the line, which in turn is used to determine +the grouping of statements. + +First, tabs are replaced (from left to right) by one to eight spaces such that +the total number of characters up to and including the replacement is a multiple +of eight (this is intended to be the same rule as used by Unix). The total +number of spaces preceding the first non-blank character then determines the +line's indentation. Indentation cannot be split over multiple physical lines +using backslashes; the whitespace up to the first backslash determines the +indentation. + +**Cross-platform compatibility note:** because of the nature of text editors on +non-UNIX platforms, it is unwise to use a mixture of spaces and tabs for the +indentation in a single source file. It should also be noted that different +platforms may explicitly limit the maximum indentation level. + +A formfeed character may be present at the start of the line; it will be ignored +for the indentation calculations above. Formfeed characters occurring elsewhere +in the leading whitespace have an undefined effect (for instance, they may reset +the space count to zero). + +.. index:: + single: INDENT token + single: DEDENT token + +The indentation levels of consecutive lines are used to generate INDENT and +DEDENT tokens, using a stack, as follows. + +Before the first line of the file is read, a single zero is pushed on the stack; +this will never be popped off again. The numbers pushed on the stack will +always be strictly increasing from bottom to top. At the beginning of each +logical line, the line's indentation level is compared to the top of the stack. +If it is equal, nothing happens. If it is larger, it is pushed on the stack, and +one INDENT token is generated. If it is smaller, it *must* be one of the +numbers occurring on the stack; all numbers on the stack that are larger are +popped off, and for each number popped off a DEDENT token is generated. At the +end of the file, a DEDENT token is generated for each number remaining on the +stack that is larger than zero. + +Here is an example of a correctly (though confusingly) indented piece of Python +code:: + + def perm(l): + # Compute the list of all permutations of l + if len(l) <= 1: + return [l] + r = [] + for i in range(len(l)): + s = l[:i] + l[i+1:] + p = perm(s) + for x in p: + r.append(l[i:i+1] + x) + return r + +The following example shows various indentation errors:: + + def perm(l): # error: first line indented + for i in range(len(l)): # error: not indented + s = l[:i] + l[i+1:] + p = perm(l[:i] + l[i+1:]) # error: unexpected indent + for x in p: + r.append(l[i:i+1] + x) + return r # error: inconsistent dedent + +(Actually, the first three errors are detected by the parser; only the last +error is found by the lexical analyzer --- the indentation of ``return r`` does +not match a level popped off the stack.) + + +.. _whitespace: + +Whitespace between tokens +------------------------- + +Except at the beginning of a logical line or in string literals, the whitespace +characters space, tab and formfeed can be used interchangeably to separate +tokens. Whitespace is needed between two tokens only if their concatenation +could otherwise be interpreted as a different token (e.g., ab is one token, but +a b is two tokens). + + +.. _other-tokens: + +Other tokens +============ + +Besides NEWLINE, INDENT and DEDENT, the following categories of tokens exist: +*identifiers*, *keywords*, *literals*, *operators*, and *delimiters*. Whitespace +characters (other than line terminators, discussed earlier) are not tokens, but +serve to delimit tokens. Where ambiguity exists, a token comprises the longest +possible string that forms a legal token, when read from left to right. + + +.. _identifiers: + +Identifiers and keywords +======================== + +.. index:: + single: identifier + single: name + +Identifiers (also referred to as *names*) are described by the following lexical +definitions: + +.. productionlist:: + identifier: (`letter`|"_") (`letter` | `digit` | "_")* + letter: `lowercase` | `uppercase` + lowercase: "a"..."z" + uppercase: "A"..."Z" + digit: "0"..."9" + +Identifiers are unlimited in length. Case is significant. + + +.. _keywords: + +Keywords +-------- + +.. index:: + single: keyword + single: reserved word + +The following identifiers are used as reserved words, or *keywords* of the +language, and cannot be used as ordinary identifiers. They must be spelled +exactly as written here:: + + and def for is raise + as del from lambda return + assert elif global not try + break else if or while + class except import pass with + continue finally in print yield + +.. versionchanged:: 2.4 + :const:`None` became a constant and is now recognized by the compiler as a name + for the built-in object :const:`None`. Although it is not a keyword, you cannot + assign a different object to it. + +.. versionchanged:: 2.5 + Both :keyword:`as` and :keyword:`with` are only recognized when the + ``with_statement`` future feature has been enabled. It will always be enabled in + Python 2.6. See section :ref:`with` for details. Note that using :keyword:`as` + and :keyword:`with` as identifiers will always issue a warning, even when the + ``with_statement`` future directive is not in effect. + + +.. _id-classes: + +Reserved classes of identifiers +------------------------------- + +Certain classes of identifiers (besides keywords) have special meanings. These +classes are identified by the patterns of leading and trailing underscore +characters: + +``_*`` + Not imported by ``from module import *``. The special identifier ``_`` is used + in the interactive interpreter to store the result of the last evaluation; it is + stored in the :mod:`__builtin__` module. When not in interactive mode, ``_`` + has no special meaning and is not defined. See section :ref:`import`. + + .. note:: + + The name ``_`` is often used in conjunction with internationalization; + refer to the documentation for the :mod:`gettext` module for more + information on this convention. + +``__*__`` + System-defined names. These names are defined by the interpreter and its + implementation (including the standard library); applications should not expect + to define additional names using this convention. The set of names of this + class defined by Python may be extended in future versions. See section + :ref:`specialnames`. + +``__*`` + Class-private names. Names in this category, when used within the context of a + class definition, are re-written to use a mangled form to help avoid name + clashes between "private" attributes of base and derived classes. See section + :ref:`atom-identifiers`. + + +.. _literals: + +Literals +======== + +.. index:: + single: literal + single: constant + +Literals are notations for constant values of some built-in types. + + +.. _strings: + +String literals +--------------- + +.. index:: single: string literal + +String literals are described by the following lexical definitions: + +.. index:: single: ASCII@ASCII + +.. productionlist:: + stringliteral: [`stringprefix`](`shortstring` | `longstring`) + stringprefix: "r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR" + shortstring: "'" `shortstringitem`* "'" | '"' `shortstringitem`* '"' + longstring: ""'" `longstringitem`* ""'" + : | '"""' `longstringitem`* '"""' + shortstringitem: `shortstringchar` | `escapeseq` + longstringitem: `longstringchar` | `escapeseq` + shortstringchar: <any source character except "\" or newline or the quote> + longstringchar: <any source character except "\"> + escapeseq: "\" <any ASCII character> + +One syntactic restriction not indicated by these productions is that whitespace +is not allowed between the :token:`stringprefix` and the rest of the string +literal. The source character set is defined by the encoding declaration; it is +ASCII if no encoding declaration is given in the source file; see section +:ref:`encodings`. + +.. index:: + single: triple-quoted string + single: Unicode Consortium + single: string; Unicode + single: raw string + +In plain English: String literals can be enclosed in matching single quotes +(``'``) or double quotes (``"``). They can also be enclosed in matching groups +of three single or double quotes (these are generally referred to as +*triple-quoted strings*). The backslash (``\``) character is used to escape +characters that otherwise have a special meaning, such as newline, backslash +itself, or the quote character. String literals may optionally be prefixed with +a letter ``'r'`` or ``'R'``; such strings are called :dfn:`raw strings` and use +different rules for interpreting backslash escape sequences. A prefix of +``'u'`` or ``'U'`` makes the string a Unicode string. Unicode strings use the +Unicode character set as defined by the Unicode Consortium and ISO 10646. Some +additional escape sequences, described below, are available in Unicode strings. +The two prefix characters may be combined; in this case, ``'u'`` must appear +before ``'r'``. + +In triple-quoted strings, unescaped newlines and quotes are allowed (and are +retained), except that three unescaped quotes in a row terminate the string. (A +"quote" is the character used to open the string, i.e. either ``'`` or ``"``.) + +.. index:: + single: physical line + single: escape sequence + single: Standard C + single: C + +Unless an ``'r'`` or ``'R'`` prefix is present, escape sequences in strings are +interpreted according to rules similar to those used by Standard C. The +recognized escape sequences are: + ++-----------------+---------------------------------+-------+ +| Escape Sequence | Meaning | Notes | ++=================+=================================+=======+ +| ``\newline`` | Ignored | | ++-----------------+---------------------------------+-------+ +| ``\\`` | Backslash (``\``) | | ++-----------------+---------------------------------+-------+ +| ``\'`` | Single quote (``'``) | | ++-----------------+---------------------------------+-------+ +| ``\"`` | Double quote (``"``) | | ++-----------------+---------------------------------+-------+ +| ``\a`` | ASCII Bell (BEL) | | ++-----------------+---------------------------------+-------+ +| ``\b`` | ASCII Backspace (BS) | | ++-----------------+---------------------------------+-------+ +| ``\f`` | ASCII Formfeed (FF) | | ++-----------------+---------------------------------+-------+ +| ``\n`` | ASCII Linefeed (LF) | | ++-----------------+---------------------------------+-------+ +| ``\N{name}`` | Character named *name* in the | | +| | Unicode database (Unicode only) | | ++-----------------+---------------------------------+-------+ +| ``\r`` | ASCII Carriage Return (CR) | | ++-----------------+---------------------------------+-------+ +| ``\t`` | ASCII Horizontal Tab (TAB) | | ++-----------------+---------------------------------+-------+ +| ``\uxxxx`` | Character with 16-bit hex value | \(1) | +| | *xxxx* (Unicode only) | | ++-----------------+---------------------------------+-------+ +| ``\Uxxxxxxxx`` | Character with 32-bit hex value | \(2) | +| | *xxxxxxxx* (Unicode only) | | ++-----------------+---------------------------------+-------+ +| ``\v`` | ASCII Vertical Tab (VT) | | ++-----------------+---------------------------------+-------+ +| ``\ooo`` | Character with octal value | (3,5) | +| | *ooo* | | ++-----------------+---------------------------------+-------+ +| ``\xhh`` | Character with hex value *hh* | (4,5) | ++-----------------+---------------------------------+-------+ + +.. index:: single: ASCII@ASCII + +Notes: + +(1) + Individual code units which form parts of a surrogate pair can be encoded using + this escape sequence. + +(2) + Any Unicode character can be encoded this way, but characters outside the Basic + Multilingual Plane (BMP) will be encoded using a surrogate pair if Python is + compiled to use 16-bit code units (the default). Individual code units which + form parts of a surrogate pair can be encoded using this escape sequence. + +(3) + As in Standard C, up to three octal digits are accepted. + +(4) + Unlike in Standard C, at most two hex digits are accepted. + +(5) + In a string literal, hexadecimal and octal escapes denote the byte with the + given value; it is not necessary that the byte encodes a character in the source + character set. In a Unicode literal, these escapes denote a Unicode character + with the given value. + +.. index:: single: unrecognized escape sequence + +Unlike Standard C, all unrecognized escape sequences are left in the string +unchanged, i.e., *the backslash is left in the string*. (This behavior is +useful when debugging: if an escape sequence is mistyped, the resulting output +is more easily recognized as broken.) It is also important to note that the +escape sequences marked as "(Unicode only)" in the table above fall into the +category of unrecognized escapes for non-Unicode string literals. + +When an ``'r'`` or ``'R'`` prefix is present, a character following a backslash +is included in the string without change, and *all backslashes are left in the +string*. For example, the string literal ``r"\n"`` consists of two characters: +a backslash and a lowercase ``'n'``. String quotes can be escaped with a +backslash, but the backslash remains in the string; for example, ``r"\""`` is a +valid string literal consisting of two characters: a backslash and a double +quote; ``r"\"`` is not a valid string literal (even a raw string cannot end in +an odd number of backslashes). Specifically, *a raw string cannot end in a +single backslash* (since the backslash would escape the following quote +character). Note also that a single backslash followed by a newline is +interpreted as those two characters as part of the string, *not* as a line +continuation. + +When an ``'r'`` or ``'R'`` prefix is used in conjunction with a ``'u'`` or +``'U'`` prefix, then the ``\uXXXX`` and ``\UXXXXXXXX`` escape sequences are +processed while *all other backslashes are left in the string*. For example, +the string literal ``ur"\u0062\n"`` consists of three Unicode characters: 'LATIN +SMALL LETTER B', 'REVERSE SOLIDUS', and 'LATIN SMALL LETTER N'. Backslashes can +be escaped with a preceding backslash; however, both remain in the string. As a +result, ``\uXXXX`` escape sequences are only recognized when there are an odd +number of backslashes. + + +.. _string-catenation: + +String literal concatenation +---------------------------- + +Multiple adjacent string literals (delimited by whitespace), possibly using +different quoting conventions, are allowed, and their meaning is the same as +their concatenation. Thus, ``"hello" 'world'`` is equivalent to +``"helloworld"``. This feature can be used to reduce the number of backslashes +needed, to split long strings conveniently across long lines, or even to add +comments to parts of strings, for example:: + + re.compile("[A-Za-z_]" # letter or underscore + "[A-Za-z0-9_]*" # letter, digit or underscore + ) + +Note that this feature is defined at the syntactical level, but implemented at +compile time. The '+' operator must be used to concatenate string expressions +at run time. Also note that literal concatenation can use different quoting +styles for each component (even mixing raw strings and triple quoted strings). + + +.. _numbers: + +Numeric literals +---------------- + +.. index:: + single: number + single: numeric literal + single: integer literal + single: plain integer literal + single: long integer literal + single: floating point literal + single: hexadecimal literal + single: octal literal + single: binary literal + single: decimal literal + single: imaginary literal + single: complex; literal + +There are four types of numeric literals: plain integers, long integers, +floating point numbers, and imaginary numbers. There are no complex literals +(complex numbers can be formed by adding a real number and an imaginary number). + +Note that numeric literals do not include a sign; a phrase like ``-1`` is +actually an expression composed of the unary operator '``-``' and the literal +``1``. + + +.. _integers: + +Integer literals +---------------- + +Integer literals are described by the following lexical definitions: + +.. productionlist:: + integer: `decimalinteger` | `octinteger` | `hexinteger` + decimalinteger: `nonzerodigit` `digit`* | "0"+ + octinteger: "0" ("o" | "O") `octdigit`+ + hexinteger: "0" ("x" | "X") `hexdigit`+ + bininteger: "0" ("b" | "B") `bindigit`+ + nonzerodigit: "1"..."9" + octdigit: "0"..."7" + hexdigit: `digit` | "a"..."f" | "A"..."F" + bindigit: "0"..."1" + +Plain integer literals that are above the largest representable plain integer +(e.g., 2147483647 when using 32-bit arithmetic) are accepted as if they were +long integers instead. [#]_ There is no limit for long integer literals apart +from what can be stored in available memory. + +Note that leading zeros in a non-zero decimal number are not allowed. This is +for disambiguation with C-style octal literals, which Python used before version +3.0. + +Some examples of integer literals:: + + 7 2147483647 0o177 0b100110111 + 3 79228162514264337593543950336 0o377 0x100000000 + 79228162514264337593543950336 0xdeadbeef + + +.. _floating: + +Floating point literals +----------------------- + +Floating point literals are described by the following lexical definitions: + +.. productionlist:: + floatnumber: `pointfloat` | `exponentfloat` + pointfloat: [`intpart`] `fraction` | `intpart` "." + exponentfloat: (`intpart` | `pointfloat`) `exponent` + intpart: `digit`+ + fraction: "." `digit`+ + exponent: ("e" | "E") ["+" | "-"] `digit`+ + +Note that the integer and exponent parts are always interpreted using radix 10. +For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The +allowed range of floating point literals is implementation-dependent. Some +examples of floating point literals:: + + 3.14 10. .001 1e100 3.14e-10 0e0 + +Note that numeric literals do not include a sign; a phrase like ``-1`` is +actually an expression composed of the unary operator ``-`` and the literal +``1``. + + +.. _imaginary: + +Imaginary literals +------------------ + +Imaginary literals are described by the following lexical definitions: + +.. productionlist:: + imagnumber: (`floatnumber` | `intpart`) ("j" | "J") + +An imaginary literal yields a complex number with a real part of 0.0. Complex +numbers are represented as a pair of floating point numbers and have the same +restrictions on their range. To create a complex number with a nonzero real +part, add a floating point number to it, e.g., ``(3+4j)``. Some examples of +imaginary literals:: + + 3.14j 10.j 10j .001j 1e100j 3.14e-10j + + +.. _operators: + +Operators +========= + +.. index:: single: operators + +The following tokens are operators:: + + + - * ** / // % + << >> & | ^ ~ + < > <= >= == != + + +.. _delimiters: + +Delimiters +========== + +.. index:: single: delimiters + +The following tokens serve as delimiters in the grammar:: + + ( ) [ ] { } @ + , : . ` = ; + += -= *= /= //= %= + &= |= ^= >>= <<= **= + +The period can also occur in floating-point and imaginary literals. A sequence +of three periods has a special meaning as an ellipsis in slices. The second half +of the list, the augmented assignment operators, serve lexically as delimiters, +but also perform an operation. + +The following printing ASCII characters have special meaning as part of other +tokens or are otherwise significant to the lexical analyzer:: + + ' " # \ + +.. index:: single: ASCII@ASCII + +The following printing ASCII characters are not used in Python. Their +occurrence outside string literals and comments is an unconditional error:: + + $ ? + +.. rubric:: Footnotes + +.. [#] In versions of Python prior to 2.4, octal and hexadecimal literals in the range + just above the largest representable plain integer but below the largest + unsigned 32-bit number (on a machine using 32-bit arithmetic), 4294967296, were + taken as the negative plain integer obtained by subtracting 4294967296 from + their unsigned value. + diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst new file mode 100644 index 0000000..fbc626f --- /dev/null +++ b/Doc/reference/simple_stmts.rst @@ -0,0 +1,825 @@ + +.. _simple: + +***************** +Simple statements +***************** + +.. index:: pair: simple; statement + +Simple statements are comprised within a single logical line. Several simple +statements may occur on a single line separated by semicolons. The syntax for +simple statements is: + +.. productionlist:: + simple_stmt: `expression_stmt` + : | `assert_stmt` + : | `assignment_stmt` + : | `augmented_assignment_stmt` + : | `pass_stmt` + : | `del_stmt` + : | `return_stmt` + : | `yield_stmt` + : | `raise_stmt` + : | `break_stmt` + : | `continue_stmt` + : | `import_stmt` + : | `global_stmt` + + +.. _exprstmts: + +Expression statements +===================== + +.. index:: pair: expression; statement + +Expression statements are used (mostly interactively) to compute and write a +value, or (usually) to call a procedure (a function that returns no meaningful +result; in Python, procedures return the value ``None``). Other uses of +expression statements are allowed and occasionally useful. The syntax for an +expression statement is: + +.. productionlist:: + expression_stmt: `expression_list` + +.. index:: pair: expression; list + +An expression statement evaluates the expression list (which may be a single +expression). + +.. index:: + builtin: repr + object: None + pair: string; conversion + single: output + pair: standard; output + pair: writing; values + pair: procedure; call + +In interactive mode, if the value is not ``None``, it is converted to a string +using the built-in :func:`repr` function and the resulting string is written to +standard output (see :func:`print`) on a line by itself. (Expression +statements yielding ``None`` are not written, so that procedure calls do not +cause any output.) + + +.. _assert: + +Assert statements +================= + +.. index:: + statement: assert + pair: debugging; assertions + +Assert statements are a convenient way to insert debugging assertions into a +program: + +.. productionlist:: + assert_stmt: "assert" `expression` ["," `expression`] + +The simple form, ``assert expression``, is equivalent to :: + + if __debug__: + if not expression: raise AssertionError + +The extended form, ``assert expression1, expression2``, is equivalent to :: + + if __debug__: + if not expression1: raise AssertionError, expression2 + +.. index:: + single: __debug__ + exception: AssertionError + +These equivalences assume that ``__debug__`` and :exc:`AssertionError` refer to +the built-in variables with those names. In the current implementation, the +built-in variable ``__debug__`` is ``True`` under normal circumstances, +``False`` when optimization is requested (command line option -O). The current +code generator emits no code for an assert statement when optimization is +requested at compile time. Note that it is unnecessary to include the source +code for the expression that failed in the error message; it will be displayed +as part of the stack trace. + +Assignments to ``__debug__`` are illegal. The value for the built-in variable +is determined when the interpreter starts. + + +.. _assignment: + +Assignment statements +===================== + +.. index:: + pair: assignment; statement + pair: binding; name + pair: rebinding; name + object: mutable + pair: attribute; assignment + +Assignment statements are used to (re)bind names to values and to modify +attributes or items of mutable objects: + +.. productionlist:: + assignment_stmt: (`target_list` "=")+ (`expression_list` | `yield_expression`) + target_list: `target` ("," `target`)* [","] + target: `identifier` + : | "(" `target_list` ")" + : | "[" `target_list` "]" + : | `attributeref` + : | `subscription` + : | `slicing` + +(See section :ref:`primaries` for the syntax definitions for the last three +symbols.) + +.. index:: pair: expression; list + +An assignment statement evaluates the expression list (remember that this can be +a single expression or a comma-separated list, the latter yielding a tuple) and +assigns the single resulting object to each of the target lists, from left to +right. + +.. index:: + single: target + pair: target; list + +Assignment is defined recursively depending on the form of the target (list). +When a target is part of a mutable object (an attribute reference, subscription +or slicing), the mutable object must ultimately perform the assignment and +decide about its validity, and may raise an exception if the assignment is +unacceptable. The rules observed by various types and the exceptions raised are +given with the definition of the object types (see section :ref:`types`). + +.. index:: triple: target; list; assignment + +Assignment of an object to a target list is recursively defined as follows. + +* If the target list is a single target: The object is assigned to that target. + +* If the target list is a comma-separated list of targets: The object must be a + sequence with the same number of items as there are targets in the target list, + and the items are assigned, from left to right, to the corresponding targets. + (This rule is relaxed as of Python 1.5; in earlier versions, the object had to + be a tuple. Since strings are sequences, an assignment like ``a, b = "xy"`` is + now legal as long as the string has the right length.) + +Assignment of an object to a single target is recursively defined as follows. + +* If the target is an identifier (name): + + .. index:: statement: global + +* If the name does not occur in a :keyword:`global` statement in the current + code block: the name is bound to the object in the current local namespace. + +* Otherwise: the name is bound to the object in the current global namespace. + + .. index:: single: destructor + + The name is rebound if it was already bound. This may cause the reference count + for the object previously bound to the name to reach zero, causing the object to + be deallocated and its destructor (if it has one) to be called. + + .. % nested + +* If the target is a target list enclosed in parentheses or in square brackets: + The object must be a sequence with the same number of items as there are targets + in the target list, and its items are assigned, from left to right, to the + corresponding targets. + + .. index:: pair: attribute; assignment + +* If the target is an attribute reference: The primary expression in the + reference is evaluated. It should yield an object with assignable attributes; + if this is not the case, :exc:`TypeError` is raised. That object is then asked + to assign the assigned object to the given attribute; if it cannot perform the + assignment, it raises an exception (usually but not necessarily + :exc:`AttributeError`). + + .. index:: + pair: subscription; assignment + object: mutable + +* If the target is a subscription: The primary expression in the reference is + evaluated. It should yield either a mutable sequence object (such as a list) or + a mapping object (such as a dictionary). Next, the subscript expression is + evaluated. + + .. index:: + object: sequence + object: list + + If the primary is a mutable sequence object (such as a list), the subscript must + yield a plain integer. If it is negative, the sequence's length is added to it. + The resulting value must be a nonnegative integer less than the sequence's + length, and the sequence is asked to assign the assigned object to its item with + that index. If the index is out of range, :exc:`IndexError` is raised + (assignment to a subscripted sequence cannot add new items to a list). + + .. index:: + object: mapping + object: dictionary + + If the primary is a mapping object (such as a dictionary), the subscript must + have a type compatible with the mapping's key type, and the mapping is then + asked to create a key/datum pair which maps the subscript to the assigned + object. This can either replace an existing key/value pair with the same key + value, or insert a new key/value pair (if no key with the same value existed). + + .. index:: pair: slicing; assignment + +* If the target is a slicing: The primary expression in the reference is + evaluated. It should yield a mutable sequence object (such as a list). The + assigned object should be a sequence object of the same type. Next, the lower + and upper bound expressions are evaluated, insofar they are present; defaults + are zero and the sequence's length. The bounds should evaluate to (small) + integers. If either bound is negative, the sequence's length is added to it. + The resulting bounds are clipped to lie between zero and the sequence's length, + inclusive. Finally, the sequence object is asked to replace the slice with the + items of the assigned sequence. The length of the slice may be different from + the length of the assigned sequence, thus changing the length of the target + sequence, if the object allows it. + +(In the current implementation, the syntax for targets is taken to be the same +as for expressions, and invalid syntax is rejected during the code generation +phase, causing less detailed error messages.) + +WARNING: Although the definition of assignment implies that overlaps between the +left-hand side and the right-hand side are 'safe' (for example ``a, b = b, a`` +swaps two variables), overlaps *within* the collection of assigned-to variables +are not safe! For instance, the following program prints ``[0, 2]``:: + + x = [0, 1] + i = 0 + i, x[i] = 1, 2 + print x + + +.. _augassign: + +Augmented assignment statements +------------------------------- + +.. index:: + pair: augmented; assignment + single: statement; assignment, augmented + +Augmented assignment is the combination, in a single statement, of a binary +operation and an assignment statement: + +.. productionlist:: + augmented_assignment_stmt: `target` `augop` (`expression_list` | `yield_expression`) + augop: "+=" | "-=" | "*=" | "/=" | "%=" | "**=" + : | ">>=" | "<<=" | "&=" | "^=" | "|=" + +(See section :ref:`primaries` for the syntax definitions for the last three +symbols.) + +An augmented assignment evaluates the target (which, unlike normal assignment +statements, cannot be an unpacking) and the expression list, performs the binary +operation specific to the type of assignment on the two operands, and assigns +the result to the original target. The target is only evaluated once. + +An augmented assignment expression like ``x += 1`` can be rewritten as ``x = x + +1`` to achieve a similar, but not exactly equal effect. In the augmented +version, ``x`` is only evaluated once. Also, when possible, the actual operation +is performed *in-place*, meaning that rather than creating a new object and +assigning that to the target, the old object is modified instead. + +With the exception of assigning to tuples and multiple targets in a single +statement, the assignment done by augmented assignment statements is handled the +same way as normal assignments. Similarly, with the exception of the possible +*in-place* behavior, the binary operation performed by augmented assignment is +the same as the normal binary operations. + +For targets which are attribute references, the initial value is retrieved with +a :meth:`getattr` and the result is assigned with a :meth:`setattr`. Notice +that the two methods do not necessarily refer to the same variable. When +:meth:`getattr` refers to a class variable, :meth:`setattr` still writes to an +instance variable. For example:: + + class A: + x = 3 # class variable + a = A() + a.x += 1 # writes a.x as 4 leaving A.x as 3 + + +.. _pass: + +The :keyword:`pass` statement +============================= + +.. index:: statement: pass + +.. productionlist:: + pass_stmt: "pass" + +.. index:: pair: null; operation + +:keyword:`pass` is a null operation --- when it is executed, nothing happens. +It is useful as a placeholder when a statement is required syntactically, but no +code needs to be executed, for example:: + + def f(arg): pass # a function that does nothing (yet) + + class C: pass # a class with no methods (yet) + + +.. _del: + +The :keyword:`del` statement +============================ + +.. index:: statement: del + +.. productionlist:: + del_stmt: "del" `target_list` + +.. index:: + pair: deletion; target + triple: deletion; target; list + +Deletion is recursively defined very similar to the way assignment is defined. +Rather that spelling it out in full details, here are some hints. + +Deletion of a target list recursively deletes each target, from left to right. + +.. index:: + statement: global + pair: unbinding; name + +Deletion of a name removes the binding of that name from the local or global +namespace, depending on whether the name occurs in a :keyword:`global` statement +in the same code block. If the name is unbound, a :exc:`NameError` exception +will be raised. + +.. index:: pair: free; variable + +It is illegal to delete a name from the local namespace if it occurs as a free +variable in a nested block. + +.. index:: pair: attribute; deletion + +Deletion of attribute references, subscriptions and slicings is passed to the +primary object involved; deletion of a slicing is in general equivalent to +assignment of an empty slice of the right type (but even this is determined by +the sliced object). + + +.. _return: + +The :keyword:`return` statement +=============================== + +.. index:: statement: return + +.. productionlist:: + return_stmt: "return" [`expression_list`] + +.. index:: + pair: function; definition + pair: class; definition + +:keyword:`return` may only occur syntactically nested in a function definition, +not within a nested class definition. + +If an expression list is present, it is evaluated, else ``None`` is substituted. + +:keyword:`return` leaves the current function call with the expression list (or +``None``) as return value. + +.. index:: keyword: finally + +When :keyword:`return` passes control out of a :keyword:`try` statement with a +:keyword:`finally` clause, that :keyword:`finally` clause is executed before +really leaving the function. + +In a generator function, the :keyword:`return` statement is not allowed to +include an :token:`expression_list`. In that context, a bare :keyword:`return` +indicates that the generator is done and will cause :exc:`StopIteration` to be +raised. + + +.. _yield: + +The :keyword:`yield` statement +============================== + +.. index:: statement: yield + +.. productionlist:: + yield_stmt: `yield_expression` + +.. index:: + single: generator; function + single: generator; iterator + single: function; generator + exception: StopIteration + +The :keyword:`yield` statement is only used when defining a generator function, +and is only used in the body of the generator function. Using a :keyword:`yield` +statement in a function definition is sufficient to cause that definition to +create a generator function instead of a normal function. + +When a generator function is called, it returns an iterator known as a generator +iterator, or more commonly, a generator. The body of the generator function is +executed by calling the generator's :meth:`__next__` method repeatedly until it +raises an exception. + +When a :keyword:`yield` statement is executed, the state of the generator is +frozen and the value of :token:`expression_list` is returned to +:meth:`__next__`'s caller. By "frozen" we mean that all local state is +retained, including the current bindings of local variables, the instruction +pointer, and the internal evaluation stack: enough information is saved so that +the next time :meth:`__next__` is invoked, the function can proceed exactly as +if the :keyword:`yield` statement were just another external call. + +As of Python version 2.5, the :keyword:`yield` statement is now allowed in the +:keyword:`try` clause of a :keyword:`try` ... :keyword:`finally` construct. If +the generator is not resumed before it is finalized (by reaching a zero +reference count or by being garbage collected), the generator-iterator's +:meth:`close` method will be called, allowing any pending :keyword:`finally` +clauses to execute. + +.. note:: + + In Python 2.2, the :keyword:`yield` statement is only allowed when the + ``generators`` feature has been enabled. It will always be enabled in Python + 2.3. This ``__future__`` import statement can be used to enable the feature:: + + from __future__ import generators + + +.. seealso:: + + :pep:`0255` - Simple Generators + The proposal for adding generators and the :keyword:`yield` statement to Python. + + :pep:`0342` - Coroutines via Enhanced Generators + The proposal that, among other generator enhancements, proposed allowing + :keyword:`yield` to appear inside a :keyword:`try` ... :keyword:`finally` block. + + +.. _raise: + +The :keyword:`raise` statement +============================== + +.. index:: statement: raise + +.. productionlist:: + raise_stmt: "raise" [`expression` ["," `expression` ["," `expression`]]] + +.. index:: + single: exception + pair: raising; exception + +If no expressions are present, :keyword:`raise` re-raises the last exception +that was active in the current scope. If no exception is active in the current +scope, a :exc:`TypeError` exception is raised indicating that this is an error +(if running under IDLE, a :exc:`Queue.Empty` exception is raised instead). + +Otherwise, :keyword:`raise` evaluates the expressions to get three objects, +using ``None`` as the value of omitted expressions. The first two objects are +used to determine the *type* and *value* of the exception. + +If the first object is an instance, the type of the exception is the class of +the instance, the instance itself is the value, and the second object must be +``None``. + +If the first object is a class, it becomes the type of the exception. The second +object is used to determine the exception value: If it is an instance of the +class, the instance becomes the exception value. If the second object is a +tuple, it is used as the argument list for the class constructor; if it is +``None``, an empty argument list is used, and any other object is treated as a +single argument to the constructor. The instance so created by calling the +constructor is used as the exception value. + +.. index:: object: traceback + +If a third object is present and not ``None``, it must be a traceback object +(see section :ref:`types`), and it is substituted instead of the current +location as the place where the exception occurred. If the third object is +present and not a traceback object or ``None``, a :exc:`TypeError` exception is +raised. The three-expression form of :keyword:`raise` is useful to re-raise an +exception transparently in an except clause, but :keyword:`raise` with no +expressions should be preferred if the exception to be re-raised was the most +recently active exception in the current scope. + +Additional information on exceptions can be found in section :ref:`exceptions`, +and information about handling exceptions is in section :ref:`try`. + + +.. _break: + +The :keyword:`break` statement +============================== + +.. index:: statement: break + +.. productionlist:: + break_stmt: "break" + +.. index:: + statement: for + statement: while + pair: loop; statement + +:keyword:`break` may only occur syntactically nested in a :keyword:`for` or +:keyword:`while` loop, but not nested in a function or class definition within +that loop. + +.. index:: keyword: else + +It terminates the nearest enclosing loop, skipping the optional :keyword:`else` +clause if the loop has one. + +.. index:: pair: loop control; target + +If a :keyword:`for` loop is terminated by :keyword:`break`, the loop control +target keeps its current value. + +.. index:: keyword: finally + +When :keyword:`break` passes control out of a :keyword:`try` statement with a +:keyword:`finally` clause, that :keyword:`finally` clause is executed before +really leaving the loop. + + +.. _continue: + +The :keyword:`continue` statement +================================= + +.. index:: statement: continue + +.. productionlist:: + continue_stmt: "continue" + +.. index:: + statement: for + statement: while + pair: loop; statement + keyword: finally + +:keyword:`continue` may only occur syntactically nested in a :keyword:`for` or +:keyword:`while` loop, but not nested in a function or class definition or +:keyword:`finally` statement within that loop. [#]_ It continues with the next +cycle of the nearest enclosing loop. + + +.. _import: + +The :keyword:`import` statement +=============================== + +.. index:: + statement: import + single: module; importing + pair: name; binding + keyword: from + +.. productionlist:: + import_stmt: "import" `module` ["as" `name`] ( "," `module` ["as" `name`] )* + : | "from" `relative_module` "import" `identifier` ["as" `name`] + : ( "," `identifier` ["as" `name`] )* + : | "from" `relative_module` "import" "(" `identifier` ["as" `name`] + : ( "," `identifier` ["as" `name`] )* [","] ")" + : | "from" `module` "import" "*" + module: (`identifier` ".")* `identifier` + relative_module: "."* `module` | "."+ + name: `identifier` + +Import statements are executed in two steps: (1) find a module, and initialize +it if necessary; (2) define a name or names in the local namespace (of the scope +where the :keyword:`import` statement occurs). The first form (without +:keyword:`from`) repeats these steps for each identifier in the list. The form +with :keyword:`from` performs step (1) once, and then performs step (2) +repeatedly. + +In this context, to "initialize" a built-in or extension module means to call an +initialization function that the module must provide for the purpose (in the +reference implementation, the function's name is obtained by prepending string +"init" to the module's name); to "initialize" a Python-coded module means to +execute the module's body. + +.. index:: + single: modules (in module sys) + single: sys.modules + pair: module; name + pair: built-in; module + pair: user-defined; module + module: sys + pair: filename; extension + triple: module; search; path + +The system maintains a table of modules that have been or are being initialized, +indexed by module name. This table is accessible as ``sys.modules``. When a +module name is found in this table, step (1) is finished. If not, a search for +a module definition is started. When a module is found, it is loaded. Details +of the module searching and loading process are implementation and platform +specific. It generally involves searching for a "built-in" module with the +given name and then searching a list of locations given as ``sys.path``. + +.. index:: + pair: module; initialization + exception: ImportError + single: code block + exception: SyntaxError + +If a built-in module is found, its built-in initialization code is executed and +step (1) is finished. If no matching file is found, :exc:`ImportError` is +raised. If a file is found, it is parsed, yielding an executable code block. If +a syntax error occurs, :exc:`SyntaxError` is raised. Otherwise, an empty module +of the given name is created and inserted in the module table, and then the code +block is executed in the context of this module. Exceptions during this +execution terminate step (1). + +When step (1) finishes without raising an exception, step (2) can begin. + +The first form of :keyword:`import` statement binds the module name in the local +namespace to the module object, and then goes on to import the next identifier, +if any. If the module name is followed by :keyword:`as`, the name following +:keyword:`as` is used as the local name for the module. + +.. index:: + pair: name; binding + exception: ImportError + +The :keyword:`from` form does not bind the module name: it goes through the list +of identifiers, looks each one of them up in the module found in step (1), and +binds the name in the local namespace to the object thus found. As with the +first form of :keyword:`import`, an alternate local name can be supplied by +specifying ":keyword:`as` localname". If a name is not found, +:exc:`ImportError` is raised. If the list of identifiers is replaced by a star +(``'*'``), all public names defined in the module are bound in the local +namespace of the :keyword:`import` statement.. + +.. index:: single: __all__ (optional module attribute) + +The *public names* defined by a module are determined by checking the module's +namespace for a variable named ``__all__``; if defined, it must be a sequence of +strings which are names defined or imported by that module. The names given in +``__all__`` are all considered public and are required to exist. If ``__all__`` +is not defined, the set of public names includes all names found in the module's +namespace which do not begin with an underscore character (``'_'``). +``__all__`` should contain the entire public API. It is intended to avoid +accidentally exporting items that are not part of the API (such as library +modules which were imported and used within the module). + +The :keyword:`from` form with ``*`` may only occur in a module scope. If the +wild card form of import --- ``import *`` --- is used in a function and the +function contains or is a nested block with free variables, the compiler will +raise a :exc:`SyntaxError`. + +.. index:: + keyword: from + statement: from + +.. index:: + triple: hierarchical; module; names + single: packages + single: __init__.py + +**Hierarchical module names:** when the module names contains one or more dots, +the module search path is carried out differently. The sequence of identifiers +up to the last dot is used to find a "package"; the final identifier is then +searched inside the package. A package is generally a subdirectory of a +directory on ``sys.path`` that has a file :file:`__init__.py`. [XXX Can't be +bothered to spell this out right now; see the URL +http://www.python.org/doc/essays/packages.html for more details, also about how +the module search works from inside a package.] + +.. % + +.. index:: builtin: __import__ + +The built-in function :func:`__import__` is provided to support applications +that determine which modules need to be loaded dynamically; refer to +:ref:`built-in-funcs` for additional information. + + +.. _future: + +Future statements +----------------- + +.. index:: pair: future; statement + +A :dfn:`future statement` is a directive to the compiler that a particular +module should be compiled using syntax or semantics that will be available in a +specified future release of Python. The future statement is intended to ease +migration to future versions of Python that introduce incompatible changes to +the language. It allows use of the new features on a per-module basis before +the release in which the feature becomes standard. + +.. productionlist:: * + future_statement: "from" "__future__" "import" feature ["as" name] + : ("," feature ["as" name])* + : | "from" "__future__" "import" "(" feature ["as" name] + : ("," feature ["as" name])* [","] ")" + feature: identifier + name: identifier + +A future statement must appear near the top of the module. The only lines that +can appear before a future statement are: + +* the module docstring (if any), +* comments, +* blank lines, and +* other future statements. + +The features recognized by Python 2.5 are ``absolute_import``, ``division``, +``generators``, ``nested_scopes`` and ``with_statement``. ``generators`` and +``nested_scopes`` are redundant in Python version 2.3 and above because they +are always enabled. + +A future statement is recognized and treated specially at compile time: Changes +to the semantics of core constructs are often implemented by generating +different code. It may even be the case that a new feature introduces new +incompatible syntax (such as a new reserved word), in which case the compiler +may need to parse the module differently. Such decisions cannot be pushed off +until runtime. + +For any given release, the compiler knows which feature names have been defined, +and raises a compile-time error if a future statement contains a feature not +known to it. + +The direct runtime semantics are the same as for any import statement: there is +a standard module :mod:`__future__`, described later, and it will be imported in +the usual way at the time the future statement is executed. + +The interesting runtime semantics depend on the specific feature enabled by the +future statement. + +Note that there is nothing special about the statement:: + + import __future__ [as name] + +That is not a future statement; it's an ordinary import statement with no +special semantics or syntax restrictions. + +Code compiled by calls to the builtin functions :func:`exec` and :func:`compile` +that occur in a module :mod:`M` containing a future +statement will, by default, use the new syntax or semantics associated with the +future statement. This can, starting with Python 2.2 be controlled by optional +arguments to :func:`compile` --- see the documentation of that function +for details. + +A future statement typed at an interactive interpreter prompt will take effect +for the rest of the interpreter session. If an interpreter is started with the +:option:`-i` option, is passed a script name to execute, and the script includes +a future statement, it will be in effect in the interactive session started +after the script is executed. + + +.. _global: + +The :keyword:`global` statement +=============================== + +.. index:: statement: global + +.. productionlist:: + global_stmt: "global" `identifier` ("," `identifier`)* + +.. index:: triple: global; name; binding + +The :keyword:`global` statement is a declaration which holds for the entire +current code block. It means that the listed identifiers are to be interpreted +as globals. It would be impossible to assign to a global variable without +:keyword:`global`, although free variables may refer to globals without being +declared global. + +Names listed in a :keyword:`global` statement must not be used in the same code +block textually preceding that :keyword:`global` statement. + +Names listed in a :keyword:`global` statement must not be defined as formal +parameters or in a :keyword:`for` loop control target, :keyword:`class` +definition, function definition, or :keyword:`import` statement. + +(The current implementation does not enforce the latter two restrictions, but +programs should not abuse this freedom, as future implementations may enforce +them or silently change the meaning of the program.) + +.. index:: + builtin: exec + builtin: eval + builtin: compile + +**Programmer's note:** the :keyword:`global` is a directive to the parser. It +applies only to code parsed at the same time as the :keyword:`global` statement. +In particular, a :keyword:`global` statement contained in a string or code +object supplied to the builtin :func:`exec` function does not affect the code +block *containing* the function call, and code contained in such a string is +unaffected by :keyword:`global` statements in the code containing the function +call. The same applies to the :func:`eval` and :func:`compile` functions. + +.. rubric:: Footnotes + +.. [#] It may occur within an :keyword:`except` or :keyword:`else` clause. The + restriction on occurring in the :keyword:`try` clause is implementor's laziness + and will eventually be lifted. + diff --git a/Doc/reference/toplevel_components.rst b/Doc/reference/toplevel_components.rst new file mode 100644 index 0000000..2149311 --- /dev/null +++ b/Doc/reference/toplevel_components.rst @@ -0,0 +1,123 @@ + +.. _top-level: + +******************** +Top-level components +******************** + +.. index:: single: interpreter + +The Python interpreter can get its input from a number of sources: from a script +passed to it as standard input or as program argument, typed in interactively, +from a module source file, etc. This chapter gives the syntax used in these +cases. + + +.. _programs: + +Complete Python programs +======================== + +.. index:: single: program + +.. index:: + module: sys + module: __main__ + module: __builtin__ + +While a language specification need not prescribe how the language interpreter +is invoked, it is useful to have a notion of a complete Python program. A +complete Python program is executed in a minimally initialized environment: all +built-in and standard modules are available, but none have been initialized, +except for :mod:`sys` (various system services), :mod:`__builtin__` (built-in +functions, exceptions and ``None``) and :mod:`__main__`. The latter is used to +provide the local and global namespace for execution of the complete program. + +The syntax for a complete Python program is that for file input, described in +the next section. + +.. index:: + single: interactive mode + module: __main__ + +The interpreter may also be invoked in interactive mode; in this case, it does +not read and execute a complete program but reads and executes one statement +(possibly compound) at a time. The initial environment is identical to that of +a complete program; each statement is executed in the namespace of +:mod:`__main__`. + +.. index:: + single: UNIX + single: command line + single: standard input + +Under Unix, a complete program can be passed to the interpreter in three forms: +with the :option:`-c` *string* command line option, as a file passed as the +first command line argument, or as standard input. If the file or standard input +is a tty device, the interpreter enters interactive mode; otherwise, it executes +the file as a complete program. + + +.. _file-input: + +File input +========== + +All input read from non-interactive files has the same form: + +.. productionlist:: + file_input: (NEWLINE | `statement`)* + +This syntax is used in the following situations: + +* when parsing a complete Python program (from a file or from a string); + +* when parsing a module; + +* when parsing a string passed to the :func:`exec` function; + + +.. _interactive: + +Interactive input +================= + +Input in interactive mode is parsed using the following grammar: + +.. productionlist:: + interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE + +Note that a (top-level) compound statement must be followed by a blank line in +interactive mode; this is needed to help the parser detect the end of the input. + + +.. _expression-input: + +Expression input +================ + +.. index:: single: input + +.. index:: builtin: eval + +There are two forms of expression input. Both ignore leading whitespace. The +string argument to :func:`eval` must have the following form: + +.. productionlist:: + eval_input: `expression_list` NEWLINE* + +.. index:: builtin: input + +The input line read by :func:`input` must have the following form: + +.. productionlist:: + input_input: `expression_list` NEWLINE + +.. index:: + object: file + single: input; raw + single: readline() (file method) + +Note: to read 'raw' input line without interpretation, you can use the the +:meth:`readline` method of file objects, including ``sys.stdin``. + diff --git a/Doc/tools/sphinx-build.py b/Doc/tools/sphinx-build.py new file mode 100644 index 0000000..20516b6 --- /dev/null +++ b/Doc/tools/sphinx-build.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" + Sphinx - Python documentation toolchain + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: 2007 by Georg Brandl. + :license: Python license. +""" + +import sys + +if __name__ == '__main__': + from sphinx import main + sys.exit(main(sys.argv)) diff --git a/Doc/tools/sphinx-web.py b/Doc/tools/sphinx-web.py new file mode 100644 index 0000000..5f7b50b --- /dev/null +++ b/Doc/tools/sphinx-web.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +""" + Sphinx - Python documentation webserver + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: 2007 by Georg Brandl. + :license: Python license. +""" + +import sys + +if __name__ == '__main__': + from sphinx.web import main + sys.exit(main(sys.argv)) diff --git a/Doc/tutorial/appetite.rst b/Doc/tutorial/appetite.rst new file mode 100644 index 0000000..f1c80e9 --- /dev/null +++ b/Doc/tutorial/appetite.rst @@ -0,0 +1,89 @@ +.. _tut-intro: + +********************** +Whetting Your Appetite +********************** + +If you do much work on computers, eventually you find that there's some task +you'd like to automate. For example, you may wish to perform a +search-and-replace over a large number of text files, or rename and rearrange a +bunch of photo files in a complicated way. Perhaps you'd like to write a small +custom database, or a specialized GUI application, or a simple game. + +If you're a professional software developer, you may have to work with several +C/C++/Java libraries but find the usual write/compile/test/re-compile cycle is +too slow. Perhaps you're writing a test suite for such a library and find +writing the testing code a tedious task. Or maybe you've written a program that +could use an extension language, and you don't want to design and implement a +whole new language for your application. + +Python is just the language for you. + +You could write a Unix shell script or Windows batch files for some of these +tasks, but shell scripts are best at moving around files and changing text data, +not well-suited for GUI applications or games. You could write a C/C++/Java +program, but it can take a lot of development time to get even a first-draft +program. Python is simpler to use, available on Windows, MacOS X, and Unix +operating systems, and will help you get the job done more quickly. + +Python is simple to use, but it is a real programming language, offering much +more structure and support for large programs than shell scripts or batch files +can offer. On the other hand, Python also offers much more error checking than +C, and, being a *very-high-level language*, it has high-level data types built +in, such as flexible arrays and dictionaries. Because of its more general data +types Python is applicable to a much larger problem domain than Awk or even +Perl, yet many things are at least as easy in Python as in those languages. + +Python allows you to split your program into modules that can be reused in other +Python programs. It comes with a large collection of standard modules that you +can use as the basis of your programs --- or as examples to start learning to +program in Python. Some of these modules provide things like file I/O, system +calls, sockets, and even interfaces to graphical user interface toolkits like +Tk. + +Python is an interpreted language, which can save you considerable time during +program development because no compilation and linking is necessary. The +interpreter can be used interactively, which makes it easy to experiment with +features of the language, to write throw-away programs, or to test functions +during bottom-up program development. It is also a handy desk calculator. + +Python enables programs to be written compactly and readably. Programs written +in Python are typically much shorter than equivalent C, C++, or Java programs, +for several reasons: + +* the high-level data types allow you to express complex operations in a single + statement; + +* statement grouping is done by indentation instead of beginning and ending + brackets; + +* no variable or argument declarations are necessary. + +Python is *extensible*: if you know how to program in C it is easy to add a new +built-in function or module to the interpreter, either to perform critical +operations at maximum speed, or to link Python programs to libraries that may +only be available in binary form (such as a vendor-specific graphics library). +Once you are really hooked, you can link the Python interpreter into an +application written in C and use it as an extension or command language for that +application. + +By the way, the language is named after the BBC show "Monty Python's Flying +Circus" and has nothing to do with nasty reptiles. Making references to Monty +Python skits in documentation is not only allowed, it is encouraged! + +Now that you are all excited about Python, you'll want to examine it in some +more detail. Since the best way to learn a language is to use it, the tutorial +invites you to play with the Python interpreter as you read. + +.. % \section{Where From Here \label{where}} + +In the next chapter, the mechanics of using the interpreter are explained. This +is rather mundane information, but essential for trying out the examples shown +later. + +The rest of the tutorial introduces various features of the Python language and +system through examples, beginning with simple expressions, statements and data +types, through functions and modules, and finally touching upon advanced +concepts like exceptions and user-defined classes. + + diff --git a/Doc/tutorial/classes.rst b/Doc/tutorial/classes.rst new file mode 100644 index 0000000..b733e1e --- /dev/null +++ b/Doc/tutorial/classes.rst @@ -0,0 +1,792 @@ +.. _tut-classes: + +******* +Classes +******* + +Python's class mechanism adds classes to the language with a minimum of new +syntax and semantics. It is a mixture of the class mechanisms found in C++ and +Modula-3. As is true for modules, classes in Python do not put an absolute +barrier between definition and user, but rather rely on the politeness of the +user not to "break into the definition." The most important features of classes +are retained with full power, however: the class inheritance mechanism allows +multiple base classes, a derived class can override any methods of its base +class or classes, and a method can call the method of a base class with the same +name. Objects can contain an arbitrary amount of private data. + +In C++ terminology, all class members (including the data members) are *public*, +and all member functions are *virtual*. There are no special constructors or +destructors. As in Modula-3, there are no shorthands for referencing the +object's members from its methods: the method function is declared with an +explicit first argument representing the object, which is provided implicitly by +the call. As in Smalltalk, classes themselves are objects, albeit in the wider +sense of the word: in Python, all data types are objects. This provides +semantics for importing and renaming. Unlike C++ and Modula-3, built-in types +can be used as base classes for extension by the user. Also, like in C++ but +unlike in Modula-3, most built-in operators with special syntax (arithmetic +operators, subscripting etc.) can be redefined for class instances. + + +.. _tut-terminology: + +A Word About Terminology +======================== + +Lacking universally accepted terminology to talk about classes, I will make +occasional use of Smalltalk and C++ terms. (I would use Modula-3 terms, since +its object-oriented semantics are closer to those of Python than C++, but I +expect that few readers have heard of it.) + +Objects have individuality, and multiple names (in multiple scopes) can be bound +to the same object. This is known as aliasing in other languages. This is +usually not appreciated on a first glance at Python, and can be safely ignored +when dealing with immutable basic types (numbers, strings, tuples). However, +aliasing has an (intended!) effect on the semantics of Python code involving +mutable objects such as lists, dictionaries, and most types representing +entities outside the program (files, windows, etc.). This is usually used to +the benefit of the program, since aliases behave like pointers in some respects. +For example, passing an object is cheap since only a pointer is passed by the +implementation; and if a function modifies an object passed as an argument, the +caller will see the change --- this eliminates the need for two different +argument passing mechanisms as in Pascal. + + +.. _tut-scopes: + +Python Scopes and Name Spaces +============================= + +Before introducing classes, I first have to tell you something about Python's +scope rules. Class definitions play some neat tricks with namespaces, and you +need to know how scopes and namespaces work to fully understand what's going on. +Incidentally, knowledge about this subject is useful for any advanced Python +programmer. + +Let's begin with some definitions. + +A *namespace* is a mapping from names to objects. Most namespaces are currently +implemented as Python dictionaries, but that's normally not noticeable in any +way (except for performance), and it may change in the future. Examples of +namespaces are: the set of built-in names (functions such as :func:`abs`, and +built-in exception names); the global names in a module; and the local names in +a function invocation. In a sense the set of attributes of an object also form +a namespace. The important thing to know about namespaces is that there is +absolutely no relation between names in different namespaces; for instance, two +different modules may both define a function "maximize" without confusion --- +users of the modules must prefix it with the module name. + +By the way, I use the word *attribute* for any name following a dot --- for +example, in the expression ``z.real``, ``real`` is an attribute of the object +``z``. Strictly speaking, references to names in modules are attribute +references: in the expression ``modname.funcname``, ``modname`` is a module +object and ``funcname`` is an attribute of it. In this case there happens to be +a straightforward mapping between the module's attributes and the global names +defined in the module: they share the same namespace! [#]_ + +Attributes may be read-only or writable. In the latter case, assignment to +attributes is possible. Module attributes are writable: you can write +``modname.the_answer = 42``. Writable attributes may also be deleted with the +:keyword:`del` statement. For example, ``del modname.the_answer`` will remove +the attribute :attr:`the_answer` from the object named by ``modname``. + +Name spaces are created at different moments and have different lifetimes. The +namespace containing the built-in names is created when the Python interpreter +starts up, and is never deleted. The global namespace for a module is created +when the module definition is read in; normally, module namespaces also last +until the interpreter quits. The statements executed by the top-level +invocation of the interpreter, either read from a script file or interactively, +are considered part of a module called :mod:`__main__`, so they have their own +global namespace. (The built-in names actually also live in a module; this is +called :mod:`__builtin__`.) + +The local namespace for a function is created when the function is called, and +deleted when the function returns or raises an exception that is not handled +within the function. (Actually, forgetting would be a better way to describe +what actually happens.) Of course, recursive invocations each have their own +local namespace. + +A *scope* is a textual region of a Python program where a namespace is directly +accessible. "Directly accessible" here means that an unqualified reference to a +name attempts to find the name in the namespace. + +Although scopes are determined statically, they are used dynamically. At any +time during execution, there are at least three nested scopes whose namespaces +are directly accessible: the innermost scope, which is searched first, contains +the local names; the namespaces of any enclosing functions, which are searched +starting with the nearest enclosing scope; the middle scope, searched next, +contains the current module's global names; and the outermost scope (searched +last) is the namespace containing built-in names. + +If a name is declared global, then all references and assignments go directly to +the middle scope containing the module's global names. Otherwise, all variables +found outside of the innermost scope are read-only (an attempt to write to such +a variable will simply create a *new* local variable in the innermost scope, +leaving the identically named outer variable unchanged). + +Usually, the local scope references the local names of the (textually) current +function. Outside functions, the local scope references the same namespace as +the global scope: the module's namespace. Class definitions place yet another +namespace in the local scope. + +It is important to realize that scopes are determined textually: the global +scope of a function defined in a module is that module's namespace, no matter +from where or by what alias the function is called. On the other hand, the +actual search for names is done dynamically, at run time --- however, the +language definition is evolving towards static name resolution, at "compile" +time, so don't rely on dynamic name resolution! (In fact, local variables are +already determined statically.) + +A special quirk of Python is that assignments always go into the innermost +scope. Assignments do not copy data --- they just bind names to objects. The +same is true for deletions: the statement ``del x`` removes the binding of ``x`` +from the namespace referenced by the local scope. In fact, all operations that +introduce new names use the local scope: in particular, import statements and +function definitions bind the module or function name in the local scope. (The +:keyword:`global` statement can be used to indicate that particular variables +live in the global scope.) + + +.. _tut-firstclasses: + +A First Look at Classes +======================= + +Classes introduce a little bit of new syntax, three new object types, and some +new semantics. + + +.. _tut-classdefinition: + +Class Definition Syntax +----------------------- + +The simplest form of class definition looks like this:: + + class ClassName: + <statement-1> + . + . + . + <statement-N> + +Class definitions, like function definitions (:keyword:`def` statements) must be +executed before they have any effect. (You could conceivably place a class +definition in a branch of an :keyword:`if` statement, or inside a function.) + +In practice, the statements inside a class definition will usually be function +definitions, but other statements are allowed, and sometimes useful --- we'll +come back to this later. The function definitions inside a class normally have +a peculiar form of argument list, dictated by the calling conventions for +methods --- again, this is explained later. + +When a class definition is entered, a new namespace is created, and used as the +local scope --- thus, all assignments to local variables go into this new +namespace. In particular, function definitions bind the name of the new +function here. + +When a class definition is left normally (via the end), a *class object* is +created. This is basically a wrapper around the contents of the namespace +created by the class definition; we'll learn more about class objects in the +next section. The original local scope (the one in effect just before the class +definition was entered) is reinstated, and the class object is bound here to the +class name given in the class definition header (:class:`ClassName` in the +example). + + +.. _tut-classobjects: + +Class Objects +------------- + +Class objects support two kinds of operations: attribute references and +instantiation. + +*Attribute references* use the standard syntax used for all attribute references +in Python: ``obj.name``. Valid attribute names are all the names that were in +the class's namespace when the class object was created. So, if the class +definition looked like this:: + + class MyClass: + "A simple example class" + i = 12345 + def f(self): + return 'hello world' + +then ``MyClass.i`` and ``MyClass.f`` are valid attribute references, returning +an integer and a function object, respectively. Class attributes can also be +assigned to, so you can change the value of ``MyClass.i`` by assignment. +:attr:`__doc__` is also a valid attribute, returning the docstring belonging to +the class: ``"A simple example class"``. + +Class *instantiation* uses function notation. Just pretend that the class +object is a parameterless function that returns a new instance of the class. +For example (assuming the above class):: + + x = MyClass() + +creates a new *instance* of the class and assigns this object to the local +variable ``x``. + +The instantiation operation ("calling" a class object) creates an empty object. +Many classes like to create objects with instances customized to a specific +initial state. Therefore a class may define a special method named +:meth:`__init__`, like this:: + + def __init__(self): + self.data = [] + +When a class defines an :meth:`__init__` method, class instantiation +automatically invokes :meth:`__init__` for the newly-created class instance. So +in this example, a new, initialized instance can be obtained by:: + + x = MyClass() + +Of course, the :meth:`__init__` method may have arguments for greater +flexibility. In that case, arguments given to the class instantiation operator +are passed on to :meth:`__init__`. For example, :: + + >>> class Complex: + ... def __init__(self, realpart, imagpart): + ... self.r = realpart + ... self.i = imagpart + ... + >>> x = Complex(3.0, -4.5) + >>> x.r, x.i + (3.0, -4.5) + + +.. _tut-instanceobjects: + +Instance Objects +---------------- + +Now what can we do with instance objects? The only operations understood by +instance objects are attribute references. There are two kinds of valid +attribute names, data attributes and methods. + +*data attributes* correspond to "instance variables" in Smalltalk, and to "data +members" in C++. Data attributes need not be declared; like local variables, +they spring into existence when they are first assigned to. For example, if +``x`` is the instance of :class:`MyClass` created above, the following piece of +code will print the value ``16``, without leaving a trace:: + + x.counter = 1 + while x.counter < 10: + x.counter = x.counter * 2 + print x.counter + del x.counter + +The other kind of instance attribute reference is a *method*. A method is a +function that "belongs to" an object. (In Python, the term method is not unique +to class instances: other object types can have methods as well. For example, +list objects have methods called append, insert, remove, sort, and so on. +However, in the following discussion, we'll use the term method exclusively to +mean methods of class instance objects, unless explicitly stated otherwise.) + +.. index:: object: method + +Valid method names of an instance object depend on its class. By definition, +all attributes of a class that are function objects define corresponding +methods of its instances. So in our example, ``x.f`` is a valid method +reference, since ``MyClass.f`` is a function, but ``x.i`` is not, since +``MyClass.i`` is not. But ``x.f`` is not the same thing as ``MyClass.f`` --- it +is a *method object*, not a function object. + + +.. _tut-methodobjects: + +Method Objects +-------------- + +Usually, a method is called right after it is bound:: + + x.f() + +In the :class:`MyClass` example, this will return the string ``'hello world'``. +However, it is not necessary to call a method right away: ``x.f`` is a method +object, and can be stored away and called at a later time. For example:: + + xf = x.f + while True: + print xf() + +will continue to print ``hello world`` until the end of time. + +What exactly happens when a method is called? You may have noticed that +``x.f()`` was called without an argument above, even though the function +definition for :meth:`f` specified an argument. What happened to the argument? +Surely Python raises an exception when a function that requires an argument is +called without any --- even if the argument isn't actually used... + +Actually, you may have guessed the answer: the special thing about methods is +that the object is passed as the first argument of the function. In our +example, the call ``x.f()`` is exactly equivalent to ``MyClass.f(x)``. In +general, calling a method with a list of *n* arguments is equivalent to calling +the corresponding function with an argument list that is created by inserting +the method's object before the first argument. + +If you still don't understand how methods work, a look at the implementation can +perhaps clarify matters. When an instance attribute is referenced that isn't a +data attribute, its class is searched. If the name denotes a valid class +attribute that is a function object, a method object is created by packing +(pointers to) the instance object and the function object just found together in +an abstract object: this is the method object. When the method object is called +with an argument list, it is unpacked again, a new argument list is constructed +from the instance object and the original argument list, and the function object +is called with this new argument list. + + +.. _tut-remarks: + +Random Remarks +============== + +.. % [These should perhaps be placed more carefully...] + +Data attributes override method attributes with the same name; to avoid +accidental name conflicts, which may cause hard-to-find bugs in large programs, +it is wise to use some kind of convention that minimizes the chance of +conflicts. Possible conventions include capitalizing method names, prefixing +data attribute names with a small unique string (perhaps just an underscore), or +using verbs for methods and nouns for data attributes. + +Data attributes may be referenced by methods as well as by ordinary users +("clients") of an object. In other words, classes are not usable to implement +pure abstract data types. In fact, nothing in Python makes it possible to +enforce data hiding --- it is all based upon convention. (On the other hand, +the Python implementation, written in C, can completely hide implementation +details and control access to an object if necessary; this can be used by +extensions to Python written in C.) + +Clients should use data attributes with care --- clients may mess up invariants +maintained by the methods by stamping on their data attributes. Note that +clients may add data attributes of their own to an instance object without +affecting the validity of the methods, as long as name conflicts are avoided --- +again, a naming convention can save a lot of headaches here. + +There is no shorthand for referencing data attributes (or other methods!) from +within methods. I find that this actually increases the readability of methods: +there is no chance of confusing local variables and instance variables when +glancing through a method. + +Often, the first argument of a method is called ``self``. This is nothing more +than a convention: the name ``self`` has absolutely no special meaning to +Python. (Note, however, that by not following the convention your code may be +less readable to other Python programmers, and it is also conceivable that a +*class browser* program might be written that relies upon such a convention.) + +Any function object that is a class attribute defines a method for instances of +that class. It is not necessary that the function definition is textually +enclosed in the class definition: assigning a function object to a local +variable in the class is also ok. For example:: + + # Function defined outside the class + def f1(self, x, y): + return min(x, x+y) + + class C: + f = f1 + def g(self): + return 'hello world' + h = g + +Now ``f``, ``g`` and ``h`` are all attributes of class :class:`C` that refer to +function objects, and consequently they are all methods of instances of +:class:`C` --- ``h`` being exactly equivalent to ``g``. Note that this practice +usually only serves to confuse the reader of a program. + +Methods may call other methods by using method attributes of the ``self`` +argument:: + + class Bag: + def __init__(self): + self.data = [] + def add(self, x): + self.data.append(x) + def addtwice(self, x): + self.add(x) + self.add(x) + +Methods may reference global names in the same way as ordinary functions. The +global scope associated with a method is the module containing the class +definition. (The class itself is never used as a global scope!) While one +rarely encounters a good reason for using global data in a method, there are +many legitimate uses of the global scope: for one thing, functions and modules +imported into the global scope can be used by methods, as well as functions and +classes defined in it. Usually, the class containing the method is itself +defined in this global scope, and in the next section we'll find some good +reasons why a method would want to reference its own class! + + +.. _tut-inheritance: + +Inheritance +=========== + +Of course, a language feature would not be worthy of the name "class" without +supporting inheritance. The syntax for a derived class definition looks like +this:: + + class DerivedClassName(BaseClassName): + <statement-1> + . + . + . + <statement-N> + +The name :class:`BaseClassName` must be defined in a scope containing the +derived class definition. In place of a base class name, other arbitrary +expressions are also allowed. This can be useful, for example, when the base +class is defined in another module:: + + class DerivedClassName(modname.BaseClassName): + +Execution of a derived class definition proceeds the same as for a base class. +When the class object is constructed, the base class is remembered. This is +used for resolving attribute references: if a requested attribute is not found +in the class, the search proceeds to look in the base class. This rule is +applied recursively if the base class itself is derived from some other class. + +There's nothing special about instantiation of derived classes: +``DerivedClassName()`` creates a new instance of the class. Method references +are resolved as follows: the corresponding class attribute is searched, +descending down the chain of base classes if necessary, and the method reference +is valid if this yields a function object. + +Derived classes may override methods of their base classes. Because methods +have no special privileges when calling other methods of the same object, a +method of a base class that calls another method defined in the same base class +may end up calling a method of a derived class that overrides it. (For C++ +programmers: all methods in Python are effectively :keyword:`virtual`.) + +An overriding method in a derived class may in fact want to extend rather than +simply replace the base class method of the same name. There is a simple way to +call the base class method directly: just call ``BaseClassName.methodname(self, +arguments)``. This is occasionally useful to clients as well. (Note that this +only works if the base class is defined or imported directly in the global +scope.) + + +.. _tut-multiple: + +Multiple Inheritance +-------------------- + +Python supports a limited form of multiple inheritance as well. A class +definition with multiple base classes looks like this:: + + class DerivedClassName(Base1, Base2, Base3): + <statement-1> + . + . + . + <statement-N> + +For old-style classes, the only rule is depth-first, left-to-right. Thus, if an +attribute is not found in :class:`DerivedClassName`, it is searched in +:class:`Base1`, then (recursively) in the base classes of :class:`Base1`, and +only if it is not found there, it is searched in :class:`Base2`, and so on. + +(To some people breadth first --- searching :class:`Base2` and :class:`Base3` +before the base classes of :class:`Base1` --- looks more natural. However, this +would require you to know whether a particular attribute of :class:`Base1` is +actually defined in :class:`Base1` or in one of its base classes before you can +figure out the consequences of a name conflict with an attribute of +:class:`Base2`. The depth-first rule makes no differences between direct and +inherited attributes of :class:`Base1`.) + +For new-style classes, the method resolution order changes dynamically to +support cooperative calls to :func:`super`. This approach is known in some +other multiple-inheritance languages as call-next-method and is more powerful +than the super call found in single-inheritance languages. + +With new-style classes, dynamic ordering is necessary because all cases of +multiple inheritance exhibit one or more diamond relationships (where one at +least one of the parent classes can be accessed through multiple paths from the +bottommost class). For example, all new-style classes inherit from +:class:`object`, so any case of multiple inheritance provides more than one path +to reach :class:`object`. To keep the base classes from being accessed more +than once, the dynamic algorithm linearizes the search order in a way that +preserves the left-to-right ordering specified in each class, that calls each +parent only once, and that is monotonic (meaning that a class can be subclassed +without affecting the precedence order of its parents). Taken together, these +properties make it possible to design reliable and extensible classes with +multiple inheritance. For more detail, see +http://www.python.org/download/releases/2.3/mro/. + + +.. _tut-private: + +Private Variables +================= + +There is limited support for class-private identifiers. Any identifier of the +form ``__spam`` (at least two leading underscores, at most one trailing +underscore) is textually replaced with ``_classname__spam``, where ``classname`` +is the current class name with leading underscore(s) stripped. This mangling is +done without regard to the syntactic position of the identifier, so it can be +used to define class-private instance and class variables, methods, variables +stored in globals, and even variables stored in instances. private to this class +on instances of *other* classes. Truncation may occur when the mangled name +would be longer than 255 characters. Outside classes, or when the class name +consists of only underscores, no mangling occurs. + +Name mangling is intended to give classes an easy way to define "private" +instance variables and methods, without having to worry about instance variables +defined by derived classes, or mucking with instance variables by code outside +the class. Note that the mangling rules are designed mostly to avoid accidents; +it still is possible for a determined soul to access or modify a variable that +is considered private. This can even be useful in special circumstances, such +as in the debugger, and that's one reason why this loophole is not closed. +(Buglet: derivation of a class with the same name as the base class makes use of +private variables of the base class possible.) + +Notice that code passed to ``exec()`` or ``eval()`` does not +consider the classname of the invoking class to be the current class; this is +similar to the effect of the ``global`` statement, the effect of which is +likewise restricted to code that is byte-compiled together. The same +restriction applies to ``getattr()``, ``setattr()`` and ``delattr()``, as well +as when referencing ``__dict__`` directly. + + +.. _tut-odds: + +Odds and Ends +============= + +Sometimes it is useful to have a data type similar to the Pascal "record" or C +"struct", bundling together a few named data items. An empty class definition +will do nicely:: + + class Employee: + pass + + john = Employee() # Create an empty employee record + + # Fill the fields of the record + john.name = 'John Doe' + john.dept = 'computer lab' + john.salary = 1000 + +A piece of Python code that expects a particular abstract data type can often be +passed a class that emulates the methods of that data type instead. For +instance, if you have a function that formats some data from a file object, you +can define a class with methods :meth:`read` and :meth:`readline` that get the +data from a string buffer instead, and pass it as an argument. + +.. % (Unfortunately, this +.. % technique has its limitations: a class can't define operations that +.. % are accessed by special syntax such as sequence subscripting or +.. % arithmetic operators, and assigning such a ``pseudo-file'' to +.. % \code{sys.stdin} will not cause the interpreter to read further input +.. % from it.) + +Instance method objects have attributes, too: ``m.im_self`` is the instance +object with the method :meth:`m`, and ``m.im_func`` is the function object +corresponding to the method. + + +.. _tut-exceptionclasses: + +Exceptions Are Classes Too +========================== + +User-defined exceptions are identified by classes as well. Using this mechanism +it is possible to create extensible hierarchies of exceptions. + +There are two new valid (semantic) forms for the raise statement:: + + raise Class, instance + + raise instance + +In the first form, ``instance`` must be an instance of :class:`Class` or of a +class derived from it. The second form is a shorthand for:: + + raise instance.__class__, instance + +A class in an except clause is compatible with an exception if it is the same +class or a base class thereof (but not the other way around --- an except clause +listing a derived class is not compatible with a base class). For example, the +following code will print B, C, D in that order:: + + class B: + pass + class C(B): + pass + class D(C): + pass + + for c in [B, C, D]: + try: + raise c() + except D: + print "D" + except C: + print "C" + except B: + print "B" + +Note that if the except clauses were reversed (with ``except B`` first), it +would have printed B, B, B --- the first matching except clause is triggered. + +When an error message is printed for an unhandled exception, the exception's +class name is printed, then a colon and a space, and finally the instance +converted to a string using the built-in function :func:`str`. + + +.. _tut-iterators: + +Iterators +========= + +By now you have probably noticed that most container objects can be looped over +using a :keyword:`for` statement:: + + for element in [1, 2, 3]: + print element + for element in (1, 2, 3): + print element + for key in {'one':1, 'two':2}: + print key + for char in "123": + print char + for line in open("myfile.txt"): + print line + +This style of access is clear, concise, and convenient. The use of iterators +pervades and unifies Python. Behind the scenes, the :keyword:`for` statement +calls :func:`iter` on the container object. The function returns an iterator +object that defines the method :meth:`__next__` which accesses elements in the +container one at a time. When there are no more elements, :meth:`__next__` +raises a :exc:`StopIteration` exception which tells the :keyword:`for` loop to +terminate. You can call the :meth:`__next__` method using the :func:`next` +builtin; this example shows how it all works:: + + >>> s = 'abc' + >>> it = iter(s) + >>> it + <iterator object at 0x00A1DB50> + >>> next(it) + 'a' + >>> next(it) + 'b' + >>> next(it) + 'c' + >>> next(it) + + Traceback (most recent call last): + File "<stdin>", line 1, in ? + next(it) + StopIteration + +Having seen the mechanics behind the iterator protocol, it is easy to add +iterator behavior to your classes. Define a :meth:`__iter__` method which +returns an object with a :meth:`__next__` method. If the class defines +:meth:`__next__`, then :meth:`__iter__` can just return ``self``:: + + class Reverse: + "Iterator for looping over a sequence backwards" + def __init__(self, data): + self.data = data + self.index = len(data) + def __iter__(self): + return self + def __next__(self): + if self.index == 0: + raise StopIteration + self.index = self.index - 1 + return self.data[self.index] + + >>> for char in Reverse('spam'): + ... print char + ... + m + a + p + s + + +.. _tut-generators: + +Generators +========== + +Generators are a simple and powerful tool for creating iterators. They are +written like regular functions but use the :keyword:`yield` statement whenever +they want to return data. Each time :func:`next` is called on it, the generator +resumes where it left-off (it remembers all the data values and which statement +was last executed). An example shows that generators can be trivially easy to +create:: + + def reverse(data): + for index in range(len(data)-1, -1, -1): + yield data[index] + + >>> for char in reverse('golf'): + ... print char + ... + f + l + o + g + +Anything that can be done with generators can also be done with class based +iterators as described in the previous section. What makes generators so +compact is that the :meth:`__iter__` and :meth:`__next__` methods are created +automatically. + +Another key feature is that the local variables and execution state are +automatically saved between calls. This made the function easier to write and +much more clear than an approach using instance variables like ``self.index`` +and ``self.data``. + +In addition to automatic method creation and saving program state, when +generators terminate, they automatically raise :exc:`StopIteration`. In +combination, these features make it easy to create iterators with no more effort +than writing a regular function. + + +.. _tut-genexps: + +Generator Expressions +===================== + +Some simple generators can be coded succinctly as expressions using a syntax +similar to list comprehensions but with parentheses instead of brackets. These +expressions are designed for situations where the generator is used right away +by an enclosing function. Generator expressions are more compact but less +versatile than full generator definitions and tend to be more memory friendly +than equivalent list comprehensions. + +Examples:: + + >>> sum(i*i for i in range(10)) # sum of squares + 285 + + >>> xvec = [10, 20, 30] + >>> yvec = [7, 5, 3] + >>> sum(x*y for x,y in zip(xvec, yvec)) # dot product + 260 + + >>> from math import pi, sin + >>> sine_table = dict((x, sin(x*pi/180)) for x in range(0, 91)) + + >>> unique_words = set(word for line in page for word in line.split()) + + >>> valedictorian = max((student.gpa, student.name) for student in graduates) + + >>> data = 'golf' + >>> list(data[i] for i in range(len(data)-1,-1,-1)) + ['f', 'l', 'o', 'g'] + + + +.. rubric:: Footnotes + +.. [#] Except for one thing. Module objects have a secret read-only attribute called + :attr:`__dict__` which returns the dictionary used to implement the module's + namespace; the name :attr:`__dict__` is an attribute but not a global name. + Obviously, using this violates the abstraction of namespace implementation, and + should be restricted to things like post-mortem debuggers. + diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst new file mode 100644 index 0000000..f6f41b3 --- /dev/null +++ b/Doc/tutorial/controlflow.rst @@ -0,0 +1,574 @@ +.. _tut-morecontrol: + +*********************** +More Control Flow Tools +*********************** + +Besides the :keyword:`while` statement just introduced, Python knows the usual +control flow statements known from other languages, with some twists. + + +.. _tut-if: + +:keyword:`if` Statements +======================== + +Perhaps the most well-known statement type is the :keyword:`if` statement. For +example:: + + >>> def raw_input(prompt): + ... import sys + ... sys.stdout.write(prompt) + ... sys.stdout.flush() + ... return sys.stdin.readline() + ... + >>> x = int(raw_input("Please enter an integer: ")) + >>> if x < 0: + ... x = 0 + ... print 'Negative changed to zero' + ... elif x == 0: + ... print 'Zero' + ... elif x == 1: + ... print 'Single' + ... else: + ... print 'More' + ... + +There can be zero or more :keyword:`elif` parts, and the :keyword:`else` part is +optional. The keyword ':keyword:`elif`' is short for 'else if', and is useful +to avoid excessive indentation. An :keyword:`if` ... :keyword:`elif` ... +:keyword:`elif` ... sequence is a substitute for the :keyword:`switch` or +:keyword:`case` statements found in other languages. + +.. % Weird spacings happen here if the wrapping of the source text +.. % gets changed in the wrong way. + + +.. _tut-for: + +:keyword:`for` Statements +========================= + +.. index:: + statement: for + statement: for + +The :keyword:`for` statement in Python differs a bit from what you may be used +to in C or Pascal. Rather than always iterating over an arithmetic progression +of numbers (like in Pascal), or giving the user the ability to define both the +iteration step and halting condition (as C), Python's :keyword:`for` statement +iterates over the items of any sequence (a list or a string), in the order that +they appear in the sequence. For example (no pun intended): + +.. % One suggestion was to give a real C example here, but that may only +.. % serve to confuse non-C programmers. + +:: + + >>> # Measure some strings: + ... a = ['cat', 'window', 'defenestrate'] + >>> for x in a: + ... print x, len(x) + ... + cat 3 + window 6 + defenestrate 12 + +It is not safe to modify the sequence being iterated over in the loop (this can +only happen for mutable sequence types, such as lists). If you need to modify +the list you are iterating over (for example, to duplicate selected items) you +must iterate over a copy. The slice notation makes this particularly +convenient:: + + >>> for x in a[:]: # make a slice copy of the entire list + ... if len(x) > 6: a.insert(0, x) + ... + >>> a + ['defenestrate', 'cat', 'window', 'defenestrate'] + + +.. _tut-range: + +The :func:`range` Function +========================== + +If you do need to iterate over a sequence of numbers, the built-in function +:func:`range` comes in handy. It generates lists containing arithmetic +progressions:: + + >>> range(10) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + +The given end point is never part of the generated list; ``range(10)`` generates +a list of 10 values, the legal indices for items of a sequence of length 10. It +is possible to let the range start at another number, or to specify a different +increment (even negative; sometimes this is called the 'step'):: + + >>> range(5, 10) + [5, 6, 7, 8, 9] + >>> range(0, 10, 3) + [0, 3, 6, 9] + >>> range(-10, -100, -30) + [-10, -40, -70] + +To iterate over the indices of a sequence, combine :func:`range` and :func:`len` +as follows:: + + >>> a = ['Mary', 'had', 'a', 'little', 'lamb'] + >>> for i in range(len(a)): + ... print i, a[i] + ... + 0 Mary + 1 had + 2 a + 3 little + 4 lamb + + +.. _tut-break: + +:keyword:`break` and :keyword:`continue` Statements, and :keyword:`else` Clauses on Loops +========================================================================================= + +The :keyword:`break` statement, like in C, breaks out of the smallest enclosing +:keyword:`for` or :keyword:`while` loop. + +The :keyword:`continue` statement, also borrowed from C, continues with the next +iteration of the loop. + +Loop statements may have an ``else`` clause; it is executed when the loop +terminates through exhaustion of the list (with :keyword:`for`) or when the +condition becomes false (with :keyword:`while`), but not when the loop is +terminated by a :keyword:`break` statement. This is exemplified by the +following loop, which searches for prime numbers:: + + >>> for n in range(2, 10): + ... for x in range(2, n): + ... if n % x == 0: + ... print n, 'equals', x, '*', n/x + ... break + ... else: + ... # loop fell through without finding a factor + ... print n, 'is a prime number' + ... + 2 is a prime number + 3 is a prime number + 4 equals 2 * 2 + 5 is a prime number + 6 equals 2 * 3 + 7 is a prime number + 8 equals 2 * 4 + 9 equals 3 * 3 + + +.. _tut-pass: + +:keyword:`pass` Statements +========================== + +The :keyword:`pass` statement does nothing. It can be used when a statement is +required syntactically but the program requires no action. For example:: + + >>> while True: + ... pass # Busy-wait for keyboard interrupt + ... + + +.. _tut-functions: + +Defining Functions +================== + +We can create a function that writes the Fibonacci series to an arbitrary +boundary:: + + >>> def fib(n): # write Fibonacci series up to n + ... """Print a Fibonacci series up to n.""" + ... a, b = 0, 1 + ... while b < n: + ... print b, + ... a, b = b, a+b + ... + >>> # Now call the function we just defined: + ... fib(2000) + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 + +.. index:: + single: documentation strings + single: docstrings + single: strings, documentation + +The keyword :keyword:`def` introduces a function *definition*. It must be +followed by the function name and the parenthesized list of formal parameters. +The statements that form the body of the function start at the next line, and +must be indented. The first statement of the function body can optionally be a +string literal; this string literal is the function's documentation string, or +:dfn:`docstring`. + +There are tools which use docstrings to automatically produce online or printed +documentation, or to let the user interactively browse through code; it's good +practice to include docstrings in code that you write, so try to make a habit of +it. + +The *execution* of a function introduces a new symbol table used for the local +variables of the function. More precisely, all variable assignments in a +function store the value in the local symbol table; whereas variable references +first look in the local symbol table, then in the global symbol table, and then +in the table of built-in names. Thus, global variables cannot be directly +assigned a value within a function (unless named in a :keyword:`global` +statement), although they may be referenced. + +The actual parameters (arguments) to a function call are introduced in the local +symbol table of the called function when it is called; thus, arguments are +passed using *call by value* (where the *value* is always an object *reference*, +not the value of the object). [#]_ When a function calls another function, a new +local symbol table is created for that call. + +A function definition introduces the function name in the current symbol table. +The value of the function name has a type that is recognized by the interpreter +as a user-defined function. This value can be assigned to another name which +can then also be used as a function. This serves as a general renaming +mechanism:: + + >>> fib + <function fib at 10042ed0> + >>> f = fib + >>> f(100) + 1 1 2 3 5 8 13 21 34 55 89 + +You might object that ``fib`` is not a function but a procedure. In Python, +like in C, procedures are just functions that don't return a value. In fact, +technically speaking, procedures do return a value, albeit a rather boring one. +This value is called ``None`` (it's a built-in name). Writing the value +``None`` is normally suppressed by the interpreter if it would be the only value +written. You can see it if you really want to:: + + >>> print fib(0) + None + +It is simple to write a function that returns a list of the numbers of the +Fibonacci series, instead of printing it:: + + >>> def fib2(n): # return Fibonacci series up to n + ... """Return a list containing the Fibonacci series up to n.""" + ... result = [] + ... a, b = 0, 1 + ... while b < n: + ... result.append(b) # see below + ... a, b = b, a+b + ... return result + ... + >>> f100 = fib2(100) # call it + >>> f100 # write the result + [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89] + +This example, as usual, demonstrates some new Python features: + +* The :keyword:`return` statement returns with a value from a function. + :keyword:`return` without an expression argument returns ``None``. Falling off + the end of a procedure also returns ``None``. + +* The statement ``result.append(b)`` calls a *method* of the list object + ``result``. A method is a function that 'belongs' to an object and is named + ``obj.methodname``, where ``obj`` is some object (this may be an expression), + and ``methodname`` is the name of a method that is defined by the object's type. + Different types define different methods. Methods of different types may have + the same name without causing ambiguity. (It is possible to define your own + object types and methods, using *classes*, as discussed later in this tutorial.) + The method :meth:`append` shown in the example is defined for list objects; it + adds a new element at the end of the list. In this example it is equivalent to + ``result = result + [b]``, but more efficient. + + +.. _tut-defining: + +More on Defining Functions +========================== + +It is also possible to define functions with a variable number of arguments. +There are three forms, which can be combined. + + +.. _tut-defaultargs: + +Default Argument Values +----------------------- + +The most useful form is to specify a default value for one or more arguments. +This creates a function that can be called with fewer arguments than it is +defined to allow. For example:: + + def raw_input(prompt): + import sys + sys.stdout.write(prompt) + sys.stdout.flush() + return sys.stdin.readline() + + def ask_ok(prompt, retries=4, complaint='Yes or no, please!'): + while True: + ok = raw_input(prompt) + if ok in ('y', 'ye', 'yes'): return True + if ok in ('n', 'no', 'nop', 'nope'): return False + retries = retries - 1 + if retries < 0: raise IOError, 'refusenik user' + print complaint + +This function can be called either like this: ``ask_ok('Do you really want to +quit?')`` or like this: ``ask_ok('OK to overwrite the file?', 2)``. + +This example also introduces the :keyword:`in` keyword. This tests whether or +not a sequence contains a certain value. + +The default values are evaluated at the point of function definition in the +*defining* scope, so that :: + + i = 5 + + def f(arg=i): + print arg + + i = 6 + f() + +will print ``5``. + +**Important warning:** The default value is evaluated only once. This makes a +difference when the default is a mutable object such as a list, dictionary, or +instances of most classes. For example, the following function accumulates the +arguments passed to it on subsequent calls:: + + def f(a, L=[]): + L.append(a) + return L + + print f(1) + print f(2) + print f(3) + +This will print :: + + [1] + [1, 2] + [1, 2, 3] + +If you don't want the default to be shared between subsequent calls, you can +write the function like this instead:: + + def f(a, L=None): + if L is None: + L = [] + L.append(a) + return L + + +.. _tut-keywordargs: + +Keyword Arguments +----------------- + +Functions can also be called using keyword arguments of the form ``keyword = +value``. For instance, the following function:: + + def parrot(voltage, state='a stiff', action='voom', type='Norwegian Blue'): + print "-- This parrot wouldn't", action, + print "if you put", voltage, "volts through it." + print "-- Lovely plumage, the", type + print "-- It's", state, "!" + +could be called in any of the following ways:: + + parrot(1000) + parrot(action = 'VOOOOOM', voltage = 1000000) + parrot('a thousand', state = 'pushing up the daisies') + parrot('a million', 'bereft of life', 'jump') + +but the following calls would all be invalid:: + + parrot() # required argument missing + parrot(voltage=5.0, 'dead') # non-keyword argument following keyword + parrot(110, voltage=220) # duplicate value for argument + parrot(actor='John Cleese') # unknown keyword + +In general, an argument list must have any positional arguments followed by any +keyword arguments, where the keywords must be chosen from the formal parameter +names. It's not important whether a formal parameter has a default value or +not. No argument may receive a value more than once --- formal parameter names +corresponding to positional arguments cannot be used as keywords in the same +calls. Here's an example that fails due to this restriction:: + + >>> def function(a): + ... pass + ... + >>> function(0, a=0) + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: function() got multiple values for keyword argument 'a' + +When a final formal parameter of the form ``**name`` is present, it receives a +dictionary (see :ref:`typesmapping`) containing all keyword arguments except for +those corresponding to a formal parameter. This may be combined with a formal +parameter of the form ``*name`` (described in the next subsection) which +receives a tuple containing the positional arguments beyond the formal parameter +list. (``*name`` must occur before ``**name``.) For example, if we define a +function like this:: + + def cheeseshop(kind, *arguments, **keywords): + print "-- Do you have any", kind, '?' + print "-- I'm sorry, we're all out of", kind + for arg in arguments: print arg + print '-'*40 + keys = keywords.keys() + keys.sort() + for kw in keys: print kw, ':', keywords[kw] + +It could be called like this:: + + cheeseshop('Limburger', "It's very runny, sir.", + "It's really very, VERY runny, sir.", + client='John Cleese', + shopkeeper='Michael Palin', + sketch='Cheese Shop Sketch') + +and of course it would print:: + + -- Do you have any Limburger ? + -- I'm sorry, we're all out of Limburger + It's very runny, sir. + It's really very, VERY runny, sir. + ---------------------------------------- + client : John Cleese + shopkeeper : Michael Palin + sketch : Cheese Shop Sketch + +Note that the :meth:`sort` method of the list of keyword argument names is +called before printing the contents of the ``keywords`` dictionary; if this is +not done, the order in which the arguments are printed is undefined. + + +.. _tut-arbitraryargs: + +Arbitrary Argument Lists +------------------------ + +Finally, the least frequently used option is to specify that a function can be +called with an arbitrary number of arguments. These arguments will be wrapped +up in a tuple. Before the variable number of arguments, zero or more normal +arguments may occur. :: + + def fprintf(file, format, *args): + file.write(format % args) + + +.. _tut-unpacking-arguments: + +Unpacking Argument Lists +------------------------ + +The reverse situation occurs when the arguments are already in a list or tuple +but need to be unpacked for a function call requiring separate positional +arguments. For instance, the built-in :func:`range` function expects separate +*start* and *stop* arguments. If they are not available separately, write the +function call with the ``*``\ -operator to unpack the arguments out of a list +or tuple:: + + >>> range(3, 6) # normal call with separate arguments + [3, 4, 5] + >>> args = [3, 6] + >>> range(*args) # call with arguments unpacked from a list + [3, 4, 5] + +In the same fashion, dictionaries can deliver keyword arguments with the ``**``\ +-operator:: + + >>> def parrot(voltage, state='a stiff', action='voom'): + ... print "-- This parrot wouldn't", action, + ... print "if you put", voltage, "volts through it.", + ... print "E's", state, "!" + ... + >>> d = {"voltage": "four million", "state": "bleedin' demised", "action": "VOOM"} + >>> parrot(**d) + -- This parrot wouldn't VOOM if you put four million volts through it. E's bleedin' demised ! + + +.. _tut-lambda: + +Lambda Forms +------------ + +By popular demand, a few features commonly found in functional programming +languages like Lisp have been added to Python. With the :keyword:`lambda` +keyword, small anonymous functions can be created. Here's a function that +returns the sum of its two arguments: ``lambda a, b: a+b``. Lambda forms can be +used wherever function objects are required. They are syntactically restricted +to a single expression. Semantically, they are just syntactic sugar for a +normal function definition. Like nested function definitions, lambda forms can +reference variables from the containing scope:: + + >>> def make_incrementor(n): + ... return lambda x: x + n + ... + >>> f = make_incrementor(42) + >>> f(0) + 42 + >>> f(1) + 43 + + +.. _tut-docstrings: + +Documentation Strings +--------------------- + +.. index:: + single: docstrings + single: documentation strings + single: strings, documentation + +There are emerging conventions about the content and formatting of documentation +strings. + +The first line should always be a short, concise summary of the object's +purpose. For brevity, it should not explicitly state the object's name or type, +since these are available by other means (except if the name happens to be a +verb describing a function's operation). This line should begin with a capital +letter and end with a period. + +If there are more lines in the documentation string, the second line should be +blank, visually separating the summary from the rest of the description. The +following lines should be one or more paragraphs describing the object's calling +conventions, its side effects, etc. + +The Python parser does not strip indentation from multi-line string literals in +Python, so tools that process documentation have to strip indentation if +desired. This is done using the following convention. The first non-blank line +*after* the first line of the string determines the amount of indentation for +the entire documentation string. (We can't use the first line since it is +generally adjacent to the string's opening quotes so its indentation is not +apparent in the string literal.) Whitespace "equivalent" to this indentation is +then stripped from the start of all lines of the string. Lines that are +indented less should not occur, but if they occur all their leading whitespace +should be stripped. Equivalence of whitespace should be tested after expansion +of tabs (to 8 spaces, normally). + +Here is an example of a multi-line docstring:: + + >>> def my_function(): + ... """Do nothing, but document it. + ... + ... No, really, it doesn't do anything. + ... """ + ... pass + ... + >>> print my_function.__doc__ + Do nothing, but document it. + + No, really, it doesn't do anything. + + + +.. rubric:: Footnotes + +.. [#] Actually, *call by object reference* would be a better description, since if a + mutable object is passed, the caller will see any changes the callee makes to it + (items inserted into a list). + diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst new file mode 100644 index 0000000..d65e55b --- /dev/null +++ b/Doc/tutorial/datastructures.rst @@ -0,0 +1,586 @@ +.. _tut-structures: + +*************** +Data Structures +*************** + +This chapter describes some things you've learned about already in more detail, +and adds some new things as well. + + +.. _tut-morelists: + +More on Lists +============= + +The list data type has some more methods. Here are all of the methods of list +objects: + + +.. method:: list.append(x) + + Add an item to the end of the list; equivalent to ``a[len(a):] = [x]``. + + +.. method:: list.extend(L) + + Extend the list by appending all the items in the given list; equivalent to + ``a[len(a):] = L``. + + +.. method:: list.insert(i, x) + + Insert an item at a given position. The first argument is the index of the + element before which to insert, so ``a.insert(0, x)`` inserts at the front of + the list, and ``a.insert(len(a), x)`` is equivalent to ``a.append(x)``. + + +.. method:: list.remove(x) + + Remove the first item from the list whose value is *x*. It is an error if there + is no such item. + + +.. method:: list.pop([i]) + + Remove the item at the given position in the list, and return it. If no index + is specified, ``a.pop()`` removes and returns the last item in the list. (The + square brackets around the *i* in the method signature denote that the parameter + is optional, not that you should type square brackets at that position. You + will see this notation frequently in the Python Library Reference.) + + +.. method:: list.index(x) + + Return the index in the list of the first item whose value is *x*. It is an + error if there is no such item. + + +.. method:: list.count(x) + + Return the number of times *x* appears in the list. + + +.. method:: list.sort() + + Sort the items of the list, in place. + + +.. method:: list.reverse() + + Reverse the elements of the list, in place. + +An example that uses most of the list methods:: + + >>> a = [66.25, 333, 333, 1, 1234.5] + >>> print a.count(333), a.count(66.25), a.count('x') + 2 1 0 + >>> a.insert(2, -1) + >>> a.append(333) + >>> a + [66.25, 333, -1, 333, 1, 1234.5, 333] + >>> a.index(333) + 1 + >>> a.remove(333) + >>> a + [66.25, -1, 333, 1, 1234.5, 333] + >>> a.reverse() + >>> a + [333, 1234.5, 1, 333, -1, 66.25] + >>> a.sort() + >>> a + [-1, 1, 66.25, 333, 333, 1234.5] + + +.. _tut-lists-as-stacks: + +Using Lists as Stacks +--------------------- + +.. sectionauthor:: Ka-Ping Yee <ping@lfw.org> + + +The list methods make it very easy to use a list as a stack, where the last +element added is the first element retrieved ("last-in, first-out"). To add an +item to the top of the stack, use :meth:`append`. To retrieve an item from the +top of the stack, use :meth:`pop` without an explicit index. For example:: + + >>> stack = [3, 4, 5] + >>> stack.append(6) + >>> stack.append(7) + >>> stack + [3, 4, 5, 6, 7] + >>> stack.pop() + 7 + >>> stack + [3, 4, 5, 6] + >>> stack.pop() + 6 + >>> stack.pop() + 5 + >>> stack + [3, 4] + + +.. _tut-lists-as-queues: + +Using Lists as Queues +--------------------- + +.. sectionauthor:: Ka-Ping Yee <ping@lfw.org> + + +You can also use a list conveniently as a queue, where the first element added +is the first element retrieved ("first-in, first-out"). To add an item to the +back of the queue, use :meth:`append`. To retrieve an item from the front of +the queue, use :meth:`pop` with ``0`` as the index. For example:: + + >>> queue = ["Eric", "John", "Michael"] + >>> queue.append("Terry") # Terry arrives + >>> queue.append("Graham") # Graham arrives + >>> queue.pop(0) + 'Eric' + >>> queue.pop(0) + 'John' + >>> queue + ['Michael', 'Terry', 'Graham'] + + +.. _tut-functional: + +Functional Programming Tools +---------------------------- + +There are two built-in functions that are very useful when used with lists: +:func:`filter` and :func:`map`. + +``filter(function, sequence)`` returns a sequence consisting of those items from +the sequence for which ``function(item)`` is true. If *sequence* is a +:class:`string` or :class:`tuple`, the result will be of the same type; +otherwise, it is always a :class:`list`. For example, to compute some primes:: + + >>> def f(x): return x % 2 != 0 and x % 3 != 0 + ... + >>> filter(f, range(2, 25)) + [5, 7, 11, 13, 17, 19, 23] + +``map(function, sequence)`` calls ``function(item)`` for each of the sequence's +items and returns a list of the return values. For example, to compute some +cubes:: + + >>> def cube(x): return x*x*x + ... + >>> map(cube, range(1, 11)) + [1, 8, 27, 64, 125, 216, 343, 512, 729, 1000] + +More than one sequence may be passed; the function must then have as many +arguments as there are sequences and is called with the corresponding item from +each sequence (or ``None`` if some sequence is shorter than another). For +example:: + + >>> seq = range(8) + >>> def add(x, y): return x+y + ... + >>> map(add, seq, seq) + [0, 2, 4, 6, 8, 10, 12, 14] + +.. versionadded:: 2.3 + + +List Comprehensions +------------------- + +List comprehensions provide a concise way to create lists without resorting to +use of :func:`map`, :func:`filter` and/or :keyword:`lambda`. The resulting list +definition tends often to be clearer than lists built using those constructs. +Each list comprehension consists of an expression followed by a :keyword:`for` +clause, then zero or more :keyword:`for` or :keyword:`if` clauses. The result +will be a list resulting from evaluating the expression in the context of the +:keyword:`for` and :keyword:`if` clauses which follow it. If the expression +would evaluate to a tuple, it must be parenthesized. :: + + >>> freshfruit = [' banana', ' loganberry ', 'passion fruit '] + >>> [weapon.strip() for weapon in freshfruit] + ['banana', 'loganberry', 'passion fruit'] + >>> vec = [2, 4, 6] + >>> [3*x for x in vec] + [6, 12, 18] + >>> [3*x for x in vec if x > 3] + [12, 18] + >>> [3*x for x in vec if x < 2] + [] + >>> [[x,x**2] for x in vec] + [[2, 4], [4, 16], [6, 36]] + >>> [x, x**2 for x in vec] # error - parens required for tuples + File "<stdin>", line 1, in ? + [x, x**2 for x in vec] + ^ + SyntaxError: invalid syntax + >>> [(x, x**2) for x in vec] + [(2, 4), (4, 16), (6, 36)] + >>> vec1 = [2, 4, 6] + >>> vec2 = [4, 3, -9] + >>> [x*y for x in vec1 for y in vec2] + [8, 6, -18, 16, 12, -36, 24, 18, -54] + >>> [x+y for x in vec1 for y in vec2] + [6, 5, -7, 8, 7, -5, 10, 9, -3] + >>> [vec1[i]*vec2[i] for i in range(len(vec1))] + [8, 12, -54] + +List comprehensions are much more flexible than :func:`map` and can be applied +to complex expressions and nested functions:: + + >>> [str(round(355/113.0, i)) for i in range(1,6)] + ['3.1', '3.14', '3.142', '3.1416', '3.14159'] + + +.. _tut-del: + +The :keyword:`del` statement +============================ + +There is a way to remove an item from a list given its index instead of its +value: the :keyword:`del` statement. This differs from the :meth:`pop` method +which returns a value. The :keyword:`del` statement can also be used to remove +slices from a list or clear the entire list (which we did earlier by assignment +of an empty list to the slice). For example:: + + >>> a = [-1, 1, 66.25, 333, 333, 1234.5] + >>> del a[0] + >>> a + [1, 66.25, 333, 333, 1234.5] + >>> del a[2:4] + >>> a + [1, 66.25, 1234.5] + >>> del a[:] + >>> a + [] + +:keyword:`del` can also be used to delete entire variables:: + + >>> del a + +Referencing the name ``a`` hereafter is an error (at least until another value +is assigned to it). We'll find other uses for :keyword:`del` later. + + +.. _tut-tuples: + +Tuples and Sequences +==================== + +We saw that lists and strings have many common properties, such as indexing and +slicing operations. They are two examples of *sequence* data types (see +:ref:`typesseq`). Since Python is an evolving language, other sequence data +types may be added. There is also another standard sequence data type: the +*tuple*. + +A tuple consists of a number of values separated by commas, for instance:: + + >>> t = 12345, 54321, 'hello!' + >>> t[0] + 12345 + >>> t + (12345, 54321, 'hello!') + >>> # Tuples may be nested: + ... u = t, (1, 2, 3, 4, 5) + >>> u + ((12345, 54321, 'hello!'), (1, 2, 3, 4, 5)) + +As you see, on output tuples are always enclosed in parentheses, so that nested +tuples are interpreted correctly; they may be input with or without surrounding +parentheses, although often parentheses are necessary anyway (if the tuple is +part of a larger expression). + +Tuples have many uses. For example: (x, y) coordinate pairs, employee records +from a database, etc. Tuples, like strings, are immutable: it is not possible +to assign to the individual items of a tuple (you can simulate much of the same +effect with slicing and concatenation, though). It is also possible to create +tuples which contain mutable objects, such as lists. + +A special problem is the construction of tuples containing 0 or 1 items: the +syntax has some extra quirks to accommodate these. Empty tuples are constructed +by an empty pair of parentheses; a tuple with one item is constructed by +following a value with a comma (it is not sufficient to enclose a single value +in parentheses). Ugly, but effective. For example:: + + >>> empty = () + >>> singleton = 'hello', # <-- note trailing comma + >>> len(empty) + 0 + >>> len(singleton) + 1 + >>> singleton + ('hello',) + +The statement ``t = 12345, 54321, 'hello!'`` is an example of *tuple packing*: +the values ``12345``, ``54321`` and ``'hello!'`` are packed together in a tuple. +The reverse operation is also possible:: + + >>> x, y, z = t + +This is called, appropriately enough, *sequence unpacking*. Sequence unpacking +requires the list of variables on the left to have the same number of elements +as the length of the sequence. Note that multiple assignment is really just a +combination of tuple packing and sequence unpacking! + +There is a small bit of asymmetry here: packing multiple values always creates +a tuple, and unpacking works for any sequence. + +.. % XXX Add a bit on the difference between tuples and lists. + + +.. _tut-sets: + +Sets +==== + +Python also includes a data type for *sets*. A set is an unordered collection +with no duplicate elements. Basic uses include membership testing and +eliminating duplicate entries. Set objects also support mathematical operations +like union, intersection, difference, and symmetric difference. + +Here is a brief demonstration:: + + >>> basket = ['apple', 'orange', 'apple', 'pear', 'orange', 'banana'] + >>> fruit = set(basket) # create a set without duplicates + >>> fruit + set(['orange', 'pear', 'apple', 'banana']) + >>> 'orange' in fruit # fast membership testing + True + >>> 'crabgrass' in fruit + False + + >>> # Demonstrate set operations on unique letters from two words + ... + >>> a = set('abracadabra') + >>> b = set('alacazam') + >>> a # unique letters in a + set(['a', 'r', 'b', 'c', 'd']) + >>> a - b # letters in a but not in b + set(['r', 'd', 'b']) + >>> a | b # letters in either a or b + set(['a', 'c', 'r', 'd', 'b', 'm', 'z', 'l']) + >>> a & b # letters in both a and b + set(['a', 'c']) + >>> a ^ b # letters in a or b but not both + set(['r', 'd', 'b', 'm', 'z', 'l']) + + +.. _tut-dictionaries: + +Dictionaries +============ + +Another useful data type built into Python is the *dictionary* (see +:ref:`typesmapping`). Dictionaries are sometimes found in other languages as +"associative memories" or "associative arrays". Unlike sequences, which are +indexed by a range of numbers, dictionaries are indexed by *keys*, which can be +any immutable type; strings and numbers can always be keys. Tuples can be used +as keys if they contain only strings, numbers, or tuples; if a tuple contains +any mutable object either directly or indirectly, it cannot be used as a key. +You can't use lists as keys, since lists can be modified in place using index +assignments, slice assignments, or methods like :meth:`append` and +:meth:`extend`. + +It is best to think of a dictionary as an unordered set of *key: value* pairs, +with the requirement that the keys are unique (within one dictionary). A pair of +braces creates an empty dictionary: ``{}``. Placing a comma-separated list of +key:value pairs within the braces adds initial key:value pairs to the +dictionary; this is also the way dictionaries are written on output. + +The main operations on a dictionary are storing a value with some key and +extracting the value given the key. It is also possible to delete a key:value +pair with ``del``. If you store using a key that is already in use, the old +value associated with that key is forgotten. It is an error to extract a value +using a non-existent key. + +The :meth:`keys` method of a dictionary object returns a list of all the keys +used in the dictionary, in arbitrary order (if you want it sorted, just apply +the :meth:`sort` method to the list of keys). To check whether a single key is +in the dictionary, either use the dictionary's :meth:`has_key` method or the +:keyword:`in` keyword. + +Here is a small example using a dictionary:: + + >>> tel = {'jack': 4098, 'sape': 4139} + >>> tel['guido'] = 4127 + >>> tel + {'sape': 4139, 'guido': 4127, 'jack': 4098} + >>> tel['jack'] + 4098 + >>> del tel['sape'] + >>> tel['irv'] = 4127 + >>> tel + {'guido': 4127, 'irv': 4127, 'jack': 4098} + >>> tel.keys() + ['guido', 'irv', 'jack'] + >>> tel.has_key('guido') + True + >>> 'guido' in tel + True + +The :func:`dict` constructor builds dictionaries directly from lists of +key-value pairs stored as tuples. When the pairs form a pattern, list +comprehensions can compactly specify the key-value list. :: + + >>> dict([('sape', 4139), ('guido', 4127), ('jack', 4098)]) + {'sape': 4139, 'jack': 4098, 'guido': 4127} + >>> dict([(x, x**2) for x in (2, 4, 6)]) # use a list comprehension + {2: 4, 4: 16, 6: 36} + +Later in the tutorial, we will learn about Generator Expressions which are even +better suited for the task of supplying key-values pairs to the :func:`dict` +constructor. + +When the keys are simple strings, it is sometimes easier to specify pairs using +keyword arguments:: + + >>> dict(sape=4139, guido=4127, jack=4098) + {'sape': 4139, 'jack': 4098, 'guido': 4127} + + +.. _tut-loopidioms: + +Looping Techniques +================== + +When looping through dictionaries, the key and corresponding value can be +retrieved at the same time using the :meth:`iteritems` method. :: + + >>> knights = {'gallahad': 'the pure', 'robin': 'the brave'} + >>> for k, v in knights.iteritems(): + ... print k, v + ... + gallahad the pure + robin the brave + +When looping through a sequence, the position index and corresponding value can +be retrieved at the same time using the :func:`enumerate` function. :: + + >>> for i, v in enumerate(['tic', 'tac', 'toe']): + ... print i, v + ... + 0 tic + 1 tac + 2 toe + +To loop over two or more sequences at the same time, the entries can be paired +with the :func:`zip` function. :: + + >>> questions = ['name', 'quest', 'favorite color'] + >>> answers = ['lancelot', 'the holy grail', 'blue'] + >>> for q, a in zip(questions, answers): + ... print 'What is your %s? It is %s.' % (q, a) + ... + What is your name? It is lancelot. + What is your quest? It is the holy grail. + What is your favorite color? It is blue. + +To loop over a sequence in reverse, first specify the sequence in a forward +direction and then call the :func:`reversed` function. :: + + >>> for i in reversed(range(1,10,2)): + ... print i + ... + 9 + 7 + 5 + 3 + 1 + +To loop over a sequence in sorted order, use the :func:`sorted` function which +returns a new sorted list while leaving the source unaltered. :: + + >>> basket = ['apple', 'orange', 'apple', 'pear', 'orange', 'banana'] + >>> for f in sorted(set(basket)): + ... print f + ... + apple + banana + orange + pear + + +.. _tut-conditions: + +More on Conditions +================== + +The conditions used in ``while`` and ``if`` statements can contain any +operators, not just comparisons. + +The comparison operators ``in`` and ``not in`` check whether a value occurs +(does not occur) in a sequence. The operators ``is`` and ``is not`` compare +whether two objects are really the same object; this only matters for mutable +objects like lists. All comparison operators have the same priority, which is +lower than that of all numerical operators. + +Comparisons can be chained. For example, ``a < b == c`` tests whether ``a`` is +less than ``b`` and moreover ``b`` equals ``c``. + +Comparisons may be combined using the Boolean operators ``and`` and ``or``, and +the outcome of a comparison (or of any other Boolean expression) may be negated +with ``not``. These have lower priorities than comparison operators; between +them, ``not`` has the highest priority and ``or`` the lowest, so that ``A and +not B or C`` is equivalent to ``(A and (not B)) or C``. As always, parentheses +can be used to express the desired composition. + +The Boolean operators ``and`` and ``or`` are so-called *short-circuit* +operators: their arguments are evaluated from left to right, and evaluation +stops as soon as the outcome is determined. For example, if ``A`` and ``C`` are +true but ``B`` is false, ``A and B and C`` does not evaluate the expression +``C``. When used as a general value and not as a Boolean, the return value of a +short-circuit operator is the last evaluated argument. + +It is possible to assign the result of a comparison or other Boolean expression +to a variable. For example, :: + + >>> string1, string2, string3 = '', 'Trondheim', 'Hammer Dance' + >>> non_null = string1 or string2 or string3 + >>> non_null + 'Trondheim' + +Note that in Python, unlike C, assignment cannot occur inside expressions. C +programmers may grumble about this, but it avoids a common class of problems +encountered in C programs: typing ``=`` in an expression when ``==`` was +intended. + + +.. _tut-comparing: + +Comparing Sequences and Other Types +=================================== + +Sequence objects may be compared to other objects with the same sequence type. +The comparison uses *lexicographical* ordering: first the first two items are +compared, and if they differ this determines the outcome of the comparison; if +they are equal, the next two items are compared, and so on, until either +sequence is exhausted. If two items to be compared are themselves sequences of +the same type, the lexicographical comparison is carried out recursively. If +all items of two sequences compare equal, the sequences are considered equal. +If one sequence is an initial sub-sequence of the other, the shorter sequence is +the smaller (lesser) one. Lexicographical ordering for strings uses the ASCII +ordering for individual characters. Some examples of comparisons between +sequences of the same type:: + + (1, 2, 3) < (1, 2, 4) + [1, 2, 3] < [1, 2, 4] + 'ABC' < 'C' < 'Pascal' < 'Python' + (1, 2, 3, 4) < (1, 2, 4) + (1, 2) < (1, 2, -1) + (1, 2, 3) == (1.0, 2.0, 3.0) + (1, 2, ('aa', 'ab')) < (1, 2, ('abc', 'a'), 4) + +Note that comparing objects of different types is legal. The outcome is +deterministic but arbitrary: the types are ordered by their name. Thus, a list +is always smaller than a string, a string is always smaller than a tuple, etc. +[#]_ Mixed numeric types are compared according to their numeric value, so 0 +equals 0.0, etc. + + +.. rubric:: Footnotes + +.. [#] The rules for comparing objects of different types should not be relied upon; + they may change in a future version of the language. + diff --git a/Doc/tutorial/errors.rst b/Doc/tutorial/errors.rst new file mode 100644 index 0000000..99af9c7 --- /dev/null +++ b/Doc/tutorial/errors.rst @@ -0,0 +1,418 @@ +.. _tut-errors: + +********************* +Errors and Exceptions +********************* + +Until now error messages haven't been more than mentioned, but if you have tried +out the examples you have probably seen some. There are (at least) two +distinguishable kinds of errors: *syntax errors* and *exceptions*. + + +.. _tut-syntaxerrors: + +Syntax Errors +============= + +Syntax errors, also known as parsing errors, are perhaps the most common kind of +complaint you get while you are still learning Python:: + + >>> while True print 'Hello world' + File "<stdin>", line 1, in ? + while True print 'Hello world' + ^ + SyntaxError: invalid syntax + +The parser repeats the offending line and displays a little 'arrow' pointing at +the earliest point in the line where the error was detected. The error is +caused by (or at least detected at) the token *preceding* the arrow: in the +example, the error is detected at the keyword :keyword:`print`, since a colon +(``':'``) is missing before it. File name and line number are printed so you +know where to look in case the input came from a script. + + +.. _tut-exceptions: + +Exceptions +========== + +Even if a statement or expression is syntactically correct, it may cause an +error when an attempt is made to execute it. Errors detected during execution +are called *exceptions* and are not unconditionally fatal: you will soon learn +how to handle them in Python programs. Most exceptions are not handled by +programs, however, and result in error messages as shown here:: + + >>> 10 * (1/0) + Traceback (most recent call last): + File "<stdin>", line 1, in ? + ZeroDivisionError: integer division or modulo by zero + >>> 4 + spam*3 + Traceback (most recent call last): + File "<stdin>", line 1, in ? + NameError: name 'spam' is not defined + >>> '2' + 2 + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: cannot concatenate 'str' and 'int' objects + +The last line of the error message indicates what happened. Exceptions come in +different types, and the type is printed as part of the message: the types in +the example are :exc:`ZeroDivisionError`, :exc:`NameError` and :exc:`TypeError`. +The string printed as the exception type is the name of the built-in exception +that occurred. This is true for all built-in exceptions, but need not be true +for user-defined exceptions (although it is a useful convention). Standard +exception names are built-in identifiers (not reserved keywords). + +The rest of the line provides detail based on the type of exception and what +caused it. + +The preceding part of the error message shows the context where the exception +happened, in the form of a stack traceback. In general it contains a stack +traceback listing source lines; however, it will not display lines read from +standard input. + +:ref:`bltin-exceptions` lists the built-in exceptions and their meanings. + + +.. _tut-handling: + +Handling Exceptions +=================== + +It is possible to write programs that handle selected exceptions. Look at the +following example, which asks the user for input until a valid integer has been +entered, but allows the user to interrupt the program (using :kbd:`Control-C` or +whatever the operating system supports); note that a user-generated interruption +is signalled by raising the :exc:`KeyboardInterrupt` exception. :: + + >>> def raw_input(prompt): + ... import sys + ... sys.stdout.write(prompt) + ... sys.stdout.flush() + ... return sys.stdin.readline() + ... + >>> while True: + ... try: + ... x = int(raw_input("Please enter a number: ")) + ... break + ... except ValueError: + ... print "Oops! That was no valid number. Try again..." + ... + +The :keyword:`try` statement works as follows. + +* First, the *try clause* (the statement(s) between the :keyword:`try` and + :keyword:`except` keywords) is executed. + +* If no exception occurs, the *except clause* is skipped and execution of the + :keyword:`try` statement is finished. + +* If an exception occurs during execution of the try clause, the rest of the + clause is skipped. Then if its type matches the exception named after the + :keyword:`except` keyword, the except clause is executed, and then execution + continues after the :keyword:`try` statement. + +* If an exception occurs which does not match the exception named in the except + clause, it is passed on to outer :keyword:`try` statements; if no handler is + found, it is an *unhandled exception* and execution stops with a message as + shown above. + +A :keyword:`try` statement may have more than one except clause, to specify +handlers for different exceptions. At most one handler will be executed. +Handlers only handle exceptions that occur in the corresponding try clause, not +in other handlers of the same :keyword:`try` statement. An except clause may +name multiple exceptions as a parenthesized tuple, for example:: + + ... except (RuntimeError, TypeError, NameError): + ... pass + +The last except clause may omit the exception name(s), to serve as a wildcard. +Use this with extreme caution, since it is easy to mask a real programming error +in this way! It can also be used to print an error message and then re-raise +the exception (allowing a caller to handle the exception as well):: + + import sys + + try: + f = open('myfile.txt') + s = f.readline() + i = int(s.strip()) + except IOError as e: + (errno, strerror) = e + print "I/O error(%s): %s" % (e.errno, e.strerror) + except ValueError: + print "Could not convert data to an integer." + except: + print "Unexpected error:", sys.exc_info()[0] + raise + +The :keyword:`try` ... :keyword:`except` statement has an optional *else +clause*, which, when present, must follow all except clauses. It is useful for +code that must be executed if the try clause does not raise an exception. For +example:: + + for arg in sys.argv[1:]: + try: + f = open(arg, 'r') + except IOError: + print 'cannot open', arg + else: + print arg, 'has', len(f.readlines()), 'lines' + f.close() + +The use of the :keyword:`else` clause is better than adding additional code to +the :keyword:`try` clause because it avoids accidentally catching an exception +that wasn't raised by the code being protected by the :keyword:`try` ... +:keyword:`except` statement. + +When an exception occurs, it may have an associated value, also known as the +exception's *argument*. The presence and type of the argument depend on the +exception type. + +The except clause may specify a variable after the exception name (or tuple). +The variable is bound to an exception instance with the arguments stored in +``instance.args``. For convenience, the exception instance defines +:meth:`__getitem__` and :meth:`__str__` so the arguments can be accessed or +printed directly without having to reference ``.args``. + +But use of ``.args`` is discouraged. Instead, the preferred use is to pass a +single argument to an exception (which can be a tuple if multiple arguments are +needed) and have it bound to the ``message`` attribute. One may also +instantiate an exception first before raising it and add any attributes to it as +desired. :: + + >>> try: + ... raise Exception('spam', 'eggs') + ... except Exception as inst: + ... print type(inst) # the exception instance + ... print inst.args # arguments stored in .args + ... print inst # __str__ allows args to printed directly + ... x, y = inst # __getitem__ allows args to be unpacked directly + ... print 'x =', x + ... print 'y =', y + ... + <type 'Exception'> + ('spam', 'eggs') + ('spam', 'eggs') + x = spam + y = eggs + +If an exception has an argument, it is printed as the last part ('detail') of +the message for unhandled exceptions. + +Exception handlers don't just handle exceptions if they occur immediately in the +try clause, but also if they occur inside functions that are called (even +indirectly) in the try clause. For example:: + + >>> def this_fails(): + ... x = 1/0 + ... + >>> try: + ... this_fails() + ... except ZeroDivisionError as detail: + ... print 'Handling run-time error:', detail + ... + Handling run-time error: integer division or modulo by zero + + +.. _tut-raising: + +Raising Exceptions +================== + +The :keyword:`raise` statement allows the programmer to force a specified +exception to occur. For example:: + + >>> raise NameError, 'HiThere' + Traceback (most recent call last): + File "<stdin>", line 1, in ? + NameError: HiThere + +The first argument to :keyword:`raise` names the exception to be raised. The +optional second argument specifies the exception's argument. Alternatively, the +above could be written as ``raise NameError('HiThere')``. Either form works +fine, but there seems to be a growing stylistic preference for the latter. + +If you need to determine whether an exception was raised but don't intend to +handle it, a simpler form of the :keyword:`raise` statement allows you to +re-raise the exception:: + + >>> try: + ... raise NameError, 'HiThere' + ... except NameError: + ... print 'An exception flew by!' + ... raise + ... + An exception flew by! + Traceback (most recent call last): + File "<stdin>", line 2, in ? + NameError: HiThere + + +.. _tut-userexceptions: + +User-defined Exceptions +======================= + +Programs may name their own exceptions by creating a new exception class. +Exceptions should typically be derived from the :exc:`Exception` class, either +directly or indirectly. For example:: + + >>> class MyError(Exception): + ... def __init__(self, value): + ... self.value = value + ... def __str__(self): + ... return repr(self.value) + ... + >>> try: + ... raise MyError(2*2) + ... except MyError as e: + ... print 'My exception occurred, value:', e.value + ... + My exception occurred, value: 4 + >>> raise MyError, 'oops!' + Traceback (most recent call last): + File "<stdin>", line 1, in ? + __main__.MyError: 'oops!' + +In this example, the default :meth:`__init__` of :class:`Exception` has been +overridden. The new behavior simply creates the *value* attribute. This +replaces the default behavior of creating the *args* attribute. + +Exception classes can be defined which do anything any other class can do, but +are usually kept simple, often only offering a number of attributes that allow +information about the error to be extracted by handlers for the exception. When +creating a module that can raise several distinct errors, a common practice is +to create a base class for exceptions defined by that module, and subclass that +to create specific exception classes for different error conditions:: + + class Error(Exception): + """Base class for exceptions in this module.""" + pass + + class InputError(Error): + """Exception raised for errors in the input. + + Attributes: + expression -- input expression in which the error occurred + message -- explanation of the error + """ + + def __init__(self, expression, message): + self.expression = expression + self.message = message + + class TransitionError(Error): + """Raised when an operation attempts a state transition that's not + allowed. + + Attributes: + previous -- state at beginning of transition + next -- attempted new state + message -- explanation of why the specific transition is not allowed + """ + + def __init__(self, previous, next, message): + self.previous = previous + self.next = next + self.message = message + +Most exceptions are defined with names that end in "Error," similar to the +naming of the standard exceptions. + +Many standard modules define their own exceptions to report errors that may +occur in functions they define. More information on classes is presented in +chapter :ref:`tut-classes`. + + +.. _tut-cleanup: + +Defining Clean-up Actions +========================= + +The :keyword:`try` statement has another optional clause which is intended to +define clean-up actions that must be executed under all circumstances. For +example:: + + >>> try: + ... raise KeyboardInterrupt + ... finally: + ... print 'Goodbye, world!' + ... + Goodbye, world! + Traceback (most recent call last): + File "<stdin>", line 2, in ? + KeyboardInterrupt + +A *finally clause* is always executed before leaving the :keyword:`try` +statement, whether an exception has occurred or not. When an exception has +occurred in the :keyword:`try` clause and has not been handled by an +:keyword:`except` clause (or it has occurred in a :keyword:`except` or +:keyword:`else` clause), it is re-raised after the :keyword:`finally` clause has +been executed. The :keyword:`finally` clause is also executed "on the way out" +when any other clause of the :keyword:`try` statement is left via a +:keyword:`break`, :keyword:`continue` or :keyword:`return` statement. A more +complicated example (having :keyword:`except` and :keyword:`finally` clauses in +the same :keyword:`try` statement works as of Python 2.5):: + + >>> def divide(x, y): + ... try: + ... result = x / y + ... except ZeroDivisionError: + ... print "division by zero!" + ... else: + ... print "result is", result + ... finally: + ... print "executing finally clause" + ... + >>> divide(2, 1) + result is 2 + executing finally clause + >>> divide(2, 0) + division by zero! + executing finally clause + >>> divide("2", "1") + executing finally clause + Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "<stdin>", line 3, in divide + TypeError: unsupported operand type(s) for /: 'str' and 'str' + +As you can see, the :keyword:`finally` clause is executed in any event. The +:exc:`TypeError` raised by dividing two strings is not handled by the +:keyword:`except` clause and therefore re-raised after the :keyword:`finally` +clauses has been executed. + +In real world applications, the :keyword:`finally` clause is useful for +releasing external resources (such as files or network connections), regardless +of whether the use of the resource was successful. + + +.. _tut-cleanup-with: + +Predefined Clean-up Actions +=========================== + +Some objects define standard clean-up actions to be undertaken when the object +is no longer needed, regardless of whether or not the operation using the object +succeeded or failed. Look at the following example, which tries to open a file +and print its contents to the screen. :: + + for line in open("myfile.txt"): + print line + +The problem with this code is that it leaves the file open for an indeterminate +amount of time after the code has finished executing. This is not an issue in +simple scripts, but can be a problem for larger applications. The +:keyword:`with` statement allows objects like files to be used in a way that +ensures they are always cleaned up promptly and correctly. :: + + with open("myfile.txt") as f: + for line in f: + print line + +After the statement is executed, the file *f* is always closed, even if a +problem was encountered while processing the lines. Other objects which provide +predefined clean-up actions will indicate this in their documentation. + + diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst new file mode 100644 index 0000000..cbf7008 --- /dev/null +++ b/Doc/tutorial/floatingpoint.rst @@ -0,0 +1,220 @@ +.. _tut-fp-issues: + +************************************************** +Floating Point Arithmetic: Issues and Limitations +************************************************** + +.. sectionauthor:: Tim Peters <tim_one@users.sourceforge.net> + + +Floating-point numbers are represented in computer hardware as base 2 (binary) +fractions. For example, the decimal fraction :: + + 0.125 + +has value 1/10 + 2/100 + 5/1000, and in the same way the binary fraction :: + + 0.001 + +has value 0/2 + 0/4 + 1/8. These two fractions have identical values, the only +real difference being that the first is written in base 10 fractional notation, +and the second in base 2. + +Unfortunately, most decimal fractions cannot be represented exactly as binary +fractions. A consequence is that, in general, the decimal floating-point +numbers you enter are only approximated by the binary floating-point numbers +actually stored in the machine. + +The problem is easier to understand at first in base 10. Consider the fraction +1/3. You can approximate that as a base 10 fraction:: + + 0.3 + +or, better, :: + + 0.33 + +or, better, :: + + 0.333 + +and so on. No matter how many digits you're willing to write down, the result +will never be exactly 1/3, but will be an increasingly better approximation of +1/3. + +In the same way, no matter how many base 2 digits you're willing to use, the +decimal value 0.1 cannot be represented exactly as a base 2 fraction. In base +2, 1/10 is the infinitely repeating fraction :: + + 0.0001100110011001100110011001100110011001100110011... + +Stop at any finite number of bits, and you get an approximation. This is why +you see things like:: + + >>> 0.1 + 0.10000000000000001 + +On most machines today, that is what you'll see if you enter 0.1 at a Python +prompt. You may not, though, because the number of bits used by the hardware to +store floating-point values can vary across machines, and Python only prints a +decimal approximation to the true decimal value of the binary approximation +stored by the machine. On most machines, if Python were to print the true +decimal value of the binary approximation stored for 0.1, it would have to +display :: + + >>> 0.1 + 0.1000000000000000055511151231257827021181583404541015625 + +instead! The Python prompt uses the builtin :func:`repr` function to obtain a +string version of everything it displays. For floats, ``repr(float)`` rounds +the true decimal value to 17 significant digits, giving :: + + 0.10000000000000001 + +``repr(float)`` produces 17 significant digits because it turns out that's +enough (on most machines) so that ``eval(repr(x)) == x`` exactly for all finite +floats *x*, but rounding to 16 digits is not enough to make that true. + +Note that this is in the very nature of binary floating-point: this is not a bug +in Python, and it is not a bug in your code either. You'll see the same kind of +thing in all languages that support your hardware's floating-point arithmetic +(although some languages may not *display* the difference by default, or in all +output modes). + +Python's builtin :func:`str` function produces only 12 significant digits, and +you may wish to use that instead. It's unusual for ``eval(str(x))`` to +reproduce *x*, but the output may be more pleasant to look at:: + + >>> print str(0.1) + 0.1 + +It's important to realize that this is, in a real sense, an illusion: the value +in the machine is not exactly 1/10, you're simply rounding the *display* of the +true machine value. + +Other surprises follow from this one. For example, after seeing :: + + >>> 0.1 + 0.10000000000000001 + +you may be tempted to use the :func:`round` function to chop it back to the +single digit you expect. But that makes no difference:: + + >>> round(0.1, 1) + 0.10000000000000001 + +The problem is that the binary floating-point value stored for "0.1" was already +the best possible binary approximation to 1/10, so trying to round it again +can't make it better: it was already as good as it gets. + +Another consequence is that since 0.1 is not exactly 1/10, summing ten values of +0.1 may not yield exactly 1.0, either:: + + >>> sum = 0.0 + >>> for i in range(10): + ... sum += 0.1 + ... + >>> sum + 0.99999999999999989 + +Binary floating-point arithmetic holds many surprises like this. The problem +with "0.1" is explained in precise detail below, in the "Representation Error" +section. See `The Perils of Floating Point <http://www.lahey.com/float.htm>`_ +for a more complete account of other common surprises. + +As that says near the end, "there are no easy answers." Still, don't be unduly +wary of floating-point! The errors in Python float operations are inherited +from the floating-point hardware, and on most machines are on the order of no +more than 1 part in 2\*\*53 per operation. That's more than adequate for most +tasks, but you do need to keep in mind that it's not decimal arithmetic, and +that every float operation can suffer a new rounding error. + +While pathological cases do exist, for most casual use of floating-point +arithmetic you'll see the result you expect in the end if you simply round the +display of your final results to the number of decimal digits you expect. +:func:`str` usually suffices, and for finer control see the discussion of +Python's ``%`` format operator: the ``%g``, ``%f`` and ``%e`` format codes +supply flexible and easy ways to round float results for display. + + +.. _tut-fp-error: + +Representation Error +==================== + +This section explains the "0.1" example in detail, and shows how you can perform +an exact analysis of cases like this yourself. Basic familiarity with binary +floating-point representation is assumed. + +:dfn:`Representation error` refers to the fact that some (most, actually) +decimal fractions cannot be represented exactly as binary (base 2) fractions. +This is the chief reason why Python (or Perl, C, C++, Java, Fortran, and many +others) often won't display the exact decimal number you expect:: + + >>> 0.1 + 0.10000000000000001 + +Why is that? 1/10 is not exactly representable as a binary fraction. Almost all +machines today (November 2000) use IEEE-754 floating point arithmetic, and +almost all platforms map Python floats to IEEE-754 "double precision". 754 +doubles contain 53 bits of precision, so on input the computer strives to +convert 0.1 to the closest fraction it can of the form *J*/2\*\**N* where *J* is +an integer containing exactly 53 bits. Rewriting :: + + 1 / 10 ~= J / (2**N) + +as :: + + J ~= 2**N / 10 + +and recalling that *J* has exactly 53 bits (is ``>= 2**52`` but ``< 2**53``), +the best value for *N* is 56:: + + >>> 2**52 + 4503599627370496L + >>> 2**53 + 9007199254740992L + >>> 2**56/10 + 7205759403792793L + +That is, 56 is the only value for *N* that leaves *J* with exactly 53 bits. The +best possible value for *J* is then that quotient rounded:: + + >>> q, r = divmod(2**56, 10) + >>> r + 6L + +Since the remainder is more than half of 10, the best approximation is obtained +by rounding up:: + + >>> q+1 + 7205759403792794L + +Therefore the best possible approximation to 1/10 in 754 double precision is +that over 2\*\*56, or :: + + 7205759403792794 / 72057594037927936 + +Note that since we rounded up, this is actually a little bit larger than 1/10; +if we had not rounded up, the quotient would have been a little bit smaller than +1/10. But in no case can it be *exactly* 1/10! + +So the computer never "sees" 1/10: what it sees is the exact fraction given +above, the best 754 double approximation it can get:: + + >>> .1 * 2**56 + 7205759403792794.0 + +If we multiply that fraction by 10\*\*30, we can see the (truncated) value of +its 30 most significant decimal digits:: + + >>> 7205759403792794 * 10**30 / 2**56 + 100000000000000005551115123125L + +meaning that the exact number stored in the computer is approximately equal to +the decimal value 0.100000000000000005551115123125. Rounding that to 17 +significant digits gives the 0.10000000000000001 that Python displays (well, +will display on any 754-conforming platform that does best-possible input and +output conversions in its C library --- yours may not!). + + diff --git a/Doc/tutorial/glossary.rst b/Doc/tutorial/glossary.rst new file mode 100644 index 0000000..c05d68d --- /dev/null +++ b/Doc/tutorial/glossary.rst @@ -0,0 +1,329 @@ + +.. _tut-glossary: + +******** +Glossary +******** + +.. % %% keep the entries sorted and include at least one \index{} item for each +.. % %% cross-references are marked with \emph{entry} + +``>>>`` + The typical Python prompt of the interactive shell. Often seen for code + examples that can be tried right away in the interpreter. + + .. index:: single: ... + +``...`` + The typical Python prompt of the interactive shell when entering code for an + indented code block. + + .. index:: single: BDFL + +BDFL + Benevolent Dictator For Life, a.k.a. `Guido van Rossum + <http://www.python.org/~guido/>`_, Python's creator. + + .. index:: single: byte code + +byte code + The internal representation of a Python program in the interpreter. The byte + code is also cached in ``.pyc`` and ``.pyo`` files so that executing the same + file is faster the second time (recompilation from source to byte code can be + avoided). This "intermediate language" is said to run on a "virtual machine" + that calls the subroutines corresponding to each bytecode. + + .. index:: single: classic class + +classic class + Any class which does not inherit from :class:`object`. See *new-style class*. + + .. index:: single: complex number + +complex number + An extension of the familiar real number system in which all numbers are + expressed as a sum of a real part and an imaginary part. Imaginary numbers are + real multiples of the imaginary unit (the square root of ``-1``), often written + ``i`` in mathematics or ``j`` in engineering. Python has builtin support for + complex numbers, which are written with this latter notation; the imaginary part + is written with a ``j`` suffix, e.g., ``3+1j``. To get access to complex + equivalents of the :mod:`math` module, use :mod:`cmath`. Use of complex numbers + is a fairly advanced mathematical feature. If you're not aware of a need for + them, it's almost certain you can safely ignore them. + + .. index:: single: descriptor + +descriptor + Any *new-style* object that defines the methods :meth:`__get__`, + :meth:`__set__`, or :meth:`__delete__`. When a class attribute is a descriptor, + its special binding behavior is triggered upon attribute lookup. Normally, + writing *a.b* looks up the object *b* in the class dictionary for *a*, but if + *b* is a descriptor, the defined method gets called. Understanding descriptors + is a key to a deep understanding of Python because they are the basis for many + features including functions, methods, properties, class methods, static + methods, and reference to super classes. + + .. index:: single: dictionary + +dictionary + An associative array, where arbitrary keys are mapped to values. The use of + :class:`dict` much resembles that for :class:`list`, but the keys can be any + object with a :meth:`__hash__` function, not just integers starting from zero. + Called a hash in Perl. + + .. index:: single: duck-typing + +duck-typing + Pythonic programming style that determines an object's type by inspection of its + method or attribute signature rather than by explicit relationship to some type + object ("If it looks like a duck and quacks like a duck, it must be a duck.") + By emphasizing interfaces rather than specific types, well-designed code + improves its flexibility by allowing polymorphic substitution. Duck-typing + avoids tests using :func:`type` or :func:`isinstance`. Instead, it typically + employs :func:`hasattr` tests or *EAFP* programming. + + .. index:: single: EAFP + +EAFP + Easier to ask for forgiveness than permission. This common Python coding style + assumes the existence of valid keys or attributes and catches exceptions if the + assumption proves false. This clean and fast style is characterized by the + presence of many :keyword:`try` and :keyword:`except` statements. The technique + contrasts with the *LBYL* style that is common in many other languages such as + C. + + .. index:: single: __future__ + +__future__ + A pseudo module which programmers can use to enable new language features which + are not compatible with the current interpreter. To enable ``new_feature`` :: + + from __future__ import new_feature + + By importing the :mod:`__future__` module and evaluating its variables, you + can see when a new feature was first added to the language and when it will + become the default:: + + >>> import __future__ + >>> __future__.division + _Feature((2, 2, 0, 'alpha', 2), (3, 0, 0, 'alpha', 0), 8192) + + .. index:: single: generator + +generator + A function that returns an iterator. It looks like a normal function except + that values are returned to the caller using a :keyword:`yield` statement + instead of a :keyword:`return` statement. Generator functions often contain one + or more :keyword:`for` or :keyword:`while` loops that :keyword:`yield` elements + back to the caller. The function execution is stopped at the :keyword:`yield` + keyword (returning the result) and is resumed there when the next element is + requested by calling the :meth:`__next__` method of the returned iterator. + + .. index:: single: generator expression + +generator expression + An expression that returns a generator. It looks like a normal expression + followed by a :keyword:`for` expression defining a loop variable, range, and an + optional :keyword:`if` expression. The combined expression generates values for + an enclosing function:: + + >>> sum(i*i for i in range(10)) # sum of squares 0, 1, 4, ... 81 + 285 + + .. index:: single: GIL + +GIL + See *global interpreter lock*. + + .. index:: single: global interpreter lock + +global interpreter lock + The lock used by Python threads to assure that only one thread can be run at + a time. This simplifies Python by assuring that no two processes can access + the same memory at the same time. Locking the entire interpreter makes it + easier for the interpreter to be multi-threaded, at the expense of some + parallelism on multi-processor machines. Efforts have been made in the past + to create a "free-threaded" interpreter (one which locks shared data at a + much finer granularity), but performance suffered in the common + single-processor case. + + .. index:: single: IDLE + +IDLE + An Integrated Development Environment for Python. IDLE is a basic editor and + interpreter environment that ships with the standard distribution of Python. + Good for beginners, it also serves as clear example code for those wanting to + implement a moderately sophisticated, multi-platform GUI application. + + .. index:: single: immutable + +immutable + An object with fixed value. Immutable objects are numbers, strings or tuples + (and more). Such an object cannot be altered. A new object has to be created + if a different value has to be stored. They play an important role in places + where a constant hash value is needed, for example as a key in a dictionary. + + .. index:: single: integer division + +integer division + Mathematical division including any remainder. The result will always be a + float. For example, the expression ``11/4`` evaluates to ``2.75``. Integer + division can be forced by using the ``//`` operator instead of the ``/`` + operator. + + .. index:: single: interactive + +interactive + Python has an interactive interpreter which means that you can try out things + and immediately see their results. Just launch ``python`` with no arguments + (possibly by selecting it from your computer's main menu). It is a very powerful + way to test out new ideas or inspect modules and packages (remember + ``help(x)``). + + .. index:: single: interpreted + +interpreted + Python is an interpreted language, as opposed to a compiled one. This means + that the source files can be run directly without first creating an executable + which is then run. Interpreted languages typically have a shorter + development/debug cycle than compiled ones, though their programs generally also + run more slowly. See also *interactive*. + + .. index:: single: iterable + +iterable + A container object capable of returning its members one at a time. Examples of + iterables include all sequence types (such as :class:`list`, :class:`str`, and + :class:`tuple`) and some non-sequence types like :class:`dict` and :class:`file` + and objects of any classes you define with an :meth:`__iter__` or + :meth:`__getitem__` method. Iterables can be used in a :keyword:`for` loop and + in many other places where a sequence is needed (:func:`zip`, :func:`map`, ...). + When an iterable object is passed as an argument to the builtin function + :func:`iter`, it returns an iterator for the object. This iterator is good for + one pass over the set of values. When using iterables, it is usually not + necessary to call :func:`iter` or deal with iterator objects yourself. The + ``for`` statement does that automatically for you, creating a temporary unnamed + variable to hold the iterator for the duration of the loop. See also + *iterator*, *sequence*, and *generator*. + + .. index:: single: iterator + +iterator + An object representing a stream of data. Repeated calls to the iterator's + :meth:`__next__` method return successive items in the stream. When no more + data is available a :exc:`StopIteration` exception is raised instead. At this + point, the iterator object is exhausted and any further calls to its + :meth:`__next__` method just raise :exc:`StopIteration` again. Iterators are + required to have an :meth:`__iter__` method that returns the iterator object + itself so every iterator is also iterable and may be used in most places where + other iterables are accepted. One notable exception is code that attempts + multiple iteration passes. A container object (such as a :class:`list`) + produces a fresh new iterator each time you pass it to the :func:`iter` function + or use it in a :keyword:`for` loop. Attempting this with an iterator will just + return the same exhausted iterator object used in the previous iteration pass, + making it appear like an empty container. + + .. index:: single: LBYL + +LBYL + Look before you leap. This coding style explicitly tests for pre-conditions + before making calls or lookups. This style contrasts with the *EAFP* approach + and is characterized by the presence of many :keyword:`if` statements. + + .. index:: single: list comprehension + +list comprehension + A compact way to process all or a subset of elements in a sequence and return a + list with the results. ``result = ["0x%02x" % x for x in range(256) if x % 2 == + 0]`` generates a list of strings containing hex numbers (0x..) that are even and + in the range from 0 to 255. The :keyword:`if` clause is optional. If omitted, + all elements in ``range(256)`` are processed. + + .. index:: single: mapping + +mapping + A container object (such as :class:`dict`) that supports arbitrary key lookups + using the special method :meth:`__getitem__`. + + .. index:: single: metaclass + +metaclass + The class of a class. Class definitions create a class name, a class + dictionary, and a list of base classes. The metaclass is responsible for taking + those three arguments and creating the class. Most object oriented programming + languages provide a default implementation. What makes Python special is that + it is possible to create custom metaclasses. Most users never need this tool, + but when the need arises, metaclasses can provide powerful, elegant solutions. + They have been used for logging attribute access, adding thread-safety, tracking + object creation, implementing singletons, and many other tasks. + + .. index:: single: mutable + +mutable + Mutable objects can change their value but keep their :func:`id`. See also + *immutable*. + + .. index:: single: namespace + +namespace + The place where a variable is stored. Namespaces are implemented as + dictionaries. There are the local, global and builtin namespaces as well as + nested namespaces in objects (in methods). Namespaces support modularity by + preventing naming conflicts. For instance, the functions + :func:`__builtin__.open` and :func:`os.open` are distinguished by their + namespaces. Namespaces also aid readability and maintainability by making it + clear which module implements a function. For instance, writing + :func:`random.seed` or :func:`itertools.izip` makes it clear that those + functions are implemented by the :mod:`random` and :mod:`itertools` modules + respectively. + + .. index:: single: nested scope + +nested scope + The ability to refer to a variable in an enclosing definition. For instance, a + function defined inside another function can refer to variables in the outer + function. Note that nested scopes work only for reference and not for + assignment which will always write to the innermost scope. In contrast, local + variables both read and write in the innermost scope. Likewise, global + variables read and write to the global namespace. + + .. index:: single: new-style class + +new-style class + Any class that inherits from :class:`object`. This includes all built-in types + like :class:`list` and :class:`dict`. Only new-style classes can use Python's + newer, versatile features like :meth:`__slots__`, descriptors, properties, + :meth:`__getattribute__`, class methods, and static methods. + + .. index:: single: Python3000 + +Python3000 + A mythical python release, not required to be backward compatible, with + telepathic interface. + + .. index:: single: __slots__ + +__slots__ + A declaration inside a *new-style class* that saves memory by pre-declaring + space for instance attributes and eliminating instance dictionaries. Though + popular, the technique is somewhat tricky to get right and is best reserved for + rare cases where there are large numbers of instances in a memory-critical + application. + + .. index:: single: sequence + +sequence + An *iterable* which supports efficient element access using integer indices via + the :meth:`__getitem__` and :meth:`__len__` special methods. Some built-in + sequence types are :class:`list`, :class:`str`, :class:`tuple`, and + :class:`unicode`. Note that :class:`dict` also supports :meth:`__getitem__` and + :meth:`__len__`, but is considered a mapping rather than a sequence because the + lookups use arbitrary *immutable* keys rather than integers. + + .. index:: single: Zen of Python + +Zen of Python + Listing of Python design principles and philosophies that are helpful in + understanding and using the language. The listing can be found by typing + "``import this``" at the interactive prompt. + diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst new file mode 100644 index 0000000..7309b7c --- /dev/null +++ b/Doc/tutorial/index.rst @@ -0,0 +1,60 @@ +.. _tutorial-index: + +###################### + The Python tutorial +###################### + +:Release: |version| +:Date: |today| + +Python is an easy to learn, powerful programming language. It has efficient +high-level data structures and a simple but effective approach to +object-oriented programming. Python's elegant syntax and dynamic typing, +together with its interpreted nature, make it an ideal language for scripting +and rapid application development in many areas on most platforms. + +The Python interpreter and the extensive standard library are freely available +in source or binary form for all major platforms from the Python Web site, +http://www.python.org/, and may be freely distributed. The same site also +contains distributions of and pointers to many free third party Python modules, +programs and tools, and additional documentation. + +The Python interpreter is easily extended with new functions and data types +implemented in C or C++ (or other languages callable from C). Python is also +suitable as an extension language for customizable applications. + +This tutorial introduces the reader informally to the basic concepts and +features of the Python language and system. It helps to have a Python +interpreter handy for hands-on experience, but all examples are self-contained, +so the tutorial can be read off-line as well. + +For a description of standard objects and modules, see the Python Library +Reference document. The Python Reference Manual gives a more formal definition +of the language. To write extensions in C or C++, read Extending and Embedding +the Python Interpreter and Python/C API Reference. There are also several books +covering Python in depth. + +This tutorial does not attempt to be comprehensive and cover every single +feature, or even every commonly used feature. Instead, it introduces many of +Python's most noteworthy features, and will give you a good idea of the +language's flavor and style. After reading it, you will be able to read and +write Python modules and programs, and you will be ready to learn more about the +various Python library modules described in the Python Library Reference. + +.. toctree:: + + appetite.rst + interpreter.rst + introduction.rst + controlflow.rst + datastructures.rst + modules.rst + inputoutput.rst + errors.rst + classes.rst + stdlib.rst + stdlib2.rst + whatnow.rst + interactive.rst + floatingpoint.rst + glossary.rst diff --git a/Doc/tutorial/inputoutput.rst b/Doc/tutorial/inputoutput.rst new file mode 100644 index 0000000..9c302af --- /dev/null +++ b/Doc/tutorial/inputoutput.rst @@ -0,0 +1,354 @@ +.. _tut-io: + +**************** +Input and Output +**************** + +There are several ways to present the output of a program; data can be printed +in a human-readable form, or written to a file for future use. This chapter will +discuss some of the possibilities. + + +.. _tut-formatting: + +Fancier Output Formatting +========================= + +So far we've encountered two ways of writing values: *expression statements* and +the :keyword:`print` statement. (A third way is using the :meth:`write` method +of file objects; the standard output file can be referenced as ``sys.stdout``. +See the Library Reference for more information on this.) + +.. index:: module: string + +Often you'll want more control over the formatting of your output than simply +printing space-separated values. There are two ways to format your output; the +first way is to do all the string handling yourself; using string slicing and +concatenation operations you can create any layout you can imagine. The +standard module :mod:`string` contains some useful operations for padding +strings to a given column width; these will be discussed shortly. The second +way is to use the ``%`` operator with a string as the left argument. The ``%`` +operator interprets the left argument much like a :cfunc:`sprintf`\ -style +format string to be applied to the right argument, and returns the string +resulting from this formatting operation. + +One question remains, of course: how do you convert values to strings? Luckily, +Python has ways to convert any value to a string: pass it to the :func:`repr` +or :func:`str` functions. Reverse quotes (``````) are equivalent to +:func:`repr`, but they are no longer used in modern Python code and will likely +not be in future versions of the language. + +The :func:`str` function is meant to return representations of values which are +fairly human-readable, while :func:`repr` is meant to generate representations +which can be read by the interpreter (or will force a :exc:`SyntaxError` if +there is not equivalent syntax). For objects which don't have a particular +representation for human consumption, :func:`str` will return the same value as +:func:`repr`. Many values, such as numbers or structures like lists and +dictionaries, have the same representation using either function. Strings and +floating point numbers, in particular, have two distinct representations. + +Some examples:: + + >>> s = 'Hello, world.' + >>> str(s) + 'Hello, world.' + >>> repr(s) + "'Hello, world.'" + >>> str(0.1) + '0.1' + >>> repr(0.1) + '0.10000000000000001' + >>> x = 10 * 3.25 + >>> y = 200 * 200 + >>> s = 'The value of x is ' + repr(x) + ', and y is ' + repr(y) + '...' + >>> print s + The value of x is 32.5, and y is 40000... + >>> # The repr() of a string adds string quotes and backslashes: + ... hello = 'hello, world\n' + >>> hellos = repr(hello) + >>> print hellos + 'hello, world\n' + >>> # The argument to repr() may be any Python object: + ... repr((x, y, ('spam', 'eggs'))) + "(32.5, 40000, ('spam', 'eggs'))" + >>> # reverse quotes are convenient in interactive sessions: + ... `x, y, ('spam', 'eggs')` + "(32.5, 40000, ('spam', 'eggs'))" + +Here are two ways to write a table of squares and cubes:: + + >>> for x in range(1, 11): + ... print repr(x).rjust(2), repr(x*x).rjust(3), + ... # Note trailing comma on previous line + ... print repr(x*x*x).rjust(4) + ... + 1 1 1 + 2 4 8 + 3 9 27 + 4 16 64 + 5 25 125 + 6 36 216 + 7 49 343 + 8 64 512 + 9 81 729 + 10 100 1000 + + >>> for x in range(1,11): + ... print '%2d %3d %4d' % (x, x*x, x*x*x) + ... + 1 1 1 + 2 4 8 + 3 9 27 + 4 16 64 + 5 25 125 + 6 36 216 + 7 49 343 + 8 64 512 + 9 81 729 + 10 100 1000 + +(Note that in the first example, one space between each column was added by the +way :keyword:`print` works: it always adds spaces between its arguments.) + +This example demonstrates the :meth:`rjust` method of string objects, which +right-justifies a string in a field of a given width by padding it with spaces +on the left. There are similar methods :meth:`ljust` and :meth:`center`. These +methods do not write anything, they just return a new string. If the input +string is too long, they don't truncate it, but return it unchanged; this will +mess up your column lay-out but that's usually better than the alternative, +which would be lying about a value. (If you really want truncation you can +always add a slice operation, as in ``x.ljust(n)[:n]``.) + +There is another method, :meth:`zfill`, which pads a numeric string on the left +with zeros. It understands about plus and minus signs:: + + >>> '12'.zfill(5) + '00012' + >>> '-3.14'.zfill(7) + '-003.14' + >>> '3.14159265359'.zfill(5) + '3.14159265359' + +Using the ``%`` operator looks like this:: + + >>> import math + >>> print 'The value of PI is approximately %5.3f.' % math.pi + The value of PI is approximately 3.142. + +If there is more than one format in the string, you need to pass a tuple as +right operand, as in this example:: + + >>> table = {'Sjoerd': 4127, 'Jack': 4098, 'Dcab': 7678} + >>> for name, phone in table.items(): + ... print '%-10s ==> %10d' % (name, phone) + ... + Jack ==> 4098 + Dcab ==> 7678 + Sjoerd ==> 4127 + +Most formats work exactly as in C and require that you pass the proper type; +however, if you don't you get an exception, not a core dump. The ``%s`` format +is more relaxed: if the corresponding argument is not a string object, it is +converted to string using the :func:`str` built-in function. Using ``*`` to +pass the width or precision in as a separate (integer) argument is supported. +The C formats ``%n`` and ``%p`` are not supported. + +If you have a really long format string that you don't want to split up, it +would be nice if you could reference the variables to be formatted by name +instead of by position. This can be done by using form ``%(name)format``, as +shown here:: + + >>> table = {'Sjoerd': 4127, 'Jack': 4098, 'Dcab': 8637678} + >>> print 'Jack: %(Jack)d; Sjoerd: %(Sjoerd)d; Dcab: %(Dcab)d' % table + Jack: 4098; Sjoerd: 4127; Dcab: 8637678 + +This is particularly useful in combination with the new built-in :func:`vars` +function, which returns a dictionary containing all local variables. + + +.. _tut-files: + +Reading and Writing Files +========================= + +.. index:: + builtin: open + object: file + +:func:`open` returns a file object, and is most commonly used with two +arguments: ``open(filename, mode)``. + +.. % Opening files + +:: + + >>> f=open('/tmp/workfile', 'w') + >>> print f + <open file '/tmp/workfile', mode 'w' at 80a0960> + +The first argument is a string containing the filename. The second argument is +another string containing a few characters describing the way in which the file +will be used. *mode* can be ``'r'`` when the file will only be read, ``'w'`` +for only writing (an existing file with the same name will be erased), and +``'a'`` opens the file for appending; any data written to the file is +automatically added to the end. ``'r+'`` opens the file for both reading and +writing. The *mode* argument is optional; ``'r'`` will be assumed if it's +omitted. + +On Windows and the Macintosh, ``'b'`` appended to the mode opens the file in +binary mode, so there are also modes like ``'rb'``, ``'wb'``, and ``'r+b'``. +Windows makes a distinction between text and binary files; the end-of-line +characters in text files are automatically altered slightly when data is read or +written. This behind-the-scenes modification to file data is fine for ASCII +text files, but it'll corrupt binary data like that in :file:`JPEG` or +:file:`EXE` files. Be very careful to use binary mode when reading and writing +such files. + + +.. _tut-filemethods: + +Methods of File Objects +----------------------- + +The rest of the examples in this section will assume that a file object called +``f`` has already been created. + +To read a file's contents, call ``f.read(size)``, which reads some quantity of +data and returns it as a string. *size* is an optional numeric argument. When +*size* is omitted or negative, the entire contents of the file will be read and +returned; it's your problem if the file is twice as large as your machine's +memory. Otherwise, at most *size* bytes are read and returned. If the end of +the file has been reached, ``f.read()`` will return an empty string (``""``). +:: + + >>> f.read() + 'This is the entire file.\n' + >>> f.read() + '' + +``f.readline()`` reads a single line from the file; a newline character (``\n``) +is left at the end of the string, and is only omitted on the last line of the +file if the file doesn't end in a newline. This makes the return value +unambiguous; if ``f.readline()`` returns an empty string, the end of the file +has been reached, while a blank line is represented by ``'\n'``, a string +containing only a single newline. :: + + >>> f.readline() + 'This is the first line of the file.\n' + >>> f.readline() + 'Second line of the file\n' + >>> f.readline() + '' + +``f.readlines()`` returns a list containing all the lines of data in the file. +If given an optional parameter *sizehint*, it reads that many bytes from the +file and enough more to complete a line, and returns the lines from that. This +is often used to allow efficient reading of a large file by lines, but without +having to load the entire file in memory. Only complete lines will be returned. +:: + + >>> f.readlines() + ['This is the first line of the file.\n', 'Second line of the file\n'] + +An alternate approach to reading lines is to loop over the file object. This is +memory efficient, fast, and leads to simpler code:: + + >>> for line in f: + print line, + + This is the first line of the file. + Second line of the file + +The alternative approach is simpler but does not provide as fine-grained +control. Since the two approaches manage line buffering differently, they +should not be mixed. + +``f.write(string)`` writes the contents of *string* to the file, returning +``None``. :: + + >>> f.write('This is a test\n') + +To write something other than a string, it needs to be converted to a string +first:: + + >>> value = ('the answer', 42) + >>> s = str(value) + >>> f.write(s) + +``f.tell()`` returns an integer giving the file object's current position in the +file, measured in bytes from the beginning of the file. To change the file +object's position, use ``f.seek(offset, from_what)``. The position is computed +from adding *offset* to a reference point; the reference point is selected by +the *from_what* argument. A *from_what* value of 0 measures from the beginning +of the file, 1 uses the current file position, and 2 uses the end of the file as +the reference point. *from_what* can be omitted and defaults to 0, using the +beginning of the file as the reference point. :: + + >>> f = open('/tmp/workfile', 'r+') + >>> f.write('0123456789abcdef') + >>> f.seek(5) # Go to the 6th byte in the file + >>> f.read(1) + '5' + >>> f.seek(-3, 2) # Go to the 3rd byte before the end + >>> f.read(1) + 'd' + +When you're done with a file, call ``f.close()`` to close it and free up any +system resources taken up by the open file. After calling ``f.close()``, +attempts to use the file object will automatically fail. :: + + >>> f.close() + >>> f.read() + Traceback (most recent call last): + File "<stdin>", line 1, in ? + ValueError: I/O operation on closed file + +File objects have some additional methods, such as :meth:`isatty` and +:meth:`truncate` which are less frequently used; consult the Library Reference +for a complete guide to file objects. + + +.. _tut-pickle: + +The :mod:`pickle` Module +------------------------ + +.. index:: module: pickle + +Strings can easily be written to and read from a file. Numbers take a bit more +effort, since the :meth:`read` method only returns strings, which will have to +be passed to a function like :func:`int`, which takes a string like ``'123'`` +and returns its numeric value 123. However, when you want to save more complex +data types like lists, dictionaries, or class instances, things get a lot more +complicated. + +Rather than have users be constantly writing and debugging code to save +complicated data types, Python provides a standard module called :mod:`pickle`. +This is an amazing module that can take almost any Python object (even some +forms of Python code!), and convert it to a string representation; this process +is called :dfn:`pickling`. Reconstructing the object from the string +representation is called :dfn:`unpickling`. Between pickling and unpickling, +the string representing the object may have been stored in a file or data, or +sent over a network connection to some distant machine. + +If you have an object ``x``, and a file object ``f`` that's been opened for +writing, the simplest way to pickle the object takes only one line of code:: + + pickle.dump(x, f) + +To unpickle the object again, if ``f`` is a file object which has been opened +for reading:: + + x = pickle.load(f) + +(There are other variants of this, used when pickling many objects or when you +don't want to write the pickled data to a file; consult the complete +documentation for :mod:`pickle` in the Python Library Reference.) + +:mod:`pickle` is the standard way to make Python objects which can be stored and +reused by other programs or by a future invocation of the same program; the +technical term for this is a :dfn:`persistent` object. Because :mod:`pickle` is +so widely used, many authors who write Python extensions take care to ensure +that new data types such as matrices can be properly pickled and unpickled. + + diff --git a/Doc/tutorial/interactive.rst b/Doc/tutorial/interactive.rst new file mode 100644 index 0000000..8eeca2a --- /dev/null +++ b/Doc/tutorial/interactive.rst @@ -0,0 +1,167 @@ +.. _tut-interacting: + +************************************************** +Interactive Input Editing and History Substitution +************************************************** + +Some versions of the Python interpreter support editing of the current input +line and history substitution, similar to facilities found in the Korn shell and +the GNU Bash shell. This is implemented using the *GNU Readline* library, which +supports Emacs-style and vi-style editing. This library has its own +documentation which I won't duplicate here; however, the basics are easily +explained. The interactive editing and history described here are optionally +available in the Unix and Cygwin versions of the interpreter. + +This chapter does *not* document the editing facilities of Mark Hammond's +PythonWin package or the Tk-based environment, IDLE, distributed with Python. +The command line history recall which operates within DOS boxes on NT and some +other DOS and Windows flavors is yet another beast. + + +.. _tut-lineediting: + +Line Editing +============ + +If supported, input line editing is active whenever the interpreter prints a +primary or secondary prompt. The current line can be edited using the +conventional Emacs control characters. The most important of these are: +:kbd:`C-A` (Control-A) moves the cursor to the beginning of the line, :kbd:`C-E` +to the end, :kbd:`C-B` moves it one position to the left, :kbd:`C-F` to the +right. Backspace erases the character to the left of the cursor, :kbd:`C-D` the +character to its right. :kbd:`C-K` kills (erases) the rest of the line to the +right of the cursor, :kbd:`C-Y` yanks back the last killed string. +:kbd:`C-underscore` undoes the last change you made; it can be repeated for +cumulative effect. + + +.. _tut-history: + +History Substitution +==================== + +History substitution works as follows. All non-empty input lines issued are +saved in a history buffer, and when a new prompt is given you are positioned on +a new line at the bottom of this buffer. :kbd:`C-P` moves one line up (back) in +the history buffer, :kbd:`C-N` moves one down. Any line in the history buffer +can be edited; an asterisk appears in front of the prompt to mark a line as +modified. Pressing the :kbd:`Return` key passes the current line to the +interpreter. :kbd:`C-R` starts an incremental reverse search; :kbd:`C-S` starts +a forward search. + + +.. _tut-keybindings: + +Key Bindings +============ + +The key bindings and some other parameters of the Readline library can be +customized by placing commands in an initialization file called +:file:`~/.inputrc`. Key bindings have the form :: + + key-name: function-name + +or :: + + "string": function-name + +and options can be set with :: + + set option-name value + +For example:: + + # I prefer vi-style editing: + set editing-mode vi + + # Edit using a single line: + set horizontal-scroll-mode On + + # Rebind some keys: + Meta-h: backward-kill-word + "\C-u": universal-argument + "\C-x\C-r": re-read-init-file + +Note that the default binding for :kbd:`Tab` in Python is to insert a :kbd:`Tab` +character instead of Readline's default filename completion function. If you +insist, you can override this by putting :: + + Tab: complete + +in your :file:`~/.inputrc`. (Of course, this makes it harder to type indented +continuation lines if you're accustomed to using :kbd:`Tab` for that purpose.) + +.. index:: + module: rlcompleter + module: readline + +Automatic completion of variable and module names is optionally available. To +enable it in the interpreter's interactive mode, add the following to your +startup file: [#]_ :: + + import rlcompleter, readline + readline.parse_and_bind('tab: complete') + +This binds the :kbd:`Tab` key to the completion function, so hitting the +:kbd:`Tab` key twice suggests completions; it looks at Python statement names, +the current local variables, and the available module names. For dotted +expressions such as ``string.a``, it will evaluate the expression up to the +final ``'.'`` and then suggest completions from the attributes of the resulting +object. Note that this may execute application-defined code if an object with a +:meth:`__getattr__` method is part of the expression. + +A more capable startup file might look like this example. Note that this +deletes the names it creates once they are no longer needed; this is done since +the startup file is executed in the same namespace as the interactive commands, +and removing the names avoids creating side effects in the interactive +environment. You may find it convenient to keep some of the imported modules, +such as :mod:`os`, which turn out to be needed in most sessions with the +interpreter. :: + + # Add auto-completion and a stored history file of commands to your Python + # interactive interpreter. Requires Python 2.0+, readline. Autocomplete is + # bound to the Esc key by default (you can change it - see readline docs). + # + # Store the file in ~/.pystartup, and set an environment variable to point + # to it: "export PYTHONSTARTUP=/max/home/itamar/.pystartup" in bash. + # + # Note that PYTHONSTARTUP does *not* expand "~", so you have to put in the + # full path to your home directory. + + import atexit + import os + import readline + import rlcompleter + + historyPath = os.path.expanduser("~/.pyhistory") + + def save_history(historyPath=historyPath): + import readline + readline.write_history_file(historyPath) + + if os.path.exists(historyPath): + readline.read_history_file(historyPath) + + atexit.register(save_history) + del os, atexit, readline, rlcompleter, save_history, historyPath + + +.. _tut-commentary: + +Commentary +========== + +This facility is an enormous step forward compared to earlier versions of the +interpreter; however, some wishes are left: It would be nice if the proper +indentation were suggested on continuation lines (the parser knows if an indent +token is required next). The completion mechanism might use the interpreter's +symbol table. A command to check (or even suggest) matching parentheses, +quotes, etc., would also be useful. + + +.. rubric:: Footnotes + +.. [#] Python will execute the contents of a file identified by the + :envvar:`PYTHONSTARTUP` environment variable when you start an interactive + interpreter. + diff --git a/Doc/tutorial/interpreter.rst b/Doc/tutorial/interpreter.rst new file mode 100644 index 0000000..8b42090 --- /dev/null +++ b/Doc/tutorial/interpreter.rst @@ -0,0 +1,248 @@ +.. _tut-using: + +**************************** +Using the Python Interpreter +**************************** + + +.. _tut-invoking: + +Invoking the Interpreter +======================== + +The Python interpreter is usually installed as :file:`/usr/local/bin/python` on +those machines where it is available; putting :file:`/usr/local/bin` in your +Unix shell's search path makes it possible to start it by typing the command :: + + python + +to the shell. Since the choice of the directory where the interpreter lives is +an installation option, other places are possible; check with your local Python +guru or system administrator. (E.g., :file:`/usr/local/python` is a popular +alternative location.) + +On Windows machines, the Python installation is usually placed in +:file:`C:\Python30`, though you can change this when you're running the +installer. To add this directory to your path, you can type the following +command into the command prompt in a DOS box:: + + set path=%path%;C:\python30 + +Typing an end-of-file character (:kbd:`Control-D` on Unix, :kbd:`Control-Z` on +Windows) at the primary prompt causes the interpreter to exit with a zero exit +status. If that doesn't work, you can exit the interpreter by typing the +following commands: ``import sys; sys.exit()``. + +The interpreter's line-editing features usually aren't very sophisticated. On +Unix, whoever installed the interpreter may have enabled support for the GNU +readline library, which adds more elaborate interactive editing and history +features. Perhaps the quickest check to see whether command line editing is +supported is typing Control-P to the first Python prompt you get. If it beeps, +you have command line editing; see Appendix :ref:`tut-interacting` for an +introduction to the keys. If nothing appears to happen, or if ``^P`` is echoed, +command line editing isn't available; you'll only be able to use backspace to +remove characters from the current line. + +The interpreter operates somewhat like the Unix shell: when called with standard +input connected to a tty device, it reads and executes commands interactively; +when called with a file name argument or with a file as standard input, it reads +and executes a *script* from that file. + +A second way of starting the interpreter is ``python -c command [arg] ...``, +which executes the statement(s) in *command*, analogous to the shell's +:option:`-c` option. Since Python statements often contain spaces or other +characters that are special to the shell, it is best to quote *command* in its +entirety with double quotes. + +Some Python modules are also useful as scripts. These can be invoked using +``python -m module [arg] ...``, which executes the source file for *module* as +if you had spelled out its full name on the command line. + +Note that there is a difference between ``python file`` and ``python <file``. +In the latter case, input requests from the program, such as calling +``sys.stdin.read()``, are satisfied from *file*. Since this file has already +been read until the end by the parser before the program starts executing, the +program will encounter end-of-file immediately. In the former case (which is +usually what you want) they are satisfied from whatever file or device is +connected to standard input of the Python interpreter. + +When a script file is used, it is sometimes useful to be able to run the script +and enter interactive mode afterwards. This can be done by passing :option:`-i` +before the script. (This does not work if the script is read from standard +input, for the same reason as explained in the previous paragraph.) + + +.. _tut-argpassing: + +Argument Passing +---------------- + +When known to the interpreter, the script name and additional arguments +thereafter are passed to the script in the variable ``sys.argv``, which is a +list of strings. Its length is at least one; when no script and no arguments +are given, ``sys.argv[0]`` is an empty string. When the script name is given as +``'-'`` (meaning standard input), ``sys.argv[0]`` is set to ``'-'``. When +:option:`-c` *command* is used, ``sys.argv[0]`` is set to ``'-c'``. When +:option:`-m` *module* is used, ``sys.argv[0]`` is set to the full name of the +located module. Options found after :option:`-c` *command* or :option:`-m` +*module* are not consumed by the Python interpreter's option processing but +left in ``sys.argv`` for the command or module to handle. + + +.. _tut-interactive: + +Interactive Mode +---------------- + +When commands are read from a tty, the interpreter is said to be in *interactive +mode*. In this mode it prompts for the next command with the *primary prompt*, +usually three greater-than signs (``>>>``); for continuation lines it prompts +with the *secondary prompt*, by default three dots (``...``). The interpreter +prints a welcome message stating its version number and a copyright notice +before printing the first prompt:: + + python + Python 1.5.2b2 (#1, Feb 28 1999, 00:02:06) [GCC 2.8.1] on sunos5 + Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam + >>> + +Continuation lines are needed when entering a multi-line construct. As an +example, take a look at this :keyword:`if` statement:: + + >>> the_world_is_flat = 1 + >>> if the_world_is_flat: + ... print "Be careful not to fall off!" + ... + Be careful not to fall off! + + +.. _tut-interp: + +The Interpreter and Its Environment +=================================== + + +.. _tut-error: + +Error Handling +-------------- + +When an error occurs, the interpreter prints an error message and a stack trace. +In interactive mode, it then returns to the primary prompt; when input came from +a file, it exits with a nonzero exit status after printing the stack trace. +(Exceptions handled by an :keyword:`except` clause in a :keyword:`try` statement +are not errors in this context.) Some errors are unconditionally fatal and +cause an exit with a nonzero exit; this applies to internal inconsistencies and +some cases of running out of memory. All error messages are written to the +standard error stream; normal output from executed commands is written to +standard output. + +Typing the interrupt character (usually Control-C or DEL) to the primary or +secondary prompt cancels the input and returns to the primary prompt. [#]_ +Typing an interrupt while a command is executing raises the +:exc:`KeyboardInterrupt` exception, which may be handled by a :keyword:`try` +statement. + + +.. _tut-scripts: + +Executable Python Scripts +------------------------- + +On BSD'ish Unix systems, Python scripts can be made directly executable, like +shell scripts, by putting the line :: + + #! /usr/bin/env python + +(assuming that the interpreter is on the user's :envvar:`PATH`) at the beginning +of the script and giving the file an executable mode. The ``#!`` must be the +first two characters of the file. On some platforms, this first line must end +with a Unix-style line ending (``'\n'``), not a Mac OS (``'\r'``) or Windows +(``'\r\n'``) line ending. Note that the hash, or pound, character, ``'#'``, is +used to start a comment in Python. + +The script can be given an executable mode, or permission, using the +:program:`chmod` command:: + + $ chmod +x myscript.py + + +Source Code Encoding +-------------------- + +It is possible to use encodings different than ASCII in Python source files. The +best way to do it is to put one more special comment line right after the ``#!`` +line to define the source file encoding:: + + # -*- coding: encoding -*- + + +With that declaration, all characters in the source file will be treated as +having the encoding *encoding*, and it will be possible to directly write +Unicode string literals in the selected encoding. The list of possible +encodings can be found in the Python Library Reference, in the section on +:mod:`codecs`. + +For example, to write Unicode literals including the Euro currency symbol, the +ISO-8859-15 encoding can be used, with the Euro symbol having the ordinal value +164. This script will print the value 8364 (the Unicode codepoint corresponding +to the Euro symbol) and then exit:: + + # -*- coding: iso-8859-15 -*- + + currency = u"€" + print ord(currency) + +If your editor supports saving files as ``UTF-8`` with a UTF-8 *byte order mark* +(aka BOM), you can use that instead of an encoding declaration. IDLE supports +this capability if ``Options/General/Default Source Encoding/UTF-8`` is set. +Notice that this signature is not understood in older Python releases (2.2 and +earlier), and also not understood by the operating system for script files with +``#!`` lines (only used on Unix systems). + +By using UTF-8 (either through the signature or an encoding declaration), +characters of most languages in the world can be used simultaneously in string +literals and comments. Using non-ASCII characters in identifiers is not +supported. To display all these characters properly, your editor must recognize +that the file is UTF-8, and it must use a font that supports all the characters +in the file. + + +.. _tut-startup: + +The Interactive Startup File +---------------------------- + +When you use Python interactively, it is frequently handy to have some standard +commands executed every time the interpreter is started. You can do this by +setting an environment variable named :envvar:`PYTHONSTARTUP` to the name of a +file containing your start-up commands. This is similar to the :file:`.profile` +feature of the Unix shells. + +.. % XXX This should probably be dumped in an appendix, since most people +.. % don't use Python interactively in non-trivial ways. + +This file is only read in interactive sessions, not when Python reads commands +from a script, and not when :file:`/dev/tty` is given as the explicit source of +commands (which otherwise behaves like an interactive session). It is executed +in the same namespace where interactive commands are executed, so that objects +that it defines or imports can be used without qualification in the interactive +session. You can also change the prompts ``sys.ps1`` and ``sys.ps2`` in this +file. + +If you want to read an additional start-up file from the current directory, you +can program this in the global start-up file using code like ``if +os.path.isfile('.pythonrc.py'): exec(open('.pythonrc.py').read())``. +If you want to use the startup file in a script, you must do this explicitly +in the script:: + + import os + filename = os.environ.get('PYTHONSTARTUP') + if filename and os.path.isfile(filename): + exec(open(filename).read()) + + +.. rubric:: Footnotes + +.. [#] A problem with the GNU Readline package may prevent this. + diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst new file mode 100644 index 0000000..e209bfc --- /dev/null +++ b/Doc/tutorial/introduction.rst @@ -0,0 +1,645 @@ +.. _tut-informal: + +********************************** +An Informal Introduction to Python +********************************** + +In the following examples, input and output are distinguished by the presence or +absence of prompts (``>>>`` and ``...``): to repeat the example, you must type +everything after the prompt, when the prompt appears; lines that do not begin +with a prompt are output from the interpreter. Note that a secondary prompt on a +line by itself in an example means you must type a blank line; this is used to +end a multi-line command. + +.. % +.. % \footnote{ +.. % I'd prefer to use different fonts to distinguish input +.. % from output, but the amount of LaTeX hacking that would require +.. % is currently beyond my ability. +.. % } + +Many of the examples in this manual, even those entered at the interactive +prompt, include comments. Comments in Python start with the hash character, +``'#'``, and extend to the end of the physical line. A comment may appear at +the start of a line or following whitespace or code, but not within a string +literal. A hash character within a string literal is just a hash character. + +Some examples:: + + # this is the first comment + SPAM = 1 # and this is the second comment + # ... and now a third! + STRING = "# This is not a comment." + + +.. _tut-calculator: + +Using Python as a Calculator +============================ + +Let's try some simple Python commands. Start the interpreter and wait for the +primary prompt, ``>>>``. (It shouldn't take long.) + + +.. _tut-numbers: + +Numbers +------- + +The interpreter acts as a simple calculator: you can type an expression at it +and it will write the value. Expression syntax is straightforward: the +operators ``+``, ``-``, ``*`` and ``/`` work just like in most other languages +(for example, Pascal or C); parentheses can be used for grouping. For example:: + + >>> 2+2 + 4 + >>> # This is a comment + ... 2+2 + 4 + >>> 2+2 # and a comment on the same line as code + 4 + >>> (50-5*6)/4 + 5 + >>> # Integer division returns the floor: + ... 7/3 + 2 + >>> 7/-3 + -3 + +The equal sign (``'='``) is used to assign a value to a variable. Afterwards, no +result is displayed before the next interactive prompt:: + + >>> width = 20 + >>> height = 5*9 + >>> width * height + 900 + +A value can be assigned to several variables simultaneously:: + + >>> x = y = z = 0 # Zero x, y and z + >>> x + 0 + >>> y + 0 + >>> z + 0 + +There is full support for floating point; operators with mixed type operands +convert the integer operand to floating point:: + + >>> 3 * 3.75 / 1.5 + 7.5 + >>> 7.0 / 2 + 3.5 + +Complex numbers are also supported; imaginary numbers are written with a suffix +of ``j`` or ``J``. Complex numbers with a nonzero real component are written as +``(real+imagj)``, or can be created with the ``complex(real, imag)`` function. +:: + + >>> 1j * 1J + (-1+0j) + >>> 1j * complex(0,1) + (-1+0j) + >>> 3+1j*3 + (3+3j) + >>> (3+1j)*3 + (9+3j) + >>> (1+2j)/(1+1j) + (1.5+0.5j) + +Complex numbers are always represented as two floating point numbers, the real +and imaginary part. To extract these parts from a complex number *z*, use +``z.real`` and ``z.imag``. :: + + >>> a=1.5+0.5j + >>> a.real + 1.5 + >>> a.imag + 0.5 + +The conversion functions to floating point and integer (:func:`float`, +:func:`int` and :func:`long`) don't work for complex numbers --- there is no one +correct way to convert a complex number to a real number. Use ``abs(z)`` to get +its magnitude (as a float) or ``z.real`` to get its real part. :: + + >>> a=3.0+4.0j + >>> float(a) + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: can't convert complex to float; use abs(z) + >>> a.real + 3.0 + >>> a.imag + 4.0 + >>> abs(a) # sqrt(a.real**2 + a.imag**2) + 5.0 + >>> + +In interactive mode, the last printed expression is assigned to the variable +``_``. This means that when you are using Python as a desk calculator, it is +somewhat easier to continue calculations, for example:: + + >>> tax = 12.5 / 100 + >>> price = 100.50 + >>> price * tax + 12.5625 + >>> price + _ + 113.0625 + >>> round(_, 2) + 113.06 + >>> + +This variable should be treated as read-only by the user. Don't explicitly +assign a value to it --- you would create an independent local variable with the +same name masking the built-in variable with its magic behavior. + + +.. _tut-strings: + +Strings +------- + +Besides numbers, Python can also manipulate strings, which can be expressed in +several ways. They can be enclosed in single quotes or double quotes:: + + >>> 'spam eggs' + 'spam eggs' + >>> 'doesn\'t' + "doesn't" + >>> "doesn't" + "doesn't" + >>> '"Yes," he said.' + '"Yes," he said.' + >>> "\"Yes,\" he said." + '"Yes," he said.' + >>> '"Isn\'t," she said.' + '"Isn\'t," she said.' + +String literals can span multiple lines in several ways. Continuation lines can +be used, with a backslash as the last character on the line indicating that the +next line is a logical continuation of the line:: + + hello = "This is a rather long string containing\n\ + several lines of text just as you would do in C.\n\ + Note that whitespace at the beginning of the line is\ + significant." + + print hello + +Note that newlines still need to be embedded in the string using ``\n``; the +newline following the trailing backslash is discarded. This example would print +the following:: + + This is a rather long string containing + several lines of text just as you would do in C. + Note that whitespace at the beginning of the line is significant. + +If we make the string literal a "raw" string, however, the ``\n`` sequences are +not converted to newlines, but the backslash at the end of the line, and the +newline character in the source, are both included in the string as data. Thus, +the example:: + + hello = r"This is a rather long string containing\n\ + several lines of text much as you would do in C." + + print hello + +would print:: + + This is a rather long string containing\n\ + several lines of text much as you would do in C. + +Or, strings can be surrounded in a pair of matching triple-quotes: ``"""`` or +``'''``. End of lines do not need to be escaped when using triple-quotes, but +they will be included in the string. :: + + print """ + Usage: thingy [OPTIONS] + -h Display this usage message + -H hostname Hostname to connect to + """ + +produces the following output:: + + Usage: thingy [OPTIONS] + -h Display this usage message + -H hostname Hostname to connect to + +The interpreter prints the result of string operations in the same way as they +are typed for input: inside quotes, and with quotes and other funny characters +escaped by backslashes, to show the precise value. The string is enclosed in +double quotes if the string contains a single quote and no double quotes, else +it's enclosed in single quotes. (The :keyword:`print` statement, described +later, can be used to write strings without quotes or escapes.) + +Strings can be concatenated (glued together) with the ``+`` operator, and +repeated with ``*``:: + + >>> word = 'Help' + 'A' + >>> word + 'HelpA' + >>> '<' + word*5 + '>' + '<HelpAHelpAHelpAHelpAHelpA>' + +Two string literals next to each other are automatically concatenated; the first +line above could also have been written ``word = 'Help' 'A'``; this only works +with two literals, not with arbitrary string expressions:: + + >>> 'str' 'ing' # <- This is ok + 'string' + >>> 'str'.strip() + 'ing' # <- This is ok + 'string' + >>> 'str'.strip() 'ing' # <- This is invalid + File "<stdin>", line 1, in ? + 'str'.strip() 'ing' + ^ + SyntaxError: invalid syntax + +Strings can be subscripted (indexed); like in C, the first character of a string +has subscript (index) 0. There is no separate character type; a character is +simply a string of size one. Like in Icon, substrings can be specified with the +*slice notation*: two indices separated by a colon. :: + + >>> word[4] + 'A' + >>> word[0:2] + 'He' + >>> word[2:4] + 'lp' + +Slice indices have useful defaults; an omitted first index defaults to zero, an +omitted second index defaults to the size of the string being sliced. :: + + >>> word[:2] # The first two characters + 'He' + >>> word[2:] # Everything except the first two characters + 'lpA' + +Unlike a C string, Python strings cannot be changed. Assigning to an indexed +position in the string results in an error:: + + >>> word[0] = 'x' + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: object doesn't support item assignment + >>> word[:1] = 'Splat' + Traceback (most recent call last): + File "<stdin>", line 1, in ? + TypeError: object doesn't support slice assignment + +However, creating a new string with the combined content is easy and efficient:: + + >>> 'x' + word[1:] + 'xelpA' + >>> 'Splat' + word[4] + 'SplatA' + +Here's a useful invariant of slice operations: ``s[:i] + s[i:]`` equals ``s``. +:: + + >>> word[:2] + word[2:] + 'HelpA' + >>> word[:3] + word[3:] + 'HelpA' + +Degenerate slice indices are handled gracefully: an index that is too large is +replaced by the string size, an upper bound smaller than the lower bound returns +an empty string. :: + + >>> word[1:100] + 'elpA' + >>> word[10:] + '' + >>> word[2:1] + '' + +Indices may be negative numbers, to start counting from the right. For example:: + + >>> word[-1] # The last character + 'A' + >>> word[-2] # The last-but-one character + 'p' + >>> word[-2:] # The last two characters + 'pA' + >>> word[:-2] # Everything except the last two characters + 'Hel' + +But note that -0 is really the same as 0, so it does not count from the right! +:: + + >>> word[-0] # (since -0 equals 0) + 'H' + +Out-of-range negative slice indices are truncated, but don't try this for +single-element (non-slice) indices:: + + >>> word[-100:] + 'HelpA' + >>> word[-10] # error + Traceback (most recent call last): + File "<stdin>", line 1, in ? + IndexError: string index out of range + +One way to remember how slices work is to think of the indices as pointing +*between* characters, with the left edge of the first character numbered 0. +Then the right edge of the last character of a string of *n* characters has +index *n*, for example:: + + +---+---+---+---+---+ + | H | e | l | p | A | + +---+---+---+---+---+ + 0 1 2 3 4 5 + -5 -4 -3 -2 -1 + +The first row of numbers gives the position of the indices 0...5 in the string; +the second row gives the corresponding negative indices. The slice from *i* to +*j* consists of all characters between the edges labeled *i* and *j*, +respectively. + +For non-negative indices, the length of a slice is the difference of the +indices, if both are within bounds. For example, the length of ``word[1:3]`` is +2. + +The built-in function :func:`len` returns the length of a string:: + + >>> s = 'supercalifragilisticexpialidocious' + >>> len(s) + 34 + + +.. seealso:: + + :ref:`typesseq` + Strings, and the Unicode strings described in the next section, are + examples of *sequence types*, and support the common operations supported + by such types. + + :ref:`string-methods` + Both strings and Unicode strings support a large number of methods for + basic transformations and searching. + + :ref:`string-formatting` + The formatting operations invoked when strings and Unicode strings are the + left operand of the ``%`` operator are described in more detail here. + + +.. _tut-unicodestrings: + +Unicode Strings +--------------- + +.. sectionauthor:: Marc-Andre Lemburg <mal@lemburg.com> + + +Starting with Python 2.0 a new data type for storing text data is available to +the programmer: the Unicode object. It can be used to store and manipulate +Unicode data (see http://www.unicode.org/) and integrates well with the existing +string objects, providing auto-conversions where necessary. + +Unicode has the advantage of providing one ordinal for every character in every +script used in modern and ancient texts. Previously, there were only 256 +possible ordinals for script characters. Texts were typically bound to a code +page which mapped the ordinals to script characters. This lead to very much +confusion especially with respect to internationalization (usually written as +``i18n`` --- ``'i'`` + 18 characters + ``'n'``) of software. Unicode solves +these problems by defining one code page for all scripts. + +Creating Unicode strings in Python is just as simple as creating normal +strings:: + + >>> u'Hello World !' + u'Hello World !' + +The small ``'u'`` in front of the quote indicates that a Unicode string is +supposed to be created. If you want to include special characters in the string, +you can do so by using the Python *Unicode-Escape* encoding. The following +example shows how:: + + >>> u'Hello\u0020World !' + u'Hello World !' + +The escape sequence ``\u0020`` indicates to insert the Unicode character with +the ordinal value 0x0020 (the space character) at the given position. + +Other characters are interpreted by using their respective ordinal values +directly as Unicode ordinals. If you have literal strings in the standard +Latin-1 encoding that is used in many Western countries, you will find it +convenient that the lower 256 characters of Unicode are the same as the 256 +characters of Latin-1. + +For experts, there is also a raw mode just like the one for normal strings. You +have to prefix the opening quote with 'ur' to have Python use the +*Raw-Unicode-Escape* encoding. It will only apply the above ``\uXXXX`` +conversion if there is an uneven number of backslashes in front of the small +'u'. :: + + >>> ur'Hello\u0020World !' + u'Hello World !' + >>> ur'Hello\\u0020World !' + u'Hello\\\\u0020World !' + +The raw mode is most useful when you have to enter lots of backslashes, as can +be necessary in regular expressions. + +Apart from these standard encodings, Python provides a whole set of other ways +of creating Unicode strings on the basis of a known encoding. + +.. index:: builtin: unicode + +The built-in function :func:`unicode` provides access to all registered Unicode +codecs (COders and DECoders). Some of the more well known encodings which these +codecs can convert are *Latin-1*, *ASCII*, *UTF-8*, and *UTF-16*. The latter two +are variable-length encodings that store each Unicode character in one or more +bytes. The default encoding is normally set to ASCII, which passes through +characters in the range 0 to 127 and rejects any other characters with an error. +When a Unicode string is printed, written to a file, or converted with +:func:`str`, conversion takes place using this default encoding. :: + + >>> u"abc" + u'abc' + >>> str(u"abc") + 'abc' + >>> u"äöü" + u'\xe4\xf6\xfc' + >>> str(u"äöü") + Traceback (most recent call last): + File "<stdin>", line 1, in ? + UnicodeEncodeError: 'ascii' codec can't encode characters in position 0-2: ordinal not in range(128) + +To convert a Unicode string into an 8-bit string using a specific encoding, +Unicode objects provide an :func:`encode` method that takes one argument, the +name of the encoding. Lowercase names for encodings are preferred. :: + + >>> u"äöü".encode('utf-8') + '\xc3\xa4\xc3\xb6\xc3\xbc' + +If you have data in a specific encoding and want to produce a corresponding +Unicode string from it, you can use the :func:`unicode` function with the +encoding name as the second argument. :: + + >>> unicode('\xc3\xa4\xc3\xb6\xc3\xbc', 'utf-8') + u'\xe4\xf6\xfc' + + +.. _tut-lists: + +Lists +----- + +Python knows a number of *compound* data types, used to group together other +values. The most versatile is the *list*, which can be written as a list of +comma-separated values (items) between square brackets. List items need not all +have the same type. :: + + >>> a = ['spam', 'eggs', 100, 1234] + >>> a + ['spam', 'eggs', 100, 1234] + +Like string indices, list indices start at 0, and lists can be sliced, +concatenated and so on:: + + >>> a[0] + 'spam' + >>> a[3] + 1234 + >>> a[-2] + 100 + >>> a[1:-1] + ['eggs', 100] + >>> a[:2] + ['bacon', 2*2] + ['spam', 'eggs', 'bacon', 4] + >>> 3*a[:3] + ['Boo!'] + ['spam', 'eggs', 100, 'spam', 'eggs', 100, 'spam', 'eggs', 100, 'Boo!'] + +Unlike strings, which are *immutable*, it is possible to change individual +elements of a list:: + + >>> a + ['spam', 'eggs', 100, 1234] + >>> a[2] = a[2] + 23 + >>> a + ['spam', 'eggs', 123, 1234] + +Assignment to slices is also possible, and this can even change the size of the +list or clear it entirely:: + + >>> # Replace some items: + ... a[0:2] = [1, 12] + >>> a + [1, 12, 123, 1234] + >>> # Remove some: + ... a[0:2] = [] + >>> a + [123, 1234] + >>> # Insert some: + ... a[1:1] = ['bletch', 'xyzzy'] + >>> a + [123, 'bletch', 'xyzzy', 1234] + >>> # Insert (a copy of) itself at the beginning + >>> a[:0] = a + >>> a + [123, 'bletch', 'xyzzy', 1234, 123, 'bletch', 'xyzzy', 1234] + >>> # Clear the list: replace all items with an empty list + >>> a[:] = [] + >>> a + [] + +The built-in function :func:`len` also applies to lists:: + + >>> len(a) + 8 + +It is possible to nest lists (create lists containing other lists), for +example:: + + >>> q = [2, 3] + >>> p = [1, q, 4] + >>> len(p) + 3 + >>> p[1] + [2, 3] + >>> p[1][0] + 2 + >>> p[1].append('xtra') # See section 5.1 + >>> p + [1, [2, 3, 'xtra'], 4] + >>> q + [2, 3, 'xtra'] + +Note that in the last example, ``p[1]`` and ``q`` really refer to the same +object! We'll come back to *object semantics* later. + + +.. _tut-firststeps: + +First Steps Towards Programming +=============================== + +Of course, we can use Python for more complicated tasks than adding two and two +together. For instance, we can write an initial sub-sequence of the *Fibonacci* +series as follows:: + + >>> # Fibonacci series: + ... # the sum of two elements defines the next + ... a, b = 0, 1 + >>> while b < 10: + ... print b + ... a, b = b, a+b + ... + 1 + 1 + 2 + 3 + 5 + 8 + +This example introduces several new features. + +* The first line contains a *multiple assignment*: the variables ``a`` and ``b`` + simultaneously get the new values 0 and 1. On the last line this is used again, + demonstrating that the expressions on the right-hand side are all evaluated + first before any of the assignments take place. The right-hand side expressions + are evaluated from the left to the right. + +* The :keyword:`while` loop executes as long as the condition (here: ``b < 10``) + remains true. In Python, like in C, any non-zero integer value is true; zero is + false. The condition may also be a string or list value, in fact any sequence; + anything with a non-zero length is true, empty sequences are false. The test + used in the example is a simple comparison. The standard comparison operators + are written the same as in C: ``<`` (less than), ``>`` (greater than), ``==`` + (equal to), ``<=`` (less than or equal to), ``>=`` (greater than or equal to) + and ``!=`` (not equal to). + +* The *body* of the loop is *indented*: indentation is Python's way of grouping + statements. Python does not (yet!) provide an intelligent input line editing + facility, so you have to type a tab or space(s) for each indented line. In + practice you will prepare more complicated input for Python with a text editor; + most text editors have an auto-indent facility. When a compound statement is + entered interactively, it must be followed by a blank line to indicate + completion (since the parser cannot guess when you have typed the last line). + Note that each line within a basic block must be indented by the same amount. + +* The :keyword:`print` statement writes the value of the expression(s) it is + given. It differs from just writing the expression you want to write (as we did + earlier in the calculator examples) in the way it handles multiple expressions + and strings. Strings are printed without quotes, and a space is inserted + between items, so you can format things nicely, like this:: + + >>> i = 256*256 + >>> print 'The value of i is', i + The value of i is 65536 + + A trailing comma avoids the newline after the output:: + + >>> a, b = 0, 1 + >>> while b < 1000: + ... print b, + ... a, b = b, a+b + ... + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 + + Note that the interpreter inserts a newline before it prints the next prompt if + the last line was not completed. + + diff --git a/Doc/tutorial/modules.rst b/Doc/tutorial/modules.rst new file mode 100644 index 0000000..0b0dabd --- /dev/null +++ b/Doc/tutorial/modules.rst @@ -0,0 +1,551 @@ +.. _tut-modules: + +******* +Modules +******* + +If you quit from the Python interpreter and enter it again, the definitions you +have made (functions and variables) are lost. Therefore, if you want to write a +somewhat longer program, you are better off using a text editor to prepare the +input for the interpreter and running it with that file as input instead. This +is known as creating a *script*. As your program gets longer, you may want to +split it into several files for easier maintenance. You may also want to use a +handy function that you've written in several programs without copying its +definition into each program. + +To support this, Python has a way to put definitions in a file and use them in a +script or in an interactive instance of the interpreter. Such a file is called a +*module*; definitions from a module can be *imported* into other modules or into +the *main* module (the collection of variables that you have access to in a +script executed at the top level and in calculator mode). + +A module is a file containing Python definitions and statements. The file name +is the module name with the suffix :file:`.py` appended. Within a module, the +module's name (as a string) is available as the value of the global variable +``__name__``. For instance, use your favorite text editor to create a file +called :file:`fibo.py` in the current directory with the following contents:: + + # Fibonacci numbers module + + def fib(n): # write Fibonacci series up to n + a, b = 0, 1 + while b < n: + print b, + a, b = b, a+b + + def fib2(n): # return Fibonacci series up to n + result = [] + a, b = 0, 1 + while b < n: + result.append(b) + a, b = b, a+b + return result + +Now enter the Python interpreter and import this module with the following +command:: + + >>> import fibo + +This does not enter the names of the functions defined in ``fibo`` directly in +the current symbol table; it only enters the module name ``fibo`` there. Using +the module name you can access the functions:: + + >>> fibo.fib(1000) + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 + >>> fibo.fib2(100) + [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89] + >>> fibo.__name__ + 'fibo' + +If you intend to use a function often you can assign it to a local name:: + + >>> fib = fibo.fib + >>> fib(500) + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 + + +.. _tut-moremodules: + +More on Modules +=============== + +A module can contain executable statements as well as function definitions. +These statements are intended to initialize the module. They are executed only +the *first* time the module is imported somewhere. [#]_ + +Each module has its own private symbol table, which is used as the global symbol +table by all functions defined in the module. Thus, the author of a module can +use global variables in the module without worrying about accidental clashes +with a user's global variables. On the other hand, if you know what you are +doing you can touch a module's global variables with the same notation used to +refer to its functions, ``modname.itemname``. + +Modules can import other modules. It is customary but not required to place all +:keyword:`import` statements at the beginning of a module (or script, for that +matter). The imported module names are placed in the importing module's global +symbol table. + +There is a variant of the :keyword:`import` statement that imports names from a +module directly into the importing module's symbol table. For example:: + + >>> from fibo import fib, fib2 + >>> fib(500) + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 + +This does not introduce the module name from which the imports are taken in the +local symbol table (so in the example, ``fibo`` is not defined). + +There is even a variant to import all names that a module defines:: + + >>> from fibo import * + >>> fib(500) + 1 1 2 3 5 8 13 21 34 55 89 144 233 377 + +This imports all names except those beginning with an underscore (``_``). + + +.. _tut-modulesasscripts: + +Executing modules as scripts +---------------------------- + +When you run a Python module with :: + + python fibo.py <arguments> + +the code in the module will be executed, just as if you imported it, but with +the ``__name__`` set to ``"__main__"``. That means that by adding this code at +the end of your module:: + + if __name__ == "__main__": + import sys + fib(int(sys.argv[1])) + +you can make the file usable as a script as well as an importable module, +because the code that parses the command line only runs if the module is +executed as the "main" file:: + + $ python fibo.py 50 + 1 1 2 3 5 8 13 21 34 + +If the module is imported, the code is not run:: + + >>> import fibo + >>> + +This is often used either to provide a convenient user interface to a module, or +for testing purposes (running the module as a script executes a test suite). + + +.. _tut-searchpath: + +The Module Search Path +---------------------- + +.. index:: triple: module; search; path + +When a module named :mod:`spam` is imported, the interpreter searches for a file +named :file:`spam.py` in the current directory, and then in the list of +directories specified by the environment variable :envvar:`PYTHONPATH`. This +has the same syntax as the shell variable :envvar:`PATH`, that is, a list of +directory names. When :envvar:`PYTHONPATH` is not set, or when the file is not +found there, the search continues in an installation-dependent default path; on +Unix, this is usually :file:`.:/usr/local/lib/python`. + +Actually, modules are searched in the list of directories given by the variable +``sys.path`` which is initialized from the directory containing the input script +(or the current directory), :envvar:`PYTHONPATH` and the installation- dependent +default. This allows Python programs that know what they're doing to modify or +replace the module search path. Note that because the directory containing the +script being run is on the search path, it is important that the script not have +the same name as a standard module, or Python will attempt to load the script as +a module when that module is imported. This will generally be an error. See +section :ref:`tut-standardmodules` for more information. + + +"Compiled" Python files +----------------------- + +As an important speed-up of the start-up time for short programs that use a lot +of standard modules, if a file called :file:`spam.pyc` exists in the directory +where :file:`spam.py` is found, this is assumed to contain an +already-"byte-compiled" version of the module :mod:`spam`. The modification time +of the version of :file:`spam.py` used to create :file:`spam.pyc` is recorded in +:file:`spam.pyc`, and the :file:`.pyc` file is ignored if these don't match. + +Normally, you don't need to do anything to create the :file:`spam.pyc` file. +Whenever :file:`spam.py` is successfully compiled, an attempt is made to write +the compiled version to :file:`spam.pyc`. It is not an error if this attempt +fails; if for any reason the file is not written completely, the resulting +:file:`spam.pyc` file will be recognized as invalid and thus ignored later. The +contents of the :file:`spam.pyc` file are platform independent, so a Python +module directory can be shared by machines of different architectures. + +Some tips for experts: + +* When the Python interpreter is invoked with the :option:`-O` flag, optimized + code is generated and stored in :file:`.pyo` files. The optimizer currently + doesn't help much; it only removes :keyword:`assert` statements. When + :option:`-O` is used, *all* bytecode is optimized; ``.pyc`` files are ignored + and ``.py`` files are compiled to optimized bytecode. + +* Passing two :option:`-O` flags to the Python interpreter (:option:`-OO`) will + cause the bytecode compiler to perform optimizations that could in some rare + cases result in malfunctioning programs. Currently only ``__doc__`` strings are + removed from the bytecode, resulting in more compact :file:`.pyo` files. Since + some programs may rely on having these available, you should only use this + option if you know what you're doing. + +* A program doesn't run any faster when it is read from a :file:`.pyc` or + :file:`.pyo` file than when it is read from a :file:`.py` file; the only thing + that's faster about :file:`.pyc` or :file:`.pyo` files is the speed with which + they are loaded. + +* When a script is run by giving its name on the command line, the bytecode for + the script is never written to a :file:`.pyc` or :file:`.pyo` file. Thus, the + startup time of a script may be reduced by moving most of its code to a module + and having a small bootstrap script that imports that module. It is also + possible to name a :file:`.pyc` or :file:`.pyo` file directly on the command + line. + +* It is possible to have a file called :file:`spam.pyc` (or :file:`spam.pyo` + when :option:`-O` is used) without a file :file:`spam.py` for the same module. + This can be used to distribute a library of Python code in a form that is + moderately hard to reverse engineer. + + .. index:: module: compileall + +* The module :mod:`compileall` can create :file:`.pyc` files (or :file:`.pyo` + files when :option:`-O` is used) for all modules in a directory. + + .. % + + +.. _tut-standardmodules: + +Standard Modules +================ + +.. index:: module: sys + +Python comes with a library of standard modules, described in a separate +document, the Python Library Reference ("Library Reference" hereafter). Some +modules are built into the interpreter; these provide access to operations that +are not part of the core of the language but are nevertheless built in, either +for efficiency or to provide access to operating system primitives such as +system calls. The set of such modules is a configuration option which also +depends on the underlying platform For example, the :mod:`winreg` module is only +provided on Windows systems. One particular module deserves some attention: +:mod:`sys`, which is built into every Python interpreter. The variables +``sys.ps1`` and ``sys.ps2`` define the strings used as primary and secondary +prompts: + +.. % + +:: + + >>> import sys + >>> sys.ps1 + '>>> ' + >>> sys.ps2 + '... ' + >>> sys.ps1 = 'C> ' + C> print 'Yuck!' + Yuck! + C> + + +These two variables are only defined if the interpreter is in interactive mode. + +The variable ``sys.path`` is a list of strings that determines the interpreter's +search path for modules. It is initialized to a default path taken from the +environment variable :envvar:`PYTHONPATH`, or from a built-in default if +:envvar:`PYTHONPATH` is not set. You can modify it using standard list +operations:: + + >>> import sys + >>> sys.path.append('/ufs/guido/lib/python') + + +.. _tut-dir: + +The :func:`dir` Function +======================== + +The built-in function :func:`dir` is used to find out which names a module +defines. It returns a sorted list of strings:: + + >>> import fibo, sys + >>> dir(fibo) + ['__name__', 'fib', 'fib2'] + >>> dir(sys) + ['__displayhook__', '__doc__', '__excepthook__', '__name__', '__stderr__', + '__stdin__', '__stdout__', '_getframe', 'api_version', 'argv', + 'builtin_module_names', 'byteorder', 'callstats', 'copyright', + 'displayhook', 'exc_info', 'excepthook', + 'exec_prefix', 'executable', 'exit', 'getdefaultencoding', 'getdlopenflags', + 'getrecursionlimit', 'getrefcount', 'hexversion', 'maxint', 'maxunicode', + 'meta_path', 'modules', 'path', 'path_hooks', 'path_importer_cache', + 'platform', 'prefix', 'ps1', 'ps2', 'setcheckinterval', 'setdlopenflags', + 'setprofile', 'setrecursionlimit', 'settrace', 'stderr', 'stdin', 'stdout', + 'version', 'version_info', 'warnoptions'] + +Without arguments, :func:`dir` lists the names you have defined currently:: + + >>> a = [1, 2, 3, 4, 5] + >>> import fibo + >>> fib = fibo.fib + >>> dir() + ['__builtins__', '__doc__', '__file__', '__name__', 'a', 'fib', 'fibo', 'sys'] + +Note that it lists all types of names: variables, modules, functions, etc. + +.. index:: module: __builtin__ + +:func:`dir` does not list the names of built-in functions and variables. If you +want a list of those, they are defined in the standard module +:mod:`__builtin__`:: + + >>> import __builtin__ + >>> dir(__builtin__) + ['ArithmeticError', 'AssertionError', 'AttributeError', 'DeprecationWarning', + 'EOFError', 'Ellipsis', 'EnvironmentError', 'Exception', 'False', + 'FloatingPointError', 'FutureWarning', 'IOError', 'ImportError', + 'IndentationError', 'IndexError', 'KeyError', 'KeyboardInterrupt', + 'LookupError', 'MemoryError', 'NameError', 'None', 'NotImplemented', + 'NotImplementedError', 'OSError', 'OverflowError', + 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError', + 'RuntimeWarning', 'StopIteration', 'SyntaxError', + 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError', 'True', + 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError', + 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError', + 'UserWarning', 'ValueError', 'Warning', 'WindowsError', + 'ZeroDivisionError', '_', '__debug__', '__doc__', '__import__', + '__name__', 'abs', 'basestring', 'bool', 'buffer', + 'chr', 'classmethod', 'cmp', 'compile', + 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod', + 'enumerate', 'eval', 'exec', 'exit', 'filter', 'float', + 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex', + 'id', 'input', 'int', 'isinstance', 'issubclass', 'iter', + 'len', 'license', 'list', 'locals', 'map', 'max', 'min', + 'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', + 'repr', 'reversed', 'round', 'set', + 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super', + 'tuple', 'type', 'vars', 'zip'] + + +.. _tut-packages: + +Packages +======== + +Packages are a way of structuring Python's module namespace by using "dotted +module names". For example, the module name :mod:`A.B` designates a submodule +named ``B`` in a package named ``A``. Just like the use of modules saves the +authors of different modules from having to worry about each other's global +variable names, the use of dotted module names saves the authors of multi-module +packages like NumPy or the Python Imaging Library from having to worry about +each other's module names. + +Suppose you want to design a collection of modules (a "package") for the uniform +handling of sound files and sound data. There are many different sound file +formats (usually recognized by their extension, for example: :file:`.wav`, +:file:`.aiff`, :file:`.au`), so you may need to create and maintain a growing +collection of modules for the conversion between the various file formats. +There are also many different operations you might want to perform on sound data +(such as mixing, adding echo, applying an equalizer function, creating an +artificial stereo effect), so in addition you will be writing a never-ending +stream of modules to perform these operations. Here's a possible structure for +your package (expressed in terms of a hierarchical filesystem):: + + sound/ Top-level package + __init__.py Initialize the sound package + formats/ Subpackage for file format conversions + __init__.py + wavread.py + wavwrite.py + aiffread.py + aiffwrite.py + auread.py + auwrite.py + ... + effects/ Subpackage for sound effects + __init__.py + echo.py + surround.py + reverse.py + ... + filters/ Subpackage for filters + __init__.py + equalizer.py + vocoder.py + karaoke.py + ... + +When importing the package, Python searches through the directories on +``sys.path`` looking for the package subdirectory. + +The :file:`__init__.py` files are required to make Python treat the directories +as containing packages; this is done to prevent directories with a common name, +such as ``string``, from unintentionally hiding valid modules that occur later +on the module search path. In the simplest case, :file:`__init__.py` can just be +an empty file, but it can also execute initialization code for the package or +set the ``__all__`` variable, described later. + +Users of the package can import individual modules from the package, for +example:: + + import sound.effects.echo + +This loads the submodule :mod:`sound.effects.echo`. It must be referenced with +its full name. :: + + sound.effects.echo.echofilter(input, output, delay=0.7, atten=4) + +An alternative way of importing the submodule is:: + + from sound.effects import echo + +This also loads the submodule :mod:`echo`, and makes it available without its +package prefix, so it can be used as follows:: + + echo.echofilter(input, output, delay=0.7, atten=4) + +Yet another variation is to import the desired function or variable directly:: + + from sound.effects.echo import echofilter + +Again, this loads the submodule :mod:`echo`, but this makes its function +:func:`echofilter` directly available:: + + echofilter(input, output, delay=0.7, atten=4) + +Note that when using ``from package import item``, the item can be either a +submodule (or subpackage) of the package, or some other name defined in the +package, like a function, class or variable. The ``import`` statement first +tests whether the item is defined in the package; if not, it assumes it is a +module and attempts to load it. If it fails to find it, an :exc:`ImportError` +exception is raised. + +Contrarily, when using syntax like ``import item.subitem.subsubitem``, each item +except for the last must be a package; the last item can be a module or a +package but can't be a class or function or variable defined in the previous +item. + + +.. _tut-pkg-import-star: + +Importing \* From a Package +--------------------------- + +.. index:: single: __all__ + +Now what happens when the user writes ``from sound.effects import *``? Ideally, +one would hope that this somehow goes out to the filesystem, finds which +submodules are present in the package, and imports them all. Unfortunately, +this operation does not work very well on Windows platforms, where the +filesystem does not always have accurate information about the case of a +filename! On these platforms, there is no guaranteed way to know whether a file +:file:`ECHO.PY` should be imported as a module :mod:`echo`, :mod:`Echo` or +:mod:`ECHO`. (For example, Windows 95 has the annoying practice of showing all +file names with a capitalized first letter.) The DOS 8+3 filename restriction +adds another interesting problem for long module names. + +.. % The \code{__all__} Attribute + +The only solution is for the package author to provide an explicit index of the +package. The import statement uses the following convention: if a package's +:file:`__init__.py` code defines a list named ``__all__``, it is taken to be the +list of module names that should be imported when ``from package import *`` is +encountered. It is up to the package author to keep this list up-to-date when a +new version of the package is released. Package authors may also decide not to +support it, if they don't see a use for importing \* from their package. For +example, the file :file:`sounds/effects/__init__.py` could contain the following +code:: + + __all__ = ["echo", "surround", "reverse"] + +This would mean that ``from sound.effects import *`` would import the three +named submodules of the :mod:`sound` package. + +If ``__all__`` is not defined, the statement ``from sound.effects import *`` +does *not* import all submodules from the package :mod:`sound.effects` into the +current namespace; it only ensures that the package :mod:`sound.effects` has +been imported (possibly running any initialization code in :file:`__init__.py`) +and then imports whatever names are defined in the package. This includes any +names defined (and submodules explicitly loaded) by :file:`__init__.py`. It +also includes any submodules of the package that were explicitly loaded by +previous import statements. Consider this code:: + + import sound.effects.echo + import sound.effects.surround + from sound.effects import * + +In this example, the echo and surround modules are imported in the current +namespace because they are defined in the :mod:`sound.effects` package when the +``from...import`` statement is executed. (This also works when ``__all__`` is +defined.) + +Note that in general the practice of importing ``*`` from a module or package is +frowned upon, since it often causes poorly readable code. However, it is okay to +use it to save typing in interactive sessions, and certain modules are designed +to export only names that follow certain patterns. + +Remember, there is nothing wrong with using ``from Package import +specific_submodule``! In fact, this is the recommended notation unless the +importing module needs to use submodules with the same name from different +packages. + + +Intra-package References +------------------------ + +The submodules often need to refer to each other. For example, the +:mod:`surround` module might use the :mod:`echo` module. In fact, such +references are so common that the :keyword:`import` statement first looks in the +containing package before looking in the standard module search path. Thus, the +:mod:`surround` module can simply use ``import echo`` or ``from echo import +echofilter``. If the imported module is not found in the current package (the +package of which the current module is a submodule), the :keyword:`import` +statement looks for a top-level module with the given name. + +When packages are structured into subpackages (as with the :mod:`sound` package +in the example), you can use absolute imports to refer to submodules of siblings +packages. For example, if the module :mod:`sound.filters.vocoder` needs to use +the :mod:`echo` module in the :mod:`sound.effects` package, it can use ``from +sound.effects import echo``. + +Starting with Python 2.5, in addition to the implicit relative imports described +above, you can write explicit relative imports with the ``from module import +name`` form of import statement. These explicit relative imports use leading +dots to indicate the current and parent packages involved in the relative +import. From the :mod:`surround` module for example, you might use:: + + from . import echo + from .. import formats + from ..filters import equalizer + +Note that both explicit and implicit relative imports are based on the name of +the current module. Since the name of the main module is always ``"__main__"``, +modules intended for use as the main module of a Python application should +always use absolute imports. + + +Packages in Multiple Directories +-------------------------------- + +Packages support one more special attribute, :attr:`__path__`. This is +initialized to be a list containing the name of the directory holding the +package's :file:`__init__.py` before the code in that file is executed. This +variable can be modified; doing so affects future searches for modules and +subpackages contained in the package. + +While this feature is not often needed, it can be used to extend the set of +modules found in a package. + + +.. rubric:: Footnotes + +.. [#] In fact function definitions are also 'statements' that are 'executed'; the + execution enters the function name in the module's global symbol table. + diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst new file mode 100644 index 0000000..7bbc5ef --- /dev/null +++ b/Doc/tutorial/stdlib.rst @@ -0,0 +1,313 @@ +.. _tut-brieftour: + +********************************** +Brief Tour of the Standard Library +********************************** + + +.. _tut-os-interface: + +Operating System Interface +========================== + +The :mod:`os` module provides dozens of functions for interacting with the +operating system:: + + >>> import os + >>> os.system('time 0:02') + 0 + >>> os.getcwd() # Return the current working directory + 'C:\\Python30' + >>> os.chdir('/server/accesslogs') + +Be sure to use the ``import os`` style instead of ``from os import *``. This +will keep :func:`os.open` from shadowing the builtin :func:`open` function which +operates much differently. + +.. index:: builtin: help + +The builtin :func:`dir` and :func:`help` functions are useful as interactive +aids for working with large modules like :mod:`os`:: + + >>> import os + >>> dir(os) + <returns a list of all module functions> + >>> help(os) + <returns an extensive manual page created from the module's docstrings> + +For daily file and directory management tasks, the :mod:`shutil` module provides +a higher level interface that is easier to use:: + + >>> import shutil + >>> shutil.copyfile('data.db', 'archive.db') + >>> shutil.move('/build/executables', 'installdir') + + +.. _tut-file-wildcards: + +File Wildcards +============== + +The :mod:`glob` module provides a function for making file lists from directory +wildcard searches:: + + >>> import glob + >>> glob.glob('*.py') + ['primes.py', 'random.py', 'quote.py'] + + +.. _tut-command-line-arguments: + +Command Line Arguments +====================== + +Common utility scripts often need to process command line arguments. These +arguments are stored in the :mod:`sys` module's *argv* attribute as a list. For +instance the following output results from running ``python demo.py one two +three`` at the command line:: + + >>> import sys + >>> print sys.argv + ['demo.py', 'one', 'two', 'three'] + +The :mod:`getopt` module processes *sys.argv* using the conventions of the Unix +:func:`getopt` function. More powerful and flexible command line processing is +provided by the :mod:`optparse` module. + + +.. _tut-stderr: + +Error Output Redirection and Program Termination +================================================ + +The :mod:`sys` module also has attributes for *stdin*, *stdout*, and *stderr*. +The latter is useful for emitting warnings and error messages to make them +visible even when *stdout* has been redirected:: + + >>> sys.stderr.write('Warning, log file not found starting a new one\n') + Warning, log file not found starting a new one + +The most direct way to terminate a script is to use ``sys.exit()``. + + +.. _tut-string-pattern-matching: + +String Pattern Matching +======================= + +The :mod:`re` module provides regular expression tools for advanced string +processing. For complex matching and manipulation, regular expressions offer +succinct, optimized solutions:: + + >>> import re + >>> re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest') + ['foot', 'fell', 'fastest'] + >>> re.sub(r'(\b[a-z]+) \1', r'\1', 'cat in the the hat') + 'cat in the hat' + +When only simple capabilities are needed, string methods are preferred because +they are easier to read and debug:: + + >>> 'tea for too'.replace('too', 'two') + 'tea for two' + + +.. _tut-mathematics: + +Mathematics +=========== + +The :mod:`math` module gives access to the underlying C library functions for +floating point math:: + + >>> import math + >>> math.cos(math.pi / 4.0) + 0.70710678118654757 + >>> math.log(1024, 2) + 10.0 + +The :mod:`random` module provides tools for making random selections:: + + >>> import random + >>> random.choice(['apple', 'pear', 'banana']) + 'apple' + >>> random.sample(range(100), 10) # sampling without replacement + [30, 83, 16, 4, 8, 81, 41, 50, 18, 33] + >>> random.random() # random float + 0.17970987693706186 + >>> random.randrange(6) # random integer chosen from range(6) + 4 + + +.. _tut-internet-access: + +Internet Access +=============== + +There are a number of modules for accessing the internet and processing internet +protocols. Two of the simplest are :mod:`urllib2` for retrieving data from urls +and :mod:`smtplib` for sending mail:: + + >>> import urllib2 + >>> for line in urllib2.urlopen('http://tycho.usno.navy.mil/cgi-bin/timer.pl'): + ... if 'EST' in line or 'EDT' in line: # look for Eastern Time + ... print line + + <BR>Nov. 25, 09:43:32 PM EST + + >>> import smtplib + >>> server = smtplib.SMTP('localhost') + >>> server.sendmail('soothsayer@example.org', 'jcaesar@example.org', + """To: jcaesar@example.org + From: soothsayer@example.org + + Beware the Ides of March. + """) + >>> server.quit() + + +.. _tut-dates-and-times: + +Dates and Times +=============== + +The :mod:`datetime` module supplies classes for manipulating dates and times in +both simple and complex ways. While date and time arithmetic is supported, the +focus of the implementation is on efficient member extraction for output +formatting and manipulation. The module also supports objects that are timezone +aware. :: + + # dates are easily constructed and formatted + >>> from datetime import date + >>> now = date.today() + >>> now + datetime.date(2003, 12, 2) + >>> now.strftime("%m-%d-%y. %d %b %Y is a %A on the %d day of %B.") + '12-02-03. 02 Dec 2003 is a Tuesday on the 02 day of December.' + + # dates support calendar arithmetic + >>> birthday = date(1964, 7, 31) + >>> age = now - birthday + >>> age.days + 14368 + + +.. _tut-data-compression: + +Data Compression +================ + +Common data archiving and compression formats are directly supported by modules +including: :mod:`zlib`, :mod:`gzip`, :mod:`bz2`, :mod:`zipfile` and +:mod:`tarfile`. :: + + >>> import zlib + >>> s = 'witch which has which witches wrist watch' + >>> len(s) + 41 + >>> t = zlib.compress(s) + >>> len(t) + 37 + >>> zlib.decompress(t) + 'witch which has which witches wrist watch' + >>> zlib.crc32(s) + 226805979 + + +.. _tut-performance-measurement: + +Performance Measurement +======================= + +Some Python users develop a deep interest in knowing the relative performance of +different approaches to the same problem. Python provides a measurement tool +that answers those questions immediately. + +For example, it may be tempting to use the tuple packing and unpacking feature +instead of the traditional approach to swapping arguments. The :mod:`timeit` +module quickly demonstrates a modest performance advantage:: + + >>> from timeit import Timer + >>> Timer('t=a; a=b; b=t', 'a=1; b=2').timeit() + 0.57535828626024577 + >>> Timer('a,b = b,a', 'a=1; b=2').timeit() + 0.54962537085770791 + +In contrast to :mod:`timeit`'s fine level of granularity, the :mod:`profile` and +:mod:`pstats` modules provide tools for identifying time critical sections in +larger blocks of code. + + +.. _tut-quality-control: + +Quality Control +=============== + +One approach for developing high quality software is to write tests for each +function as it is developed and to run those tests frequently during the +development process. + +The :mod:`doctest` module provides a tool for scanning a module and validating +tests embedded in a program's docstrings. Test construction is as simple as +cutting-and-pasting a typical call along with its results into the docstring. +This improves the documentation by providing the user with an example and it +allows the doctest module to make sure the code remains true to the +documentation:: + + def average(values): + """Computes the arithmetic mean of a list of numbers. + + >>> print average([20, 30, 70]) + 40.0 + """ + return sum(values, 0.0) / len(values) + + import doctest + doctest.testmod() # automatically validate the embedded tests + +The :mod:`unittest` module is not as effortless as the :mod:`doctest` module, +but it allows a more comprehensive set of tests to be maintained in a separate +file:: + + import unittest + + class TestStatisticalFunctions(unittest.TestCase): + + def test_average(self): + self.assertEqual(average([20, 30, 70]), 40.0) + self.assertEqual(round(average([1, 5, 7]), 1), 4.3) + self.assertRaises(ZeroDivisionError, average, []) + self.assertRaises(TypeError, average, 20, 30, 70) + + unittest.main() # Calling from the command line invokes all tests + + +.. _tut-batteries-included: + +Batteries Included +================== + +Python has a "batteries included" philosophy. This is best seen through the +sophisticated and robust capabilities of its larger packages. For example: + +* The :mod:`xmlrpclib` and :mod:`SimpleXMLRPCServer` modules make implementing + remote procedure calls into an almost trivial task. Despite the modules + names, no direct knowledge or handling of XML is needed. + +* The :mod:`email` package is a library for managing email messages, including + MIME and other RFC 2822-based message documents. Unlike :mod:`smtplib` and + :mod:`poplib` which actually send and receive messages, the email package has + a complete toolset for building or decoding complex message structures + (including attachments) and for implementing internet encoding and header + protocols. + +* The :mod:`xml.dom` and :mod:`xml.sax` packages provide robust support for + parsing this popular data interchange format. Likewise, the :mod:`csv` module + supports direct reads and writes in a common database format. Together, these + modules and packages greatly simplify data interchange between python + applications and other tools. + +* Internationalization is supported by a number of modules including + :mod:`gettext`, :mod:`locale`, and the :mod:`codecs` package. + + diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst new file mode 100644 index 0000000..0ce2757 --- /dev/null +++ b/Doc/tutorial/stdlib2.rst @@ -0,0 +1,394 @@ +.. _tut-brieftourtwo: + +********************************************* +Brief Tour of the Standard Library -- Part II +********************************************* + +This second tour covers more advanced modules that support professional +programming needs. These modules rarely occur in small scripts. + + +.. _tut-output-formatting: + +Output Formatting +================= + +The :mod:`repr` module provides a version of :func:`repr` customized for +abbreviated displays of large or deeply nested containers:: + + >>> import repr + >>> repr.repr(set('supercalifragilisticexpialidocious')) + "set(['a', 'c', 'd', 'e', 'f', 'g', ...])" + +The :mod:`pprint` module offers more sophisticated control over printing both +built-in and user defined objects in a way that is readable by the interpreter. +When the result is longer than one line, the "pretty printer" adds line breaks +and indentation to more clearly reveal data structure:: + + >>> import pprint + >>> t = [[[['black', 'cyan'], 'white', ['green', 'red']], [['magenta', + ... 'yellow'], 'blue']]] + ... + >>> pprint.pprint(t, width=30) + [[[['black', 'cyan'], + 'white', + ['green', 'red']], + [['magenta', 'yellow'], + 'blue']]] + +The :mod:`textwrap` module formats paragraphs of text to fit a given screen +width:: + + >>> import textwrap + >>> doc = """The wrap() method is just like fill() except that it returns + ... a list of strings instead of one big string with newlines to separate + ... the wrapped lines.""" + ... + >>> print textwrap.fill(doc, width=40) + The wrap() method is just like fill() + except that it returns a list of strings + instead of one big string with newlines + to separate the wrapped lines. + +The :mod:`locale` module accesses a database of culture specific data formats. +The grouping attribute of locale's format function provides a direct way of +formatting numbers with group separators:: + + >>> import locale + >>> locale.setlocale(locale.LC_ALL, 'English_United States.1252') + 'English_United States.1252' + >>> conv = locale.localeconv() # get a mapping of conventions + >>> x = 1234567.8 + >>> locale.format("%d", x, grouping=True) + '1,234,567' + >>> locale.format("%s%.*f", (conv['currency_symbol'], + ... conv['frac_digits'], x), grouping=True) + '$1,234,567.80' + + +.. _tut-templating: + +Templating +========== + +The :mod:`string` module includes a versatile :class:`Template` class with a +simplified syntax suitable for editing by end-users. This allows users to +customize their applications without having to alter the application. + +The format uses placeholder names formed by ``$`` with valid Python identifiers +(alphanumeric characters and underscores). Surrounding the placeholder with +braces allows it to be followed by more alphanumeric letters with no intervening +spaces. Writing ``$$`` creates a single escaped ``$``:: + + >>> from string import Template + >>> t = Template('${village}folk send $$10 to $cause.') + >>> t.substitute(village='Nottingham', cause='the ditch fund') + 'Nottinghamfolk send $10 to the ditch fund.' + +The :meth:`substitute` method raises a :exc:`KeyError` when a placeholder is not +supplied in a dictionary or a keyword argument. For mail-merge style +applications, user supplied data may be incomplete and the +:meth:`safe_substitute` method may be more appropriate --- it will leave +placeholders unchanged if data is missing:: + + >>> t = Template('Return the $item to $owner.') + >>> d = dict(item='unladen swallow') + >>> t.substitute(d) + Traceback (most recent call last): + . . . + KeyError: 'owner' + >>> t.safe_substitute(d) + 'Return the unladen swallow to $owner.' + +Template subclasses can specify a custom delimiter. For example, a batch +renaming utility for a photo browser may elect to use percent signs for +placeholders such as the current date, image sequence number, or file format:: + + >>> import time, os.path, sys + >>> def raw_input(prompt): + ... sys.stdout.write(prompt) + ... sys.stdout.flush() + ... return sys.stdin.readline() + ... + >>> photofiles = ['img_1074.jpg', 'img_1076.jpg', 'img_1077.jpg'] + >>> class BatchRename(Template): + ... delimiter = '%' + >>> fmt = raw_input('Enter rename style (%d-date %n-seqnum %f-format): ') + Enter rename style (%d-date %n-seqnum %f-format): Ashley_%n%f + + >>> t = BatchRename(fmt) + >>> date = time.strftime('%d%b%y') + >>> for i, filename in enumerate(photofiles): + ... base, ext = os.path.splitext(filename) + ... newname = t.substitute(d=date, n=i, f=ext) + ... print '%s --> %s' % (filename, newname) + + img_1074.jpg --> Ashley_0.jpg + img_1076.jpg --> Ashley_1.jpg + img_1077.jpg --> Ashley_2.jpg + +Another application for templating is separating program logic from the details +of multiple output formats. This makes it possible to substitute custom +templates for XML files, plain text reports, and HTML web reports. + + +.. _tut-binary-formats: + +Working with Binary Data Record Layouts +======================================= + +The :mod:`struct` module provides :func:`pack` and :func:`unpack` functions for +working with variable length binary record formats. The following example shows +how to loop through header information in a ZIP file (with pack codes ``"H"`` +and ``"L"`` representing two and four byte unsigned numbers respectively):: + + import struct + + data = open('myfile.zip', 'rb').read() + start = 0 + for i in range(3): # show the first 3 file headers + start += 14 + fields = struct.unpack('LLLHH', data[start:start+16]) + crc32, comp_size, uncomp_size, filenamesize, extra_size = fields + + start += 16 + filename = data[start:start+filenamesize] + start += filenamesize + extra = data[start:start+extra_size] + print filename, hex(crc32), comp_size, uncomp_size + + start += extra_size + comp_size # skip to the next header + + +.. _tut-multi-threading: + +Multi-threading +=============== + +Threading is a technique for decoupling tasks which are not sequentially +dependent. Threads can be used to improve the responsiveness of applications +that accept user input while other tasks run in the background. A related use +case is running I/O in parallel with computations in another thread. + +The following code shows how the high level :mod:`threading` module can run +tasks in background while the main program continues to run:: + + import threading, zipfile + + class AsyncZip(threading.Thread): + def __init__(self, infile, outfile): + threading.Thread.__init__(self) + self.infile = infile + self.outfile = outfile + def run(self): + f = zipfile.ZipFile(self.outfile, 'w', zipfile.ZIP_DEFLATED) + f.write(self.infile) + f.close() + print 'Finished background zip of: ', self.infile + + background = AsyncZip('mydata.txt', 'myarchive.zip') + background.start() + print 'The main program continues to run in foreground.' + + background.join() # Wait for the background task to finish + print 'Main program waited until background was done.' + +The principal challenge of multi-threaded applications is coordinating threads +that share data or other resources. To that end, the threading module provides +a number of synchronization primitives including locks, events, condition +variables, and semaphores. + +While those tools are powerful, minor design errors can result in problems that +are difficult to reproduce. So, the preferred approach to task coordination is +to concentrate all access to a resource in a single thread and then use the +:mod:`Queue` module to feed that thread with requests from other threads. +Applications using :class:`Queue` objects for inter-thread communication and +coordination are easier to design, more readable, and more reliable. + + +.. _tut-logging: + +Logging +======= + +The :mod:`logging` module offers a full featured and flexible logging system. +At its simplest, log messages are sent to a file or to ``sys.stderr``:: + + import logging + logging.debug('Debugging information') + logging.info('Informational message') + logging.warning('Warning:config file %s not found', 'server.conf') + logging.error('Error occurred') + logging.critical('Critical error -- shutting down') + +This produces the following output:: + + WARNING:root:Warning:config file server.conf not found + ERROR:root:Error occurred + CRITICAL:root:Critical error -- shutting down + +By default, informational and debugging messages are suppressed and the output +is sent to standard error. Other output options include routing messages +through email, datagrams, sockets, or to an HTTP Server. New filters can select +different routing based on message priority: :const:`DEBUG`, :const:`INFO`, +:const:`WARNING`, :const:`ERROR`, and :const:`CRITICAL`. + +The logging system can be configured directly from Python or can be loaded from +a user editable configuration file for customized logging without altering the +application. + + +.. _tut-weak-references: + +Weak References +=============== + +Python does automatic memory management (reference counting for most objects and +garbage collection to eliminate cycles). The memory is freed shortly after the +last reference to it has been eliminated. + +This approach works fine for most applications but occasionally there is a need +to track objects only as long as they are being used by something else. +Unfortunately, just tracking them creates a reference that makes them permanent. +The :mod:`weakref` module provides tools for tracking objects without creating a +reference. When the object is no longer needed, it is automatically removed +from a weakref table and a callback is triggered for weakref objects. Typical +applications include caching objects that are expensive to create:: + + >>> import weakref, gc + >>> class A: + ... def __init__(self, value): + ... self.value = value + ... def __repr__(self): + ... return str(self.value) + ... + >>> a = A(10) # create a reference + >>> d = weakref.WeakValueDictionary() + >>> d['primary'] = a # does not create a reference + >>> d['primary'] # fetch the object if it is still alive + 10 + >>> del a # remove the one reference + >>> gc.collect() # run garbage collection right away + 0 + >>> d['primary'] # entry was automatically removed + Traceback (most recent call last): + File "<pyshell#108>", line 1, in -toplevel- + d['primary'] # entry was automatically removed + File "C:/python30/lib/weakref.py", line 46, in __getitem__ + o = self.data[key]() + KeyError: 'primary' + + +.. _tut-list-tools: + +Tools for Working with Lists +============================ + +Many data structure needs can be met with the built-in list type. However, +sometimes there is a need for alternative implementations with different +performance trade-offs. + +The :mod:`array` module provides an :class:`array()` object that is like a list +that stores only homogenous data and stores it more compactly. The following +example shows an array of numbers stored as two byte unsigned binary numbers +(typecode ``"H"``) rather than the usual 16 bytes per entry for regular lists of +python int objects:: + + >>> from array import array + >>> a = array('H', [4000, 10, 700, 22222]) + >>> sum(a) + 26932 + >>> a[1:3] + array('H', [10, 700]) + +The :mod:`collections` module provides a :class:`deque()` object that is like a +list with faster appends and pops from the left side but slower lookups in the +middle. These objects are well suited for implementing queues and breadth first +tree searches:: + + >>> from collections import deque + >>> d = deque(["task1", "task2", "task3"]) + >>> d.append("task4") + >>> print "Handling", d.popleft() + Handling task1 + + unsearched = deque([starting_node]) + def breadth_first_search(unsearched): + node = unsearched.popleft() + for m in gen_moves(node): + if is_goal(m): + return m + unsearched.append(m) + +In addition to alternative list implementations, the library also offers other +tools such as the :mod:`bisect` module with functions for manipulating sorted +lists:: + + >>> import bisect + >>> scores = [(100, 'perl'), (200, 'tcl'), (400, 'lua'), (500, 'python')] + >>> bisect.insort(scores, (300, 'ruby')) + >>> scores + [(100, 'perl'), (200, 'tcl'), (300, 'ruby'), (400, 'lua'), (500, 'python')] + +The :mod:`heapq` module provides functions for implementing heaps based on +regular lists. The lowest valued entry is always kept at position zero. This +is useful for applications which repeatedly access the smallest element but do +not want to run a full list sort:: + + >>> from heapq import heapify, heappop, heappush + >>> data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0] + >>> heapify(data) # rearrange the list into heap order + >>> heappush(data, -5) # add a new entry + >>> [heappop(data) for i in range(3)] # fetch the three smallest entries + [-5, 0, 1] + + +.. _tut-decimal-fp: + +Decimal Floating Point Arithmetic +================================= + +The :mod:`decimal` module offers a :class:`Decimal` datatype for decimal +floating point arithmetic. Compared to the built-in :class:`float` +implementation of binary floating point, the new class is especially helpful for +financial applications and other uses which require exact decimal +representation, control over precision, control over rounding to meet legal or +regulatory requirements, tracking of significant decimal places, or for +applications where the user expects the results to match calculations done by +hand. + +For example, calculating a 5% tax on a 70 cent phone charge gives different +results in decimal floating point and binary floating point. The difference +becomes significant if the results are rounded to the nearest cent:: + + >>> from decimal import * + >>> Decimal('0.70') * Decimal('1.05') + Decimal("0.7350") + >>> .70 * 1.05 + 0.73499999999999999 + +The :class:`Decimal` result keeps a trailing zero, automatically inferring four +place significance from multiplicands with two place significance. Decimal +reproduces mathematics as done by hand and avoids issues that can arise when +binary floating point cannot exactly represent decimal quantities. + +Exact representation enables the :class:`Decimal` class to perform modulo +calculations and equality tests that are unsuitable for binary floating point:: + + >>> Decimal('1.00') % Decimal('.10') + Decimal("0.00") + >>> 1.00 % 0.10 + 0.09999999999999995 + + >>> sum([Decimal('0.1')]*10) == Decimal('1.0') + True + >>> sum([0.1]*10) == 1.0 + False + +The :mod:`decimal` module provides arithmetic with as much precision as needed:: + + >>> getcontext().prec = 36 + >>> Decimal(1) / Decimal(7) + Decimal("0.142857142857142857142857142857142857") + + diff --git a/Doc/tutorial/whatnow.rst b/Doc/tutorial/whatnow.rst new file mode 100644 index 0000000..599fcbd --- /dev/null +++ b/Doc/tutorial/whatnow.rst @@ -0,0 +1,68 @@ +.. _tut-whatnow: + +********* +What Now? +********* + +Reading this tutorial has probably reinforced your interest in using Python --- +you should be eager to apply Python to solving your real-world problems. Where +should you go to learn more? + +This tutorial is part of Python's documentation set. Some other documents in +the set are: + +* :ref:`library-index`: + + You should browse through this manual, which gives complete (though terse) + reference material about types, functions, and the modules in the standard + library. The standard Python distribution includes a *lot* of additional code. + There are modules to read Unix mailboxes, retrieve documents via HTTP, generate + random numbers, parse command-line options, write CGI programs, compress data, + and many other tasks. Skimming through the Library Reference will give you an + idea of what's available. + +* :ref:`install-index` explains how to install external modules written by other + Python users. + +* :ref:`reference-index`: A detailed explanation of Python's syntax and + semantics. It's heavy reading, but is useful as a complete guide to the + language itself. + +More Python resources: + +* http://www.python.org: The major Python Web site. It contains code, + documentation, and pointers to Python-related pages around the Web. This Web + site is mirrored in various places around the world, such as Europe, Japan, and + Australia; a mirror may be faster than the main site, depending on your + geographical location. + +* http://docs.python.org: Fast access to Python's documentation. + +* http://cheeseshop.python.org: The Python Package Index, nicknamed the Cheese + Shop, is an index of user-created Python modules that are available for + download. Once you begin releasing code, you can register it here so that + others can find it. + +* http://aspn.activestate.com/ASPN/Python/Cookbook/: The Python Cookbook is a + sizable collection of code examples, larger modules, and useful scripts. + Particularly notable contributions are collected in a book also titled Python + Cookbook (O'Reilly & Associates, ISBN 0-596-00797-3.) + +For Python-related questions and problem reports, you can post to the newsgroup +:newsgroup:`comp.lang.python`, or send them to the mailing list at +python-list@python.org. The newsgroup and mailing list are gatewayed, so +messages posted to one will automatically be forwarded to the other. There are +around 120 postings a day (with peaks up to several hundred), asking (and +answering) questions, suggesting new features, and announcing new modules. +Before posting, be sure to check the list of `Frequently Asked Questions +<http://www.python.org/doc/faq/>`_ (also called the FAQ), or look for it in the +:file:`Misc/` directory of the Python source distribution. Mailing list +archives are available at http://mail.python.org/pipermail/. The FAQ answers +many of the questions that come up again and again, and may already contain the +solution for your problem. + +.. % Postings figure based on average of last six months activity as +.. % reported by www.egroups.com; Jan. 2000 - June 2000: 21272 msgs / 182 +.. % days = 116.9 msgs / day and steadily increasing. + + diff --git a/Doc/whatsnew/2.0.rst b/Doc/whatsnew/2.0.rst new file mode 100644 index 0000000..302986c --- /dev/null +++ b/Doc/whatsnew/2.0.rst @@ -0,0 +1,1207 @@ +**************************** + What's New in Python 2.0 +**************************** + +:Author: A.M. Kuchling and Moshe Zadka + +.. |release| replace:: 1.02 + +.. % $Id: whatsnew20.tex 51211 2006-08-11 14:57:12Z thomas.wouters $ + + +Introduction +============ + +A new release of Python, version 2.0, was released on October 16, 2000. This +article covers the exciting new features in 2.0, highlights some other useful +changes, and points out a few incompatible changes that may require rewriting +code. + +Python's development never completely stops between releases, and a steady flow +of bug fixes and improvements are always being submitted. A host of minor fixes, +a few optimizations, additional docstrings, and better error messages went into +2.0; to list them all would be impossible, but they're certainly significant. +Consult the publicly-available CVS logs if you want to see the full list. This +progress is due to the five developers working for PythonLabs are now getting +paid to spend their days fixing bugs, and also due to the improved communication +resulting from moving to SourceForge. + +.. % ====================================================================== + + +What About Python 1.6? +====================== + +Python 1.6 can be thought of as the Contractual Obligations Python release. +After the core development team left CNRI in May 2000, CNRI requested that a 1.6 +release be created, containing all the work on Python that had been performed at +CNRI. Python 1.6 therefore represents the state of the CVS tree as of May 2000, +with the most significant new feature being Unicode support. Development +continued after May, of course, so the 1.6 tree received a few fixes to ensure +that it's forward-compatible with Python 2.0. 1.6 is therefore part of Python's +evolution, and not a side branch. + +So, should you take much interest in Python 1.6? Probably not. The 1.6final +and 2.0beta1 releases were made on the same day (September 5, 2000), the plan +being to finalize Python 2.0 within a month or so. If you have applications to +maintain, there seems little point in breaking things by moving to 1.6, fixing +them, and then having another round of breakage within a month by moving to 2.0; +you're better off just going straight to 2.0. Most of the really interesting +features described in this document are only in 2.0, because a lot of work was +done between May and September. + +.. % ====================================================================== + + +New Development Process +======================= + +The most important change in Python 2.0 may not be to the code at all, but to +how Python is developed: in May 2000 the Python developers began using the tools +made available by SourceForge for storing source code, tracking bug reports, +and managing the queue of patch submissions. To report bugs or submit patches +for Python 2.0, use the bug tracking and patch manager tools available from +Python's project page, located at http://sourceforge.net/projects/python/. + +The most important of the services now hosted at SourceForge is the Python CVS +tree, the version-controlled repository containing the source code for Python. +Previously, there were roughly 7 or so people who had write access to the CVS +tree, and all patches had to be inspected and checked in by one of the people on +this short list. Obviously, this wasn't very scalable. By moving the CVS tree +to SourceForge, it became possible to grant write access to more people; as of +September 2000 there were 27 people able to check in changes, a fourfold +increase. This makes possible large-scale changes that wouldn't be attempted if +they'd have to be filtered through the small group of core developers. For +example, one day Peter Schneider-Kamp took it into his head to drop K&R C +compatibility and convert the C source for Python to ANSI C. After getting +approval on the python-dev mailing list, he launched into a flurry of checkins +that lasted about a week, other developers joined in to help, and the job was +done. If there were only 5 people with write access, probably that task would +have been viewed as "nice, but not worth the time and effort needed" and it +would never have gotten done. + +The shift to using SourceForge's services has resulted in a remarkable increase +in the speed of development. Patches now get submitted, commented on, revised +by people other than the original submitter, and bounced back and forth between +people until the patch is deemed worth checking in. Bugs are tracked in one +central location and can be assigned to a specific person for fixing, and we can +count the number of open bugs to measure progress. This didn't come without a +cost: developers now have more e-mail to deal with, more mailing lists to +follow, and special tools had to be written for the new environment. For +example, SourceForge sends default patch and bug notification e-mail messages +that are completely unhelpful, so Ka-Ping Yee wrote an HTML screen-scraper that +sends more useful messages. + +The ease of adding code caused a few initial growing pains, such as code was +checked in before it was ready or without getting clear agreement from the +developer group. The approval process that has emerged is somewhat similar to +that used by the Apache group. Developers can vote +1, +0, -0, or -1 on a patch; ++1 and -1 denote acceptance or rejection, while +0 and -0 mean the developer is +mostly indifferent to the change, though with a slight positive or negative +slant. The most significant change from the Apache model is that the voting is +essentially advisory, letting Guido van Rossum, who has Benevolent Dictator For +Life status, know what the general opinion is. He can still ignore the result of +a vote, and approve or reject a change even if the community disagrees with him. + +Producing an actual patch is the last step in adding a new feature, and is +usually easy compared to the earlier task of coming up with a good design. +Discussions of new features can often explode into lengthy mailing list threads, +making the discussion hard to follow, and no one can read every posting to +python-dev. Therefore, a relatively formal process has been set up to write +Python Enhancement Proposals (PEPs), modelled on the Internet RFC process. PEPs +are draft documents that describe a proposed new feature, and are continually +revised until the community reaches a consensus, either accepting or rejecting +the proposal. Quoting from the introduction to PEP 1, "PEP Purpose and +Guidelines": + + +.. epigraph:: + + PEP stands for Python Enhancement Proposal. A PEP is a design document + providing information to the Python community, or describing a new feature for + Python. The PEP should provide a concise technical specification of the feature + and a rationale for the feature. + + We intend PEPs to be the primary mechanisms for proposing new features, for + collecting community input on an issue, and for documenting the design decisions + that have gone into Python. The PEP author is responsible for building + consensus within the community and documenting dissenting opinions. + +Read the rest of PEP 1 for the details of the PEP editorial process, style, and +format. PEPs are kept in the Python CVS tree on SourceForge, though they're not +part of the Python 2.0 distribution, and are also available in HTML form from +http://www.python.org/peps/. As of September 2000, there are 25 PEPS, ranging +from PEP 201, "Lockstep Iteration", to PEP 225, "Elementwise/Objectwise +Operators". + +.. % ====================================================================== + + +Unicode +======= + +The largest new feature in Python 2.0 is a new fundamental data type: Unicode +strings. Unicode uses 16-bit numbers to represent characters instead of the +8-bit number used by ASCII, meaning that 65,536 distinct characters can be +supported. + +The final interface for Unicode support was arrived at through countless often- +stormy discussions on the python-dev mailing list, and mostly implemented by +Marc-André Lemburg, based on a Unicode string type implementation by Fredrik +Lundh. A detailed explanation of the interface was written up as :pep:`100`, +"Python Unicode Integration". This article will simply cover the most +significant points about the Unicode interfaces. + +In Python source code, Unicode strings are written as ``u"string"``. Arbitrary +Unicode characters can be written using a new escape sequence, ``\uHHHH``, where +*HHHH* is a 4-digit hexadecimal number from 0000 to FFFF. The existing +``\xHHHH`` escape sequence can also be used, and octal escapes can be used for +characters up to U+01FF, which is represented by ``\777``. + +Unicode strings, just like regular strings, are an immutable sequence type. +They can be indexed and sliced, but not modified in place. Unicode strings have +an ``encode( [encoding] )`` method that returns an 8-bit string in the desired +encoding. Encodings are named by strings, such as ``'ascii'``, ``'utf-8'``, +``'iso-8859-1'``, or whatever. A codec API is defined for implementing and +registering new encodings that are then available throughout a Python program. +If an encoding isn't specified, the default encoding is usually 7-bit ASCII, +though it can be changed for your Python installation by calling the +:func:`sys.setdefaultencoding(encoding)` function in a customised version of +:file:`site.py`. + +Combining 8-bit and Unicode strings always coerces to Unicode, using the default +ASCII encoding; the result of ``'a' + u'bc'`` is ``u'abc'``. + +New built-in functions have been added, and existing built-ins modified to +support Unicode: + +* ``unichr(ch)`` returns a Unicode string 1 character long, containing the + character *ch*. + +* ``ord(u)``, where *u* is a 1-character regular or Unicode string, returns the + number of the character as an integer. + +* ``unicode(string [, encoding] [, errors] )`` creates a Unicode string + from an 8-bit string. ``encoding`` is a string naming the encoding to use. The + ``errors`` parameter specifies the treatment of characters that are invalid for + the current encoding; passing ``'strict'`` as the value causes an exception to + be raised on any encoding error, while ``'ignore'`` causes errors to be silently + ignored and ``'replace'`` uses U+FFFD, the official replacement character, in + case of any problems. + +* The :keyword:`exec` statement, and various built-ins such as ``eval()``, + ``getattr()``, and ``setattr()`` will also accept Unicode strings as well as + regular strings. (It's possible that the process of fixing this missed some + built-ins; if you find a built-in function that accepts strings but doesn't + accept Unicode strings at all, please report it as a bug.) + +A new module, :mod:`unicodedata`, provides an interface to Unicode character +properties. For example, ``unicodedata.category(u'A')`` returns the 2-character +string 'Lu', the 'L' denoting it's a letter, and 'u' meaning that it's +uppercase. ``unicodedata.bidirectional(u'\u0660')`` returns 'AN', meaning that +U+0660 is an Arabic number. + +The :mod:`codecs` module contains functions to look up existing encodings and +register new ones. Unless you want to implement a new encoding, you'll most +often use the :func:`codecs.lookup(encoding)` function, which returns a +4-element tuple: ``(encode_func, decode_func, stream_reader, stream_writer)``. + +* *encode_func* is a function that takes a Unicode string, and returns a 2-tuple + ``(string, length)``. *string* is an 8-bit string containing a portion (perhaps + all) of the Unicode string converted into the given encoding, and *length* tells + you how much of the Unicode string was converted. + +* *decode_func* is the opposite of *encode_func*, taking an 8-bit string and + returning a 2-tuple ``(ustring, length)``, consisting of the resulting Unicode + string *ustring* and the integer *length* telling how much of the 8-bit string + was consumed. + +* *stream_reader* is a class that supports decoding input from a stream. + *stream_reader(file_obj)* returns an object that supports the :meth:`read`, + :meth:`readline`, and :meth:`readlines` methods. These methods will all + translate from the given encoding and return Unicode strings. + +* *stream_writer*, similarly, is a class that supports encoding output to a + stream. *stream_writer(file_obj)* returns an object that supports the + :meth:`write` and :meth:`writelines` methods. These methods expect Unicode + strings, translating them to the given encoding on output. + +For example, the following code writes a Unicode string into a file, encoding +it as UTF-8:: + + import codecs + + unistr = u'\u0660\u2000ab ...' + + (UTF8_encode, UTF8_decode, + UTF8_streamreader, UTF8_streamwriter) = codecs.lookup('UTF-8') + + output = UTF8_streamwriter( open( '/tmp/output', 'wb') ) + output.write( unistr ) + output.close() + +The following code would then read UTF-8 input from the file:: + + input = UTF8_streamreader( open( '/tmp/output', 'rb') ) + print repr(input.read()) + input.close() + +Unicode-aware regular expressions are available through the :mod:`re` module, +which has a new underlying implementation called SRE written by Fredrik Lundh of +Secret Labs AB. + +A ``-U`` command line option was added which causes the Python compiler to +interpret all string literals as Unicode string literals. This is intended to be +used in testing and future-proofing your Python code, since some future version +of Python may drop support for 8-bit strings and provide only Unicode strings. + +.. % ====================================================================== + + +List Comprehensions +=================== + +Lists are a workhorse data type in Python, and many programs manipulate a list +at some point. Two common operations on lists are to loop over them, and either +pick out the elements that meet a certain criterion, or apply some function to +each element. For example, given a list of strings, you might want to pull out +all the strings containing a given substring, or strip off trailing whitespace +from each line. + +The existing :func:`map` and :func:`filter` functions can be used for this +purpose, but they require a function as one of their arguments. This is fine if +there's an existing built-in function that can be passed directly, but if there +isn't, you have to create a little function to do the required work, and +Python's scoping rules make the result ugly if the little function needs +additional information. Take the first example in the previous paragraph, +finding all the strings in the list containing a given substring. You could +write the following to do it:: + + # Given the list L, make a list of all strings + # containing the substring S. + sublist = filter( lambda s, substring=S: + string.find(s, substring) != -1, + L) + +Because of Python's scoping rules, a default argument is used so that the +anonymous function created by the :keyword:`lambda` statement knows what +substring is being searched for. List comprehensions make this cleaner:: + + sublist = [ s for s in L if string.find(s, S) != -1 ] + +List comprehensions have the form:: + + [ expression for expr in sequence1 + for expr2 in sequence2 ... + for exprN in sequenceN + if condition ] + +The :keyword:`for`...\ :keyword:`in` clauses contain the sequences to be +iterated over. The sequences do not have to be the same length, because they +are *not* iterated over in parallel, but from left to right; this is explained +more clearly in the following paragraphs. The elements of the generated list +will be the successive values of *expression*. The final :keyword:`if` clause +is optional; if present, *expression* is only evaluated and added to the result +if *condition* is true. + +To make the semantics very clear, a list comprehension is equivalent to the +following Python code:: + + for expr1 in sequence1: + for expr2 in sequence2: + ... + for exprN in sequenceN: + if (condition): + # Append the value of + # the expression to the + # resulting list. + +This means that when there are multiple :keyword:`for`...\ :keyword:`in` +clauses, the resulting list will be equal to the product of the lengths of all +the sequences. If you have two lists of length 3, the output list is 9 elements +long:: + + seq1 = 'abc' + seq2 = (1,2,3) + >>> [ (x,y) for x in seq1 for y in seq2] + [('a', 1), ('a', 2), ('a', 3), ('b', 1), ('b', 2), ('b', 3), ('c', 1), + ('c', 2), ('c', 3)] + +To avoid introducing an ambiguity into Python's grammar, if *expression* is +creating a tuple, it must be surrounded with parentheses. The first list +comprehension below is a syntax error, while the second one is correct:: + + # Syntax error + [ x,y for x in seq1 for y in seq2] + # Correct + [ (x,y) for x in seq1 for y in seq2] + +The idea of list comprehensions originally comes from the functional programming +language Haskell (http://www.haskell.org). Greg Ewing argued most effectively +for adding them to Python and wrote the initial list comprehension patch, which +was then discussed for a seemingly endless time on the python-dev mailing list +and kept up-to-date by Skip Montanaro. + +.. % ====================================================================== + + +Augmented Assignment +==================== + +Augmented assignment operators, another long-requested feature, have been added +to Python 2.0. Augmented assignment operators include ``+=``, ``-=``, ``*=``, +and so forth. For example, the statement ``a += 2`` increments the value of the +variable ``a`` by 2, equivalent to the slightly lengthier ``a = a + 2``. + +The full list of supported assignment operators is ``+=``, ``-=``, ``*=``, +``/=``, ``%=``, ``**=``, ``&=``, ``|=``, ``^=``, ``>>=``, and ``<<=``. Python +classes can override the augmented assignment operators by defining methods +named :meth:`__iadd__`, :meth:`__isub__`, etc. For example, the following +:class:`Number` class stores a number and supports using += to create a new +instance with an incremented value. + +.. % The empty groups below prevent conversion to guillemets. + +:: + + class Number: + def __init__(self, value): + self.value = value + def __iadd__(self, increment): + return Number( self.value + increment) + + n = Number(5) + n += 3 + print n.value + +The :meth:`__iadd__` special method is called with the value of the increment, +and should return a new instance with an appropriately modified value; this +return value is bound as the new value of the variable on the left-hand side. + +Augmented assignment operators were first introduced in the C programming +language, and most C-derived languages, such as :program:`awk`, C++, Java, Perl, +and PHP also support them. The augmented assignment patch was implemented by +Thomas Wouters. + +.. % ====================================================================== + + +String Methods +============== + +Until now string-manipulation functionality was in the :mod:`string` module, +which was usually a front-end for the :mod:`strop` module written in C. The +addition of Unicode posed a difficulty for the :mod:`strop` module, because the +functions would all need to be rewritten in order to accept either 8-bit or +Unicode strings. For functions such as :func:`string.replace`, which takes 3 +string arguments, that means eight possible permutations, and correspondingly +complicated code. + +Instead, Python 2.0 pushes the problem onto the string type, making string +manipulation functionality available through methods on both 8-bit strings and +Unicode strings. :: + + >>> 'andrew'.capitalize() + 'Andrew' + >>> 'hostname'.replace('os', 'linux') + 'hlinuxtname' + >>> 'moshe'.find('sh') + 2 + +One thing that hasn't changed, a noteworthy April Fools' joke notwithstanding, +is that Python strings are immutable. Thus, the string methods return new +strings, and do not modify the string on which they operate. + +The old :mod:`string` module is still around for backwards compatibility, but it +mostly acts as a front-end to the new string methods. + +Two methods which have no parallel in pre-2.0 versions, although they did exist +in JPython for quite some time, are :meth:`startswith` and :meth:`endswith`. +``s.startswith(t)`` is equivalent to ``s[:len(t)] == t``, while +``s.endswith(t)`` is equivalent to ``s[-len(t):] == t``. + +One other method which deserves special mention is :meth:`join`. The +:meth:`join` method of a string receives one parameter, a sequence of strings, +and is equivalent to the :func:`string.join` function from the old :mod:`string` +module, with the arguments reversed. In other words, ``s.join(seq)`` is +equivalent to the old ``string.join(seq, s)``. + +.. % ====================================================================== + + +Garbage Collection of Cycles +============================ + +The C implementation of Python uses reference counting to implement garbage +collection. Every Python object maintains a count of the number of references +pointing to itself, and adjusts the count as references are created or +destroyed. Once the reference count reaches zero, the object is no longer +accessible, since you need to have a reference to an object to access it, and if +the count is zero, no references exist any longer. + +Reference counting has some pleasant properties: it's easy to understand and +implement, and the resulting implementation is portable, fairly fast, and reacts +well with other libraries that implement their own memory handling schemes. The +major problem with reference counting is that it sometimes doesn't realise that +objects are no longer accessible, resulting in a memory leak. This happens when +there are cycles of references. + +Consider the simplest possible cycle, a class instance which has a reference to +itself:: + + instance = SomeClass() + instance.myself = instance + +After the above two lines of code have been executed, the reference count of +``instance`` is 2; one reference is from the variable named ``'instance'``, and +the other is from the ``myself`` attribute of the instance. + +If the next line of code is ``del instance``, what happens? The reference count +of ``instance`` is decreased by 1, so it has a reference count of 1; the +reference in the ``myself`` attribute still exists. Yet the instance is no +longer accessible through Python code, and it could be deleted. Several objects +can participate in a cycle if they have references to each other, causing all of +the objects to be leaked. + +Python 2.0 fixes this problem by periodically executing a cycle detection +algorithm which looks for inaccessible cycles and deletes the objects involved. +A new :mod:`gc` module provides functions to perform a garbage collection, +obtain debugging statistics, and tuning the collector's parameters. + +Running the cycle detection algorithm takes some time, and therefore will result +in some additional overhead. It is hoped that after we've gotten experience +with the cycle collection from using 2.0, Python 2.1 will be able to minimize +the overhead with careful tuning. It's not yet obvious how much performance is +lost, because benchmarking this is tricky and depends crucially on how often the +program creates and destroys objects. The detection of cycles can be disabled +when Python is compiled, if you can't afford even a tiny speed penalty or +suspect that the cycle collection is buggy, by specifying the +:option:`--without-cycle-gc` switch when running the :program:`configure` +script. + +Several people tackled this problem and contributed to a solution. An early +implementation of the cycle detection approach was written by Toby Kelsey. The +current algorithm was suggested by Eric Tiedemann during a visit to CNRI, and +Guido van Rossum and Neil Schemenauer wrote two different implementations, which +were later integrated by Neil. Lots of other people offered suggestions along +the way; the March 2000 archives of the python-dev mailing list contain most of +the relevant discussion, especially in the threads titled "Reference cycle +collection for Python" and "Finalization again". + +.. % ====================================================================== + + +Other Core Changes +================== + +Various minor changes have been made to Python's syntax and built-in functions. +None of the changes are very far-reaching, but they're handy conveniences. + + +Minor Language Changes +---------------------- + +A new syntax makes it more convenient to call a given function with a tuple of +arguments and/or a dictionary of keyword arguments. In Python 1.5 and earlier, +you'd use the :func:`apply` built-in function: ``apply(f, args, kw)`` calls the +function :func:`f` with the argument tuple *args* and the keyword arguments in +the dictionary *kw*. :func:`apply` is the same in 2.0, but thanks to a patch +from Greg Ewing, ``f(*args, **kw)`` as a shorter and clearer way to achieve the +same effect. This syntax is symmetrical with the syntax for defining +functions:: + + def f(*args, **kw): + # args is a tuple of positional args, + # kw is a dictionary of keyword args + ... + +The :keyword:`print` statement can now have its output directed to a file-like +object by following the :keyword:`print` with ``>> file``, similar to the +redirection operator in Unix shells. Previously you'd either have to use the +:meth:`write` method of the file-like object, which lacks the convenience and +simplicity of :keyword:`print`, or you could assign a new value to +``sys.stdout`` and then restore the old value. For sending output to standard +error, it's much easier to write this:: + + print >> sys.stderr, "Warning: action field not supplied" + +Modules can now be renamed on importing them, using the syntax ``import module +as name`` or ``from module import name as othername``. The patch was submitted +by Thomas Wouters. + +A new format style is available when using the ``%`` operator; '%r' will insert +the :func:`repr` of its argument. This was also added from symmetry +considerations, this time for symmetry with the existing '%s' format style, +which inserts the :func:`str` of its argument. For example, ``'%r %s' % ('abc', +'abc')`` returns a string containing ``'abc' abc``. + +Previously there was no way to implement a class that overrode Python's built-in +:keyword:`in` operator and implemented a custom version. ``obj in seq`` returns +true if *obj* is present in the sequence *seq*; Python computes this by simply +trying every index of the sequence until either *obj* is found or an +:exc:`IndexError` is encountered. Moshe Zadka contributed a patch which adds a +:meth:`__contains__` magic method for providing a custom implementation for +:keyword:`in`. Additionally, new built-in objects written in C can define what +:keyword:`in` means for them via a new slot in the sequence protocol. + +Earlier versions of Python used a recursive algorithm for deleting objects. +Deeply nested data structures could cause the interpreter to fill up the C stack +and crash; Christian Tismer rewrote the deletion logic to fix this problem. On +a related note, comparing recursive objects recursed infinitely and crashed; +Jeremy Hylton rewrote the code to no longer crash, producing a useful result +instead. For example, after this code:: + + a = [] + b = [] + a.append(a) + b.append(b) + +The comparison ``a==b`` returns true, because the two recursive data structures +are isomorphic. See the thread "trashcan and PR#7" in the April 2000 archives of +the python-dev mailing list for the discussion leading up to this +implementation, and some useful relevant links. Note that comparisons can now +also raise exceptions. In earlier versions of Python, a comparison operation +such as ``cmp(a,b)`` would always produce an answer, even if a user-defined +:meth:`__cmp__` method encountered an error, since the resulting exception would +simply be silently swallowed. + +.. % Starting URL: +.. % http://www.python.org/pipermail/python-dev/2000-April/004834.html + +Work has been done on porting Python to 64-bit Windows on the Itanium processor, +mostly by Trent Mick of ActiveState. (Confusingly, ``sys.platform`` is still +``'win32'`` on Win64 because it seems that for ease of porting, MS Visual C++ +treats code as 32 bit on Itanium.) PythonWin also supports Windows CE; see the +Python CE page at http://starship.python.net/crew/mhammond/ce/ for more +information. + +Another new platform is Darwin/MacOS X; initial support for it is in Python 2.0. +Dynamic loading works, if you specify "configure --with-dyld --with-suffix=.x". +Consult the README in the Python source distribution for more instructions. + +An attempt has been made to alleviate one of Python's warts, the often-confusing +:exc:`NameError` exception when code refers to a local variable before the +variable has been assigned a value. For example, the following code raises an +exception on the :keyword:`print` statement in both 1.5.2 and 2.0; in 1.5.2 a +:exc:`NameError` exception is raised, while 2.0 raises a new +:exc:`UnboundLocalError` exception. :exc:`UnboundLocalError` is a subclass of +:exc:`NameError`, so any existing code that expects :exc:`NameError` to be +raised should still work. :: + + def f(): + print "i=",i + i = i + 1 + f() + +Two new exceptions, :exc:`TabError` and :exc:`IndentationError`, have been +introduced. They're both subclasses of :exc:`SyntaxError`, and are raised when +Python code is found to be improperly indented. + + +Changes to Built-in Functions +----------------------------- + +A new built-in, :func:`zip(seq1, seq2, ...)`, has been added. :func:`zip` +returns a list of tuples where each tuple contains the i-th element from each of +the argument sequences. The difference between :func:`zip` and ``map(None, +seq1, seq2)`` is that :func:`map` pads the sequences with ``None`` if the +sequences aren't all of the same length, while :func:`zip` truncates the +returned list to the length of the shortest argument sequence. + +The :func:`int` and :func:`long` functions now accept an optional "base" +parameter when the first argument is a string. ``int('123', 10)`` returns 123, +while ``int('123', 16)`` returns 291. ``int(123, 16)`` raises a +:exc:`TypeError` exception with the message "can't convert non-string with +explicit base". + +A new variable holding more detailed version information has been added to the +:mod:`sys` module. ``sys.version_info`` is a tuple ``(major, minor, micro, +level, serial)`` For example, in a hypothetical 2.0.1beta1, ``sys.version_info`` +would be ``(2, 0, 1, 'beta', 1)``. *level* is a string such as ``"alpha"``, +``"beta"``, or ``"final"`` for a final release. + +Dictionaries have an odd new method, :meth:`setdefault(key, default)`, which +behaves similarly to the existing :meth:`get` method. However, if the key is +missing, :meth:`setdefault` both returns the value of *default* as :meth:`get` +would do, and also inserts it into the dictionary as the value for *key*. Thus, +the following lines of code:: + + if dict.has_key( key ): return dict[key] + else: + dict[key] = [] + return dict[key] + +can be reduced to a single ``return dict.setdefault(key, [])`` statement. + +The interpreter sets a maximum recursion depth in order to catch runaway +recursion before filling the C stack and causing a core dump or GPF.. +Previously this limit was fixed when you compiled Python, but in 2.0 the maximum +recursion depth can be read and modified using :func:`sys.getrecursionlimit` and +:func:`sys.setrecursionlimit`. The default value is 1000, and a rough maximum +value for a given platform can be found by running a new script, +:file:`Misc/find_recursionlimit.py`. + +.. % ====================================================================== + + +Porting to 2.0 +============== + +New Python releases try hard to be compatible with previous releases, and the +record has been pretty good. However, some changes are considered useful +enough, usually because they fix initial design decisions that turned out to be +actively mistaken, that breaking backward compatibility can't always be avoided. +This section lists the changes in Python 2.0 that may cause old Python code to +break. + +The change which will probably break the most code is tightening up the +arguments accepted by some methods. Some methods would take multiple arguments +and treat them as a tuple, particularly various list methods such as +:meth:`.append` and :meth:`.insert`. In earlier versions of Python, if ``L`` is +a list, ``L.append( 1,2 )`` appends the tuple ``(1,2)`` to the list. In Python +2.0 this causes a :exc:`TypeError` exception to be raised, with the message: +'append requires exactly 1 argument; 2 given'. The fix is to simply add an +extra set of parentheses to pass both values as a tuple: ``L.append( (1,2) )``. + +The earlier versions of these methods were more forgiving because they used an +old function in Python's C interface to parse their arguments; 2.0 modernizes +them to use :func:`PyArg_ParseTuple`, the current argument parsing function, +which provides more helpful error messages and treats multi-argument calls as +errors. If you absolutely must use 2.0 but can't fix your code, you can edit +:file:`Objects/listobject.c` and define the preprocessor symbol +``NO_STRICT_LIST_APPEND`` to preserve the old behaviour; this isn't recommended. + +Some of the functions in the :mod:`socket` module are still forgiving in this +way. For example, :func:`socket.connect( ('hostname', 25) )` is the correct +form, passing a tuple representing an IP address, but :func:`socket.connect( +'hostname', 25 )` also works. :func:`socket.connect_ex` and :func:`socket.bind` +are similarly easy-going. 2.0alpha1 tightened these functions up, but because +the documentation actually used the erroneous multiple argument form, many +people wrote code which would break with the stricter checking. GvR backed out +the changes in the face of public reaction, so for the :mod:`socket` module, the +documentation was fixed and the multiple argument form is simply marked as +deprecated; it *will* be tightened up again in a future Python version. + +The ``\x`` escape in string literals now takes exactly 2 hex digits. Previously +it would consume all the hex digits following the 'x' and take the lowest 8 bits +of the result, so ``\x123456`` was equivalent to ``\x56``. + +The :exc:`AttributeError` and :exc:`NameError` exceptions have a more friendly +error message, whose text will be something like ``'Spam' instance has no +attribute 'eggs'`` or ``name 'eggs' is not defined``. Previously the error +message was just the missing attribute name ``eggs``, and code written to take +advantage of this fact will break in 2.0. + +Some work has been done to make integers and long integers a bit more +interchangeable. In 1.5.2, large-file support was added for Solaris, to allow +reading files larger than 2 GiB; this made the :meth:`tell` method of file +objects return a long integer instead of a regular integer. Some code would +subtract two file offsets and attempt to use the result to multiply a sequence +or slice a string, but this raised a :exc:`TypeError`. In 2.0, long integers +can be used to multiply or slice a sequence, and it'll behave as you'd +intuitively expect it to; ``3L * 'abc'`` produces 'abcabcabc', and +``(0,1,2,3)[2L:4L]`` produces (2,3). Long integers can also be used in various +contexts where previously only integers were accepted, such as in the +:meth:`seek` method of file objects, and in the formats supported by the ``%`` +operator (``%d``, ``%i``, ``%x``, etc.). For example, ``"%d" % 2L**64`` will +produce the string ``18446744073709551616``. + +The subtlest long integer change of all is that the :func:`str` of a long +integer no longer has a trailing 'L' character, though :func:`repr` still +includes it. The 'L' annoyed many people who wanted to print long integers that +looked just like regular integers, since they had to go out of their way to chop +off the character. This is no longer a problem in 2.0, but code which does +``str(longval)[:-1]`` and assumes the 'L' is there, will now lose the final +digit. + +Taking the :func:`repr` of a float now uses a different formatting precision +than :func:`str`. :func:`repr` uses ``%.17g`` format string for C's +:func:`sprintf`, while :func:`str` uses ``%.12g`` as before. The effect is that +:func:`repr` may occasionally show more decimal places than :func:`str`, for +certain numbers. For example, the number 8.1 can't be represented exactly in +binary, so ``repr(8.1)`` is ``'8.0999999999999996'``, while str(8.1) is +``'8.1'``. + +The ``-X`` command-line option, which turned all standard exceptions into +strings instead of classes, has been removed; the standard exceptions will now +always be classes. The :mod:`exceptions` module containing the standard +exceptions was translated from Python to a built-in C module, written by Barry +Warsaw and Fredrik Lundh. + +.. % Commented out for now -- I don't think anyone will care. +.. % The pattern and match objects provided by SRE are C types, not Python +.. % class instances as in 1.5. This means you can no longer inherit from +.. % \class{RegexObject} or \class{MatchObject}, but that shouldn't be much +.. % of a problem since no one should have been doing that in the first +.. % place. +.. % ====================================================================== + + +Extending/Embedding Changes +=========================== + +Some of the changes are under the covers, and will only be apparent to people +writing C extension modules or embedding a Python interpreter in a larger +application. If you aren't dealing with Python's C API, you can safely skip +this section. + +The version number of the Python C API was incremented, so C extensions compiled +for 1.5.2 must be recompiled in order to work with 2.0. On Windows, it's not +possible for Python 2.0 to import a third party extension built for Python 1.5.x +due to how Windows DLLs work, so Python will raise an exception and the import +will fail. + +Users of Jim Fulton's ExtensionClass module will be pleased to find out that +hooks have been added so that ExtensionClasses are now supported by +:func:`isinstance` and :func:`issubclass`. This means you no longer have to +remember to write code such as ``if type(obj) == myExtensionClass``, but can use +the more natural ``if isinstance(obj, myExtensionClass)``. + +The :file:`Python/importdl.c` file, which was a mass of #ifdefs to support +dynamic loading on many different platforms, was cleaned up and reorganised by +Greg Stein. :file:`importdl.c` is now quite small, and platform-specific code +has been moved into a bunch of :file:`Python/dynload_\*.c` files. Another +cleanup: there were also a number of :file:`my\*.h` files in the Include/ +directory that held various portability hacks; they've been merged into a single +file, :file:`Include/pyport.h`. + +Vladimir Marangozov's long-awaited malloc restructuring was completed, to make +it easy to have the Python interpreter use a custom allocator instead of C's +standard :func:`malloc`. For documentation, read the comments in +:file:`Include/pymem.h` and :file:`Include/objimpl.h`. For the lengthy +discussions during which the interface was hammered out, see the Web archives of +the 'patches' and 'python-dev' lists at python.org. + +Recent versions of the GUSI development environment for MacOS support POSIX +threads. Therefore, Python's POSIX threading support now works on the +Macintosh. Threading support using the user-space GNU ``pth`` library was also +contributed. + +Threading support on Windows was enhanced, too. Windows supports thread locks +that use kernel objects only in case of contention; in the common case when +there's no contention, they use simpler functions which are an order of +magnitude faster. A threaded version of Python 1.5.2 on NT is twice as slow as +an unthreaded version; with the 2.0 changes, the difference is only 10%. These +improvements were contributed by Yakov Markovitch. + +Python 2.0's source now uses only ANSI C prototypes, so compiling Python now +requires an ANSI C compiler, and can no longer be done using a compiler that +only supports K&R C. + +Previously the Python virtual machine used 16-bit numbers in its bytecode, +limiting the size of source files. In particular, this affected the maximum +size of literal lists and dictionaries in Python source; occasionally people who +are generating Python code would run into this limit. A patch by Charles G. +Waldman raises the limit from ``2^16`` to ``2^{32}``. + +Three new convenience functions intended for adding constants to a module's +dictionary at module initialization time were added: :func:`PyModule_AddObject`, +:func:`PyModule_AddIntConstant`, and :func:`PyModule_AddStringConstant`. Each +of these functions takes a module object, a null-terminated C string containing +the name to be added, and a third argument for the value to be assigned to the +name. This third argument is, respectively, a Python object, a C long, or a C +string. + +A wrapper API was added for Unix-style signal handlers. :func:`PyOS_getsig` gets +a signal handler and :func:`PyOS_setsig` will set a new handler. + +.. % ====================================================================== + + +Distutils: Making Modules Easy to Install +========================================= + +Before Python 2.0, installing modules was a tedious affair -- there was no way +to figure out automatically where Python is installed, or what compiler options +to use for extension modules. Software authors had to go through an arduous +ritual of editing Makefiles and configuration files, which only really work on +Unix and leave Windows and MacOS unsupported. Python users faced wildly +differing installation instructions which varied between different extension +packages, which made administering a Python installation something of a chore. + +The SIG for distribution utilities, shepherded by Greg Ward, has created the +Distutils, a system to make package installation much easier. They form the +:mod:`distutils` package, a new part of Python's standard library. In the best +case, installing a Python module from source will require the same steps: first +you simply mean unpack the tarball or zip archive, and the run "``python +setup.py install``". The platform will be automatically detected, the compiler +will be recognized, C extension modules will be compiled, and the distribution +installed into the proper directory. Optional command-line arguments provide +more control over the installation process, the distutils package offers many +places to override defaults -- separating the build from the install, building +or installing in non-default directories, and more. + +In order to use the Distutils, you need to write a :file:`setup.py` script. For +the simple case, when the software contains only .py files, a minimal +:file:`setup.py` can be just a few lines long:: + + from distutils.core import setup + setup (name = "foo", version = "1.0", + py_modules = ["module1", "module2"]) + +The :file:`setup.py` file isn't much more complicated if the software consists +of a few packages:: + + from distutils.core import setup + setup (name = "foo", version = "1.0", + packages = ["package", "package.subpackage"]) + +A C extension can be the most complicated case; here's an example taken from +the PyXML package:: + + from distutils.core import setup, Extension + + expat_extension = Extension('xml.parsers.pyexpat', + define_macros = [('XML_NS', None)], + include_dirs = [ 'extensions/expat/xmltok', + 'extensions/expat/xmlparse' ], + sources = [ 'extensions/pyexpat.c', + 'extensions/expat/xmltok/xmltok.c', + 'extensions/expat/xmltok/xmlrole.c', + ] + ) + setup (name = "PyXML", version = "0.5.4", + ext_modules =[ expat_extension ] ) + +The Distutils can also take care of creating source and binary distributions. +The "sdist" command, run by "``python setup.py sdist``', builds a source +distribution such as :file:`foo-1.0.tar.gz`. Adding new commands isn't +difficult, "bdist_rpm" and "bdist_wininst" commands have already been +contributed to create an RPM distribution and a Windows installer for the +software, respectively. Commands to create other distribution formats such as +Debian packages and Solaris :file:`.pkg` files are in various stages of +development. + +All this is documented in a new manual, *Distributing Python Modules*, that +joins the basic set of Python documentation. + +.. % ====================================================================== + + +XML Modules +=========== + +Python 1.5.2 included a simple XML parser in the form of the :mod:`xmllib` +module, contributed by Sjoerd Mullender. Since 1.5.2's release, two different +interfaces for processing XML have become common: SAX2 (version 2 of the Simple +API for XML) provides an event-driven interface with some similarities to +:mod:`xmllib`, and the DOM (Document Object Model) provides a tree-based +interface, transforming an XML document into a tree of nodes that can be +traversed and modified. Python 2.0 includes a SAX2 interface and a stripped- +down DOM interface as part of the :mod:`xml` package. Here we will give a brief +overview of these new interfaces; consult the Python documentation or the source +code for complete details. The Python XML SIG is also working on improved +documentation. + + +SAX2 Support +------------ + +SAX defines an event-driven interface for parsing XML. To use SAX, you must +write a SAX handler class. Handler classes inherit from various classes +provided by SAX, and override various methods that will then be called by the +XML parser. For example, the :meth:`startElement` and :meth:`endElement` +methods are called for every starting and end tag encountered by the parser, the +:meth:`characters` method is called for every chunk of character data, and so +forth. + +The advantage of the event-driven approach is that the whole document doesn't +have to be resident in memory at any one time, which matters if you are +processing really huge documents. However, writing the SAX handler class can +get very complicated if you're trying to modify the document structure in some +elaborate way. + +For example, this little example program defines a handler that prints a message +for every starting and ending tag, and then parses the file :file:`hamlet.xml` +using it:: + + from xml import sax + + class SimpleHandler(sax.ContentHandler): + def startElement(self, name, attrs): + print 'Start of element:', name, attrs.keys() + + def endElement(self, name): + print 'End of element:', name + + # Create a parser object + parser = sax.make_parser() + + # Tell it what handler to use + handler = SimpleHandler() + parser.setContentHandler( handler ) + + # Parse a file! + parser.parse( 'hamlet.xml' ) + +For more information, consult the Python documentation, or the XML HOWTO at +http://pyxml.sourceforge.net/topics/howto/xml-howto.html. + + +DOM Support +----------- + +The Document Object Model is a tree-based representation for an XML document. A +top-level :class:`Document` instance is the root of the tree, and has a single +child which is the top-level :class:`Element` instance. This :class:`Element` +has children nodes representing character data and any sub-elements, which may +have further children of their own, and so forth. Using the DOM you can +traverse the resulting tree any way you like, access element and attribute +values, insert and delete nodes, and convert the tree back into XML. + +The DOM is useful for modifying XML documents, because you can create a DOM +tree, modify it by adding new nodes or rearranging subtrees, and then produce a +new XML document as output. You can also construct a DOM tree manually and +convert it to XML, which can be a more flexible way of producing XML output than +simply writing ``<tag1>``...\ ``</tag1>`` to a file. + +The DOM implementation included with Python lives in the :mod:`xml.dom.minidom` +module. It's a lightweight implementation of the Level 1 DOM with support for +XML namespaces. The :func:`parse` and :func:`parseString` convenience +functions are provided for generating a DOM tree:: + + from xml.dom import minidom + doc = minidom.parse('hamlet.xml') + +``doc`` is a :class:`Document` instance. :class:`Document`, like all the other +DOM classes such as :class:`Element` and :class:`Text`, is a subclass of the +:class:`Node` base class. All the nodes in a DOM tree therefore support certain +common methods, such as :meth:`toxml` which returns a string containing the XML +representation of the node and its children. Each class also has special +methods of its own; for example, :class:`Element` and :class:`Document` +instances have a method to find all child elements with a given tag name. +Continuing from the previous 2-line example:: + + perslist = doc.getElementsByTagName( 'PERSONA' ) + print perslist[0].toxml() + print perslist[1].toxml() + +For the *Hamlet* XML file, the above few lines output:: + + <PERSONA>CLAUDIUS, king of Denmark. </PERSONA> + <PERSONA>HAMLET, son to the late, and nephew to the present king.</PERSONA> + +The root element of the document is available as ``doc.documentElement``, and +its children can be easily modified by deleting, adding, or removing nodes:: + + root = doc.documentElement + + # Remove the first child + root.removeChild( root.childNodes[0] ) + + # Move the new first child to the end + root.appendChild( root.childNodes[0] ) + + # Insert the new first child (originally, + # the third child) before the 20th child. + root.insertBefore( root.childNodes[0], root.childNodes[20] ) + +Again, I will refer you to the Python documentation for a complete listing of +the different :class:`Node` classes and their various methods. + + +Relationship to PyXML +--------------------- + +The XML Special Interest Group has been working on XML-related Python code for a +while. Its code distribution, called PyXML, is available from the SIG's Web +pages at http://www.python.org/sigs/xml-sig/. The PyXML distribution also used +the package name ``xml``. If you've written programs that used PyXML, you're +probably wondering about its compatibility with the 2.0 :mod:`xml` package. + +The answer is that Python 2.0's :mod:`xml` package isn't compatible with PyXML, +but can be made compatible by installing a recent version PyXML. Many +applications can get by with the XML support that is included with Python 2.0, +but more complicated applications will require that the full PyXML package will +be installed. When installed, PyXML versions 0.6.0 or greater will replace the +:mod:`xml` package shipped with Python, and will be a strict superset of the +standard package, adding a bunch of additional features. Some of the additional +features in PyXML include: + +* 4DOM, a full DOM implementation from FourThought, Inc. + +* The xmlproc validating parser, written by Lars Marius Garshol. + +* The :mod:`sgmlop` parser accelerator module, written by Fredrik Lundh. + +.. % ====================================================================== + + +Module changes +============== + +Lots of improvements and bugfixes were made to Python's extensive standard +library; some of the affected modules include :mod:`readline`, +:mod:`ConfigParser`, :mod:`cgi`, :mod:`calendar`, :mod:`posix`, :mod:`readline`, +:mod:`xmllib`, :mod:`aifc`, :mod:`chunk, wave`, :mod:`random`, :mod:`shelve`, +and :mod:`nntplib`. Consult the CVS logs for the exact patch-by-patch details. + +Brian Gallew contributed OpenSSL support for the :mod:`socket` module. OpenSSL +is an implementation of the Secure Socket Layer, which encrypts the data being +sent over a socket. When compiling Python, you can edit :file:`Modules/Setup` +to include SSL support, which adds an additional function to the :mod:`socket` +module: :func:`socket.ssl(socket, keyfile, certfile)`, which takes a socket +object and returns an SSL socket. The :mod:`httplib` and :mod:`urllib` modules +were also changed to support "https://" URLs, though no one has implemented FTP +or SMTP over SSL. + +The :mod:`httplib` module has been rewritten by Greg Stein to support HTTP/1.1. +Backward compatibility with the 1.5 version of :mod:`httplib` is provided, +though using HTTP/1.1 features such as pipelining will require rewriting code to +use a different set of interfaces. + +The :mod:`Tkinter` module now supports Tcl/Tk version 8.1, 8.2, or 8.3, and +support for the older 7.x versions has been dropped. The Tkinter module now +supports displaying Unicode strings in Tk widgets. Also, Fredrik Lundh +contributed an optimization which makes operations like ``create_line`` and +``create_polygon`` much faster, especially when using lots of coordinates. + +The :mod:`curses` module has been greatly extended, starting from Oliver +Andrich's enhanced version, to provide many additional functions from ncurses +and SYSV curses, such as colour, alternative character set support, pads, and +mouse support. This means the module is no longer compatible with operating +systems that only have BSD curses, but there don't seem to be any currently +maintained OSes that fall into this category. + +As mentioned in the earlier discussion of 2.0's Unicode support, the underlying +implementation of the regular expressions provided by the :mod:`re` module has +been changed. SRE, a new regular expression engine written by Fredrik Lundh and +partially funded by Hewlett Packard, supports matching against both 8-bit +strings and Unicode strings. + +.. % ====================================================================== + + +New modules +=========== + +A number of new modules were added. We'll simply list them with brief +descriptions; consult the 2.0 documentation for the details of a particular +module. + +* :mod:`atexit`: For registering functions to be called before the Python + interpreter exits. Code that currently sets ``sys.exitfunc`` directly should be + changed to use the :mod:`atexit` module instead, importing :mod:`atexit` and + calling :func:`atexit.register` with the function to be called on exit. + (Contributed by Skip Montanaro.) + +* :mod:`codecs`, :mod:`encodings`, :mod:`unicodedata`: Added as part of the new + Unicode support. + +* :mod:`filecmp`: Supersedes the old :mod:`cmp`, :mod:`cmpcache` and + :mod:`dircmp` modules, which have now become deprecated. (Contributed by Gordon + MacMillan and Moshe Zadka.) + +* :mod:`gettext`: This module provides internationalization (I18N) and + localization (L10N) support for Python programs by providing an interface to the + GNU gettext message catalog library. (Integrated by Barry Warsaw, from separate + contributions by Martin von Löwis, Peter Funk, and James Henstridge.) + +* :mod:`linuxaudiodev`: Support for the :file:`/dev/audio` device on Linux, a + twin to the existing :mod:`sunaudiodev` module. (Contributed by Peter Bosch, + with fixes by Jeremy Hylton.) + +* :mod:`mmap`: An interface to memory-mapped files on both Windows and Unix. A + file's contents can be mapped directly into memory, at which point it behaves + like a mutable string, so its contents can be read and modified. They can even + be passed to functions that expect ordinary strings, such as the :mod:`re` + module. (Contributed by Sam Rushing, with some extensions by A.M. Kuchling.) + +* :mod:`pyexpat`: An interface to the Expat XML parser. (Contributed by Paul + Prescod.) + +* :mod:`robotparser`: Parse a :file:`robots.txt` file, which is used for writing + Web spiders that politely avoid certain areas of a Web site. The parser accepts + the contents of a :file:`robots.txt` file, builds a set of rules from it, and + can then answer questions about the fetchability of a given URL. (Contributed + by Skip Montanaro.) + +* :mod:`tabnanny`: A module/script to check Python source code for ambiguous + indentation. (Contributed by Tim Peters.) + +* :mod:`UserString`: A base class useful for deriving objects that behave like + strings. + +* :mod:`webbrowser`: A module that provides a platform independent way to launch + a web browser on a specific URL. For each platform, various browsers are tried + in a specific order. The user can alter which browser is launched by setting the + *BROWSER* environment variable. (Originally inspired by Eric S. Raymond's patch + to :mod:`urllib` which added similar functionality, but the final module comes + from code originally implemented by Fred Drake as + :file:`Tools/idle/BrowserControl.py`, and adapted for the standard library by + Fred.) + +* :mod:`_winreg`: An interface to the Windows registry. :mod:`_winreg` is an + adaptation of functions that have been part of PythonWin since 1995, but has now + been added to the core distribution, and enhanced to support Unicode. + :mod:`_winreg` was written by Bill Tutt and Mark Hammond. + +* :mod:`zipfile`: A module for reading and writing ZIP-format archives. These + are archives produced by :program:`PKZIP` on DOS/Windows or :program:`zip` on + Unix, not to be confused with :program:`gzip`\ -format files (which are + supported by the :mod:`gzip` module) (Contributed by James C. Ahlstrom.) + +* :mod:`imputil`: A module that provides a simpler way for writing customised + import hooks, in comparison to the existing :mod:`ihooks` module. (Implemented + by Greg Stein, with much discussion on python-dev along the way.) + +.. % ====================================================================== + + +IDLE Improvements +================= + +IDLE is the official Python cross-platform IDE, written using Tkinter. Python +2.0 includes IDLE 0.6, which adds a number of new features and improvements. A +partial list: + +* UI improvements and optimizations, especially in the area of syntax + highlighting and auto-indentation. + +* The class browser now shows more information, such as the top level functions + in a module. + +* Tab width is now a user settable option. When opening an existing Python file, + IDLE automatically detects the indentation conventions, and adapts. + +* There is now support for calling browsers on various platforms, used to open + the Python documentation in a browser. + +* IDLE now has a command line, which is largely similar to the vanilla Python + interpreter. + +* Call tips were added in many places. + +* IDLE can now be installed as a package. + +* In the editor window, there is now a line/column bar at the bottom. + +* Three new keystroke commands: Check module (Alt-F5), Import module (F5) and + Run script (Ctrl-F5). + +.. % ====================================================================== + + +Deleted and Deprecated Modules +============================== + +A few modules have been dropped because they're obsolete, or because there are +now better ways to do the same thing. The :mod:`stdwin` module is gone; it was +for a platform-independent windowing toolkit that's no longer developed. + +A number of modules have been moved to the :file:`lib-old` subdirectory: +:mod:`cmp`, :mod:`cmpcache`, :mod:`dircmp`, :mod:`dump`, :mod:`find`, +:mod:`grep`, :mod:`packmail`, :mod:`poly`, :mod:`util`, :mod:`whatsound`, +:mod:`zmod`. If you have code which relies on a module that's been moved to +:file:`lib-old`, you can simply add that directory to ``sys.path`` to get them +back, but you're encouraged to update any code that uses these modules. + + +Acknowledgements +================ + +The authors would like to thank the following people for offering suggestions on +various drafts of this article: David Bolen, Mark Hammond, Gregg Hauser, Jeremy +Hylton, Fredrik Lundh, Detlef Lannert, Aahz Maruch, Skip Montanaro, Vladimir +Marangozov, Tobias Polzin, Guido van Rossum, Neil Schemenauer, and Russ Schmidt. + diff --git a/Doc/whatsnew/2.1.rst b/Doc/whatsnew/2.1.rst new file mode 100644 index 0000000..2be11ba --- /dev/null +++ b/Doc/whatsnew/2.1.rst @@ -0,0 +1,794 @@ +**************************** + What's New in Python 2.1 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 1.01 + +.. % $Id: whatsnew21.tex 51211 2006-08-11 14:57:12Z thomas.wouters $ + + +Introduction +============ + +This article explains the new features in Python 2.1. While there aren't as +many changes in 2.1 as there were in Python 2.0, there are still some pleasant +surprises in store. 2.1 is the first release to be steered through the use of +Python Enhancement Proposals, or PEPs, so most of the sizable changes have +accompanying PEPs that provide more complete documentation and a design +rationale for the change. This article doesn't attempt to document the new +features completely, but simply provides an overview of the new features for +Python programmers. Refer to the Python 2.1 documentation, or to the specific +PEP, for more details about any new feature that particularly interests you. + +One recent goal of the Python development team has been to accelerate the pace +of new releases, with a new release coming every 6 to 9 months. 2.1 is the first +release to come out at this faster pace, with the first alpha appearing in +January, 3 months after the final version of 2.0 was released. + +The final release of Python 2.1 was made on April 17, 2001. + +.. % ====================================================================== + + +PEP 227: Nested Scopes +====================== + +The largest change in Python 2.1 is to Python's scoping rules. In Python 2.0, +at any given time there are at most three namespaces used to look up variable +names: local, module-level, and the built-in namespace. This often surprised +people because it didn't match their intuitive expectations. For example, a +nested recursive function definition doesn't work:: + + def f(): + ... + def g(value): + ... + return g(value-1) + 1 + ... + +The function :func:`g` will always raise a :exc:`NameError` exception, because +the binding of the name ``g`` isn't in either its local namespace or in the +module-level namespace. This isn't much of a problem in practice (how often do +you recursively define interior functions like this?), but this also made using +the :keyword:`lambda` statement clumsier, and this was a problem in practice. +In code which uses :keyword:`lambda` you can often find local variables being +copied by passing them as the default values of arguments. :: + + def find(self, name): + "Return list of any entries equal to 'name'" + L = filter(lambda x, name=name: x == name, + self.list_attribute) + return L + +The readability of Python code written in a strongly functional style suffers +greatly as a result. + +The most significant change to Python 2.1 is that static scoping has been added +to the language to fix this problem. As a first effect, the ``name=name`` +default argument is now unnecessary in the above example. Put simply, when a +given variable name is not assigned a value within a function (by an assignment, +or the :keyword:`def`, :keyword:`class`, or :keyword:`import` statements), +references to the variable will be looked up in the local namespace of the +enclosing scope. A more detailed explanation of the rules, and a dissection of +the implementation, can be found in the PEP. + +This change may cause some compatibility problems for code where the same +variable name is used both at the module level and as a local variable within a +function that contains further function definitions. This seems rather unlikely +though, since such code would have been pretty confusing to read in the first +place. + +One side effect of the change is that the ``from module import *`` and +:keyword:`exec` statements have been made illegal inside a function scope under +certain conditions. The Python reference manual has said all along that ``from +module import *`` is only legal at the top level of a module, but the CPython +interpreter has never enforced this before. As part of the implementation of +nested scopes, the compiler which turns Python source into bytecodes has to +generate different code to access variables in a containing scope. ``from +module import *`` and :keyword:`exec` make it impossible for the compiler to +figure this out, because they add names to the local namespace that are +unknowable at compile time. Therefore, if a function contains function +definitions or :keyword:`lambda` expressions with free variables, the compiler +will flag this by raising a :exc:`SyntaxError` exception. + +To make the preceding explanation a bit clearer, here's an example:: + + x = 1 + def f(): + # The next line is a syntax error + exec 'x=2' + def g(): + return x + +Line 4 containing the :keyword:`exec` statement is a syntax error, since +:keyword:`exec` would define a new local variable named ``x`` whose value should +be accessed by :func:`g`. + +This shouldn't be much of a limitation, since :keyword:`exec` is rarely used in +most Python code (and when it is used, it's often a sign of a poor design +anyway). + +Compatibility concerns have led to nested scopes being introduced gradually; in +Python 2.1, they aren't enabled by default, but can be turned on within a module +by using a future statement as described in PEP 236. (See the following section +for further discussion of PEP 236.) In Python 2.2, nested scopes will become +the default and there will be no way to turn them off, but users will have had +all of 2.1's lifetime to fix any breakage resulting from their introduction. + + +.. seealso:: + + :pep:`227` - Statically Nested Scopes + Written and implemented by Jeremy Hylton. + +.. % ====================================================================== + + +PEP 236: __future__ Directives +============================== + +The reaction to nested scopes was widespread concern about the dangers of +breaking code with the 2.1 release, and it was strong enough to make the +Pythoneers take a more conservative approach. This approach consists of +introducing a convention for enabling optional functionality in release N that +will become compulsory in release N+1. + +The syntax uses a ``from...import`` statement using the reserved module name +:mod:`__future__`. Nested scopes can be enabled by the following statement:: + + from __future__ import nested_scopes + +While it looks like a normal :keyword:`import` statement, it's not; there are +strict rules on where such a future statement can be put. They can only be at +the top of a module, and must precede any Python code or regular +:keyword:`import` statements. This is because such statements can affect how +the Python bytecode compiler parses code and generates bytecode, so they must +precede any statement that will result in bytecodes being produced. + + +.. seealso:: + + :pep:`236` - Back to the :mod:`__future__` + Written by Tim Peters, and primarily implemented by Jeremy Hylton. + +.. % ====================================================================== + + +PEP 207: Rich Comparisons +========================= + +In earlier versions, Python's support for implementing comparisons on user- +defined classes and extension types was quite simple. Classes could implement a +:meth:`__cmp__` method that was given two instances of a class, and could only +return 0 if they were equal or +1 or -1 if they weren't; the method couldn't +raise an exception or return anything other than a Boolean value. Users of +Numeric Python often found this model too weak and restrictive, because in the +number-crunching programs that numeric Python is used for, it would be more +useful to be able to perform elementwise comparisons of two matrices, returning +a matrix containing the results of a given comparison for each element. If the +two matrices are of different sizes, then the compare has to be able to raise an +exception to signal the error. + +In Python 2.1, rich comparisons were added in order to support this need. +Python classes can now individually overload each of the ``<``, ``<=``, ``>``, +``>=``, ``==``, and ``!=`` operations. The new magic method names are: + ++-----------+----------------+ +| Operation | Method name | ++===========+================+ +| ``<`` | :meth:`__lt__` | ++-----------+----------------+ +| ``<=`` | :meth:`__le__` | ++-----------+----------------+ +| ``>`` | :meth:`__gt__` | ++-----------+----------------+ +| ``>=`` | :meth:`__ge__` | ++-----------+----------------+ +| ``==`` | :meth:`__eq__` | ++-----------+----------------+ +| ``!=`` | :meth:`__ne__` | ++-----------+----------------+ + +(The magic methods are named after the corresponding Fortran operators ``.LT.``. +``.LE.``, &c. Numeric programmers are almost certainly quite familiar with +these names and will find them easy to remember.) + +Each of these magic methods is of the form ``method(self, other)``, where +``self`` will be the object on the left-hand side of the operator, while +``other`` will be the object on the right-hand side. For example, the +expression ``A < B`` will cause ``A.__lt__(B)`` to be called. + +Each of these magic methods can return anything at all: a Boolean, a matrix, a +list, or any other Python object. Alternatively they can raise an exception if +the comparison is impossible, inconsistent, or otherwise meaningless. + +The built-in :func:`cmp(A,B)` function can use the rich comparison machinery, +and now accepts an optional argument specifying which comparison operation to +use; this is given as one of the strings ``"<"``, ``"<="``, ``">"``, ``">="``, +``"=="``, or ``"!="``. If called without the optional third argument, +:func:`cmp` will only return -1, 0, or +1 as in previous versions of Python; +otherwise it will call the appropriate method and can return any Python object. + +There are also corresponding changes of interest to C programmers; there's a new +slot ``tp_richcmp`` in type objects and an API for performing a given rich +comparison. I won't cover the C API here, but will refer you to PEP 207, or to +2.1's C API documentation, for the full list of related functions. + + +.. seealso:: + + :pep:`207` - Rich Comparisions + Written by Guido van Rossum, heavily based on earlier work by David Ascher, and + implemented by Guido van Rossum. + +.. % ====================================================================== + + +PEP 230: Warning Framework +========================== + +Over its 10 years of existence, Python has accumulated a certain number of +obsolete modules and features along the way. It's difficult to know when a +feature is safe to remove, since there's no way of knowing how much code uses it +--- perhaps no programs depend on the feature, or perhaps many do. To enable +removing old features in a more structured way, a warning framework was added. +When the Python developers want to get rid of a feature, it will first trigger a +warning in the next version of Python. The following Python version can then +drop the feature, and users will have had a full release cycle to remove uses of +the old feature. + +Python 2.1 adds the warning framework to be used in this scheme. It adds a +:mod:`warnings` module that provide functions to issue warnings, and to filter +out warnings that you don't want to be displayed. Third-party modules can also +use this framework to deprecate old features that they no longer wish to +support. + +For example, in Python 2.1 the :mod:`regex` module is deprecated, so importing +it causes a warning to be printed:: + + >>> import regex + __main__:1: DeprecationWarning: the regex module + is deprecated; please use the re module + >>> + +Warnings can be issued by calling the :func:`warnings.warn` function:: + + warnings.warn("feature X no longer supported") + +The first parameter is the warning message; an additional optional parameters +can be used to specify a particular warning category. + +Filters can be added to disable certain warnings; a regular expression pattern +can be applied to the message or to the module name in order to suppress a +warning. For example, you may have a program that uses the :mod:`regex` module +and not want to spare the time to convert it to use the :mod:`re` module right +now. The warning can be suppressed by calling :: + + import warnings + warnings.filterwarnings(action = 'ignore', + message='.*regex module is deprecated', + category=DeprecationWarning, + module = '__main__') + +This adds a filter that will apply only to warnings of the class +:class:`DeprecationWarning` triggered in the :mod:`__main__` module, and applies +a regular expression to only match the message about the :mod:`regex` module +being deprecated, and will cause such warnings to be ignored. Warnings can also +be printed only once, printed every time the offending code is executed, or +turned into exceptions that will cause the program to stop (unless the +exceptions are caught in the usual way, of course). + +Functions were also added to Python's C API for issuing warnings; refer to PEP +230 or to Python's API documentation for the details. + + +.. seealso:: + + :pep:`5` - Guidelines for Language Evolution + Written by Paul Prescod, to specify procedures to be followed when removing old + features from Python. The policy described in this PEP hasn't been officially + adopted, but the eventual policy probably won't be too different from Prescod's + proposal. + + :pep:`230` - Warning Framework + Written and implemented by Guido van Rossum. + +.. % ====================================================================== + + +PEP 229: New Build System +========================= + +When compiling Python, the user had to go in and edit the :file:`Modules/Setup` +file in order to enable various additional modules; the default set is +relatively small and limited to modules that compile on most Unix platforms. +This means that on Unix platforms with many more features, most notably Linux, +Python installations often don't contain all useful modules they could. + +Python 2.0 added the Distutils, a set of modules for distributing and installing +extensions. In Python 2.1, the Distutils are used to compile much of the +standard library of extension modules, autodetecting which ones are supported on +the current machine. It's hoped that this will make Python installations easier +and more featureful. + +Instead of having to edit the :file:`Modules/Setup` file in order to enable +modules, a :file:`setup.py` script in the top directory of the Python source +distribution is run at build time, and attempts to discover which modules can be +enabled by examining the modules and header files on the system. If a module is +configured in :file:`Modules/Setup`, the :file:`setup.py` script won't attempt +to compile that module and will defer to the :file:`Modules/Setup` file's +contents. This provides a way to specific any strange command-line flags or +libraries that are required for a specific platform. + +In another far-reaching change to the build mechanism, Neil Schemenauer +restructured things so Python now uses a single makefile that isn't recursive, +instead of makefiles in the top directory and in each of the :file:`Python/`, +:file:`Parser/`, :file:`Objects/`, and :file:`Modules/` subdirectories. This +makes building Python faster and also makes hacking the Makefiles clearer and +simpler. + + +.. seealso:: + + :pep:`229` - Using Distutils to Build Python + Written and implemented by A.M. Kuchling. + +.. % ====================================================================== + + +PEP 205: Weak References +======================== + +Weak references, available through the :mod:`weakref` module, are a minor but +useful new data type in the Python programmer's toolbox. + +Storing a reference to an object (say, in a dictionary or a list) has the side +effect of keeping that object alive forever. There are a few specific cases +where this behaviour is undesirable, object caches being the most common one, +and another being circular references in data structures such as trees. + +For example, consider a memoizing function that caches the results of another +function :func:`f(x)` by storing the function's argument and its result in a +dictionary:: + + _cache = {} + def memoize(x): + if _cache.has_key(x): + return _cache[x] + + retval = f(x) + + # Cache the returned object + _cache[x] = retval + + return retval + +This version works for simple things such as integers, but it has a side effect; +the ``_cache`` dictionary holds a reference to the return values, so they'll +never be deallocated until the Python process exits and cleans up This isn't +very noticeable for integers, but if :func:`f` returns an object, or a data +structure that takes up a lot of memory, this can be a problem. + +Weak references provide a way to implement a cache that won't keep objects alive +beyond their time. If an object is only accessible through weak references, the +object will be deallocated and the weak references will now indicate that the +object it referred to no longer exists. A weak reference to an object *obj* is +created by calling ``wr = weakref.ref(obj)``. The object being referred to is +returned by calling the weak reference as if it were a function: ``wr()``. It +will return the referenced object, or ``None`` if the object no longer exists. + +This makes it possible to write a :func:`memoize` function whose cache doesn't +keep objects alive, by storing weak references in the cache. :: + + _cache = {} + def memoize(x): + if _cache.has_key(x): + obj = _cache[x]() + # If weak reference object still exists, + # return it + if obj is not None: return obj + + retval = f(x) + + # Cache a weak reference + _cache[x] = weakref.ref(retval) + + return retval + +The :mod:`weakref` module also allows creating proxy objects which behave like +weak references --- an object referenced only by proxy objects is deallocated -- +but instead of requiring an explicit call to retrieve the object, the proxy +transparently forwards all operations to the object as long as the object still +exists. If the object is deallocated, attempting to use a proxy will cause a +:exc:`weakref.ReferenceError` exception to be raised. :: + + proxy = weakref.proxy(obj) + proxy.attr # Equivalent to obj.attr + proxy.meth() # Equivalent to obj.meth() + del obj + proxy.attr # raises weakref.ReferenceError + + +.. seealso:: + + :pep:`205` - Weak References + Written and implemented by Fred L. Drake, Jr. + +.. % ====================================================================== + + +PEP 232: Function Attributes +============================ + +In Python 2.1, functions can now have arbitrary information attached to them. +People were often using docstrings to hold information about functions and +methods, because the ``__doc__`` attribute was the only way of attaching any +information to a function. For example, in the Zope Web application server, +functions are marked as safe for public access by having a docstring, and in +John Aycock's SPARK parsing framework, docstrings hold parts of the BNF grammar +to be parsed. This overloading is unfortunate, since docstrings are really +intended to hold a function's documentation; for example, it means you can't +properly document functions intended for private use in Zope. + +Arbitrary attributes can now be set and retrieved on functions using the regular +Python syntax:: + + def f(): pass + + f.publish = 1 + f.secure = 1 + f.grammar = "A ::= B (C D)*" + +The dictionary containing attributes can be accessed as the function's +:attr:`__dict__`. Unlike the :attr:`__dict__` attribute of class instances, in +functions you can actually assign a new dictionary to :attr:`__dict__`, though +the new value is restricted to a regular Python dictionary; you *can't* be +tricky and set it to a :class:`UserDict` instance, or any other random object +that behaves like a mapping. + + +.. seealso:: + + :pep:`232` - Function Attributes + Written and implemented by Barry Warsaw. + +.. % ====================================================================== + + +PEP 235: Importing Modules on Case-Insensitive Platforms +======================================================== + +Some operating systems have filesystems that are case-insensitive, MacOS and +Windows being the primary examples; on these systems, it's impossible to +distinguish the filenames ``FILE.PY`` and ``file.py``, even though they do store +the file's name in its original case (they're case-preserving, too). + +In Python 2.1, the :keyword:`import` statement will work to simulate case- +sensitivity on case-insensitive platforms. Python will now search for the first +case-sensitive match by default, raising an :exc:`ImportError` if no such file +is found, so ``import file`` will not import a module named ``FILE.PY``. Case- +insensitive matching can be requested by setting the :envvar:`PYTHONCASEOK` +environment variable before starting the Python interpreter. + +.. % ====================================================================== + + +PEP 217: Interactive Display Hook +================================= + +When using the Python interpreter interactively, the output of commands is +displayed using the built-in :func:`repr` function. In Python 2.1, the variable +:func:`sys.displayhook` can be set to a callable object which will be called +instead of :func:`repr`. For example, you can set it to a special pretty- +printing function:: + + >>> # Create a recursive data structure + ... L = [1,2,3] + >>> L.append(L) + >>> L # Show Python's default output + [1, 2, 3, [...]] + >>> # Use pprint.pprint() as the display function + ... import sys, pprint + >>> sys.displayhook = pprint.pprint + >>> L + [1, 2, 3, <Recursion on list with id=135143996>] + >>> + + +.. seealso:: + + :pep:`217` - Display Hook for Interactive Use + Written and implemented by Moshe Zadka. + +.. % ====================================================================== + + +PEP 208: New Coercion Model +=========================== + +How numeric coercion is done at the C level was significantly modified. This +will only affect the authors of C extensions to Python, allowing them more +flexibility in writing extension types that support numeric operations. + +Extension types can now set the type flag ``Py_TPFLAGS_CHECKTYPES`` in their +``PyTypeObject`` structure to indicate that they support the new coercion model. +In such extension types, the numeric slot functions can no longer assume that +they'll be passed two arguments of the same type; instead they may be passed two +arguments of differing types, and can then perform their own internal coercion. +If the slot function is passed a type it can't handle, it can indicate the +failure by returning a reference to the ``Py_NotImplemented`` singleton value. +The numeric functions of the other type will then be tried, and perhaps they can +handle the operation; if the other type also returns ``Py_NotImplemented``, then +a :exc:`TypeError` will be raised. Numeric methods written in Python can also +return ``Py_NotImplemented``, causing the interpreter to act as if the method +did not exist (perhaps raising a :exc:`TypeError`, perhaps trying another +object's numeric methods). + + +.. seealso:: + + :pep:`208` - Reworking the Coercion Model + Written and implemented by Neil Schemenauer, heavily based upon earlier work by + Marc-André Lemburg. Read this to understand the fine points of how numeric + operations will now be processed at the C level. + +.. % ====================================================================== + + +PEP 241: Metadata in Python Packages +==================================== + +A common complaint from Python users is that there's no single catalog of all +the Python modules in existence. T. Middleton's Vaults of Parnassus at +http://www.vex.net/parnassus/ are the largest catalog of Python modules, but +registering software at the Vaults is optional, and many people don't bother. + +As a first small step toward fixing the problem, Python software packaged using +the Distutils :command:`sdist` command will include a file named +:file:`PKG-INFO` containing information about the package such as its name, +version, and author (metadata, in cataloguing terminology). PEP 241 contains +the full list of fields that can be present in the :file:`PKG-INFO` file. As +people began to package their software using Python 2.1, more and more packages +will include metadata, making it possible to build automated cataloguing systems +and experiment with them. With the result experience, perhaps it'll be possible +to design a really good catalog and then build support for it into Python 2.2. +For example, the Distutils :command:`sdist` and :command:`bdist_\*` commands +could support a :option:`upload` option that would automatically upload your +package to a catalog server. + +You can start creating packages containing :file:`PKG-INFO` even if you're not +using Python 2.1, since a new release of the Distutils will be made for users of +earlier Python versions. Version 1.0.2 of the Distutils includes the changes +described in PEP 241, as well as various bugfixes and enhancements. It will be +available from the Distutils SIG at http://www.python.org/sigs/distutils-sig/. + + +.. seealso:: + + :pep:`241` - Metadata for Python Software Packages + Written and implemented by A.M. Kuchling. + + :pep:`243` - Module Repository Upload Mechanism + Written by Sean Reifschneider, this draft PEP describes a proposed mechanism for + uploading Python packages to a central server. + +.. % ====================================================================== + + +New and Improved Modules +======================== + +* Ka-Ping Yee contributed two new modules: :mod:`inspect.py`, a module for + getting information about live Python code, and :mod:`pydoc.py`, a module for + interactively converting docstrings to HTML or text. As a bonus, + :file:`Tools/scripts/pydoc`, which is now automatically installed, uses + :mod:`pydoc.py` to display documentation given a Python module, package, or + class name. For example, ``pydoc xml.dom`` displays the following:: + + Python Library Documentation: package xml.dom in xml + + NAME + xml.dom - W3C Document Object Model implementation for Python. + + FILE + /usr/local/lib/python2.1/xml/dom/__init__.pyc + + DESCRIPTION + The Python mapping of the Document Object Model is documented in the + Python Library Reference in the section on the xml.dom package. + + This package contains the following modules: + ... + + :file:`pydoc` also includes a Tk-based interactive help browser. :file:`pydoc` + quickly becomes addictive; try it out! + +* Two different modules for unit testing were added to the standard library. + The :mod:`doctest` module, contributed by Tim Peters, provides a testing + framework based on running embedded examples in docstrings and comparing the + results against the expected output. PyUnit, contributed by Steve Purcell, is a + unit testing framework inspired by JUnit, which was in turn an adaptation of + Kent Beck's Smalltalk testing framework. See http://pyunit.sourceforge.net/ for + more information about PyUnit. + +* The :mod:`difflib` module contains a class, :class:`SequenceMatcher`, which + compares two sequences and computes the changes required to transform one + sequence into the other. For example, this module can be used to write a tool + similar to the Unix :program:`diff` program, and in fact the sample program + :file:`Tools/scripts/ndiff.py` demonstrates how to write such a script. + +* :mod:`curses.panel`, a wrapper for the panel library, part of ncurses and of + SYSV curses, was contributed by Thomas Gellekum. The panel library provides + windows with the additional feature of depth. Windows can be moved higher or + lower in the depth ordering, and the panel library figures out where panels + overlap and which sections are visible. + +* The PyXML package has gone through a few releases since Python 2.0, and Python + 2.1 includes an updated version of the :mod:`xml` package. Some of the + noteworthy changes include support for Expat 1.2 and later versions, the ability + for Expat parsers to handle files in any encoding supported by Python, and + various bugfixes for SAX, DOM, and the :mod:`minidom` module. + +* Ping also contributed another hook for handling uncaught exceptions. + :func:`sys.excepthook` can be set to a callable object. When an exception isn't + caught by any :keyword:`try`...\ :keyword:`except` blocks, the exception will be + passed to :func:`sys.excepthook`, which can then do whatever it likes. At the + Ninth Python Conference, Ping demonstrated an application for this hook: + printing an extended traceback that not only lists the stack frames, but also + lists the function arguments and the local variables for each frame. + +* Various functions in the :mod:`time` module, such as :func:`asctime` and + :func:`localtime`, require a floating point argument containing the time in + seconds since the epoch. The most common use of these functions is to work with + the current time, so the floating point argument has been made optional; when a + value isn't provided, the current time will be used. For example, log file + entries usually need a string containing the current time; in Python 2.1, + ``time.asctime()`` can be used, instead of the lengthier + ``time.asctime(time.localtime(time.time()))`` that was previously required. + + This change was proposed and implemented by Thomas Wouters. + +* The :mod:`ftplib` module now defaults to retrieving files in passive mode, + because passive mode is more likely to work from behind a firewall. This + request came from the Debian bug tracking system, since other Debian packages + use :mod:`ftplib` to retrieve files and then don't work from behind a firewall. + It's deemed unlikely that this will cause problems for anyone, because Netscape + defaults to passive mode and few people complain, but if passive mode is + unsuitable for your application or network setup, call :meth:`set_pasv(0)` on + FTP objects to disable passive mode. + +* Support for raw socket access has been added to the :mod:`socket` module, + contributed by Grant Edwards. + +* The :mod:`pstats` module now contains a simple interactive statistics browser + for displaying timing profiles for Python programs, invoked when the module is + run as a script. Contributed by Eric S. Raymond. + +* A new implementation-dependent function, :func:`sys._getframe([depth])`, has + been added to return a given frame object from the current call stack. + :func:`sys._getframe` returns the frame at the top of the call stack; if the + optional integer argument *depth* is supplied, the function returns the frame + that is *depth* calls below the top of the stack. For example, + ``sys._getframe(1)`` returns the caller's frame object. + + This function is only present in CPython, not in Jython or the .NET + implementation. Use it for debugging, and resist the temptation to put it into + production code. + +.. % ====================================================================== + + +Other Changes and Fixes +======================= + +There were relatively few smaller changes made in Python 2.1 due to the shorter +release cycle. A search through the CVS change logs turns up 117 patches +applied, and 136 bugs fixed; both figures are likely to be underestimates. Some +of the more notable changes are: + +* A specialized object allocator is now optionally available, that should be + faster than the system :func:`malloc` and have less memory overhead. The + allocator uses C's :func:`malloc` function to get large pools of memory, and + then fulfills smaller memory requests from these pools. It can be enabled by + providing the :option:`--with-pymalloc` option to the :program:`configure` + script; see :file:`Objects/obmalloc.c` for the implementation details. + + Authors of C extension modules should test their code with the object allocator + enabled, because some incorrect code may break, causing core dumps at runtime. + There are a bunch of memory allocation functions in Python's C API that have + previously been just aliases for the C library's :func:`malloc` and + :func:`free`, meaning that if you accidentally called mismatched functions, the + error wouldn't be noticeable. When the object allocator is enabled, these + functions aren't aliases of :func:`malloc` and :func:`free` any more, and + calling the wrong function to free memory will get you a core dump. For + example, if memory was allocated using :func:`PyMem_New`, it has to be freed + using :func:`PyMem_Del`, not :func:`free`. A few modules included with Python + fell afoul of this and had to be fixed; doubtless there are more third-party + modules that will have the same problem. + + The object allocator was contributed by Vladimir Marangozov. + +* The speed of line-oriented file I/O has been improved because people often + complain about its lack of speed, and because it's often been used as a naïve + benchmark. The :meth:`readline` method of file objects has therefore been + rewritten to be much faster. The exact amount of the speedup will vary from + platform to platform depending on how slow the C library's :func:`getc` was, but + is around 66%, and potentially much faster on some particular operating systems. + Tim Peters did much of the benchmarking and coding for this change, motivated by + a discussion in comp.lang.python. + + A new module and method for file objects was also added, contributed by Jeff + Epler. The new method, :meth:`xreadlines`, is similar to the existing + :func:`xrange` built-in. :func:`xreadlines` returns an opaque sequence object + that only supports being iterated over, reading a line on every iteration but + not reading the entire file into memory as the existing :meth:`readlines` method + does. You'd use it like this:: + + for line in sys.stdin.xreadlines(): + # ... do something for each line ... + ... + + For a fuller discussion of the line I/O changes, see the python-dev summary for + January 1-15, 2001 at http://www.python.org/dev/summary/2001-01-1.html. + +* A new method, :meth:`popitem`, was added to dictionaries to enable + destructively iterating through the contents of a dictionary; this can be faster + for large dictionaries because there's no need to construct a list containing + all the keys or values. ``D.popitem()`` removes a random ``(key, value)`` pair + from the dictionary ``D`` and returns it as a 2-tuple. This was implemented + mostly by Tim Peters and Guido van Rossum, after a suggestion and preliminary + patch by Moshe Zadka. + +* Modules can now control which names are imported when ``from module import *`` + is used, by defining an ``__all__`` attribute containing a list of names that + will be imported. One common complaint is that if the module imports other + modules such as :mod:`sys` or :mod:`string`, ``from module import *`` will add + them to the importing module's namespace. To fix this, simply list the public + names in ``__all__``:: + + # List public names + __all__ = ['Database', 'open'] + + A stricter version of this patch was first suggested and implemented by Ben + Wolfson, but after some python-dev discussion, a weaker final version was + checked in. + +* Applying :func:`repr` to strings previously used octal escapes for + non-printable characters; for example, a newline was ``'\012'``. This was a + vestigial trace of Python's C ancestry, but today octal is of very little + practical use. Ka-Ping Yee suggested using hex escapes instead of octal ones, + and using the ``\n``, ``\t``, ``\r`` escapes for the appropriate characters, + and implemented this new formatting. + +* Syntax errors detected at compile-time can now raise exceptions containing the + filename and line number of the error, a pleasant side effect of the compiler + reorganization done by Jeremy Hylton. + +* C extensions which import other modules have been changed to use + :func:`PyImport_ImportModule`, which means that they will use any import hooks + that have been installed. This is also encouraged for third-party extensions + that need to import some other module from C code. + +* The size of the Unicode character database was shrunk by another 340K thanks + to Fredrik Lundh. + +* Some new ports were contributed: MacOS X (by Steven Majewski), Cygwin (by + Jason Tishler); RISCOS (by Dietmar Schwertberger); Unixware 7 (by Billy G. + Allie). + +And there's the usual list of minor bugfixes, minor memory leaks, docstring +edits, and other tweaks, too lengthy to be worth itemizing; see the CVS logs for +the full details if you want them. + +.. % ====================================================================== + + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions on +various drafts of this article: Graeme Cross, David Goodger, Jay Graves, Michael +Hudson, Marc-André Lemburg, Fredrik Lundh, Neil Schemenauer, Thomas Wouters. + diff --git a/Doc/whatsnew/2.2.rst b/Doc/whatsnew/2.2.rst new file mode 100644 index 0000000..6a7e0e8 --- /dev/null +++ b/Doc/whatsnew/2.2.rst @@ -0,0 +1,1269 @@ +**************************** + What's New in Python 2.2 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 1.02 + +.. % $Id: whatsnew22.tex 37315 2004-09-10 19:33:00Z akuchling $ + + +Introduction +============ + +This article explains the new features in Python 2.2.2, released on October 14, +2002. Python 2.2.2 is a bugfix release of Python 2.2, originally released on +December 21, 2001. + +Python 2.2 can be thought of as the "cleanup release". There are some features +such as generators and iterators that are completely new, but most of the +changes, significant and far-reaching though they may be, are aimed at cleaning +up irregularities and dark corners of the language design. + +This article doesn't attempt to provide a complete specification of the new +features, but instead provides a convenient overview. For full details, you +should refer to the documentation for Python 2.2, such as the `Python Library +Reference <http://www.python.org/doc/2.2/lib/lib.html>`_ and the `Python +Reference Manual <http://www.python.org/doc/2.2/ref/ref.html>`_. If you want to +understand the complete implementation and design rationale for a change, refer +to the PEP for a particular new feature. + + +.. seealso:: + + http://www.unixreview.com/documents/s=1356/urm0109h/0109h.htm + "What's So Special About Python 2.2?" is also about the new 2.2 features, and + was written by Cameron Laird and Kathryn Soraiz. + +.. % ====================================================================== + + +PEPs 252 and 253: Type and Class Changes +======================================== + +The largest and most far-reaching changes in Python 2.2 are to Python's model of +objects and classes. The changes should be backward compatible, so it's likely +that your code will continue to run unchanged, but the changes provide some +amazing new capabilities. Before beginning this, the longest and most +complicated section of this article, I'll provide an overview of the changes and +offer some comments. + +A long time ago I wrote a Web page (http://www.amk.ca/python/writing/warts.html) +listing flaws in Python's design. One of the most significant flaws was that +it's impossible to subclass Python types implemented in C. In particular, it's +not possible to subclass built-in types, so you can't just subclass, say, lists +in order to add a single useful method to them. The :mod:`UserList` module +provides a class that supports all of the methods of lists and that can be +subclassed further, but there's lots of C code that expects a regular Python +list and won't accept a :class:`UserList` instance. + +Python 2.2 fixes this, and in the process adds some exciting new capabilities. +A brief summary: + +* You can subclass built-in types such as lists and even integers, and your + subclasses should work in every place that requires the original type. + +* It's now possible to define static and class methods, in addition to the + instance methods available in previous versions of Python. + +* It's also possible to automatically call methods on accessing or setting an + instance attribute by using a new mechanism called :dfn:`properties`. Many uses + of :meth:`__getattr__` can be rewritten to use properties instead, making the + resulting code simpler and faster. As a small side benefit, attributes can now + have docstrings, too. + +* The list of legal attributes for an instance can be limited to a particular + set using :dfn:`slots`, making it possible to safeguard against typos and + perhaps make more optimizations possible in future versions of Python. + +Some users have voiced concern about all these changes. Sure, they say, the new +features are neat and lend themselves to all sorts of tricks that weren't +possible in previous versions of Python, but they also make the language more +complicated. Some people have said that they've always recommended Python for +its simplicity, and feel that its simplicity is being lost. + +Personally, I think there's no need to worry. Many of the new features are +quite esoteric, and you can write a lot of Python code without ever needed to be +aware of them. Writing a simple class is no more difficult than it ever was, so +you don't need to bother learning or teaching them unless they're actually +needed. Some very complicated tasks that were previously only possible from C +will now be possible in pure Python, and to my mind that's all for the better. + +I'm not going to attempt to cover every single corner case and small change that +were required to make the new features work. Instead this section will paint +only the broad strokes. See section :ref:`sect-rellinks`, "Related Links", for +further sources of information about Python 2.2's new object model. + + +Old and New Classes +------------------- + +First, you should know that Python 2.2 really has two kinds of classes: classic +or old-style classes, and new-style classes. The old-style class model is +exactly the same as the class model in earlier versions of Python. All the new +features described in this section apply only to new-style classes. This +divergence isn't intended to last forever; eventually old-style classes will be +dropped, possibly in Python 3.0. + +So how do you define a new-style class? You do it by subclassing an existing +new-style class. Most of Python's built-in types, such as integers, lists, +dictionaries, and even files, are new-style classes now. A new-style class +named :class:`object`, the base class for all built-in types, has also been +added so if no built-in type is suitable, you can just subclass +:class:`object`:: + + class C(object): + def __init__ (self): + ... + ... + +This means that :keyword:`class` statements that don't have any base classes are +always classic classes in Python 2.2. (Actually you can also change this by +setting a module-level variable named :attr:`__metaclass__` --- see :pep:`253` +for the details --- but it's easier to just subclass :keyword:`object`.) + +The type objects for the built-in types are available as built-ins, named using +a clever trick. Python has always had built-in functions named :func:`int`, +:func:`float`, and :func:`str`. In 2.2, they aren't functions any more, but +type objects that behave as factories when called. :: + + >>> int + <type 'int'> + >>> int('123') + 123 + +To make the set of types complete, new type objects such as :func:`dict` and +:func:`file` have been added. Here's a more interesting example, adding a +:meth:`lock` method to file objects:: + + class LockableFile(file): + def lock (self, operation, length=0, start=0, whence=0): + import fcntl + return fcntl.lockf(self.fileno(), operation, + length, start, whence) + +The now-obsolete :mod:`posixfile` module contained a class that emulated all of +a file object's methods and also added a :meth:`lock` method, but this class +couldn't be passed to internal functions that expected a built-in file, +something which is possible with our new :class:`LockableFile`. + + +Descriptors +----------- + +In previous versions of Python, there was no consistent way to discover what +attributes and methods were supported by an object. There were some informal +conventions, such as defining :attr:`__members__` and :attr:`__methods__` +attributes that were lists of names, but often the author of an extension type +or a class wouldn't bother to define them. You could fall back on inspecting +the :attr:`__dict__` of an object, but when class inheritance or an arbitrary +:meth:`__getattr__` hook were in use this could still be inaccurate. + +The one big idea underlying the new class model is that an API for describing +the attributes of an object using :dfn:`descriptors` has been formalized. +Descriptors specify the value of an attribute, stating whether it's a method or +a field. With the descriptor API, static methods and class methods become +possible, as well as more exotic constructs. + +Attribute descriptors are objects that live inside class objects, and have a few +attributes of their own: + +* :attr:`__name__` is the attribute's name. + +* :attr:`__doc__` is the attribute's docstring. + +* :meth:`__get__(object)` is a method that retrieves the attribute value from + *object*. + +* :meth:`__set__(object, value)` sets the attribute on *object* to *value*. + +* :meth:`__delete__(object, value)` deletes the *value* attribute of *object*. + +For example, when you write ``obj.x``, the steps that Python actually performs +are:: + + descriptor = obj.__class__.x + descriptor.__get__(obj) + +For methods, :meth:`descriptor.__get__` returns a temporary object that's +callable, and wraps up the instance and the method to be called on it. This is +also why static methods and class methods are now possible; they have +descriptors that wrap up just the method, or the method and the class. As a +brief explanation of these new kinds of methods, static methods aren't passed +the instance, and therefore resemble regular functions. Class methods are +passed the class of the object, but not the object itself. Static and class +methods are defined like this:: + + class C(object): + def f(arg1, arg2): + ... + f = staticmethod(f) + + def g(cls, arg1, arg2): + ... + g = classmethod(g) + +The :func:`staticmethod` function takes the function :func:`f`, and returns it +wrapped up in a descriptor so it can be stored in the class object. You might +expect there to be special syntax for creating such methods (``def static f``, +``defstatic f()``, or something like that) but no such syntax has been defined +yet; that's been left for future versions of Python. + +More new features, such as slots and properties, are also implemented as new +kinds of descriptors, and it's not difficult to write a descriptor class that +does something novel. For example, it would be possible to write a descriptor +class that made it possible to write Eiffel-style preconditions and +postconditions for a method. A class that used this feature might be defined +like this:: + + from eiffel import eiffelmethod + + class C(object): + def f(self, arg1, arg2): + # The actual function + ... + def pre_f(self): + # Check preconditions + ... + def post_f(self): + # Check postconditions + ... + + f = eiffelmethod(f, pre_f, post_f) + +Note that a person using the new :func:`eiffelmethod` doesn't have to understand +anything about descriptors. This is why I think the new features don't increase +the basic complexity of the language. There will be a few wizards who need to +know about it in order to write :func:`eiffelmethod` or the ZODB or whatever, +but most users will just write code on top of the resulting libraries and ignore +the implementation details. + + +Multiple Inheritance: The Diamond Rule +-------------------------------------- + +Multiple inheritance has also been made more useful through changing the rules +under which names are resolved. Consider this set of classes (diagram taken +from :pep:`253` by Guido van Rossum):: + + class A: + ^ ^ def save(self): ... + / \ + / \ + / \ + / \ + class B class C: + ^ ^ def save(self): ... + \ / + \ / + \ / + \ / + class D + +The lookup rule for classic classes is simple but not very smart; the base +classes are searched depth-first, going from left to right. A reference to +:meth:`D.save` will search the classes :class:`D`, :class:`B`, and then +:class:`A`, where :meth:`save` would be found and returned. :meth:`C.save` +would never be found at all. This is bad, because if :class:`C`'s :meth:`save` +method is saving some internal state specific to :class:`C`, not calling it will +result in that state never getting saved. + +New-style classes follow a different algorithm that's a bit more complicated to +explain, but does the right thing in this situation. (Note that Python 2.3 +changes this algorithm to one that produces the same results in most cases, but +produces more useful results for really complicated inheritance graphs.) + +#. List all the base classes, following the classic lookup rule and include a + class multiple times if it's visited repeatedly. In the above example, the list + of visited classes is [:class:`D`, :class:`B`, :class:`A`, :class:`C`, + :class:`A`]. + +#. Scan the list for duplicated classes. If any are found, remove all but one + occurrence, leaving the *last* one in the list. In the above example, the list + becomes [:class:`D`, :class:`B`, :class:`C`, :class:`A`] after dropping + duplicates. + +Following this rule, referring to :meth:`D.save` will return :meth:`C.save`, +which is the behaviour we're after. This lookup rule is the same as the one +followed by Common Lisp. A new built-in function, :func:`super`, provides a way +to get at a class's superclasses without having to reimplement Python's +algorithm. The most commonly used form will be :func:`super(class, obj)`, which +returns a bound superclass object (not the actual class object). This form +will be used in methods to call a method in the superclass; for example, +:class:`D`'s :meth:`save` method would look like this:: + + class D (B,C): + def save (self): + # Call superclass .save() + super(D, self).save() + # Save D's private information here + ... + +:func:`super` can also return unbound superclass objects when called as +:func:`super(class)` or :func:`super(class1, class2)`, but this probably won't +often be useful. + + +Attribute Access +---------------- + +A fair number of sophisticated Python classes define hooks for attribute access +using :meth:`__getattr__`; most commonly this is done for convenience, to make +code more readable by automatically mapping an attribute access such as +``obj.parent`` into a method call such as ``obj.get_parent``. Python 2.2 adds +some new ways of controlling attribute access. + +First, :meth:`__getattr__(attr_name)` is still supported by new-style classes, +and nothing about it has changed. As before, it will be called when an attempt +is made to access ``obj.foo`` and no attribute named ``foo`` is found in the +instance's dictionary. + +New-style classes also support a new method, +:meth:`__getattribute__(attr_name)`. The difference between the two methods is +that :meth:`__getattribute__` is *always* called whenever any attribute is +accessed, while the old :meth:`__getattr__` is only called if ``foo`` isn't +found in the instance's dictionary. + +However, Python 2.2's support for :dfn:`properties` will often be a simpler way +to trap attribute references. Writing a :meth:`__getattr__` method is +complicated because to avoid recursion you can't use regular attribute accesses +inside them, and instead have to mess around with the contents of +:attr:`__dict__`. :meth:`__getattr__` methods also end up being called by Python +when it checks for other methods such as :meth:`__repr__` or :meth:`__coerce__`, +and so have to be written with this in mind. Finally, calling a function on +every attribute access results in a sizable performance loss. + +:class:`property` is a new built-in type that packages up three functions that +get, set, or delete an attribute, and a docstring. For example, if you want to +define a :attr:`size` attribute that's computed, but also settable, you could +write:: + + class C(object): + def get_size (self): + result = ... computation ... + return result + def set_size (self, size): + ... compute something based on the size + and set internal state appropriately ... + + # Define a property. The 'delete this attribute' + # method is defined as None, so the attribute + # can't be deleted. + size = property(get_size, set_size, + None, + "Storage size of this instance") + +That is certainly clearer and easier to write than a pair of +:meth:`__getattr__`/:meth:`__setattr__` methods that check for the :attr:`size` +attribute and handle it specially while retrieving all other attributes from the +instance's :attr:`__dict__`. Accesses to :attr:`size` are also the only ones +which have to perform the work of calling a function, so references to other +attributes run at their usual speed. + +Finally, it's possible to constrain the list of attributes that can be +referenced on an object using the new :attr:`__slots__` class attribute. Python +objects are usually very dynamic; at any time it's possible to define a new +attribute on an instance by just doing ``obj.new_attr=1``. A new-style class +can define a class attribute named :attr:`__slots__` to limit the legal +attributes to a particular set of names. An example will make this clear:: + + >>> class C(object): + ... __slots__ = ('template', 'name') + ... + >>> obj = C() + >>> print obj.template + None + >>> obj.template = 'Test' + >>> print obj.template + Test + >>> obj.newattr = None + Traceback (most recent call last): + File "<stdin>", line 1, in ? + AttributeError: 'C' object has no attribute 'newattr' + +Note how you get an :exc:`AttributeError` on the attempt to assign to an +attribute not listed in :attr:`__slots__`. + + +.. _sect-rellinks: + +Related Links +------------- + +This section has just been a quick overview of the new features, giving enough +of an explanation to start you programming, but many details have been +simplified or ignored. Where should you go to get a more complete picture? + +http://www.python.org/2.2/descrintro.html is a lengthy tutorial introduction to +the descriptor features, written by Guido van Rossum. If my description has +whetted your appetite, go read this tutorial next, because it goes into much +more detail about the new features while still remaining quite easy to read. + +Next, there are two relevant PEPs, :pep:`252` and :pep:`253`. :pep:`252` is +titled "Making Types Look More Like Classes", and covers the descriptor API. +:pep:`253` is titled "Subtyping Built-in Types", and describes the changes to +type objects that make it possible to subtype built-in objects. :pep:`253` is +the more complicated PEP of the two, and at a few points the necessary +explanations of types and meta-types may cause your head to explode. Both PEPs +were written and implemented by Guido van Rossum, with substantial assistance +from the rest of the Zope Corp. team. + +Finally, there's the ultimate authority: the source code. Most of the machinery +for the type handling is in :file:`Objects/typeobject.c`, but you should only +resort to it after all other avenues have been exhausted, including posting a +question to python-list or python-dev. + +.. % ====================================================================== + + +PEP 234: Iterators +================== + +Another significant addition to 2.2 is an iteration interface at both the C and +Python levels. Objects can define how they can be looped over by callers. + +In Python versions up to 2.1, the usual way to make ``for item in obj`` work is +to define a :meth:`__getitem__` method that looks something like this:: + + def __getitem__(self, index): + return <next item> + +:meth:`__getitem__` is more properly used to define an indexing operation on an +object so that you can write ``obj[5]`` to retrieve the sixth element. It's a +bit misleading when you're using this only to support :keyword:`for` loops. +Consider some file-like object that wants to be looped over; the *index* +parameter is essentially meaningless, as the class probably assumes that a +series of :meth:`__getitem__` calls will be made with *index* incrementing by +one each time. In other words, the presence of the :meth:`__getitem__` method +doesn't mean that using ``file[5]`` to randomly access the sixth element will +work, though it really should. + +In Python 2.2, iteration can be implemented separately, and :meth:`__getitem__` +methods can be limited to classes that really do support random access. The +basic idea of iterators is simple. A new built-in function, :func:`iter(obj)` +or ``iter(C, sentinel)``, is used to get an iterator. :func:`iter(obj)` returns +an iterator for the object *obj*, while ``iter(C, sentinel)`` returns an +iterator that will invoke the callable object *C* until it returns *sentinel* to +signal that the iterator is done. + +Python classes can define an :meth:`__iter__` method, which should create and +return a new iterator for the object; if the object is its own iterator, this +method can just return ``self``. In particular, iterators will usually be their +own iterators. Extension types implemented in C can implement a :attr:`tp_iter` +function in order to return an iterator, and extension types that want to behave +as iterators can define a :attr:`tp_iternext` function. + +So, after all this, what do iterators actually do? They have one required +method, :meth:`next`, which takes no arguments and returns the next value. When +there are no more values to be returned, calling :meth:`next` should raise the +:exc:`StopIteration` exception. :: + + >>> L = [1,2,3] + >>> i = iter(L) + >>> print i + <iterator object at 0x8116870> + >>> i.next() + 1 + >>> i.next() + 2 + >>> i.next() + 3 + >>> i.next() + Traceback (most recent call last): + File "<stdin>", line 1, in ? + StopIteration + >>> + +In 2.2, Python's :keyword:`for` statement no longer expects a sequence; it +expects something for which :func:`iter` will return an iterator. For backward +compatibility and convenience, an iterator is automatically constructed for +sequences that don't implement :meth:`__iter__` or a :attr:`tp_iter` slot, so +``for i in [1,2,3]`` will still work. Wherever the Python interpreter loops +over a sequence, it's been changed to use the iterator protocol. This means you +can do things like this:: + + >>> L = [1,2,3] + >>> i = iter(L) + >>> a,b,c = i + >>> a,b,c + (1, 2, 3) + +Iterator support has been added to some of Python's basic types. Calling +:func:`iter` on a dictionary will return an iterator which loops over its keys:: + + >>> m = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, + ... 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12} + >>> for key in m: print key, m[key] + ... + Mar 3 + Feb 2 + Aug 8 + Sep 9 + May 5 + Jun 6 + Jul 7 + Jan 1 + Apr 4 + Nov 11 + Dec 12 + Oct 10 + +That's just the default behaviour. If you want to iterate over keys, values, or +key/value pairs, you can explicitly call the :meth:`iterkeys`, +:meth:`itervalues`, or :meth:`iteritems` methods to get an appropriate iterator. +In a minor related change, the :keyword:`in` operator now works on dictionaries, +so ``key in dict`` is now equivalent to ``dict.has_key(key)``. + +Files also provide an iterator, which calls the :meth:`readline` method until +there are no more lines in the file. This means you can now read each line of a +file using code like this:: + + for line in file: + # do something for each line + ... + +Note that you can only go forward in an iterator; there's no way to get the +previous element, reset the iterator, or make a copy of it. An iterator object +could provide such additional capabilities, but the iterator protocol only +requires a :meth:`next` method. + + +.. seealso:: + + :pep:`234` - Iterators + Written by Ka-Ping Yee and GvR; implemented by the Python Labs crew, mostly by + GvR and Tim Peters. + +.. % ====================================================================== + + +PEP 255: Simple Generators +========================== + +Generators are another new feature, one that interacts with the introduction of +iterators. + +You're doubtless familiar with how function calls work in Python or C. When you +call a function, it gets a private namespace where its local variables are +created. When the function reaches a :keyword:`return` statement, the local +variables are destroyed and the resulting value is returned to the caller. A +later call to the same function will get a fresh new set of local variables. +But, what if the local variables weren't thrown away on exiting a function? +What if you could later resume the function where it left off? This is what +generators provide; they can be thought of as resumable functions. + +Here's the simplest example of a generator function:: + + def generate_ints(N): + for i in range(N): + yield i + +A new keyword, :keyword:`yield`, was introduced for generators. Any function +containing a :keyword:`yield` statement is a generator function; this is +detected by Python's bytecode compiler which compiles the function specially as +a result. Because a new keyword was introduced, generators must be explicitly +enabled in a module by including a ``from __future__ import generators`` +statement near the top of the module's source code. In Python 2.3 this +statement will become unnecessary. + +When you call a generator function, it doesn't return a single value; instead it +returns a generator object that supports the iterator protocol. On executing +the :keyword:`yield` statement, the generator outputs the value of ``i``, +similar to a :keyword:`return` statement. The big difference between +:keyword:`yield` and a :keyword:`return` statement is that on reaching a +:keyword:`yield` the generator's state of execution is suspended and local +variables are preserved. On the next call to the generator's ``next()`` method, +the function will resume executing immediately after the :keyword:`yield` +statement. (For complicated reasons, the :keyword:`yield` statement isn't +allowed inside the :keyword:`try` block of a :keyword:`try`...\ +:keyword:`finally` statement; read :pep:`255` for a full explanation of the +interaction between :keyword:`yield` and exceptions.) + +Here's a sample usage of the :func:`generate_ints` generator:: + + >>> gen = generate_ints(3) + >>> gen + <generator object at 0x8117f90> + >>> gen.next() + 0 + >>> gen.next() + 1 + >>> gen.next() + 2 + >>> gen.next() + Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "<stdin>", line 2, in generate_ints + StopIteration + +You could equally write ``for i in generate_ints(5)``, or ``a,b,c = +generate_ints(3)``. + +Inside a generator function, the :keyword:`return` statement can only be used +without a value, and signals the end of the procession of values; afterwards the +generator cannot return any further values. :keyword:`return` with a value, such +as ``return 5``, is a syntax error inside a generator function. The end of the +generator's results can also be indicated by raising :exc:`StopIteration` +manually, or by just letting the flow of execution fall off the bottom of the +function. + +You could achieve the effect of generators manually by writing your own class +and storing all the local variables of the generator as instance variables. For +example, returning a list of integers could be done by setting ``self.count`` to +0, and having the :meth:`next` method increment ``self.count`` and return it. +However, for a moderately complicated generator, writing a corresponding class +would be much messier. :file:`Lib/test/test_generators.py` contains a number of +more interesting examples. The simplest one implements an in-order traversal of +a tree using generators recursively. :: + + # A recursive generator that generates Tree leaves in in-order. + def inorder(t): + if t: + for x in inorder(t.left): + yield x + yield t.label + for x in inorder(t.right): + yield x + +Two other examples in :file:`Lib/test/test_generators.py` produce solutions for +the N-Queens problem (placing $N$ queens on an $NxN$ chess board so that no +queen threatens another) and the Knight's Tour (a route that takes a knight to +every square of an $NxN$ chessboard without visiting any square twice). + +The idea of generators comes from other programming languages, especially Icon +(http://www.cs.arizona.edu/icon/), where the idea of generators is central. In +Icon, every expression and function call behaves like a generator. One example +from "An Overview of the Icon Programming Language" at +http://www.cs.arizona.edu/icon/docs/ipd266.htm gives an idea of what this looks +like:: + + sentence := "Store it in the neighboring harbor" + if (i := find("or", sentence)) > 5 then write(i) + +In Icon the :func:`find` function returns the indexes at which the substring +"or" is found: 3, 23, 33. In the :keyword:`if` statement, ``i`` is first +assigned a value of 3, but 3 is less than 5, so the comparison fails, and Icon +retries it with the second value of 23. 23 is greater than 5, so the comparison +now succeeds, and the code prints the value 23 to the screen. + +Python doesn't go nearly as far as Icon in adopting generators as a central +concept. Generators are considered a new part of the core Python language, but +learning or using them isn't compulsory; if they don't solve any problems that +you have, feel free to ignore them. One novel feature of Python's interface as +compared to Icon's is that a generator's state is represented as a concrete +object (the iterator) that can be passed around to other functions or stored in +a data structure. + + +.. seealso:: + + :pep:`255` - Simple Generators + Written by Neil Schemenauer, Tim Peters, Magnus Lie Hetland. Implemented mostly + by Neil Schemenauer and Tim Peters, with other fixes from the Python Labs crew. + +.. % ====================================================================== + + +PEP 237: Unifying Long Integers and Integers +============================================ + +In recent versions, the distinction between regular integers, which are 32-bit +values on most machines, and long integers, which can be of arbitrary size, was +becoming an annoyance. For example, on platforms that support files larger than +``2**32`` bytes, the :meth:`tell` method of file objects has to return a long +integer. However, there were various bits of Python that expected plain integers +and would raise an error if a long integer was provided instead. For example, +in Python 1.5, only regular integers could be used as a slice index, and +``'abc'[1L:]`` would raise a :exc:`TypeError` exception with the message 'slice +index must be int'. + +Python 2.2 will shift values from short to long integers as required. The 'L' +suffix is no longer needed to indicate a long integer literal, as now the +compiler will choose the appropriate type. (Using the 'L' suffix will be +discouraged in future 2.x versions of Python, triggering a warning in Python +2.4, and probably dropped in Python 3.0.) Many operations that used to raise an +:exc:`OverflowError` will now return a long integer as their result. For +example:: + + >>> 1234567890123 + 1234567890123L + >>> 2 ** 64 + 18446744073709551616L + +In most cases, integers and long integers will now be treated identically. You +can still distinguish them with the :func:`type` built-in function, but that's +rarely needed. + + +.. seealso:: + + :pep:`237` - Unifying Long Integers and Integers + Written by Moshe Zadka and Guido van Rossum. Implemented mostly by Guido van + Rossum. + +.. % ====================================================================== + + +PEP 238: Changing the Division Operator +======================================= + +The most controversial change in Python 2.2 heralds the start of an effort to +fix an old design flaw that's been in Python from the beginning. Currently +Python's division operator, ``/``, behaves like C's division operator when +presented with two integer arguments: it returns an integer result that's +truncated down when there would be a fractional part. For example, ``3/2`` is +1, not 1.5, and ``(-1)/2`` is -1, not -0.5. This means that the results of +divison can vary unexpectedly depending on the type of the two operands and +because Python is dynamically typed, it can be difficult to determine the +possible types of the operands. + +(The controversy is over whether this is *really* a design flaw, and whether +it's worth breaking existing code to fix this. It's caused endless discussions +on python-dev, and in July 2001 erupted into an storm of acidly sarcastic +postings on :newsgroup:`comp.lang.python`. I won't argue for either side here +and will stick to describing what's implemented in 2.2. Read :pep:`238` for a +summary of arguments and counter-arguments.) + +Because this change might break code, it's being introduced very gradually. +Python 2.2 begins the transition, but the switch won't be complete until Python +3.0. + +First, I'll borrow some terminology from :pep:`238`. "True division" is the +division that most non-programmers are familiar with: 3/2 is 1.5, 1/4 is 0.25, +and so forth. "Floor division" is what Python's ``/`` operator currently does +when given integer operands; the result is the floor of the value returned by +true division. "Classic division" is the current mixed behaviour of ``/``; it +returns the result of floor division when the operands are integers, and returns +the result of true division when one of the operands is a floating-point number. + +Here are the changes 2.2 introduces: + +* A new operator, ``//``, is the floor division operator. (Yes, we know it looks + like C++'s comment symbol.) ``//`` *always* performs floor division no matter + what the types of its operands are, so ``1 // 2`` is 0 and ``1.0 // 2.0`` is + also 0.0. + + ``//`` is always available in Python 2.2; you don't need to enable it using a + ``__future__`` statement. + +* By including a ``from __future__ import division`` in a module, the ``/`` + operator will be changed to return the result of true division, so ``1/2`` is + 0.5. Without the ``__future__`` statement, ``/`` still means classic division. + The default meaning of ``/`` will not change until Python 3.0. + +* Classes can define methods called :meth:`__truediv__` and :meth:`__floordiv__` + to overload the two division operators. At the C level, there are also slots in + the :ctype:`PyNumberMethods` structure so extension types can define the two + operators. + +* Python 2.2 supports some command-line arguments for testing whether code will + works with the changed division semantics. Running python with :option:`-Q + warn` will cause a warning to be issued whenever division is applied to two + integers. You can use this to find code that's affected by the change and fix + it. By default, Python 2.2 will simply perform classic division without a + warning; the warning will be turned on by default in Python 2.3. + + +.. seealso:: + + :pep:`238` - Changing the Division Operator + Written by Moshe Zadka and Guido van Rossum. Implemented by Guido van Rossum.. + +.. % ====================================================================== + + +Unicode Changes +=============== + +Python's Unicode support has been enhanced a bit in 2.2. Unicode strings are +usually stored as UCS-2, as 16-bit unsigned integers. Python 2.2 can also be +compiled to use UCS-4, 32-bit unsigned integers, as its internal encoding by +supplying :option:`--enable-unicode=ucs4` to the configure script. (It's also +possible to specify :option:`--disable-unicode` to completely disable Unicode +support.) + +When built to use UCS-4 (a "wide Python"), the interpreter can natively handle +Unicode characters from U+000000 to U+110000, so the range of legal values for +the :func:`unichr` function is expanded accordingly. Using an interpreter +compiled to use UCS-2 (a "narrow Python"), values greater than 65535 will still +cause :func:`unichr` to raise a :exc:`ValueError` exception. This is all +described in :pep:`261`, "Support for 'wide' Unicode characters"; consult it for +further details. + +Another change is simpler to explain. Since their introduction, Unicode strings +have supported an :meth:`encode` method to convert the string to a selected +encoding such as UTF-8 or Latin-1. A symmetric :meth:`decode([*encoding*])` +method has been added to 8-bit strings (though not to Unicode strings) in 2.2. +:meth:`decode` assumes that the string is in the specified encoding and decodes +it, returning whatever is returned by the codec. + +Using this new feature, codecs have been added for tasks not directly related to +Unicode. For example, codecs have been added for uu-encoding, MIME's base64 +encoding, and compression with the :mod:`zlib` module:: + + >>> s = """Here is a lengthy piece of redundant, overly verbose, + ... and repetitive text. + ... """ + >>> data = s.encode('zlib') + >>> data + 'x\x9c\r\xc9\xc1\r\x80 \x10\x04\xc0?Ul...' + >>> data.decode('zlib') + 'Here is a lengthy piece of redundant, overly verbose,\nand repetitive text.\n' + >>> print s.encode('uu') + begin 666 <data> + M2&5R92!I<R!A(&QE;F=T:'D@<&EE8V4@;V8@<F5D=6YD86YT+"!O=F5R;'D@ + >=F5R8F]S92P*86YD(')E<&5T:71I=F4@=&5X="X* + + end + >>> "sheesh".encode('rot-13') + 'furrfu' + +To convert a class instance to Unicode, a :meth:`__unicode__` method can be +defined by a class, analogous to :meth:`__str__`. + +:meth:`encode`, :meth:`decode`, and :meth:`__unicode__` were implemented by +Marc-André Lemburg. The changes to support using UCS-4 internally were +implemented by Fredrik Lundh and Martin von Löwis. + + +.. seealso:: + + :pep:`261` - Support for 'wide' Unicode characters + Written by Paul Prescod. + +.. % ====================================================================== + + +PEP 227: Nested Scopes +====================== + +In Python 2.1, statically nested scopes were added as an optional feature, to be +enabled by a ``from __future__ import nested_scopes`` directive. In 2.2 nested +scopes no longer need to be specially enabled, and are now always present. The +rest of this section is a copy of the description of nested scopes from my +"What's New in Python 2.1" document; if you read it when 2.1 came out, you can +skip the rest of this section. + +The largest change introduced in Python 2.1, and made complete in 2.2, is to +Python's scoping rules. In Python 2.0, at any given time there are at most +three namespaces used to look up variable names: local, module-level, and the +built-in namespace. This often surprised people because it didn't match their +intuitive expectations. For example, a nested recursive function definition +doesn't work:: + + def f(): + ... + def g(value): + ... + return g(value-1) + 1 + ... + +The function :func:`g` will always raise a :exc:`NameError` exception, because +the binding of the name ``g`` isn't in either its local namespace or in the +module-level namespace. This isn't much of a problem in practice (how often do +you recursively define interior functions like this?), but this also made using +the :keyword:`lambda` statement clumsier, and this was a problem in practice. +In code which uses :keyword:`lambda` you can often find local variables being +copied by passing them as the default values of arguments. :: + + def find(self, name): + "Return list of any entries equal to 'name'" + L = filter(lambda x, name=name: x == name, + self.list_attribute) + return L + +The readability of Python code written in a strongly functional style suffers +greatly as a result. + +The most significant change to Python 2.2 is that static scoping has been added +to the language to fix this problem. As a first effect, the ``name=name`` +default argument is now unnecessary in the above example. Put simply, when a +given variable name is not assigned a value within a function (by an assignment, +or the :keyword:`def`, :keyword:`class`, or :keyword:`import` statements), +references to the variable will be looked up in the local namespace of the +enclosing scope. A more detailed explanation of the rules, and a dissection of +the implementation, can be found in the PEP. + +This change may cause some compatibility problems for code where the same +variable name is used both at the module level and as a local variable within a +function that contains further function definitions. This seems rather unlikely +though, since such code would have been pretty confusing to read in the first +place. + +One side effect of the change is that the ``from module import *`` and +:keyword:`exec` statements have been made illegal inside a function scope under +certain conditions. The Python reference manual has said all along that ``from +module import *`` is only legal at the top level of a module, but the CPython +interpreter has never enforced this before. As part of the implementation of +nested scopes, the compiler which turns Python source into bytecodes has to +generate different code to access variables in a containing scope. ``from +module import *`` and :keyword:`exec` make it impossible for the compiler to +figure this out, because they add names to the local namespace that are +unknowable at compile time. Therefore, if a function contains function +definitions or :keyword:`lambda` expressions with free variables, the compiler +will flag this by raising a :exc:`SyntaxError` exception. + +To make the preceding explanation a bit clearer, here's an example:: + + x = 1 + def f(): + # The next line is a syntax error + exec 'x=2' + def g(): + return x + +Line 4 containing the :keyword:`exec` statement is a syntax error, since +:keyword:`exec` would define a new local variable named ``x`` whose value should +be accessed by :func:`g`. + +This shouldn't be much of a limitation, since :keyword:`exec` is rarely used in +most Python code (and when it is used, it's often a sign of a poor design +anyway). + + +.. seealso:: + + :pep:`227` - Statically Nested Scopes + Written and implemented by Jeremy Hylton. + +.. % ====================================================================== + + +New and Improved Modules +======================== + +* The :mod:`xmlrpclib` module was contributed to the standard library by Fredrik + Lundh, providing support for writing XML-RPC clients. XML-RPC is a simple + remote procedure call protocol built on top of HTTP and XML. For example, the + following snippet retrieves a list of RSS channels from the O'Reilly Network, + and then lists the recent headlines for one channel:: + + import xmlrpclib + s = xmlrpclib.Server( + 'http://www.oreillynet.com/meerkat/xml-rpc/server.php') + channels = s.meerkat.getChannels() + # channels is a list of dictionaries, like this: + # [{'id': 4, 'title': 'Freshmeat Daily News'} + # {'id': 190, 'title': '32Bits Online'}, + # {'id': 4549, 'title': '3DGamers'}, ... ] + + # Get the items for one channel + items = s.meerkat.getItems( {'channel': 4} ) + + # 'items' is another list of dictionaries, like this: + # [{'link': 'http://freshmeat.net/releases/52719/', + # 'description': 'A utility which converts HTML to XSL FO.', + # 'title': 'html2fo 0.3 (Default)'}, ... ] + + The :mod:`SimpleXMLRPCServer` module makes it easy to create straightforward + XML-RPC servers. See http://www.xmlrpc.com/ for more information about XML-RPC. + +* The new :mod:`hmac` module implements the HMAC algorithm described by + :rfc:`2104`. (Contributed by Gerhard Häring.) + +* Several functions that originally returned lengthy tuples now return pseudo- + sequences that still behave like tuples but also have mnemonic attributes such + as memberst_mtime or :attr:`tm_year`. The enhanced functions include + :func:`stat`, :func:`fstat`, :func:`statvfs`, and :func:`fstatvfs` in the + :mod:`os` module, and :func:`localtime`, :func:`gmtime`, and :func:`strptime` in + the :mod:`time` module. + + For example, to obtain a file's size using the old tuples, you'd end up writing + something like ``file_size = os.stat(filename)[stat.ST_SIZE]``, but now this can + be written more clearly as ``file_size = os.stat(filename).st_size``. + + The original patch for this feature was contributed by Nick Mathewson. + +* The Python profiler has been extensively reworked and various errors in its + output have been corrected. (Contributed by Fred L. Drake, Jr. and Tim Peters.) + +* The :mod:`socket` module can be compiled to support IPv6; specify the + :option:`--enable-ipv6` option to Python's configure script. (Contributed by + Jun-ichiro "itojun" Hagino.) + +* Two new format characters were added to the :mod:`struct` module for 64-bit + integers on platforms that support the C :ctype:`long long` type. ``q`` is for + a signed 64-bit integer, and ``Q`` is for an unsigned one. The value is + returned in Python's long integer type. (Contributed by Tim Peters.) + +* In the interpreter's interactive mode, there's a new built-in function + :func:`help` that uses the :mod:`pydoc` module introduced in Python 2.1 to + provide interactive help. ``help(object)`` displays any available help text + about *object*. :func:`help` with no argument puts you in an online help + utility, where you can enter the names of functions, classes, or modules to read + their help text. (Contributed by Guido van Rossum, using Ka-Ping Yee's + :mod:`pydoc` module.) + +* Various bugfixes and performance improvements have been made to the SRE engine + underlying the :mod:`re` module. For example, the :func:`re.sub` and + :func:`re.split` functions have been rewritten in C. Another contributed patch + speeds up certain Unicode character ranges by a factor of two, and a new + :meth:`finditer` method that returns an iterator over all the non-overlapping + matches in a given string. (SRE is maintained by Fredrik Lundh. The + BIGCHARSET patch was contributed by Martin von Löwis.) + +* The :mod:`smtplib` module now supports :rfc:`2487`, "Secure SMTP over TLS", so + it's now possible to encrypt the SMTP traffic between a Python program and the + mail transport agent being handed a message. :mod:`smtplib` also supports SMTP + authentication. (Contributed by Gerhard Häring.) + +* The :mod:`imaplib` module, maintained by Piers Lauder, has support for several + new extensions: the NAMESPACE extension defined in :rfc:`2342`, SORT, GETACL and + SETACL. (Contributed by Anthony Baxter and Michel Pelletier.) + +* The :mod:`rfc822` module's parsing of email addresses is now compliant with + :rfc:`2822`, an update to :rfc:`822`. (The module's name is *not* going to be + changed to ``rfc2822``.) A new package, :mod:`email`, has also been added for + parsing and generating e-mail messages. (Contributed by Barry Warsaw, and + arising out of his work on Mailman.) + +* The :mod:`difflib` module now contains a new :class:`Differ` class for + producing human-readable lists of changes (a "delta") between two sequences of + lines of text. There are also two generator functions, :func:`ndiff` and + :func:`restore`, which respectively return a delta from two sequences, or one of + the original sequences from a delta. (Grunt work contributed by David Goodger, + from ndiff.py code by Tim Peters who then did the generatorization.) + +* New constants :const:`ascii_letters`, :const:`ascii_lowercase`, and + :const:`ascii_uppercase` were added to the :mod:`string` module. There were + several modules in the standard library that used :const:`string.letters` to + mean the ranges A-Za-z, but that assumption is incorrect when locales are in + use, because :const:`string.letters` varies depending on the set of legal + characters defined by the current locale. The buggy modules have all been fixed + to use :const:`ascii_letters` instead. (Reported by an unknown person; fixed by + Fred L. Drake, Jr.) + +* The :mod:`mimetypes` module now makes it easier to use alternative MIME-type + databases by the addition of a :class:`MimeTypes` class, which takes a list of + filenames to be parsed. (Contributed by Fred L. Drake, Jr.) + +* A :class:`Timer` class was added to the :mod:`threading` module that allows + scheduling an activity to happen at some future time. (Contributed by Itamar + Shtull-Trauring.) + +.. % ====================================================================== + + +Interpreter Changes and Fixes +============================= + +Some of the changes only affect people who deal with the Python interpreter at +the C level because they're writing Python extension modules, embedding the +interpreter, or just hacking on the interpreter itself. If you only write Python +code, none of the changes described here will affect you very much. + +* Profiling and tracing functions can now be implemented in C, which can operate + at much higher speeds than Python-based functions and should reduce the overhead + of profiling and tracing. This will be of interest to authors of development + environments for Python. Two new C functions were added to Python's API, + :cfunc:`PyEval_SetProfile` and :cfunc:`PyEval_SetTrace`. The existing + :func:`sys.setprofile` and :func:`sys.settrace` functions still exist, and have + simply been changed to use the new C-level interface. (Contributed by Fred L. + Drake, Jr.) + +* Another low-level API, primarily of interest to implementors of Python + debuggers and development tools, was added. :cfunc:`PyInterpreterState_Head` and + :cfunc:`PyInterpreterState_Next` let a caller walk through all the existing + interpreter objects; :cfunc:`PyInterpreterState_ThreadHead` and + :cfunc:`PyThreadState_Next` allow looping over all the thread states for a given + interpreter. (Contributed by David Beazley.) + +* The C-level interface to the garbage collector has been changed to make it + easier to write extension types that support garbage collection and to debug + misuses of the functions. Various functions have slightly different semantics, + so a bunch of functions had to be renamed. Extensions that use the old API will + still compile but will *not* participate in garbage collection, so updating them + for 2.2 should be considered fairly high priority. + + To upgrade an extension module to the new API, perform the following steps: + +* Rename :cfunc:`Py_TPFLAGS_GC` to :cfunc:`PyTPFLAGS_HAVE_GC`. + +* Use :cfunc:`PyObject_GC_New` or :cfunc:`PyObject_GC_NewVar` to allocate + objects, and :cfunc:`PyObject_GC_Del` to deallocate them. + +* Rename :cfunc:`PyObject_GC_Init` to :cfunc:`PyObject_GC_Track` and + :cfunc:`PyObject_GC_Fini` to :cfunc:`PyObject_GC_UnTrack`. + +* Remove :cfunc:`PyGC_HEAD_SIZE` from object size calculations. + +* Remove calls to :cfunc:`PyObject_AS_GC` and :cfunc:`PyObject_FROM_GC`. + +* A new ``et`` format sequence was added to :cfunc:`PyArg_ParseTuple`; ``et`` + takes both a parameter and an encoding name, and converts the parameter to the + given encoding if the parameter turns out to be a Unicode string, or leaves it + alone if it's an 8-bit string, assuming it to already be in the desired + encoding. This differs from the ``es`` format character, which assumes that + 8-bit strings are in Python's default ASCII encoding and converts them to the + specified new encoding. (Contributed by M.-A. Lemburg, and used for the MBCS + support on Windows described in the following section.) + +* A different argument parsing function, :cfunc:`PyArg_UnpackTuple`, has been + added that's simpler and presumably faster. Instead of specifying a format + string, the caller simply gives the minimum and maximum number of arguments + expected, and a set of pointers to :ctype:`PyObject\*` variables that will be + filled in with argument values. + +* Two new flags :const:`METH_NOARGS` and :const:`METH_O` are available in method + definition tables to simplify implementation of methods with no arguments or a + single untyped argument. Calling such methods is more efficient than calling a + corresponding method that uses :const:`METH_VARARGS`. Also, the old + :const:`METH_OLDARGS` style of writing C methods is now officially deprecated. + +* Two new wrapper functions, :cfunc:`PyOS_snprintf` and :cfunc:`PyOS_vsnprintf` + were added to provide cross-platform implementations for the relatively new + :cfunc:`snprintf` and :cfunc:`vsnprintf` C lib APIs. In contrast to the standard + :cfunc:`sprintf` and :cfunc:`vsprintf` functions, the Python versions check the + bounds of the buffer used to protect against buffer overruns. (Contributed by + M.-A. Lemburg.) + +* The :cfunc:`_PyTuple_Resize` function has lost an unused parameter, so now it + takes 2 parameters instead of 3. The third argument was never used, and can + simply be discarded when porting code from earlier versions to Python 2.2. + +.. % ====================================================================== + + +Other Changes and Fixes +======================= + +As usual there were a bunch of other improvements and bugfixes scattered +throughout the source tree. A search through the CVS change logs finds there +were 527 patches applied and 683 bugs fixed between Python 2.1 and 2.2; 2.2.1 +applied 139 patches and fixed 143 bugs; 2.2.2 applied 106 patches and fixed 82 +bugs. These figures are likely to be underestimates. + +Some of the more notable changes are: + +* The code for the MacOS port for Python, maintained by Jack Jansen, is now kept + in the main Python CVS tree, and many changes have been made to support MacOS X. + + The most significant change is the ability to build Python as a framework, + enabled by supplying the :option:`--enable-framework` option to the configure + script when compiling Python. According to Jack Jansen, "This installs a self- + contained Python installation plus the OS X framework "glue" into + :file:`/Library/Frameworks/Python.framework` (or another location of choice). + For now there is little immediate added benefit to this (actually, there is the + disadvantage that you have to change your PATH to be able to find Python), but + it is the basis for creating a full-blown Python application, porting the + MacPython IDE, possibly using Python as a standard OSA scripting language and + much more." + + Most of the MacPython toolbox modules, which interface to MacOS APIs such as + windowing, QuickTime, scripting, etc. have been ported to OS X, but they've been + left commented out in :file:`setup.py`. People who want to experiment with + these modules can uncomment them manually. + + .. % Jack's original comments: + .. % The main change is the possibility to build Python as a + .. % framework. This installs a self-contained Python installation plus the + .. % OSX framework "glue" into /Library/Frameworks/Python.framework (or + .. % another location of choice). For now there is little immedeate added + .. % benefit to this (actually, there is the disadvantage that you have to + .. % change your PATH to be able to find Python), but it is the basis for + .. % creating a fullblown Python application, porting the MacPython IDE, + .. % possibly using Python as a standard OSA scripting language and much + .. % more. You enable this with "configure --enable-framework". + .. % The other change is that most MacPython toolbox modules, which + .. % interface to all the MacOS APIs such as windowing, quicktime, + .. % scripting, etc. have been ported. Again, most of these are not of + .. % immedeate use, as they need a full application to be really useful, so + .. % they have been commented out in setup.py. People wanting to experiment + .. % can uncomment them. Gestalt and Internet Config modules are enabled by + .. % default. + +* Keyword arguments passed to builtin functions that don't take them now cause a + :exc:`TypeError` exception to be raised, with the message "*function* takes no + keyword arguments". + +* Weak references, added in Python 2.1 as an extension module, are now part of + the core because they're used in the implementation of new-style classes. The + :exc:`ReferenceError` exception has therefore moved from the :mod:`weakref` + module to become a built-in exception. + +* A new script, :file:`Tools/scripts/cleanfuture.py` by Tim Peters, + automatically removes obsolete ``__future__`` statements from Python source + code. + +* An additional *flags* argument has been added to the built-in function + :func:`compile`, so the behaviour of ``__future__`` statements can now be + correctly observed in simulated shells, such as those presented by IDLE and + other development environments. This is described in :pep:`264`. (Contributed + by Michael Hudson.) + +* The new license introduced with Python 1.6 wasn't GPL-compatible. This is + fixed by some minor textual changes to the 2.2 license, so it's now legal to + embed Python inside a GPLed program again. Note that Python itself is not + GPLed, but instead is under a license that's essentially equivalent to the BSD + license, same as it always was. The license changes were also applied to the + Python 2.0.1 and 2.1.1 releases. + +* When presented with a Unicode filename on Windows, Python will now convert it + to an MBCS encoded string, as used by the Microsoft file APIs. As MBCS is + explicitly used by the file APIs, Python's choice of ASCII as the default + encoding turns out to be an annoyance. On Unix, the locale's character set is + used if :func:`locale.nl_langinfo(CODESET)` is available. (Windows support was + contributed by Mark Hammond with assistance from Marc-André Lemburg. Unix + support was added by Martin von Löwis.) + +* Large file support is now enabled on Windows. (Contributed by Tim Peters.) + +* The :file:`Tools/scripts/ftpmirror.py` script now parses a :file:`.netrc` + file, if you have one. (Contributed by Mike Romberg.) + +* Some features of the object returned by the :func:`xrange` function are now + deprecated, and trigger warnings when they're accessed; they'll disappear in + Python 2.3. :class:`xrange` objects tried to pretend they were full sequence + types by supporting slicing, sequence multiplication, and the :keyword:`in` + operator, but these features were rarely used and therefore buggy. The + :meth:`tolist` method and the :attr:`start`, :attr:`stop`, and :attr:`step` + attributes are also being deprecated. At the C level, the fourth argument to + the :cfunc:`PyRange_New` function, ``repeat``, has also been deprecated. + +* There were a bunch of patches to the dictionary implementation, mostly to fix + potential core dumps if a dictionary contains objects that sneakily changed + their hash value, or mutated the dictionary they were contained in. For a while + python-dev fell into a gentle rhythm of Michael Hudson finding a case that + dumped core, Tim Peters fixing the bug, Michael finding another case, and round + and round it went. + +* On Windows, Python can now be compiled with Borland C thanks to a number of + patches contributed by Stephen Hansen, though the result isn't fully functional + yet. (But this *is* progress...) + +* Another Windows enhancement: Wise Solutions generously offered PythonLabs use + of their InstallerMaster 8.1 system. Earlier PythonLabs Windows installers used + Wise 5.0a, which was beginning to show its age. (Packaged up by Tim Peters.) + +* Files ending in ``.pyw`` can now be imported on Windows. ``.pyw`` is a + Windows-only thing, used to indicate that a script needs to be run using + PYTHONW.EXE instead of PYTHON.EXE in order to prevent a DOS console from popping + up to display the output. This patch makes it possible to import such scripts, + in case they're also usable as modules. (Implemented by David Bolen.) + +* On platforms where Python uses the C :cfunc:`dlopen` function to load + extension modules, it's now possible to set the flags used by :cfunc:`dlopen` + using the :func:`sys.getdlopenflags` and :func:`sys.setdlopenflags` functions. + (Contributed by Bram Stolk.) + +* The :func:`pow` built-in function no longer supports 3 arguments when + floating-point numbers are supplied. ``pow(x, y, z)`` returns ``(x**y) % z``, + but this is never useful for floating point numbers, and the final result varies + unpredictably depending on the platform. A call such as ``pow(2.0, 8.0, 7.0)`` + will now raise a :exc:`TypeError` exception. + +.. % ====================================================================== + + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Fred Bremmer, +Keith Briggs, Andrew Dalke, Fred L. Drake, Jr., Carel Fellinger, David Goodger, +Mark Hammond, Stephen Hansen, Michael Hudson, Jack Jansen, Marc-André Lemburg, +Martin von Löwis, Fredrik Lundh, Michael McLay, Nick Mathewson, Paul Moore, +Gustavo Niemeyer, Don O'Donnell, Joonas Paalasma, Tim Peters, Jens Quade, Tom +Reinhardt, Neil Schemenauer, Guido van Rossum, Greg Ward, Edward Welbourne. + diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst new file mode 100644 index 0000000..7dd4930 --- /dev/null +++ b/Doc/whatsnew/2.3.rst @@ -0,0 +1,2084 @@ +**************************** + What's New in Python 2.3 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 1.01 + +.. % $Id: whatsnew23.tex 55005 2007-04-27 19:54:29Z guido.van.rossum $ + +This article explains the new features in Python 2.3. Python 2.3 was released +on July 29, 2003. + +The main themes for Python 2.3 are polishing some of the features added in 2.2, +adding various small but useful enhancements to the core language, and expanding +the standard library. The new object model introduced in the previous version +has benefited from 18 months of bugfixes and from optimization efforts that have +improved the performance of new-style classes. A few new built-in functions +have been added such as :func:`sum` and :func:`enumerate`. The :keyword:`in` +operator can now be used for substring searches (e.g. ``"ab" in "abc"`` returns +:const:`True`). + +Some of the many new library features include Boolean, set, heap, and date/time +data types, the ability to import modules from ZIP-format archives, metadata +support for the long-awaited Python catalog, an updated version of IDLE, and +modules for logging messages, wrapping text, parsing CSV files, processing +command-line options, using BerkeleyDB databases... the list of new and +enhanced modules is lengthy. + +This article doesn't attempt to provide a complete specification of the new +features, but instead provides a convenient overview. For full details, you +should refer to the documentation for Python 2.3, such as the Python Library +Reference and the Python Reference Manual. If you want to understand the +complete implementation and design rationale, refer to the PEP for a particular +new feature. + +.. % ====================================================================== + + +PEP 218: A Standard Set Datatype +================================ + +The new :mod:`sets` module contains an implementation of a set datatype. The +:class:`Set` class is for mutable sets, sets that can have members added and +removed. The :class:`ImmutableSet` class is for sets that can't be modified, +and instances of :class:`ImmutableSet` can therefore be used as dictionary keys. +Sets are built on top of dictionaries, so the elements within a set must be +hashable. + +Here's a simple example:: + + >>> import sets + >>> S = sets.Set([1,2,3]) + >>> S + Set([1, 2, 3]) + >>> 1 in S + True + >>> 0 in S + False + >>> S.add(5) + >>> S.remove(3) + >>> S + Set([1, 2, 5]) + >>> + +The union and intersection of sets can be computed with the :meth:`union` and +:meth:`intersection` methods; an alternative notation uses the bitwise operators +``&`` and ``|``. Mutable sets also have in-place versions of these methods, +:meth:`union_update` and :meth:`intersection_update`. :: + + >>> S1 = sets.Set([1,2,3]) + >>> S2 = sets.Set([4,5,6]) + >>> S1.union(S2) + Set([1, 2, 3, 4, 5, 6]) + >>> S1 | S2 # Alternative notation + Set([1, 2, 3, 4, 5, 6]) + >>> S1.intersection(S2) + Set([]) + >>> S1 & S2 # Alternative notation + Set([]) + >>> S1.union_update(S2) + >>> S1 + Set([1, 2, 3, 4, 5, 6]) + >>> + +It's also possible to take the symmetric difference of two sets. This is the +set of all elements in the union that aren't in the intersection. Another way +of putting it is that the symmetric difference contains all elements that are in +exactly one set. Again, there's an alternative notation (``^``), and an in- +place version with the ungainly name :meth:`symmetric_difference_update`. :: + + >>> S1 = sets.Set([1,2,3,4]) + >>> S2 = sets.Set([3,4,5,6]) + >>> S1.symmetric_difference(S2) + Set([1, 2, 5, 6]) + >>> S1 ^ S2 + Set([1, 2, 5, 6]) + >>> + +There are also :meth:`issubset` and :meth:`issuperset` methods for checking +whether one set is a subset or superset of another:: + + >>> S1 = sets.Set([1,2,3]) + >>> S2 = sets.Set([2,3]) + >>> S2.issubset(S1) + True + >>> S1.issubset(S2) + False + >>> S1.issuperset(S2) + True + >>> + + +.. seealso:: + + :pep:`218` - Adding a Built-In Set Object Type + PEP written by Greg V. Wilson. Implemented by Greg V. Wilson, Alex Martelli, and + GvR. + +.. % ====================================================================== + + +.. _section-generators: + +PEP 255: Simple Generators +========================== + +In Python 2.2, generators were added as an optional feature, to be enabled by a +``from __future__ import generators`` directive. In 2.3 generators no longer +need to be specially enabled, and are now always present; this means that +:keyword:`yield` is now always a keyword. The rest of this section is a copy of +the description of generators from the "What's New in Python 2.2" document; if +you read it back when Python 2.2 came out, you can skip the rest of this +section. + +You're doubtless familiar with how function calls work in Python or C. When you +call a function, it gets a private namespace where its local variables are +created. When the function reaches a :keyword:`return` statement, the local +variables are destroyed and the resulting value is returned to the caller. A +later call to the same function will get a fresh new set of local variables. +But, what if the local variables weren't thrown away on exiting a function? +What if you could later resume the function where it left off? This is what +generators provide; they can be thought of as resumable functions. + +Here's the simplest example of a generator function:: + + def generate_ints(N): + for i in range(N): + yield i + +A new keyword, :keyword:`yield`, was introduced for generators. Any function +containing a :keyword:`yield` statement is a generator function; this is +detected by Python's bytecode compiler which compiles the function specially as +a result. + +When you call a generator function, it doesn't return a single value; instead it +returns a generator object that supports the iterator protocol. On executing +the :keyword:`yield` statement, the generator outputs the value of ``i``, +similar to a :keyword:`return` statement. The big difference between +:keyword:`yield` and a :keyword:`return` statement is that on reaching a +:keyword:`yield` the generator's state of execution is suspended and local +variables are preserved. On the next call to the generator's ``.next()`` +method, the function will resume executing immediately after the +:keyword:`yield` statement. (For complicated reasons, the :keyword:`yield` +statement isn't allowed inside the :keyword:`try` block of a :keyword:`try`...\ +:keyword:`finally` statement; read :pep:`255` for a full explanation of the +interaction between :keyword:`yield` and exceptions.) + +Here's a sample usage of the :func:`generate_ints` generator:: + + >>> gen = generate_ints(3) + >>> gen + <generator object at 0x8117f90> + >>> gen.next() + 0 + >>> gen.next() + 1 + >>> gen.next() + 2 + >>> gen.next() + Traceback (most recent call last): + File "stdin", line 1, in ? + File "stdin", line 2, in generate_ints + StopIteration + +You could equally write ``for i in generate_ints(5)``, or ``a,b,c = +generate_ints(3)``. + +Inside a generator function, the :keyword:`return` statement can only be used +without a value, and signals the end of the procession of values; afterwards the +generator cannot return any further values. :keyword:`return` with a value, such +as ``return 5``, is a syntax error inside a generator function. The end of the +generator's results can also be indicated by raising :exc:`StopIteration` +manually, or by just letting the flow of execution fall off the bottom of the +function. + +You could achieve the effect of generators manually by writing your own class +and storing all the local variables of the generator as instance variables. For +example, returning a list of integers could be done by setting ``self.count`` to +0, and having the :meth:`next` method increment ``self.count`` and return it. +However, for a moderately complicated generator, writing a corresponding class +would be much messier. :file:`Lib/test/test_generators.py` contains a number of +more interesting examples. The simplest one implements an in-order traversal of +a tree using generators recursively. :: + + # A recursive generator that generates Tree leaves in in-order. + def inorder(t): + if t: + for x in inorder(t.left): + yield x + yield t.label + for x in inorder(t.right): + yield x + +Two other examples in :file:`Lib/test/test_generators.py` produce solutions for +the N-Queens problem (placing $N$ queens on an $NxN$ chess board so that no +queen threatens another) and the Knight's Tour (a route that takes a knight to +every square of an $NxN$ chessboard without visiting any square twice). + +The idea of generators comes from other programming languages, especially Icon +(http://www.cs.arizona.edu/icon/), where the idea of generators is central. In +Icon, every expression and function call behaves like a generator. One example +from "An Overview of the Icon Programming Language" at +http://www.cs.arizona.edu/icon/docs/ipd266.htm gives an idea of what this looks +like:: + + sentence := "Store it in the neighboring harbor" + if (i := find("or", sentence)) > 5 then write(i) + +In Icon the :func:`find` function returns the indexes at which the substring +"or" is found: 3, 23, 33. In the :keyword:`if` statement, ``i`` is first +assigned a value of 3, but 3 is less than 5, so the comparison fails, and Icon +retries it with the second value of 23. 23 is greater than 5, so the comparison +now succeeds, and the code prints the value 23 to the screen. + +Python doesn't go nearly as far as Icon in adopting generators as a central +concept. Generators are considered part of the core Python language, but +learning or using them isn't compulsory; if they don't solve any problems that +you have, feel free to ignore them. One novel feature of Python's interface as +compared to Icon's is that a generator's state is represented as a concrete +object (the iterator) that can be passed around to other functions or stored in +a data structure. + + +.. seealso:: + + :pep:`255` - Simple Generators + Written by Neil Schemenauer, Tim Peters, Magnus Lie Hetland. Implemented mostly + by Neil Schemenauer and Tim Peters, with other fixes from the Python Labs crew. + +.. % ====================================================================== + + +.. _section-encodings: + +PEP 263: Source Code Encodings +============================== + +Python source files can now be declared as being in different character set +encodings. Encodings are declared by including a specially formatted comment in +the first or second line of the source file. For example, a UTF-8 file can be +declared with:: + + #!/usr/bin/env python + # -*- coding: UTF-8 -*- + +Without such an encoding declaration, the default encoding used is 7-bit ASCII. +Executing or importing modules that contain string literals with 8-bit +characters and have no encoding declaration will result in a +:exc:`DeprecationWarning` being signalled by Python 2.3; in 2.4 this will be a +syntax error. + +The encoding declaration only affects Unicode string literals, which will be +converted to Unicode using the specified encoding. Note that Python identifiers +are still restricted to ASCII characters, so you can't have variable names that +use characters outside of the usual alphanumerics. + + +.. seealso:: + + :pep:`263` - Defining Python Source Code Encodings + Written by Marc-André Lemburg and Martin von Löwis; implemented by Suzuki Hisao + and Martin von Löwis. + +.. % ====================================================================== + + +PEP 273: Importing Modules from ZIP Archives +============================================ + +The new :mod:`zipimport` module adds support for importing modules from a ZIP- +format archive. You don't need to import the module explicitly; it will be +automatically imported if a ZIP archive's filename is added to ``sys.path``. +For example:: + + amk@nyman:~/src/python$ unzip -l /tmp/example.zip + Archive: /tmp/example.zip + Length Date Time Name + -------- ---- ---- ---- + 8467 11-26-02 22:30 jwzthreading.py + -------- ------- + 8467 1 file + amk@nyman:~/src/python$ ./python + Python 2.3 (#1, Aug 1 2003, 19:54:32) + >>> import sys + >>> sys.path.insert(0, '/tmp/example.zip') # Add .zip file to front of path + >>> import jwzthreading + >>> jwzthreading.__file__ + '/tmp/example.zip/jwzthreading.py' + >>> + +An entry in ``sys.path`` can now be the filename of a ZIP archive. The ZIP +archive can contain any kind of files, but only files named :file:`\*.py`, +:file:`\*.pyc`, or :file:`\*.pyo` can be imported. If an archive only contains +:file:`\*.py` files, Python will not attempt to modify the archive by adding the +corresponding :file:`\*.pyc` file, meaning that if a ZIP archive doesn't contain +:file:`\*.pyc` files, importing may be rather slow. + +A path within the archive can also be specified to only import from a +subdirectory; for example, the path :file:`/tmp/example.zip/lib/` would only +import from the :file:`lib/` subdirectory within the archive. + + +.. seealso:: + + :pep:`273` - Import Modules from Zip Archives + Written by James C. Ahlstrom, who also provided an implementation. Python 2.3 + follows the specification in :pep:`273`, but uses an implementation written by + Just van Rossum that uses the import hooks described in :pep:`302`. See section + :ref:`section-pep302` for a description of the new import hooks. + +.. % ====================================================================== + + +PEP 277: Unicode file name support for Windows NT +================================================= + +On Windows NT, 2000, and XP, the system stores file names as Unicode strings. +Traditionally, Python has represented file names as byte strings, which is +inadequate because it renders some file names inaccessible. + +Python now allows using arbitrary Unicode strings (within the limitations of the +file system) for all functions that expect file names, most notably the +:func:`open` built-in function. If a Unicode string is passed to +:func:`os.listdir`, Python now returns a list of Unicode strings. A new +function, :func:`os.getcwdu`, returns the current directory as a Unicode string. + +Byte strings still work as file names, and on Windows Python will transparently +convert them to Unicode using the ``mbcs`` encoding. + +Other systems also allow Unicode strings as file names but convert them to byte +strings before passing them to the system, which can cause a :exc:`UnicodeError` +to be raised. Applications can test whether arbitrary Unicode strings are +supported as file names by checking :attr:`os.path.supports_unicode_filenames`, +a Boolean value. + +Under MacOS, :func:`os.listdir` may now return Unicode filenames. + + +.. seealso:: + + :pep:`277` - Unicode file name support for Windows NT + Written by Neil Hodgson; implemented by Neil Hodgson, Martin von Löwis, and Mark + Hammond. + +.. % ====================================================================== + + +PEP 278: Universal Newline Support +================================== + +The three major operating systems used today are Microsoft Windows, Apple's +Macintosh OS, and the various Unix derivatives. A minor irritation of cross- +platform work is that these three platforms all use different characters to +mark the ends of lines in text files. Unix uses the linefeed (ASCII character +10), MacOS uses the carriage return (ASCII character 13), and Windows uses a +two-character sequence of a carriage return plus a newline. + +Python's file objects can now support end of line conventions other than the one +followed by the platform on which Python is running. Opening a file with the +mode ``'U'`` or ``'rU'`` will open a file for reading in universal newline mode. +All three line ending conventions will be translated to a ``'\n'`` in the +strings returned by the various file methods such as :meth:`read` and +:meth:`readline`. + +Universal newline support is also used when importing modules and when executing +a file with the :func:`execfile` function. This means that Python modules can +be shared between all three operating systems without needing to convert the +line-endings. + +This feature can be disabled when compiling Python by specifying the +:option:`--without-universal-newlines` switch when running Python's +:program:`configure` script. + + +.. seealso:: + + :pep:`278` - Universal Newline Support + Written and implemented by Jack Jansen. + +.. % ====================================================================== + + +.. _section-enumerate: + +PEP 279: enumerate() +==================== + +A new built-in function, :func:`enumerate`, will make certain loops a bit +clearer. ``enumerate(thing)``, where *thing* is either an iterator or a +sequence, returns a iterator that will return ``(0, thing[0])``, ``(1, +thing[1])``, ``(2, thing[2])``, and so forth. + +A common idiom to change every element of a list looks like this:: + + for i in range(len(L)): + item = L[i] + # ... compute some result based on item ... + L[i] = result + +This can be rewritten using :func:`enumerate` as:: + + for i, item in enumerate(L): + # ... compute some result based on item ... + L[i] = result + + +.. seealso:: + + :pep:`279` - The enumerate() built-in function + Written and implemented by Raymond D. Hettinger. + +.. % ====================================================================== + + +PEP 282: The logging Package +============================ + +A standard package for writing logs, :mod:`logging`, has been added to Python +2.3. It provides a powerful and flexible mechanism for generating logging +output which can then be filtered and processed in various ways. A +configuration file written in a standard format can be used to control the +logging behavior of a program. Python includes handlers that will write log +records to standard error or to a file or socket, send them to the system log, +or even e-mail them to a particular address; of course, it's also possible to +write your own handler classes. + +The :class:`Logger` class is the primary class. Most application code will deal +with one or more :class:`Logger` objects, each one used by a particular +subsystem of the application. Each :class:`Logger` is identified by a name, and +names are organized into a hierarchy using ``.`` as the component separator. +For example, you might have :class:`Logger` instances named ``server``, +``server.auth`` and ``server.network``. The latter two instances are below +``server`` in the hierarchy. This means that if you turn up the verbosity for +``server`` or direct ``server`` messages to a different handler, the changes +will also apply to records logged to ``server.auth`` and ``server.network``. +There's also a root :class:`Logger` that's the parent of all other loggers. + +For simple uses, the :mod:`logging` package contains some convenience functions +that always use the root log:: + + import logging + + logging.debug('Debugging information') + logging.info('Informational message') + logging.warning('Warning:config file %s not found', 'server.conf') + logging.error('Error occurred') + logging.critical('Critical error -- shutting down') + +This produces the following output:: + + WARNING:root:Warning:config file server.conf not found + ERROR:root:Error occurred + CRITICAL:root:Critical error -- shutting down + +In the default configuration, informational and debugging messages are +suppressed and the output is sent to standard error. You can enable the display +of informational and debugging messages by calling the :meth:`setLevel` method +on the root logger. + +Notice the :func:`warning` call's use of string formatting operators; all of the +functions for logging messages take the arguments ``(msg, arg1, arg2, ...)`` and +log the string resulting from ``msg % (arg1, arg2, ...)``. + +There's also an :func:`exception` function that records the most recent +traceback. Any of the other functions will also record the traceback if you +specify a true value for the keyword argument *exc_info*. :: + + def f(): + try: 1/0 + except: logging.exception('Problem recorded') + + f() + +This produces the following output:: + + ERROR:root:Problem recorded + Traceback (most recent call last): + File "t.py", line 6, in f + 1/0 + ZeroDivisionError: integer division or modulo by zero + +Slightly more advanced programs will use a logger other than the root logger. +The :func:`getLogger(name)` function is used to get a particular log, creating +it if it doesn't exist yet. :func:`getLogger(None)` returns the root logger. :: + + log = logging.getLogger('server') + ... + log.info('Listening on port %i', port) + ... + log.critical('Disk full') + ... + +Log records are usually propagated up the hierarchy, so a message logged to +``server.auth`` is also seen by ``server`` and ``root``, but a :class:`Logger` +can prevent this by setting its :attr:`propagate` attribute to :const:`False`. + +There are more classes provided by the :mod:`logging` package that can be +customized. When a :class:`Logger` instance is told to log a message, it +creates a :class:`LogRecord` instance that is sent to any number of different +:class:`Handler` instances. Loggers and handlers can also have an attached list +of filters, and each filter can cause the :class:`LogRecord` to be ignored or +can modify the record before passing it along. When they're finally output, +:class:`LogRecord` instances are converted to text by a :class:`Formatter` +class. All of these classes can be replaced by your own specially-written +classes. + +With all of these features the :mod:`logging` package should provide enough +flexibility for even the most complicated applications. This is only an +incomplete overview of its features, so please see the package's reference +documentation for all of the details. Reading :pep:`282` will also be helpful. + + +.. seealso:: + + :pep:`282` - A Logging System + Written by Vinay Sajip and Trent Mick; implemented by Vinay Sajip. + +.. % ====================================================================== + + +.. _section-bool: + +PEP 285: A Boolean Type +======================= + +A Boolean type was added to Python 2.3. Two new constants were added to the +:mod:`__builtin__` module, :const:`True` and :const:`False`. (:const:`True` and +:const:`False` constants were added to the built-ins in Python 2.2.1, but the +2.2.1 versions are simply set to integer values of 1 and 0 and aren't a +different type.) + +The type object for this new type is named :class:`bool`; the constructor for it +takes any Python value and converts it to :const:`True` or :const:`False`. :: + + >>> bool(1) + True + >>> bool(0) + False + >>> bool([]) + False + >>> bool( (1,) ) + True + +Most of the standard library modules and built-in functions have been changed to +return Booleans. :: + + >>> obj = [] + >>> hasattr(obj, 'append') + True + >>> isinstance(obj, list) + True + >>> isinstance(obj, tuple) + False + +Python's Booleans were added with the primary goal of making code clearer. For +example, if you're reading a function and encounter the statement ``return 1``, +you might wonder whether the ``1`` represents a Boolean truth value, an index, +or a coefficient that multiplies some other quantity. If the statement is +``return True``, however, the meaning of the return value is quite clear. + +Python's Booleans were *not* added for the sake of strict type-checking. A very +strict language such as Pascal would also prevent you performing arithmetic with +Booleans, and would require that the expression in an :keyword:`if` statement +always evaluate to a Boolean result. Python is not this strict and never will +be, as :pep:`285` explicitly says. This means you can still use any expression +in an :keyword:`if` statement, even ones that evaluate to a list or tuple or +some random object. The Boolean type is a subclass of the :class:`int` class so +that arithmetic using a Boolean still works. :: + + >>> True + 1 + 2 + >>> False + 1 + 1 + >>> False * 75 + 0 + >>> True * 75 + 75 + +To sum up :const:`True` and :const:`False` in a sentence: they're alternative +ways to spell the integer values 1 and 0, with the single difference that +:func:`str` and :func:`repr` return the strings ``'True'`` and ``'False'`` +instead of ``'1'`` and ``'0'``. + + +.. seealso:: + + :pep:`285` - Adding a bool type + Written and implemented by GvR. + +.. % ====================================================================== + + +PEP 293: Codec Error Handling Callbacks +======================================= + +When encoding a Unicode string into a byte string, unencodable characters may be +encountered. So far, Python has allowed specifying the error processing as +either "strict" (raising :exc:`UnicodeError`), "ignore" (skipping the +character), or "replace" (using a question mark in the output string), with +"strict" being the default behavior. It may be desirable to specify alternative +processing of such errors, such as inserting an XML character reference or HTML +entity reference into the converted string. + +Python now has a flexible framework to add different processing strategies. New +error handlers can be added with :func:`codecs.register_error`, and codecs then +can access the error handler with :func:`codecs.lookup_error`. An equivalent C +API has been added for codecs written in C. The error handler gets the necessary +state information such as the string being converted, the position in the string +where the error was detected, and the target encoding. The handler can then +either raise an exception or return a replacement string. + +Two additional error handlers have been implemented using this framework: +"backslashreplace" uses Python backslash quoting to represent unencodable +characters and "xmlcharrefreplace" emits XML character references. + + +.. seealso:: + + :pep:`293` - Codec Error Handling Callbacks + Written and implemented by Walter Dörwald. + +.. % ====================================================================== + + +.. _section-pep301: + +PEP 301: Package Index and Metadata for Distutils +================================================= + +Support for the long-requested Python catalog makes its first appearance in 2.3. + +The heart of the catalog is the new Distutils :command:`register` command. +Running ``python setup.py register`` will collect the metadata describing a +package, such as its name, version, maintainer, description, &c., and send it to +a central catalog server. The resulting catalog is available from +http://www.python.org/pypi. + +To make the catalog a bit more useful, a new optional *classifiers* keyword +argument has been added to the Distutils :func:`setup` function. A list of +`Trove <http://catb.org/~esr/trove/>`_-style strings can be supplied to help +classify the software. + +Here's an example :file:`setup.py` with classifiers, written to be compatible +with older versions of the Distutils:: + + from distutils import core + kw = {'name': "Quixote", + 'version': "0.5.1", + 'description': "A highly Pythonic Web application framework", + # ... + } + + if (hasattr(core, 'setup_keywords') and + 'classifiers' in core.setup_keywords): + kw['classifiers'] = \ + ['Topic :: Internet :: WWW/HTTP :: Dynamic Content', + 'Environment :: No Input/Output (Daemon)', + 'Intended Audience :: Developers'], + + core.setup(**kw) + +The full list of classifiers can be obtained by running ``python setup.py +register --list-classifiers``. + + +.. seealso:: + + :pep:`301` - Package Index and Metadata for Distutils + Written and implemented by Richard Jones. + +.. % ====================================================================== + + +.. _section-pep302: + +PEP 302: New Import Hooks +========================= + +While it's been possible to write custom import hooks ever since the +:mod:`ihooks` module was introduced in Python 1.3, no one has ever been really +happy with it because writing new import hooks is difficult and messy. There +have been various proposed alternatives such as the :mod:`imputil` and :mod:`iu` +modules, but none of them has ever gained much acceptance, and none of them were +easily usable from C code. + +:pep:`302` borrows ideas from its predecessors, especially from Gordon +McMillan's :mod:`iu` module. Three new items are added to the :mod:`sys` +module: + +* ``sys.path_hooks`` is a list of callable objects; most often they'll be + classes. Each callable takes a string containing a path and either returns an + importer object that will handle imports from this path or raises an + :exc:`ImportError` exception if it can't handle this path. + +* ``sys.path_importer_cache`` caches importer objects for each path, so + ``sys.path_hooks`` will only need to be traversed once for each path. + +* ``sys.meta_path`` is a list of importer objects that will be traversed before + ``sys.path`` is checked. This list is initially empty, but user code can add + objects to it. Additional built-in and frozen modules can be imported by an + object added to this list. + +Importer objects must have a single method, :meth:`find_module(fullname, +path=None)`. *fullname* will be a module or package name, e.g. ``string`` or +``distutils.core``. :meth:`find_module` must return a loader object that has a +single method, :meth:`load_module(fullname)`, that creates and returns the +corresponding module object. + +Pseudo-code for Python's new import logic, therefore, looks something like this +(simplified a bit; see :pep:`302` for the full details):: + + for mp in sys.meta_path: + loader = mp(fullname) + if loader is not None: + <module> = loader.load_module(fullname) + + for path in sys.path: + for hook in sys.path_hooks: + try: + importer = hook(path) + except ImportError: + # ImportError, so try the other path hooks + pass + else: + loader = importer.find_module(fullname) + <module> = loader.load_module(fullname) + + # Not found! + raise ImportError + + +.. seealso:: + + :pep:`302` - New Import Hooks + Written by Just van Rossum and Paul Moore. Implemented by Just van Rossum. + +.. % ====================================================================== + + +.. _section-pep305: + +PEP 305: Comma-separated Files +============================== + +Comma-separated files are a format frequently used for exporting data from +databases and spreadsheets. Python 2.3 adds a parser for comma-separated files. + +Comma-separated format is deceptively simple at first glance:: + + Costs,150,200,3.95 + +Read a line and call ``line.split(',')``: what could be simpler? But toss in +string data that can contain commas, and things get more complicated:: + + "Costs",150,200,3.95,"Includes taxes, shipping, and sundry items" + +A big ugly regular expression can parse this, but using the new :mod:`csv` +package is much simpler:: + + import csv + + input = open('datafile', 'rb') + reader = csv.reader(input) + for line in reader: + print line + +The :func:`reader` function takes a number of different options. The field +separator isn't limited to the comma and can be changed to any character, and so +can the quoting and line-ending characters. + +Different dialects of comma-separated files can be defined and registered; +currently there are two dialects, both used by Microsoft Excel. A separate +:class:`csv.writer` class will generate comma-separated files from a succession +of tuples or lists, quoting strings that contain the delimiter. + + +.. seealso:: + + :pep:`305` - CSV File API + Written and implemented by Kevin Altis, Dave Cole, Andrew McNamara, Skip + Montanaro, Cliff Wells. + +.. % ====================================================================== + + +.. _section-pep307: + +PEP 307: Pickle Enhancements +============================ + +The :mod:`pickle` and :mod:`cPickle` modules received some attention during the +2.3 development cycle. In 2.2, new-style classes could be pickled without +difficulty, but they weren't pickled very compactly; :pep:`307` quotes a trivial +example where a new-style class results in a pickled string three times longer +than that for a classic class. + +The solution was to invent a new pickle protocol. The :func:`pickle.dumps` +function has supported a text-or-binary flag for a long time. In 2.3, this +flag is redefined from a Boolean to an integer: 0 is the old text-mode pickle +format, 1 is the old binary format, and now 2 is a new 2.3-specific format. A +new constant, :const:`pickle.HIGHEST_PROTOCOL`, can be used to select the +fanciest protocol available. + +Unpickling is no longer considered a safe operation. 2.2's :mod:`pickle` +provided hooks for trying to prevent unsafe classes from being unpickled +(specifically, a :attr:`__safe_for_unpickling__` attribute), but none of this +code was ever audited and therefore it's all been ripped out in 2.3. You should +not unpickle untrusted data in any version of Python. + +To reduce the pickling overhead for new-style classes, a new interface for +customizing pickling was added using three special methods: +:meth:`__getstate__`, :meth:`__setstate__`, and :meth:`__getnewargs__`. Consult +:pep:`307` for the full semantics of these methods. + +As a way to compress pickles yet further, it's now possible to use integer codes +instead of long strings to identify pickled classes. The Python Software +Foundation will maintain a list of standardized codes; there's also a range of +codes for private use. Currently no codes have been specified. + + +.. seealso:: + + :pep:`307` - Extensions to the pickle protocol + Written and implemented by Guido van Rossum and Tim Peters. + +.. % ====================================================================== + + +.. _section-slices: + +Extended Slices +=============== + +Ever since Python 1.4, the slicing syntax has supported an optional third "step" +or "stride" argument. For example, these are all legal Python syntax: +``L[1:10:2]``, ``L[:-1:1]``, ``L[::-1]``. This was added to Python at the +request of the developers of Numerical Python, which uses the third argument +extensively. However, Python's built-in list, tuple, and string sequence types +have never supported this feature, raising a :exc:`TypeError` if you tried it. +Michael Hudson contributed a patch to fix this shortcoming. + +For example, you can now easily extract the elements of a list that have even +indexes:: + + >>> L = range(10) + >>> L[::2] + [0, 2, 4, 6, 8] + +Negative values also work to make a copy of the same list in reverse order:: + + >>> L[::-1] + [9, 8, 7, 6, 5, 4, 3, 2, 1, 0] + +This also works for tuples, arrays, and strings:: + + >>> s='abcd' + >>> s[::2] + 'ac' + >>> s[::-1] + 'dcba' + +If you have a mutable sequence such as a list or an array you can assign to or +delete an extended slice, but there are some differences between assignment to +extended and regular slices. Assignment to a regular slice can be used to +change the length of the sequence:: + + >>> a = range(3) + >>> a + [0, 1, 2] + >>> a[1:3] = [4, 5, 6] + >>> a + [0, 4, 5, 6] + +Extended slices aren't this flexible. When assigning to an extended slice, the +list on the right hand side of the statement must contain the same number of +items as the slice it is replacing:: + + >>> a = range(4) + >>> a + [0, 1, 2, 3] + >>> a[::2] + [0, 2] + >>> a[::2] = [0, -1] + >>> a + [0, 1, -1, 3] + >>> a[::2] = [0,1,2] + Traceback (most recent call last): + File "<stdin>", line 1, in ? + ValueError: attempt to assign sequence of size 3 to extended slice of size 2 + +Deletion is more straightforward:: + + >>> a = range(4) + >>> a + [0, 1, 2, 3] + >>> a[::2] + [0, 2] + >>> del a[::2] + >>> a + [1, 3] + +One can also now pass slice objects to the :meth:`__getitem__` methods of the +built-in sequences:: + + >>> range(10).__getitem__(slice(0, 5, 2)) + [0, 2, 4] + +Or use slice objects directly in subscripts:: + + >>> range(10)[slice(0, 5, 2)] + [0, 2, 4] + +To simplify implementing sequences that support extended slicing, slice objects +now have a method :meth:`indices(length)` which, given the length of a sequence, +returns a ``(start, stop, step)`` tuple that can be passed directly to +:func:`range`. :meth:`indices` handles omitted and out-of-bounds indices in a +manner consistent with regular slices (and this innocuous phrase hides a welter +of confusing details!). The method is intended to be used like this:: + + class FakeSeq: + ... + def calc_item(self, i): + ... + def __getitem__(self, item): + if isinstance(item, slice): + indices = item.indices(len(self)) + return FakeSeq([self.calc_item(i) for i in range(*indices)]) + else: + return self.calc_item(i) + +From this example you can also see that the built-in :class:`slice` object is +now the type object for the slice type, and is no longer a function. This is +consistent with Python 2.2, where :class:`int`, :class:`str`, etc., underwent +the same change. + +.. % ====================================================================== + + +Other Language Changes +====================== + +Here are all of the changes that Python 2.3 makes to the core Python language. + +* The :keyword:`yield` statement is now always a keyword, as described in + section :ref:`section-generators` of this document. + +* A new built-in function :func:`enumerate` was added, as described in section + :ref:`section-enumerate` of this document. + +* Two new constants, :const:`True` and :const:`False` were added along with the + built-in :class:`bool` type, as described in section :ref:`section-bool` of this + document. + +* The :func:`int` type constructor will now return a long integer instead of + raising an :exc:`OverflowError` when a string or floating-point number is too + large to fit into an integer. This can lead to the paradoxical result that + ``isinstance(int(expression), int)`` is false, but that seems unlikely to cause + problems in practice. + +* Built-in types now support the extended slicing syntax, as described in + section :ref:`section-slices` of this document. + +* A new built-in function, :func:`sum(iterable, start=0)`, adds up the numeric + items in the iterable object and returns their sum. :func:`sum` only accepts + numbers, meaning that you can't use it to concatenate a bunch of strings. + (Contributed by Alex Martelli.) + +* ``list.insert(pos, value)`` used to insert *value* at the front of the list + when *pos* was negative. The behaviour has now been changed to be consistent + with slice indexing, so when *pos* is -1 the value will be inserted before the + last element, and so forth. + +* ``list.index(value)``, which searches for *value* within the list and returns + its index, now takes optional *start* and *stop* arguments to limit the search + to only part of the list. + +* Dictionaries have a new method, :meth:`pop(key[, *default*])`, that returns + the value corresponding to *key* and removes that key/value pair from the + dictionary. If the requested key isn't present in the dictionary, *default* is + returned if it's specified and :exc:`KeyError` raised if it isn't. :: + + >>> d = {1:2} + >>> d + {1: 2} + >>> d.pop(4) + Traceback (most recent call last): + File "stdin", line 1, in ? + KeyError: 4 + >>> d.pop(1) + 2 + >>> d.pop(1) + Traceback (most recent call last): + File "stdin", line 1, in ? + KeyError: 'pop(): dictionary is empty' + >>> d + {} + >>> + + There's also a new class method, :meth:`dict.fromkeys(iterable, value)`, that + creates a dictionary with keys taken from the supplied iterator *iterable* and + all values set to *value*, defaulting to ``None``. + + (Patches contributed by Raymond Hettinger.) + + Also, the :func:`dict` constructor now accepts keyword arguments to simplify + creating small dictionaries:: + + >>> dict(red=1, blue=2, green=3, black=4) + {'blue': 2, 'black': 4, 'green': 3, 'red': 1} + + (Contributed by Just van Rossum.) + +* The :keyword:`assert` statement no longer checks the ``__debug__`` flag, so + you can no longer disable assertions by assigning to ``__debug__``. Running + Python with the :option:`-O` switch will still generate code that doesn't + execute any assertions. + +* Most type objects are now callable, so you can use them to create new objects + such as functions, classes, and modules. (This means that the :mod:`new` module + can be deprecated in a future Python version, because you can now use the type + objects available in the :mod:`types` module.) For example, you can create a new + module object with the following code: + + .. % XXX should new.py use PendingDeprecationWarning? + + :: + + >>> import types + >>> m = types.ModuleType('abc','docstring') + >>> m + <module 'abc' (built-in)> + >>> m.__doc__ + 'docstring' + +* A new warning, :exc:`PendingDeprecationWarning` was added to indicate features + which are in the process of being deprecated. The warning will *not* be printed + by default. To check for use of features that will be deprecated in the future, + supply :option:`-Walways::PendingDeprecationWarning::` on the command line or + use :func:`warnings.filterwarnings`. + +* The process of deprecating string-based exceptions, as in ``raise "Error + occurred"``, has begun. Raising a string will now trigger + :exc:`PendingDeprecationWarning`. + +* Using ``None`` as a variable name will now result in a :exc:`SyntaxWarning` + warning. In a future version of Python, ``None`` may finally become a keyword. + +* The :meth:`xreadlines` method of file objects, introduced in Python 2.1, is no + longer necessary because files now behave as their own iterator. + :meth:`xreadlines` was originally introduced as a faster way to loop over all + the lines in a file, but now you can simply write ``for line in file_obj``. + File objects also have a new read-only :attr:`encoding` attribute that gives the + encoding used by the file; Unicode strings written to the file will be + automatically converted to bytes using the given encoding. + +* The method resolution order used by new-style classes has changed, though + you'll only notice the difference if you have a really complicated inheritance + hierarchy. Classic classes are unaffected by this change. Python 2.2 + originally used a topological sort of a class's ancestors, but 2.3 now uses the + C3 algorithm as described in the paper `"A Monotonic Superclass Linearization + for Dylan" <http://www.webcom.com/haahr/dylan/linearization-oopsla96.html>`_. To + understand the motivation for this change, read Michele Simionato's article + `"Python 2.3 Method Resolution Order" <http://www.python.org/2.3/mro.html>`_, or + read the thread on python-dev starting with the message at + http://mail.python.org/pipermail/python-dev/2002-October/029035.html. Samuele + Pedroni first pointed out the problem and also implemented the fix by coding the + C3 algorithm. + +* Python runs multithreaded programs by switching between threads after + executing N bytecodes. The default value for N has been increased from 10 to + 100 bytecodes, speeding up single-threaded applications by reducing the + switching overhead. Some multithreaded applications may suffer slower response + time, but that's easily fixed by setting the limit back to a lower number using + :func:`sys.setcheckinterval(N)`. The limit can be retrieved with the new + :func:`sys.getcheckinterval` function. + +* One minor but far-reaching change is that the names of extension types defined + by the modules included with Python now contain the module and a ``'.'`` in + front of the type name. For example, in Python 2.2, if you created a socket and + printed its :attr:`__class__`, you'd get this output:: + + >>> s = socket.socket() + >>> s.__class__ + <type 'socket'> + + In 2.3, you get this:: + + >>> s.__class__ + <type '_socket.socket'> + +* One of the noted incompatibilities between old- and new-style classes has been + removed: you can now assign to the :attr:`__name__` and :attr:`__bases__` + attributes of new-style classes. There are some restrictions on what can be + assigned to :attr:`__bases__` along the lines of those relating to assigning to + an instance's :attr:`__class__` attribute. + +.. % ====================================================================== + + +String Changes +-------------- + +* The :keyword:`in` operator now works differently for strings. Previously, when + evaluating ``X in Y`` where *X* and *Y* are strings, *X* could only be a single + character. That's now changed; *X* can be a string of any length, and ``X in Y`` + will return :const:`True` if *X* is a substring of *Y*. If *X* is the empty + string, the result is always :const:`True`. :: + + >>> 'ab' in 'abcd' + True + >>> 'ad' in 'abcd' + False + >>> '' in 'abcd' + True + + Note that this doesn't tell you where the substring starts; if you need that + information, use the :meth:`find` string method. + +* The :meth:`strip`, :meth:`lstrip`, and :meth:`rstrip` string methods now have + an optional argument for specifying the characters to strip. The default is + still to remove all whitespace characters:: + + >>> ' abc '.strip() + 'abc' + >>> '><><abc<><><>'.strip('<>') + 'abc' + >>> '><><abc<><><>\n'.strip('<>') + 'abc<><><>\n' + >>> u'\u4000\u4001abc\u4000'.strip(u'\u4000') + u'\u4001abc' + >>> + + (Suggested by Simon Brunning and implemented by Walter Dörwald.) + +* The :meth:`startswith` and :meth:`endswith` string methods now accept negative + numbers for the *start* and *end* parameters. + +* Another new string method is :meth:`zfill`, originally a function in the + :mod:`string` module. :meth:`zfill` pads a numeric string with zeros on the + left until it's the specified width. Note that the ``%`` operator is still more + flexible and powerful than :meth:`zfill`. :: + + >>> '45'.zfill(4) + '0045' + >>> '12345'.zfill(4) + '12345' + >>> 'goofy'.zfill(6) + '0goofy' + + (Contributed by Walter Dörwald.) + +* A new type object, :class:`basestring`, has been added. Both 8-bit strings and + Unicode strings inherit from this type, so ``isinstance(obj, basestring)`` will + return :const:`True` for either kind of string. It's a completely abstract + type, so you can't create :class:`basestring` instances. + +* Interned strings are no longer immortal and will now be garbage-collected in + the usual way when the only reference to them is from the internal dictionary of + interned strings. (Implemented by Oren Tirosh.) + +.. % ====================================================================== + + +Optimizations +------------- + +* The creation of new-style class instances has been made much faster; they're + now faster than classic classes! + +* The :meth:`sort` method of list objects has been extensively rewritten by Tim + Peters, and the implementation is significantly faster. + +* Multiplication of large long integers is now much faster thanks to an + implementation of Karatsuba multiplication, an algorithm that scales better than + the O(n\*n) required for the grade-school multiplication algorithm. (Original + patch by Christopher A. Craig, and significantly reworked by Tim Peters.) + +* The ``SET_LINENO`` opcode is now gone. This may provide a small speed + increase, depending on your compiler's idiosyncrasies. See section + :ref:`section-other` for a longer explanation. (Removed by Michael Hudson.) + +* :func:`xrange` objects now have their own iterator, making ``for i in + xrange(n)`` slightly faster than ``for i in range(n)``. (Patch by Raymond + Hettinger.) + +* A number of small rearrangements have been made in various hotspots to improve + performance, such as inlining a function or removing some code. (Implemented + mostly by GvR, but lots of people have contributed single changes.) + +The net result of the 2.3 optimizations is that Python 2.3 runs the pystone +benchmark around 25% faster than Python 2.2. + +.. % ====================================================================== + + +New, Improved, and Deprecated Modules +===================================== + +As usual, Python's standard library received a number of enhancements and bug +fixes. Here's a partial list of the most notable changes, sorted alphabetically +by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more +complete list of changes, or look through the CVS logs for all the details. + +* The :mod:`array` module now supports arrays of Unicode characters using the + ``'u'`` format character. Arrays also now support using the ``+=`` assignment + operator to add another array's contents, and the ``*=`` assignment operator to + repeat an array. (Contributed by Jason Orendorff.) + +* The :mod:`bsddb` module has been replaced by version 4.1.6 of the `PyBSDDB + <http://pybsddb.sourceforge.net>`_ package, providing a more complete interface + to the transactional features of the BerkeleyDB library. + + The old version of the module has been renamed to :mod:`bsddb185` and is no + longer built automatically; you'll have to edit :file:`Modules/Setup` to enable + it. Note that the new :mod:`bsddb` package is intended to be compatible with + the old module, so be sure to file bugs if you discover any incompatibilities. + When upgrading to Python 2.3, if the new interpreter is compiled with a new + version of the underlying BerkeleyDB library, you will almost certainly have to + convert your database files to the new version. You can do this fairly easily + with the new scripts :file:`db2pickle.py` and :file:`pickle2db.py` which you + will find in the distribution's :file:`Tools/scripts` directory. If you've + already been using the PyBSDDB package and importing it as :mod:`bsddb3`, you + will have to change your ``import`` statements to import it as :mod:`bsddb`. + +* The new :mod:`bz2` module is an interface to the bz2 data compression library. + bz2-compressed data is usually smaller than corresponding :mod:`zlib`\ + -compressed data. (Contributed by Gustavo Niemeyer.) + +* A set of standard date/time types has been added in the new :mod:`datetime` + module. See the following section for more details. + +* The Distutils :class:`Extension` class now supports an extra constructor + argument named *depends* for listing additional source files that an extension + depends on. This lets Distutils recompile the module if any of the dependency + files are modified. For example, if :file:`sampmodule.c` includes the header + file :file:`sample.h`, you would create the :class:`Extension` object like + this:: + + ext = Extension("samp", + sources=["sampmodule.c"], + depends=["sample.h"]) + + Modifying :file:`sample.h` would then cause the module to be recompiled. + (Contributed by Jeremy Hylton.) + +* Other minor changes to Distutils: it now checks for the :envvar:`CC`, + :envvar:`CFLAGS`, :envvar:`CPP`, :envvar:`LDFLAGS`, and :envvar:`CPPFLAGS` + environment variables, using them to override the settings in Python's + configuration (contributed by Robert Weber). + +* Previously the :mod:`doctest` module would only search the docstrings of + public methods and functions for test cases, but it now also examines private + ones as well. The :func:`DocTestSuite(` function creates a + :class:`unittest.TestSuite` object from a set of :mod:`doctest` tests. + +* The new :func:`gc.get_referents(object)` function returns a list of all the + objects referenced by *object*. + +* The :mod:`getopt` module gained a new function, :func:`gnu_getopt`, that + supports the same arguments as the existing :func:`getopt` function but uses + GNU-style scanning mode. The existing :func:`getopt` stops processing options as + soon as a non-option argument is encountered, but in GNU-style mode processing + continues, meaning that options and arguments can be mixed. For example:: + + >>> getopt.getopt(['-f', 'filename', 'output', '-v'], 'f:v') + ([('-f', 'filename')], ['output', '-v']) + >>> getopt.gnu_getopt(['-f', 'filename', 'output', '-v'], 'f:v') + ([('-f', 'filename'), ('-v', '')], ['output']) + + (Contributed by Peter Åstrand.) + +* The :mod:`grp`, :mod:`pwd`, and :mod:`resource` modules now return enhanced + tuples:: + + >>> import grp + >>> g = grp.getgrnam('amk') + >>> g.gr_name, g.gr_gid + ('amk', 500) + +* The :mod:`gzip` module can now handle files exceeding 2 GiB. + +* The new :mod:`heapq` module contains an implementation of a heap queue + algorithm. A heap is an array-like data structure that keeps items in a + partially sorted order such that, for every index *k*, ``heap[k] <= + heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]``. This makes it quick to remove the + smallest item, and inserting a new item while maintaining the heap property is + O(lg n). (See http://www.nist.gov/dads/HTML/priorityque.html for more + information about the priority queue data structure.) + + The :mod:`heapq` module provides :func:`heappush` and :func:`heappop` functions + for adding and removing items while maintaining the heap property on top of some + other mutable Python sequence type. Here's an example that uses a Python list:: + + >>> import heapq + >>> heap = [] + >>> for item in [3, 7, 5, 11, 1]: + ... heapq.heappush(heap, item) + ... + >>> heap + [1, 3, 5, 11, 7] + >>> heapq.heappop(heap) + 1 + >>> heapq.heappop(heap) + 3 + >>> heap + [5, 7, 11] + + (Contributed by Kevin O'Connor.) + +* The IDLE integrated development environment has been updated using the code + from the IDLEfork project (http://idlefork.sf.net). The most notable feature is + that the code being developed is now executed in a subprocess, meaning that + there's no longer any need for manual ``reload()`` operations. IDLE's core code + has been incorporated into the standard library as the :mod:`idlelib` package. + +* The :mod:`imaplib` module now supports IMAP over SSL. (Contributed by Piers + Lauder and Tino Lange.) + +* The :mod:`itertools` contains a number of useful functions for use with + iterators, inspired by various functions provided by the ML and Haskell + languages. For example, ``itertools.ifilter(predicate, iterator)`` returns all + elements in the iterator for which the function :func:`predicate` returns + :const:`True`, and ``itertools.repeat(obj, N)`` returns ``obj`` *N* times. + There are a number of other functions in the module; see the package's reference + documentation for details. + (Contributed by Raymond Hettinger.) + +* Two new functions in the :mod:`math` module, :func:`degrees(rads)` and + :func:`radians(degs)`, convert between radians and degrees. Other functions in + the :mod:`math` module such as :func:`math.sin` and :func:`math.cos` have always + required input values measured in radians. Also, an optional *base* argument + was added to :func:`math.log` to make it easier to compute logarithms for bases + other than ``e`` and ``10``. (Contributed by Raymond Hettinger.) + +* Several new POSIX functions (:func:`getpgid`, :func:`killpg`, :func:`lchown`, + :func:`loadavg`, :func:`major`, :func:`makedev`, :func:`minor`, and + :func:`mknod`) were added to the :mod:`posix` module that underlies the + :mod:`os` module. (Contributed by Gustavo Niemeyer, Geert Jansen, and Denis S. + Otkidach.) + +* In the :mod:`os` module, the :func:`\*stat` family of functions can now report + fractions of a second in a timestamp. Such time stamps are represented as + floats, similar to the value returned by :func:`time.time`. + + During testing, it was found that some applications will break if time stamps + are floats. For compatibility, when using the tuple interface of the + :class:`stat_result` time stamps will be represented as integers. When using + named fields (a feature first introduced in Python 2.2), time stamps are still + represented as integers, unless :func:`os.stat_float_times` is invoked to enable + float return values:: + + >>> os.stat("/tmp").st_mtime + 1034791200 + >>> os.stat_float_times(True) + >>> os.stat("/tmp").st_mtime + 1034791200.6335014 + + In Python 2.4, the default will change to always returning floats. + + Application developers should enable this feature only if all their libraries + work properly when confronted with floating point time stamps, or if they use + the tuple API. If used, the feature should be activated on an application level + instead of trying to enable it on a per-use basis. + +* The :mod:`optparse` module contains a new parser for command-line arguments + that can convert option values to a particular Python type and will + automatically generate a usage message. See the following section for more + details. + +* The old and never-documented :mod:`linuxaudiodev` module has been deprecated, + and a new version named :mod:`ossaudiodev` has been added. The module was + renamed because the OSS sound drivers can be used on platforms other than Linux, + and the interface has also been tidied and brought up to date in various ways. + (Contributed by Greg Ward and Nicholas FitzRoy-Dale.) + +* The new :mod:`platform` module contains a number of functions that try to + determine various properties of the platform you're running on. There are + functions for getting the architecture, CPU type, the Windows OS version, and + even the Linux distribution version. (Contributed by Marc-André Lemburg.) + +* The parser objects provided by the :mod:`pyexpat` module can now optionally + buffer character data, resulting in fewer calls to your character data handler + and therefore faster performance. Setting the parser object's + :attr:`buffer_text` attribute to :const:`True` will enable buffering. + +* The :func:`sample(population, k)` function was added to the :mod:`random` + module. *population* is a sequence or :class:`xrange` object containing the + elements of a population, and :func:`sample` chooses *k* elements from the + population without replacing chosen elements. *k* can be any value up to + ``len(population)``. For example:: + + >>> days = ['Mo', 'Tu', 'We', 'Th', 'Fr', 'St', 'Sn'] + >>> random.sample(days, 3) # Choose 3 elements + ['St', 'Sn', 'Th'] + >>> random.sample(days, 7) # Choose 7 elements + ['Tu', 'Th', 'Mo', 'We', 'St', 'Fr', 'Sn'] + >>> random.sample(days, 7) # Choose 7 again + ['We', 'Mo', 'Sn', 'Fr', 'Tu', 'St', 'Th'] + >>> random.sample(days, 8) # Can't choose eight + Traceback (most recent call last): + File "<stdin>", line 1, in ? + File "random.py", line 414, in sample + raise ValueError, "sample larger than population" + ValueError: sample larger than population + >>> random.sample(xrange(1,10000,2), 10) # Choose ten odd nos. under 10000 + [3407, 3805, 1505, 7023, 2401, 2267, 9733, 3151, 8083, 9195] + + The :mod:`random` module now uses a new algorithm, the Mersenne Twister, + implemented in C. It's faster and more extensively studied than the previous + algorithm. + + (All changes contributed by Raymond Hettinger.) + +* The :mod:`readline` module also gained a number of new functions: + :func:`get_history_item`, :func:`get_current_history_length`, and + :func:`redisplay`. + +* The :mod:`rexec` and :mod:`Bastion` modules have been declared dead, and + attempts to import them will fail with a :exc:`RuntimeError`. New-style classes + provide new ways to break out of the restricted execution environment provided + by :mod:`rexec`, and no one has interest in fixing them or time to do so. If + you have applications using :mod:`rexec`, rewrite them to use something else. + + (Sticking with Python 2.2 or 2.1 will not make your applications any safer + because there are known bugs in the :mod:`rexec` module in those versions. To + repeat: if you're using :mod:`rexec`, stop using it immediately.) + +* The :mod:`rotor` module has been deprecated because the algorithm it uses for + encryption is not believed to be secure. If you need encryption, use one of the + several AES Python modules that are available separately. + +* The :mod:`shutil` module gained a :func:`move(src, dest)` function that + recursively moves a file or directory to a new location. + +* Support for more advanced POSIX signal handling was added to the :mod:`signal` + but then removed again as it proved impossible to make it work reliably across + platforms. + +* The :mod:`socket` module now supports timeouts. You can call the + :meth:`settimeout(t)` method on a socket object to set a timeout of *t* seconds. + Subsequent socket operations that take longer than *t* seconds to complete will + abort and raise a :exc:`socket.timeout` exception. + + The original timeout implementation was by Tim O'Malley. Michael Gilfix + integrated it into the Python :mod:`socket` module and shepherded it through a + lengthy review. After the code was checked in, Guido van Rossum rewrote parts + of it. (This is a good example of a collaborative development process in + action.) + +* On Windows, the :mod:`socket` module now ships with Secure Sockets Layer + (SSL) support. + +* The value of the C :const:`PYTHON_API_VERSION` macro is now exposed at the + Python level as ``sys.api_version``. The current exception can be cleared by + calling the new :func:`sys.exc_clear` function. + +* The new :mod:`tarfile` module allows reading from and writing to + :program:`tar`\ -format archive files. (Contributed by Lars Gustäbel.) + +* The new :mod:`textwrap` module contains functions for wrapping strings + containing paragraphs of text. The :func:`wrap(text, width)` function takes a + string and returns a list containing the text split into lines of no more than + the chosen width. The :func:`fill(text, width)` function returns a single + string, reformatted to fit into lines no longer than the chosen width. (As you + can guess, :func:`fill` is built on top of :func:`wrap`. For example:: + + >>> import textwrap + >>> paragraph = "Not a whit, we defy augury: ... more text ..." + >>> textwrap.wrap(paragraph, 60) + ["Not a whit, we defy augury: there's a special providence in", + "the fall of a sparrow. If it be now, 'tis not to come; if it", + ...] + >>> print textwrap.fill(paragraph, 35) + Not a whit, we defy augury: there's + a special providence in the fall of + a sparrow. If it be now, 'tis not + to come; if it be not to come, it + will be now; if it be not now, yet + it will come: the readiness is all. + >>> + + The module also contains a :class:`TextWrapper` class that actually implements + the text wrapping strategy. Both the :class:`TextWrapper` class and the + :func:`wrap` and :func:`fill` functions support a number of additional keyword + arguments for fine-tuning the formatting; consult the module's documentation + for details. (Contributed by Greg Ward.) + +* The :mod:`thread` and :mod:`threading` modules now have companion modules, + :mod:`dummy_thread` and :mod:`dummy_threading`, that provide a do-nothing + implementation of the :mod:`thread` module's interface for platforms where + threads are not supported. The intention is to simplify thread-aware modules + (ones that *don't* rely on threads to run) by putting the following code at the + top:: + + try: + import threading as _threading + except ImportError: + import dummy_threading as _threading + + In this example, :mod:`_threading` is used as the module name to make it clear + that the module being used is not necessarily the actual :mod:`threading` + module. Code can call functions and use classes in :mod:`_threading` whether or + not threads are supported, avoiding an :keyword:`if` statement and making the + code slightly clearer. This module will not magically make multithreaded code + run without threads; code that waits for another thread to return or to do + something will simply hang forever. + +* The :mod:`time` module's :func:`strptime` function has long been an annoyance + because it uses the platform C library's :func:`strptime` implementation, and + different platforms sometimes have odd bugs. Brett Cannon contributed a + portable implementation that's written in pure Python and should behave + identically on all platforms. + +* The new :mod:`timeit` module helps measure how long snippets of Python code + take to execute. The :file:`timeit.py` file can be run directly from the + command line, or the module's :class:`Timer` class can be imported and used + directly. Here's a short example that figures out whether it's faster to + convert an 8-bit string to Unicode by appending an empty Unicode string to it or + by using the :func:`unicode` function:: + + import timeit + + timer1 = timeit.Timer('unicode("abc")') + timer2 = timeit.Timer('"abc" + u""') + + # Run three trials + print timer1.repeat(repeat=3, number=100000) + print timer2.repeat(repeat=3, number=100000) + + # On my laptop this outputs: + # [0.36831796169281006, 0.37441694736480713, 0.35304892063140869] + # [0.17574405670166016, 0.18193507194519043, 0.17565798759460449] + +* The :mod:`Tix` module has received various bug fixes and updates for the + current version of the Tix package. + +* The :mod:`Tkinter` module now works with a thread-enabled version of Tcl. + Tcl's threading model requires that widgets only be accessed from the thread in + which they're created; accesses from another thread can cause Tcl to panic. For + certain Tcl interfaces, :mod:`Tkinter` will now automatically avoid this when a + widget is accessed from a different thread by marshalling a command, passing it + to the correct thread, and waiting for the results. Other interfaces can't be + handled automatically but :mod:`Tkinter` will now raise an exception on such an + access so that you can at least find out about the problem. See + http://mail.python.org/pipermail/python-dev/2002-December/031107.html for a more + detailed explanation of this change. (Implemented by Martin von Löwis.) + + .. % + +* Calling Tcl methods through :mod:`_tkinter` no longer returns only strings. + Instead, if Tcl returns other objects those objects are converted to their + Python equivalent, if one exists, or wrapped with a :class:`_tkinter.Tcl_Obj` + object if no Python equivalent exists. This behavior can be controlled through + the :meth:`wantobjects` method of :class:`tkapp` objects. + + When using :mod:`_tkinter` through the :mod:`Tkinter` module (as most Tkinter + applications will), this feature is always activated. It should not cause + compatibility problems, since Tkinter would always convert string results to + Python types where possible. + + If any incompatibilities are found, the old behavior can be restored by setting + the :attr:`wantobjects` variable in the :mod:`Tkinter` module to false before + creating the first :class:`tkapp` object. :: + + import Tkinter + Tkinter.wantobjects = 0 + + Any breakage caused by this change should be reported as a bug. + +* The :mod:`UserDict` module has a new :class:`DictMixin` class which defines + all dictionary methods for classes that already have a minimum mapping + interface. This greatly simplifies writing classes that need to be + substitutable for dictionaries, such as the classes in the :mod:`shelve` + module. + + Adding the mix-in as a superclass provides the full dictionary interface + whenever the class defines :meth:`__getitem__`, :meth:`__setitem__`, + :meth:`__delitem__`, and :meth:`keys`. For example:: + + >>> import UserDict + >>> class SeqDict(UserDict.DictMixin): + ... """Dictionary lookalike implemented with lists.""" + ... def __init__(self): + ... self.keylist = [] + ... self.valuelist = [] + ... def __getitem__(self, key): + ... try: + ... i = self.keylist.index(key) + ... except ValueError: + ... raise KeyError + ... return self.valuelist[i] + ... def __setitem__(self, key, value): + ... try: + ... i = self.keylist.index(key) + ... self.valuelist[i] = value + ... except ValueError: + ... self.keylist.append(key) + ... self.valuelist.append(value) + ... def __delitem__(self, key): + ... try: + ... i = self.keylist.index(key) + ... except ValueError: + ... raise KeyError + ... self.keylist.pop(i) + ... self.valuelist.pop(i) + ... def keys(self): + ... return list(self.keylist) + ... + >>> s = SeqDict() + >>> dir(s) # See that other dictionary methods are implemented + ['__cmp__', '__contains__', '__delitem__', '__doc__', '__getitem__', + '__init__', '__iter__', '__len__', '__module__', '__repr__', + '__setitem__', 'clear', 'get', 'has_key', 'items', 'iteritems', + 'iterkeys', 'itervalues', 'keylist', 'keys', 'pop', 'popitem', + 'setdefault', 'update', 'valuelist', 'values'] + + (Contributed by Raymond Hettinger.) + +* The DOM implementation in :mod:`xml.dom.minidom` can now generate XML output + in a particular encoding by providing an optional encoding argument to the + :meth:`toxml` and :meth:`toprettyxml` methods of DOM nodes. + +* The :mod:`xmlrpclib` module now supports an XML-RPC extension for handling nil + data values such as Python's ``None``. Nil values are always supported on + unmarshalling an XML-RPC response. To generate requests containing ``None``, + you must supply a true value for the *allow_none* parameter when creating a + :class:`Marshaller` instance. + +* The new :mod:`DocXMLRPCServer` module allows writing self-documenting XML-RPC + servers. Run it in demo mode (as a program) to see it in action. Pointing the + Web browser to the RPC server produces pydoc-style documentation; pointing + xmlrpclib to the server allows invoking the actual methods. (Contributed by + Brian Quinlan.) + +* Support for internationalized domain names (RFCs 3454, 3490, 3491, and 3492) + has been added. The "idna" encoding can be used to convert between a Unicode + domain name and the ASCII-compatible encoding (ACE) of that name. :: + + >{}>{}> u"www.Alliancefrançaise.nu".encode("idna") + 'www.xn--alliancefranaise-npb.nu' + + The :mod:`socket` module has also been extended to transparently convert + Unicode hostnames to the ACE version before passing them to the C library. + Modules that deal with hostnames such as :mod:`httplib` and :mod:`ftplib`) + also support Unicode host names; :mod:`httplib` also sends HTTP ``Host`` + headers using the ACE version of the domain name. :mod:`urllib` supports + Unicode URLs with non-ASCII host names as long as the ``path`` part of the URL + is ASCII only. + + To implement this change, the :mod:`stringprep` module, the ``mkstringprep`` + tool and the ``punycode`` encoding have been added. + +.. % ====================================================================== + + +Date/Time Type +-------------- + +Date and time types suitable for expressing timestamps were added as the +:mod:`datetime` module. The types don't support different calendars or many +fancy features, and just stick to the basics of representing time. + +The three primary types are: :class:`date`, representing a day, month, and year; +:class:`time`, consisting of hour, minute, and second; and :class:`datetime`, +which contains all the attributes of both :class:`date` and :class:`time`. +There's also a :class:`timedelta` class representing differences between two +points in time, and time zone logic is implemented by classes inheriting from +the abstract :class:`tzinfo` class. + +You can create instances of :class:`date` and :class:`time` by either supplying +keyword arguments to the appropriate constructor, e.g. +``datetime.date(year=1972, month=10, day=15)``, or by using one of a number of +class methods. For example, the :meth:`date.today` class method returns the +current local date. + +Once created, instances of the date/time classes are all immutable. There are a +number of methods for producing formatted strings from objects:: + + >>> import datetime + >>> now = datetime.datetime.now() + >>> now.isoformat() + '2002-12-30T21:27:03.994956' + >>> now.ctime() # Only available on date, datetime + 'Mon Dec 30 21:27:03 2002' + >>> now.strftime('%Y %d %b') + '2002 30 Dec' + +The :meth:`replace` method allows modifying one or more fields of a +:class:`date` or :class:`datetime` instance, returning a new instance:: + + >>> d = datetime.datetime.now() + >>> d + datetime.datetime(2002, 12, 30, 22, 15, 38, 827738) + >>> d.replace(year=2001, hour = 12) + datetime.datetime(2001, 12, 30, 12, 15, 38, 827738) + >>> + +Instances can be compared, hashed, and converted to strings (the result is the +same as that of :meth:`isoformat`). :class:`date` and :class:`datetime` +instances can be subtracted from each other, and added to :class:`timedelta` +instances. The largest missing feature is that there's no standard library +support for parsing strings and getting back a :class:`date` or +:class:`datetime`. + +For more information, refer to the module's reference documentation. +(Contributed by Tim Peters.) + +.. % ====================================================================== + + +The optparse Module +------------------- + +The :mod:`getopt` module provides simple parsing of command-line arguments. The +new :mod:`optparse` module (originally named Optik) provides more elaborate +command-line parsing that follows the Unix conventions, automatically creates +the output for :option:`--help`, and can perform different actions for different +options. + +You start by creating an instance of :class:`OptionParser` and telling it what +your program's options are. :: + + import sys + from optparse import OptionParser + + op = OptionParser() + op.add_option('-i', '--input', + action='store', type='string', dest='input', + help='set input filename') + op.add_option('-l', '--length', + action='store', type='int', dest='length', + help='set maximum length of output') + +Parsing a command line is then done by calling the :meth:`parse_args` method. :: + + options, args = op.parse_args(sys.argv[1:]) + print options + print args + +This returns an object containing all of the option values, and a list of +strings containing the remaining arguments. + +Invoking the script with the various arguments now works as you'd expect it to. +Note that the length argument is automatically converted to an integer. :: + + $ ./python opt.py -i data arg1 + <Values at 0x400cad4c: {'input': 'data', 'length': None}> + ['arg1'] + $ ./python opt.py --input=data --length=4 + <Values at 0x400cad2c: {'input': 'data', 'length': 4}> + [] + $ + +The help message is automatically generated for you:: + + $ ./python opt.py --help + usage: opt.py [options] + + options: + -h, --help show this help message and exit + -iINPUT, --input=INPUT + set input filename + -lLENGTH, --length=LENGTH + set maximum length of output + $ + +See the module's documentation for more details. + + +Optik was written by Greg Ward, with suggestions from the readers of the Getopt +SIG. + +.. % ====================================================================== + + +.. _section-pymalloc: + +Pymalloc: A Specialized Object Allocator +======================================== + +Pymalloc, a specialized object allocator written by Vladimir Marangozov, was a +feature added to Python 2.1. Pymalloc is intended to be faster than the system +:cfunc:`malloc` and to have less memory overhead for allocation patterns typical +of Python programs. The allocator uses C's :cfunc:`malloc` function to get large +pools of memory and then fulfills smaller memory requests from these pools. + +In 2.1 and 2.2, pymalloc was an experimental feature and wasn't enabled by +default; you had to explicitly enable it when compiling Python by providing the +:option:`--with-pymalloc` option to the :program:`configure` script. In 2.3, +pymalloc has had further enhancements and is now enabled by default; you'll have +to supply :option:`--without-pymalloc` to disable it. + +This change is transparent to code written in Python; however, pymalloc may +expose bugs in C extensions. Authors of C extension modules should test their +code with pymalloc enabled, because some incorrect code may cause core dumps at +runtime. + +There's one particularly common error that causes problems. There are a number +of memory allocation functions in Python's C API that have previously just been +aliases for the C library's :cfunc:`malloc` and :cfunc:`free`, meaning that if +you accidentally called mismatched functions the error wouldn't be noticeable. +When the object allocator is enabled, these functions aren't aliases of +:cfunc:`malloc` and :cfunc:`free` any more, and calling the wrong function to +free memory may get you a core dump. For example, if memory was allocated using +:cfunc:`PyObject_Malloc`, it has to be freed using :cfunc:`PyObject_Free`, not +:cfunc:`free`. A few modules included with Python fell afoul of this and had to +be fixed; doubtless there are more third-party modules that will have the same +problem. + +As part of this change, the confusing multiple interfaces for allocating memory +have been consolidated down into two API families. Memory allocated with one +family must not be manipulated with functions from the other family. There is +one family for allocating chunks of memory and another family of functions +specifically for allocating Python objects. + +* To allocate and free an undistinguished chunk of memory use the "raw memory" + family: :cfunc:`PyMem_Malloc`, :cfunc:`PyMem_Realloc`, and :cfunc:`PyMem_Free`. + +* The "object memory" family is the interface to the pymalloc facility described + above and is biased towards a large number of "small" allocations: + :cfunc:`PyObject_Malloc`, :cfunc:`PyObject_Realloc`, and :cfunc:`PyObject_Free`. + +* To allocate and free Python objects, use the "object" family + :cfunc:`PyObject_New`, :cfunc:`PyObject_NewVar`, and :cfunc:`PyObject_Del`. + +Thanks to lots of work by Tim Peters, pymalloc in 2.3 also provides debugging +features to catch memory overwrites and doubled frees in both extension modules +and in the interpreter itself. To enable this support, compile a debugging +version of the Python interpreter by running :program:`configure` with +:option:`--with-pydebug`. + +To aid extension writers, a header file :file:`Misc/pymemcompat.h` is +distributed with the source to Python 2.3 that allows Python extensions to use +the 2.3 interfaces to memory allocation while compiling against any version of +Python since 1.5.2. You would copy the file from Python's source distribution +and bundle it with the source of your extension. + + +.. seealso:: + + http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/python/python/dist/src/Objects/obmalloc.c + For the full details of the pymalloc implementation, see the comments at the top + of the file :file:`Objects/obmalloc.c` in the Python source code. The above + link points to the file within the SourceForge CVS browser. + +.. % ====================================================================== + + +Build and C API Changes +======================= + +Changes to Python's build process and to the C API include: + +* The cycle detection implementation used by the garbage collection has proven + to be stable, so it's now been made mandatory. You can no longer compile Python + without it, and the :option:`--with-cycle-gc` switch to :program:`configure` has + been removed. + +* Python can now optionally be built as a shared library + (:file:`libpython2.3.so`) by supplying :option:`--enable-shared` when running + Python's :program:`configure` script. (Contributed by Ondrej Palkovsky.) + +* The :cmacro:`DL_EXPORT` and :cmacro:`DL_IMPORT` macros are now deprecated. + Initialization functions for Python extension modules should now be declared + using the new macro :cmacro:`PyMODINIT_FUNC`, while the Python core will + generally use the :cmacro:`PyAPI_FUNC` and :cmacro:`PyAPI_DATA` macros. + +* The interpreter can be compiled without any docstrings for the built-in + functions and modules by supplying :option:`--without-doc-strings` to the + :program:`configure` script. This makes the Python executable about 10% smaller, + but will also mean that you can't get help for Python's built-ins. (Contributed + by Gustavo Niemeyer.) + +* The :cfunc:`PyArg_NoArgs` macro is now deprecated, and code that uses it + should be changed. For Python 2.2 and later, the method definition table can + specify the :const:`METH_NOARGS` flag, signalling that there are no arguments, + and the argument checking can then be removed. If compatibility with pre-2.2 + versions of Python is important, the code could use ``PyArg_ParseTuple(args, + "")`` instead, but this will be slower than using :const:`METH_NOARGS`. + +* :cfunc:`PyArg_ParseTuple` accepts new format characters for various sizes of + unsigned integers: ``B`` for :ctype:`unsigned char`, ``H`` for :ctype:`unsigned + short int`, ``I`` for :ctype:`unsigned int`, and ``K`` for :ctype:`unsigned + long long`. + +* A new function, :cfunc:`PyObject_DelItemString(mapping, char \*key)` was added + as shorthand for ``PyObject_DelItem(mapping, PyString_New(key))``. + +* File objects now manage their internal string buffer differently, increasing + it exponentially when needed. This results in the benchmark tests in + :file:`Lib/test/test_bufio.py` speeding up considerably (from 57 seconds to 1.7 + seconds, according to one measurement). + +* It's now possible to define class and static methods for a C extension type by + setting either the :const:`METH_CLASS` or :const:`METH_STATIC` flags in a + method's :ctype:`PyMethodDef` structure. + +* Python now includes a copy of the Expat XML parser's source code, removing any + dependence on a system version or local installation of Expat. + +* If you dynamically allocate type objects in your extension, you should be + aware of a change in the rules relating to the :attr:`__module__` and + :attr:`__name__` attributes. In summary, you will want to ensure the type's + dictionary contains a ``'__module__'`` key; making the module name the part of + the type name leading up to the final period will no longer have the desired + effect. For more detail, read the API reference documentation or the source. + +.. % ====================================================================== + + +Port-Specific Changes +--------------------- + +Support for a port to IBM's OS/2 using the EMX runtime environment was merged +into the main Python source tree. EMX is a POSIX emulation layer over the OS/2 +system APIs. The Python port for EMX tries to support all the POSIX-like +capability exposed by the EMX runtime, and mostly succeeds; :func:`fork` and +:func:`fcntl` are restricted by the limitations of the underlying emulation +layer. The standard OS/2 port, which uses IBM's Visual Age compiler, also +gained support for case-sensitive import semantics as part of the integration of +the EMX port into CVS. (Contributed by Andrew MacIntyre.) + +On MacOS, most toolbox modules have been weaklinked to improve backward +compatibility. This means that modules will no longer fail to load if a single +routine is missing on the current OS version. Instead calling the missing +routine will raise an exception. (Contributed by Jack Jansen.) + +The RPM spec files, found in the :file:`Misc/RPM/` directory in the Python +source distribution, were updated for 2.3. (Contributed by Sean Reifschneider.) + +Other new platforms now supported by Python include AtheOS +(http://www.atheos.cx/), GNU/Hurd, and OpenVMS. + +.. % ====================================================================== + + +.. _section-other: + +Other Changes and Fixes +======================= + +As usual, there were a bunch of other improvements and bugfixes scattered +throughout the source tree. A search through the CVS change logs finds there +were 523 patches applied and 514 bugs fixed between Python 2.2 and 2.3. Both +figures are likely to be underestimates. + +Some of the more notable changes are: + +* If the :envvar:`PYTHONINSPECT` environment variable is set, the Python + interpreter will enter the interactive prompt after running a Python program, as + if Python had been invoked with the :option:`-i` option. The environment + variable can be set before running the Python interpreter, or it can be set by + the Python program as part of its execution. + +* The :file:`regrtest.py` script now provides a way to allow "all resources + except *foo*." A resource name passed to the :option:`-u` option can now be + prefixed with a hyphen (``'-'``) to mean "remove this resource." For example, + the option '``-uall,-bsddb``' could be used to enable the use of all resources + except ``bsddb``. + +* The tools used to build the documentation now work under Cygwin as well as + Unix. + +* The ``SET_LINENO`` opcode has been removed. Back in the mists of time, this + opcode was needed to produce line numbers in tracebacks and support trace + functions (for, e.g., :mod:`pdb`). Since Python 1.5, the line numbers in + tracebacks have been computed using a different mechanism that works with + "python -O". For Python 2.3 Michael Hudson implemented a similar scheme to + determine when to call the trace function, removing the need for ``SET_LINENO`` + entirely. + + It would be difficult to detect any resulting difference from Python code, apart + from a slight speed up when Python is run without :option:`-O`. + + C extensions that access the :attr:`f_lineno` field of frame objects should + instead call ``PyCode_Addr2Line(f->f_code, f->f_lasti)``. This will have the + added effect of making the code work as desired under "python -O" in earlier + versions of Python. + + A nifty new feature is that trace functions can now assign to the + :attr:`f_lineno` attribute of frame objects, changing the line that will be + executed next. A ``jump`` command has been added to the :mod:`pdb` debugger + taking advantage of this new feature. (Implemented by Richie Hindle.) + +.. % ====================================================================== + + +Porting to Python 2.3 +===================== + +This section lists previously described changes that may require changes to your +code: + +* :keyword:`yield` is now always a keyword; if it's used as a variable name in + your code, a different name must be chosen. + +* For strings *X* and *Y*, ``X in Y`` now works if *X* is more than one + character long. + +* The :func:`int` type constructor will now return a long integer instead of + raising an :exc:`OverflowError` when a string or floating-point number is too + large to fit into an integer. + +* If you have Unicode strings that contain 8-bit characters, you must declare + the file's encoding (UTF-8, Latin-1, or whatever) by adding a comment to the top + of the file. See section :ref:`section-encodings` for more information. + +* Calling Tcl methods through :mod:`_tkinter` no longer returns only strings. + Instead, if Tcl returns other objects those objects are converted to their + Python equivalent, if one exists, or wrapped with a :class:`_tkinter.Tcl_Obj` + object if no Python equivalent exists. + +* Large octal and hex literals such as ``0xffffffff`` now trigger a + :exc:`FutureWarning`. Currently they're stored as 32-bit numbers and result in a + negative value, but in Python 2.4 they'll become positive long integers. + + There are a few ways to fix this warning. If you really need a positive number, + just add an ``L`` to the end of the literal. If you're trying to get a 32-bit + integer with low bits set and have previously used an expression such as ``~(1 + << 31)``, it's probably clearest to start with all bits set and clear the + desired upper bits. For example, to clear just the top bit (bit 31), you could + write ``0xffffffffL &~(1L<<31)``. + + .. % The empty groups below prevent conversion to guillemets. + +* You can no longer disable assertions by assigning to ``__debug__``. + +* The Distutils :func:`setup` function has gained various new keyword arguments + such as *depends*. Old versions of the Distutils will abort if passed unknown + keywords. A solution is to check for the presence of the new + :func:`get_distutil_options` function in your :file:`setup.py` and only uses the + new keywords with a version of the Distutils that supports them:: + + from distutils import core + + kw = {'sources': 'foo.c', ...} + if hasattr(core, 'get_distutil_options'): + kw['depends'] = ['foo.h'] + ext = Extension(**kw) + +* Using ``None`` as a variable name will now result in a :exc:`SyntaxWarning` + warning. + +* Names of extension types defined by the modules included with Python now + contain the module and a ``'.'`` in front of the type name. + +.. % ====================================================================== + + +.. _acks: + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Jeff Bauer, +Simon Brunning, Brett Cannon, Michael Chermside, Andrew Dalke, Scott David +Daniels, Fred L. Drake, Jr., David Fraser, Kelly Gerber, Raymond Hettinger, +Michael Hudson, Chris Lambert, Detlef Lannert, Martin von Löwis, Andrew +MacIntyre, Lalo Martins, Chad Netzer, Gustavo Niemeyer, Neal Norwitz, Hans +Nowak, Chris Reedy, Francesco Ricciardi, Vinay Sajip, Neil Schemenauer, Roman +Suzi, Jason Tishler, Just van Rossum. + diff --git a/Doc/whatsnew/2.4.rst b/Doc/whatsnew/2.4.rst new file mode 100644 index 0000000..d782f5d --- /dev/null +++ b/Doc/whatsnew/2.4.rst @@ -0,0 +1,1571 @@ +**************************** + What's New in Python 2.4 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 1.02 + +.. % $Id: whatsnew24.tex 55005 2007-04-27 19:54:29Z guido.van.rossum $ +.. % Don't write extensive text for new sections; I'll do that. +.. % Feel free to add commented-out reminders of things that need +.. % to be covered. --amk + +This article explains the new features in Python 2.4.1, released on March 30, +2005. + +Python 2.4 is a medium-sized release. It doesn't introduce as many changes as +the radical Python 2.2, but introduces more features than the conservative 2.3 +release. The most significant new language features are function decorators and +generator expressions; most other changes are to the standard library. + +According to the CVS change logs, there were 481 patches applied and 502 bugs +fixed between Python 2.3 and 2.4. Both figures are likely to be underestimates. + +This article doesn't attempt to provide a complete specification of every single +new feature, but instead provides a brief introduction to each feature. For +full details, you should refer to the documentation for Python 2.4, such as the +Python Library Reference and the Python Reference Manual. Often you will be +referred to the PEP for a particular new feature for explanations of the +implementation and design rationale. + +.. % ====================================================================== + + +PEP 218: Built-In Set Objects +============================= + +Python 2.3 introduced the :mod:`sets` module. C implementations of set data +types have now been added to the Python core as two new built-in types, +:func:`set(iterable)` and :func:`frozenset(iterable)`. They provide high speed +operations for membership testing, for eliminating duplicates from sequences, +and for mathematical operations like unions, intersections, differences, and +symmetric differences. :: + + >>> a = set('abracadabra') # form a set from a string + >>> 'z' in a # fast membership testing + False + >>> a # unique letters in a + set(['a', 'r', 'b', 'c', 'd']) + >>> ''.join(a) # convert back into a string + 'arbcd' + + >>> b = set('alacazam') # form a second set + >>> a - b # letters in a but not in b + set(['r', 'd', 'b']) + >>> a | b # letters in either a or b + set(['a', 'c', 'r', 'd', 'b', 'm', 'z', 'l']) + >>> a & b # letters in both a and b + set(['a', 'c']) + >>> a ^ b # letters in a or b but not both + set(['r', 'd', 'b', 'm', 'z', 'l']) + + >>> a.add('z') # add a new element + >>> a.update('wxy') # add multiple new elements + >>> a + set(['a', 'c', 'b', 'd', 'r', 'w', 'y', 'x', 'z']) + >>> a.remove('x') # take one element out + >>> a + set(['a', 'c', 'b', 'd', 'r', 'w', 'y', 'z']) + +The :func:`frozenset` type is an immutable version of :func:`set`. Since it is +immutable and hashable, it may be used as a dictionary key or as a member of +another set. + +The :mod:`sets` module remains in the standard library, and may be useful if you +wish to subclass the :class:`Set` or :class:`ImmutableSet` classes. There are +currently no plans to deprecate the module. + + +.. seealso:: + + :pep:`218` - Adding a Built-In Set Object Type + Originally proposed by Greg Wilson and ultimately implemented by Raymond + Hettinger. + +.. % ====================================================================== + + +PEP 237: Unifying Long Integers and Integers +============================================ + +The lengthy transition process for this PEP, begun in Python 2.2, takes another +step forward in Python 2.4. In 2.3, certain integer operations that would +behave differently after int/long unification triggered :exc:`FutureWarning` +warnings and returned values limited to 32 or 64 bits (depending on your +platform). In 2.4, these expressions no longer produce a warning and instead +produce a different result that's usually a long integer. + +The problematic expressions are primarily left shifts and lengthy hexadecimal +and octal constants. For example, ``2 << 32`` results in a warning in 2.3, +evaluating to 0 on 32-bit platforms. In Python 2.4, this expression now returns +the correct answer, 8589934592. + + +.. seealso:: + + :pep:`237` - Unifying Long Integers and Integers + Original PEP written by Moshe Zadka and GvR. The changes for 2.4 were + implemented by Kalle Svensson. + +.. % ====================================================================== + + +PEP 289: Generator Expressions +============================== + +The iterator feature introduced in Python 2.2 and the :mod:`itertools` module +make it easier to write programs that loop through large data sets without +having the entire data set in memory at one time. List comprehensions don't fit +into this picture very well because they produce a Python list object containing +all of the items. This unavoidably pulls all of the objects into memory, which +can be a problem if your data set is very large. When trying to write a +functionally-styled program, it would be natural to write something like:: + + links = [link for link in get_all_links() if not link.followed] + for link in links: + ... + +instead of :: + + for link in get_all_links(): + if link.followed: + continue + ... + +The first form is more concise and perhaps more readable, but if you're dealing +with a large number of link objects you'd have to write the second form to avoid +having all link objects in memory at the same time. + +Generator expressions work similarly to list comprehensions but don't +materialize the entire list; instead they create a generator that will return +elements one by one. The above example could be written as:: + + links = (link for link in get_all_links() if not link.followed) + for link in links: + ... + +Generator expressions always have to be written inside parentheses, as in the +above example. The parentheses signalling a function call also count, so if you +want to create an iterator that will be immediately passed to a function you +could write:: + + print sum(obj.count for obj in list_all_objects()) + +Generator expressions differ from list comprehensions in various small ways. +Most notably, the loop variable (*obj* in the above example) is not accessible +outside of the generator expression. List comprehensions leave the variable +assigned to its last value; future versions of Python will change this, making +list comprehensions match generator expressions in this respect. + + +.. seealso:: + + :pep:`289` - Generator Expressions + Proposed by Raymond Hettinger and implemented by Jiwon Seo with early efforts + steered by Hye-Shik Chang. + +.. % ====================================================================== + + +PEP 292: Simpler String Substitutions +===================================== + +Some new classes in the standard library provide an alternative mechanism for +substituting variables into strings; this style of substitution may be better +for applications where untrained users need to edit templates. + +The usual way of substituting variables by name is the ``%`` operator:: + + >>> '%(page)i: %(title)s' % {'page':2, 'title': 'The Best of Times'} + '2: The Best of Times' + +When writing the template string, it can be easy to forget the ``i`` or ``s`` +after the closing parenthesis. This isn't a big problem if the template is in a +Python module, because you run the code, get an "Unsupported format character" +:exc:`ValueError`, and fix the problem. However, consider an application such +as Mailman where template strings or translations are being edited by users who +aren't aware of the Python language. The format string's syntax is complicated +to explain to such users, and if they make a mistake, it's difficult to provide +helpful feedback to them. + +PEP 292 adds a :class:`Template` class to the :mod:`string` module that uses +``$`` to indicate a substitution:: + + >>> import string + >>> t = string.Template('$page: $title') + >>> t.substitute({'page':2, 'title': 'The Best of Times'}) + '2: The Best of Times' + +If a key is missing from the dictionary, the :meth:`substitute` method will +raise a :exc:`KeyError`. There's also a :meth:`safe_substitute` method that +ignores missing keys: + +.. % $ Terminate $-mode for Emacs + +:: + + >>> t = string.Template('$page: $title') + >>> t.safe_substitute({'page':3}) + '3: $title' + +.. % $ Terminate math-mode for Emacs + + +.. seealso:: + + :pep:`292` - Simpler String Substitutions + Written and implemented by Barry Warsaw. + +.. % ====================================================================== + + +PEP 318: Decorators for Functions and Methods +============================================= + +Python 2.2 extended Python's object model by adding static methods and class +methods, but it didn't extend Python's syntax to provide any new way of defining +static or class methods. Instead, you had to write a :keyword:`def` statement +in the usual way, and pass the resulting method to a :func:`staticmethod` or +:func:`classmethod` function that would wrap up the function as a method of the +new type. Your code would look like this:: + + class C: + def meth (cls): + ... + + meth = classmethod(meth) # Rebind name to wrapped-up class method + +If the method was very long, it would be easy to miss or forget the +:func:`classmethod` invocation after the function body. + +The intention was always to add some syntax to make such definitions more +readable, but at the time of 2.2's release a good syntax was not obvious. Today +a good syntax *still* isn't obvious but users are asking for easier access to +the feature; a new syntactic feature has been added to meet this need. + +The new feature is called "function decorators". The name comes from the idea +that :func:`classmethod`, :func:`staticmethod`, and friends are storing +additional information on a function object; they're *decorating* functions with +more details. + +The notation borrows from Java and uses the ``'@'`` character as an indicator. +Using the new syntax, the example above would be written:: + + class C: + + @classmethod + def meth (cls): + ... + + +The ``@classmethod`` is shorthand for the ``meth=classmethod(meth)`` assignment. +More generally, if you have the following:: + + @A + @B + @C + def f (): + ... + +It's equivalent to the following pre-decorator code:: + + def f(): ... + f = A(B(C(f))) + +Decorators must come on the line before a function definition, one decorator per +line, and can't be on the same line as the def statement, meaning that ``@A def +f(): ...`` is illegal. You can only decorate function definitions, either at +the module level or inside a class; you can't decorate class definitions. + +A decorator is just a function that takes the function to be decorated as an +argument and returns either the same function or some new object. The return +value of the decorator need not be callable (though it typically is), unless +further decorators will be applied to the result. It's easy to write your own +decorators. The following simple example just sets an attribute on the function +object:: + + >>> def deco(func): + ... func.attr = 'decorated' + ... return func + ... + >>> @deco + ... def f(): pass + ... + >>> f + <function f at 0x402ef0d4> + >>> f.attr + 'decorated' + >>> + +As a slightly more realistic example, the following decorator checks that the +supplied argument is an integer:: + + def require_int (func): + def wrapper (arg): + assert isinstance(arg, int) + return func(arg) + + return wrapper + + @require_int + def p1 (arg): + print arg + + @require_int + def p2(arg): + print arg*2 + +An example in :pep:`318` contains a fancier version of this idea that lets you +both specify the required type and check the returned type. + +Decorator functions can take arguments. If arguments are supplied, your +decorator function is called with only those arguments and must return a new +decorator function; this function must take a single function and return a +function, as previously described. In other words, ``@A @B @C(args)`` becomes:: + + def f(): ... + _deco = C(args) + f = A(B(_deco(f))) + +Getting this right can be slightly brain-bending, but it's not too difficult. + +A small related change makes the :attr:`func_name` attribute of functions +writable. This attribute is used to display function names in tracebacks, so +decorators should change the name of any new function that's constructed and +returned. + + +.. seealso:: + + :pep:`318` - Decorators for Functions, Methods and Classes + Written by Kevin D. Smith, Jim Jewett, and Skip Montanaro. Several people + wrote patches implementing function decorators, but the one that was actually + checked in was patch #979728, written by Mark Russell. + + http://www.python.org/moin/PythonDecoratorLibrary + This Wiki page contains several examples of decorators. + +.. % ====================================================================== + + +PEP 322: Reverse Iteration +========================== + +A new built-in function, :func:`reversed(seq)`, takes a sequence and returns an +iterator that loops over the elements of the sequence in reverse order. :: + + >>> for i in reversed(xrange(1,4)): + ... print i + ... + 3 + 2 + 1 + +Compared to extended slicing, such as ``range(1,4)[::-1]``, :func:`reversed` is +easier to read, runs faster, and uses substantially less memory. + +Note that :func:`reversed` only accepts sequences, not arbitrary iterators. If +you want to reverse an iterator, first convert it to a list with :func:`list`. +:: + + >>> input = open('/etc/passwd', 'r') + >>> for line in reversed(list(input)): + ... print line + ... + root:*:0:0:System Administrator:/var/root:/bin/tcsh + ... + + +.. seealso:: + + :pep:`322` - Reverse Iteration + Written and implemented by Raymond Hettinger. + +.. % ====================================================================== + + +PEP 324: New subprocess Module +============================== + +The standard library provides a number of ways to execute a subprocess, offering +different features and different levels of complexity. +:func:`os.system(command)` is easy to use, but slow (it runs a shell process +which executes the command) and dangerous (you have to be careful about escaping +the shell's metacharacters). The :mod:`popen2` module offers classes that can +capture standard output and standard error from the subprocess, but the naming +is confusing. The :mod:`subprocess` module cleans this up, providing a unified +interface that offers all the features you might need. + +Instead of :mod:`popen2`'s collection of classes, :mod:`subprocess` contains a +single class called :class:`Popen` whose constructor supports a number of +different keyword arguments. :: + + class Popen(args, bufsize=0, executable=None, + stdin=None, stdout=None, stderr=None, + preexec_fn=None, close_fds=False, shell=False, + cwd=None, env=None, universal_newlines=False, + startupinfo=None, creationflags=0): + +*args* is commonly a sequence of strings that will be the arguments to the +program executed as the subprocess. (If the *shell* argument is true, *args* +can be a string which will then be passed on to the shell for interpretation, +just as :func:`os.system` does.) + +*stdin*, *stdout*, and *stderr* specify what the subprocess's input, output, and +error streams will be. You can provide a file object or a file descriptor, or +you can use the constant ``subprocess.PIPE`` to create a pipe between the +subprocess and the parent. + +The constructor has a number of handy options: + +* *close_fds* requests that all file descriptors be closed before running the + subprocess. + +* *cwd* specifies the working directory in which the subprocess will be executed + (defaulting to whatever the parent's working directory is). + +* *env* is a dictionary specifying environment variables. + +* *preexec_fn* is a function that gets called before the child is started. + +* *universal_newlines* opens the child's input and output using Python's + universal newline feature. + +Once you've created the :class:`Popen` instance, you can call its :meth:`wait` +method to pause until the subprocess has exited, :meth:`poll` to check if it's +exited without pausing, or :meth:`communicate(data)` to send the string *data* +to the subprocess's standard input. :meth:`communicate(data)` then reads any +data that the subprocess has sent to its standard output or standard error, +returning a tuple ``(stdout_data, stderr_data)``. + +:func:`call` is a shortcut that passes its arguments along to the :class:`Popen` +constructor, waits for the command to complete, and returns the status code of +the subprocess. It can serve as a safer analog to :func:`os.system`:: + + sts = subprocess.call(['dpkg', '-i', '/tmp/new-package.deb']) + if sts == 0: + # Success + ... + else: + # dpkg returned an error + ... + +The command is invoked without use of the shell. If you really do want to use +the shell, you can add ``shell=True`` as a keyword argument and provide a string +instead of a sequence:: + + sts = subprocess.call('dpkg -i /tmp/new-package.deb', shell=True) + +The PEP takes various examples of shell and Python code and shows how they'd be +translated into Python code that uses :mod:`subprocess`. Reading this section +of the PEP is highly recommended. + + +.. seealso:: + + :pep:`324` - subprocess - New process module + Written and implemented by Peter Åstrand, with assistance from Fredrik Lundh and + others. + +.. % ====================================================================== + + +PEP 327: Decimal Data Type +========================== + +Python has always supported floating-point (FP) numbers, based on the underlying +C :ctype:`double` type, as a data type. However, while most programming +languages provide a floating-point type, many people (even programmers) are +unaware that floating-point numbers don't represent certain decimal fractions +accurately. The new :class:`Decimal` type can represent these fractions +accurately, up to a user-specified precision limit. + + +Why is Decimal needed? +---------------------- + +The limitations arise from the representation used for floating-point numbers. +FP numbers are made up of three components: + +* The sign, which is positive or negative. + +* The mantissa, which is a single-digit binary number followed by a fractional + part. For example, ``1.01`` in base-2 notation is ``1 + 0/2 + 1/4``, or 1.25 in + decimal notation. + +* The exponent, which tells where the decimal point is located in the number + represented. + +For example, the number 1.25 has positive sign, a mantissa value of 1.01 (in +binary), and an exponent of 0 (the decimal point doesn't need to be shifted). +The number 5 has the same sign and mantissa, but the exponent is 2 because the +mantissa is multiplied by 4 (2 to the power of the exponent 2); 1.25 \* 4 equals +5. + +Modern systems usually provide floating-point support that conforms to a +standard called IEEE 754. C's :ctype:`double` type is usually implemented as a +64-bit IEEE 754 number, which uses 52 bits of space for the mantissa. This +means that numbers can only be specified to 52 bits of precision. If you're +trying to represent numbers whose expansion repeats endlessly, the expansion is +cut off after 52 bits. Unfortunately, most software needs to produce output in +base 10, and common fractions in base 10 are often repeating decimals in binary. +For example, 1.1 decimal is binary ``1.0001100110011 ...``; .1 = 1/16 + 1/32 + +1/256 plus an infinite number of additional terms. IEEE 754 has to chop off +that infinitely repeated decimal after 52 digits, so the representation is +slightly inaccurate. + +Sometimes you can see this inaccuracy when the number is printed:: + + >>> 1.1 + 1.1000000000000001 + +The inaccuracy isn't always visible when you print the number because the FP-to- +decimal-string conversion is provided by the C library, and most C libraries try +to produce sensible output. Even if it's not displayed, however, the inaccuracy +is still there and subsequent operations can magnify the error. + +For many applications this doesn't matter. If I'm plotting points and +displaying them on my monitor, the difference between 1.1 and 1.1000000000000001 +is too small to be visible. Reports often limit output to a certain number of +decimal places, and if you round the number to two or three or even eight +decimal places, the error is never apparent. However, for applications where it +does matter, it's a lot of work to implement your own custom arithmetic +routines. + +Hence, the :class:`Decimal` type was created. + + +The :class:`Decimal` type +------------------------- + +A new module, :mod:`decimal`, was added to Python's standard library. It +contains two classes, :class:`Decimal` and :class:`Context`. :class:`Decimal` +instances represent numbers, and :class:`Context` instances are used to wrap up +various settings such as the precision and default rounding mode. + +:class:`Decimal` instances are immutable, like regular Python integers and FP +numbers; once it's been created, you can't change the value an instance +represents. :class:`Decimal` instances can be created from integers or +strings:: + + >>> import decimal + >>> decimal.Decimal(1972) + Decimal("1972") + >>> decimal.Decimal("1.1") + Decimal("1.1") + +You can also provide tuples containing the sign, the mantissa represented as a +tuple of decimal digits, and the exponent:: + + >>> decimal.Decimal((1, (1, 4, 7, 5), -2)) + Decimal("-14.75") + +Cautionary note: the sign bit is a Boolean value, so 0 is positive and 1 is +negative. + +Converting from floating-point numbers poses a bit of a problem: should the FP +number representing 1.1 turn into the decimal number for exactly 1.1, or for 1.1 +plus whatever inaccuracies are introduced? The decision was to dodge the issue +and leave such a conversion out of the API. Instead, you should convert the +floating-point number into a string using the desired precision and pass the +string to the :class:`Decimal` constructor:: + + >>> f = 1.1 + >>> decimal.Decimal(str(f)) + Decimal("1.1") + >>> decimal.Decimal('%.12f' % f) + Decimal("1.100000000000") + +Once you have :class:`Decimal` instances, you can perform the usual mathematical +operations on them. One limitation: exponentiation requires an integer +exponent:: + + >>> a = decimal.Decimal('35.72') + >>> b = decimal.Decimal('1.73') + >>> a+b + Decimal("37.45") + >>> a-b + Decimal("33.99") + >>> a*b + Decimal("61.7956") + >>> a/b + Decimal("20.64739884393063583815028902") + >>> a ** 2 + Decimal("1275.9184") + >>> a**b + Traceback (most recent call last): + ... + decimal.InvalidOperation: x ** (non-integer) + +You can combine :class:`Decimal` instances with integers, but not with floating- +point numbers:: + + >>> a + 4 + Decimal("39.72") + >>> a + 4.5 + Traceback (most recent call last): + ... + TypeError: You can interact Decimal only with int, long or Decimal data types. + >>> + +:class:`Decimal` numbers can be used with the :mod:`math` and :mod:`cmath` +modules, but note that they'll be immediately converted to floating-point +numbers before the operation is performed, resulting in a possible loss of +precision and accuracy. You'll also get back a regular floating-point number +and not a :class:`Decimal`. :: + + >>> import math, cmath + >>> d = decimal.Decimal('123456789012.345') + >>> math.sqrt(d) + 351364.18288201344 + >>> cmath.sqrt(-d) + 351364.18288201344j + +:class:`Decimal` instances have a :meth:`sqrt` method that returns a +:class:`Decimal`, but if you need other things such as trigonometric functions +you'll have to implement them. :: + + >>> d.sqrt() + Decimal("351364.1828820134592177245001") + + +The :class:`Context` type +------------------------- + +Instances of the :class:`Context` class encapsulate several settings for +decimal operations: + +* :attr:`prec` is the precision, the number of decimal places. + +* :attr:`rounding` specifies the rounding mode. The :mod:`decimal` module has + constants for the various possibilities: :const:`ROUND_DOWN`, + :const:`ROUND_CEILING`, :const:`ROUND_HALF_EVEN`, and various others. + +* :attr:`traps` is a dictionary specifying what happens on encountering certain + error conditions: either an exception is raised or a value is returned. Some + examples of error conditions are division by zero, loss of precision, and + overflow. + +There's a thread-local default context available by calling :func:`getcontext`; +you can change the properties of this context to alter the default precision, +rounding, or trap handling. The following example shows the effect of changing +the precision of the default context:: + + >>> decimal.getcontext().prec + 28 + >>> decimal.Decimal(1) / decimal.Decimal(7) + Decimal("0.1428571428571428571428571429") + >>> decimal.getcontext().prec = 9 + >>> decimal.Decimal(1) / decimal.Decimal(7) + Decimal("0.142857143") + +The default action for error conditions is selectable; the module can either +return a special value such as infinity or not-a-number, or exceptions can be +raised:: + + >>> decimal.Decimal(1) / decimal.Decimal(0) + Traceback (most recent call last): + ... + decimal.DivisionByZero: x / 0 + >>> decimal.getcontext().traps[decimal.DivisionByZero] = False + >>> decimal.Decimal(1) / decimal.Decimal(0) + Decimal("Infinity") + >>> + +The :class:`Context` instance also has various methods for formatting numbers +such as :meth:`to_eng_string` and :meth:`to_sci_string`. + +For more information, see the documentation for the :mod:`decimal` module, which +includes a quick-start tutorial and a reference. + + +.. seealso:: + + :pep:`327` - Decimal Data Type + Written by Facundo Batista and implemented by Facundo Batista, Eric Price, + Raymond Hettinger, Aahz, and Tim Peters. + + http://research.microsoft.com/~hollasch/cgindex/coding/ieeefloat.html + A more detailed overview of the IEEE-754 representation. + + http://www.lahey.com/float.htm + The article uses Fortran code to illustrate many of the problems that floating- + point inaccuracy can cause. + + http://www2.hursley.ibm.com/decimal/ + A description of a decimal-based representation. This representation is being + proposed as a standard, and underlies the new Python decimal type. Much of this + material was written by Mike Cowlishaw, designer of the Rexx language. + +.. % ====================================================================== + + +PEP 328: Multi-line Imports +=========================== + +One language change is a small syntactic tweak aimed at making it easier to +import many names from a module. In a ``from module import names`` statement, +*names* is a sequence of names separated by commas. If the sequence is very +long, you can either write multiple imports from the same module, or you can use +backslashes to escape the line endings like this:: + + from SimpleXMLRPCServer import SimpleXMLRPCServer,\ + SimpleXMLRPCRequestHandler,\ + CGIXMLRPCRequestHandler,\ + resolve_dotted_attribute + +The syntactic change in Python 2.4 simply allows putting the names within +parentheses. Python ignores newlines within a parenthesized expression, so the +backslashes are no longer needed:: + + from SimpleXMLRPCServer import (SimpleXMLRPCServer, + SimpleXMLRPCRequestHandler, + CGIXMLRPCRequestHandler, + resolve_dotted_attribute) + +The PEP also proposes that all :keyword:`import` statements be absolute imports, +with a leading ``.`` character to indicate a relative import. This part of the +PEP was not implemented for Python 2.4, but was completed for Python 2.5. + + +.. seealso:: + + :pep:`328` - Imports: Multi-Line and Absolute/Relative + Written by Aahz. Multi-line imports were implemented by Dima Dorfman. + +.. % ====================================================================== + + +PEP 331: Locale-Independent Float/String Conversions +==================================================== + +The :mod:`locale` modules lets Python software select various conversions and +display conventions that are localized to a particular country or language. +However, the module was careful to not change the numeric locale because various +functions in Python's implementation required that the numeric locale remain set +to the ``'C'`` locale. Often this was because the code was using the C +library's :cfunc:`atof` function. + +Not setting the numeric locale caused trouble for extensions that used third- +party C libraries, however, because they wouldn't have the correct locale set. +The motivating example was GTK+, whose user interface widgets weren't displaying +numbers in the current locale. + +The solution described in the PEP is to add three new functions to the Python +API that perform ASCII-only conversions, ignoring the locale setting: + +* :cfunc:`PyOS_ascii_strtod(str, ptr)` and :cfunc:`PyOS_ascii_atof(str, ptr)` + both convert a string to a C :ctype:`double`. + +* :cfunc:`PyOS_ascii_formatd(buffer, buf_len, format, d)` converts a + :ctype:`double` to an ASCII string. + +The code for these functions came from the GLib library +(http://developer.gnome.org/arch/gtk/glib.html), whose developers kindly +relicensed the relevant functions and donated them to the Python Software +Foundation. The :mod:`locale` module can now change the numeric locale, +letting extensions such as GTK+ produce the correct results. + + +.. seealso:: + + :pep:`331` - Locale-Independent Float/String Conversions + Written by Christian R. Reis, and implemented by Gustavo Carneiro. + +.. % ====================================================================== + + +Other Language Changes +====================== + +Here are all of the changes that Python 2.4 makes to the core Python language. + +* Decorators for functions and methods were added (:pep:`318`). + +* Built-in :func:`set` and :func:`frozenset` types were added (:pep:`218`). + Other new built-ins include the :func:`reversed(seq)` function (:pep:`322`). + +* Generator expressions were added (:pep:`289`). + +* Certain numeric expressions no longer return values restricted to 32 or 64 + bits (:pep:`237`). + +* You can now put parentheses around the list of names in a ``from module import + names`` statement (:pep:`328`). + +* The :meth:`dict.update` method now accepts the same argument forms as the + :class:`dict` constructor. This includes any mapping, any iterable of key/value + pairs, and keyword arguments. (Contributed by Raymond Hettinger.) + +* The string methods :meth:`ljust`, :meth:`rjust`, and :meth:`center` now take + an optional argument for specifying a fill character other than a space. + (Contributed by Raymond Hettinger.) + +* Strings also gained an :meth:`rsplit` method that works like the :meth:`split` + method but splits from the end of the string. (Contributed by Sean + Reifschneider.) :: + + >>> 'www.python.org'.split('.', 1) + ['www', 'python.org'] + 'www.python.org'.rsplit('.', 1) + ['www.python', 'org'] + +* Three keyword parameters, *cmp*, *key*, and *reverse*, were added to the + :meth:`sort` method of lists. These parameters make some common usages of + :meth:`sort` simpler. All of these parameters are optional. + + For the *cmp* parameter, the value should be a comparison function that takes + two parameters and returns -1, 0, or +1 depending on how the parameters compare. + This function will then be used to sort the list. Previously this was the only + parameter that could be provided to :meth:`sort`. + + *key* should be a single-parameter function that takes a list element and + returns a comparison key for the element. The list is then sorted using the + comparison keys. The following example sorts a list case-insensitively:: + + >>> L = ['A', 'b', 'c', 'D'] + >>> L.sort() # Case-sensitive sort + >>> L + ['A', 'D', 'b', 'c'] + >>> # Using 'key' parameter to sort list + >>> L.sort(key=lambda x: x.lower()) + >>> L + ['A', 'b', 'c', 'D'] + >>> # Old-fashioned way + >>> L.sort(cmp=lambda x,y: cmp(x.lower(), y.lower())) + >>> L + ['A', 'b', 'c', 'D'] + + The last example, which uses the *cmp* parameter, is the old way to perform a + case-insensitive sort. It works but is slower than using a *key* parameter. + Using *key* calls :meth:`lower` method once for each element in the list while + using *cmp* will call it twice for each comparison, so using *key* saves on + invocations of the :meth:`lower` method. + + For simple key functions and comparison functions, it is often possible to avoid + a :keyword:`lambda` expression by using an unbound method instead. For example, + the above case-insensitive sort is best written as:: + + >>> L.sort(key=str.lower) + >>> L + ['A', 'b', 'c', 'D'] + + Finally, the *reverse* parameter takes a Boolean value. If the value is true, + the list will be sorted into reverse order. Instead of ``L.sort() ; + L.reverse()``, you can now write ``L.sort(reverse=True)``. + + The results of sorting are now guaranteed to be stable. This means that two + entries with equal keys will be returned in the same order as they were input. + For example, you can sort a list of people by name, and then sort the list by + age, resulting in a list sorted by age where people with the same age are in + name-sorted order. + + (All changes to :meth:`sort` contributed by Raymond Hettinger.) + +* There is a new built-in function :func:`sorted(iterable)` that works like the + in-place :meth:`list.sort` method but can be used in expressions. The + differences are: + +* the input may be any iterable; + +* a newly formed copy is sorted, leaving the original intact; and + +* the expression returns the new sorted copy + + :: + + >>> L = [9,7,8,3,2,4,1,6,5] + >>> [10+i for i in sorted(L)] # usable in a list comprehension + [11, 12, 13, 14, 15, 16, 17, 18, 19] + >>> L # original is left unchanged + [9,7,8,3,2,4,1,6,5] + >>> sorted('Monty Python') # any iterable may be an input + [' ', 'M', 'P', 'h', 'n', 'n', 'o', 'o', 't', 't', 'y', 'y'] + + >>> # List the contents of a dict sorted by key values + >>> colormap = dict(red=1, blue=2, green=3, black=4, yellow=5) + >>> for k, v in sorted(colormap.iteritems()): + ... print k, v + ... + black 4 + blue 2 + green 3 + red 1 + yellow 5 + + (Contributed by Raymond Hettinger.) + +* Integer operations will no longer trigger an :exc:`OverflowWarning`. The + :exc:`OverflowWarning` warning will disappear in Python 2.5. + +* The interpreter gained a new switch, :option:`-m`, that takes a name, searches + for the corresponding module on ``sys.path``, and runs the module as a script. + For example, you can now run the Python profiler with ``python -m profile``. + (Contributed by Nick Coghlan.) + +* The :func:`eval(expr, globals, locals)` and :func:`execfile(filename, globals, + locals)` functions and the :keyword:`exec` statement now accept any mapping type + for the *locals* parameter. Previously this had to be a regular Python + dictionary. (Contributed by Raymond Hettinger.) + +* The :func:`zip` built-in function and :func:`itertools.izip` now return an + empty list if called with no arguments. Previously they raised a + :exc:`TypeError` exception. This makes them more suitable for use with variable + length argument lists:: + + >>> def transpose(array): + ... return zip(*array) + ... + >>> transpose([(1,2,3), (4,5,6)]) + [(1, 4), (2, 5), (3, 6)] + >>> transpose([]) + [] + + (Contributed by Raymond Hettinger.) + +* Encountering a failure while importing a module no longer leaves a partially- + initialized module object in ``sys.modules``. The incomplete module object left + behind would fool further imports of the same module into succeeding, leading to + confusing errors. (Fixed by Tim Peters.) + +* :const:`None` is now a constant; code that binds a new value to the name + ``None`` is now a syntax error. (Contributed by Raymond Hettinger.) + +.. % ====================================================================== + + +Optimizations +------------- + +* The inner loops for list and tuple slicing were optimized and now run about + one-third faster. The inner loops for dictionaries were also optimized, + resulting in performance boosts for :meth:`keys`, :meth:`values`, :meth:`items`, + :meth:`iterkeys`, :meth:`itervalues`, and :meth:`iteritems`. (Contributed by + Raymond Hettinger.) + +* The machinery for growing and shrinking lists was optimized for speed and for + space efficiency. Appending and popping from lists now runs faster due to more + efficient code paths and less frequent use of the underlying system + :cfunc:`realloc`. List comprehensions also benefit. :meth:`list.extend` was + also optimized and no longer converts its argument into a temporary list before + extending the base list. (Contributed by Raymond Hettinger.) + +* :func:`list`, :func:`tuple`, :func:`map`, :func:`filter`, and :func:`zip` now + run several times faster with non-sequence arguments that supply a + :meth:`__len__` method. (Contributed by Raymond Hettinger.) + +* The methods :meth:`list.__getitem__`, :meth:`dict.__getitem__`, and + :meth:`dict.__contains__` are are now implemented as :class:`method_descriptor` + objects rather than :class:`wrapper_descriptor` objects. This form of access + doubles their performance and makes them more suitable for use as arguments to + functionals: ``map(mydict.__getitem__, keylist)``. (Contributed by Raymond + Hettinger.) + +* Added a new opcode, ``LIST_APPEND``, that simplifies the generated bytecode + for list comprehensions and speeds them up by about a third. (Contributed by + Raymond Hettinger.) + +* The peephole bytecode optimizer has been improved to produce shorter, faster + bytecode; remarkably, the resulting bytecode is more readable. (Enhanced by + Raymond Hettinger.) + +* String concatenations in statements of the form ``s = s + "abc"`` and ``s += + "abc"`` are now performed more efficiently in certain circumstances. This + optimization won't be present in other Python implementations such as Jython, so + you shouldn't rely on it; using the :meth:`join` method of strings is still + recommended when you want to efficiently glue a large number of strings + together. (Contributed by Armin Rigo.) + +The net result of the 2.4 optimizations is that Python 2.4 runs the pystone +benchmark around 5% faster than Python 2.3 and 35% faster than Python 2.2. +(pystone is not a particularly good benchmark, but it's the most commonly used +measurement of Python's performance. Your own applications may show greater or +smaller benefits from Python 2.4.) + +.. % pystone is almost useless for comparing different versions of Python; +.. % instead, it excels at predicting relative Python performance on +.. % different machines. +.. % So, this section would be more informative if it used other tools +.. % such as pybench and parrotbench. For a more application oriented +.. % benchmark, try comparing the timings of test_decimal.py under 2.3 +.. % and 2.4. + +.. % ====================================================================== + + +New, Improved, and Deprecated Modules +===================================== + +As usual, Python's standard library received a number of enhancements and bug +fixes. Here's a partial list of the most notable changes, sorted alphabetically +by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more +complete list of changes, or look through the CVS logs for all the details. + +* The :mod:`asyncore` module's :func:`loop` function now has a *count* parameter + that lets you perform a limited number of passes through the polling loop. The + default is still to loop forever. + +* The :mod:`base64` module now has more complete RFC 3548 support for Base64, + Base32, and Base16 encoding and decoding, including optional case folding and + optional alternative alphabets. (Contributed by Barry Warsaw.) + +* The :mod:`bisect` module now has an underlying C implementation for improved + performance. (Contributed by Dmitry Vasiliev.) + +* The CJKCodecs collections of East Asian codecs, maintained by Hye-Shik Chang, + was integrated into 2.4. The new encodings are: + +* Chinese (PRC): gb2312, gbk, gb18030, big5hkscs, hz + +* Chinese (ROC): big5, cp950 + +* Japanese: cp932, euc-jis-2004, euc-jp, euc-jisx0213, iso-2022-jp, + iso-2022-jp-1, iso-2022-jp-2, iso-2022-jp-3, iso-2022-jp-ext, iso-2022-jp-2004, + shift-jis, shift-jisx0213, shift-jis-2004 + +* Korean: cp949, euc-kr, johab, iso-2022-kr + +* Some other new encodings were added: HP Roman8, ISO_8859-11, ISO_8859-16, + PCTP-154, and TIS-620. + +* The UTF-8 and UTF-16 codecs now cope better with receiving partial input. + Previously the :class:`StreamReader` class would try to read more data, making + it impossible to resume decoding from the stream. The :meth:`read` method will + now return as much data as it can and future calls will resume decoding where + previous ones left off. (Implemented by Walter Dörwald.) + +* There is a new :mod:`collections` module for various specialized collection + datatypes. Currently it contains just one type, :class:`deque`, a double- + ended queue that supports efficiently adding and removing elements from either + end:: + + >>> from collections import deque + >>> d = deque('ghi') # make a new deque with three items + >>> d.append('j') # add a new entry to the right side + >>> d.appendleft('f') # add a new entry to the left side + >>> d # show the representation of the deque + deque(['f', 'g', 'h', 'i', 'j']) + >>> d.pop() # return and remove the rightmost item + 'j' + >>> d.popleft() # return and remove the leftmost item + 'f' + >>> list(d) # list the contents of the deque + ['g', 'h', 'i'] + >>> 'h' in d # search the deque + True + + Several modules, such as the :mod:`Queue` and :mod:`threading` modules, now take + advantage of :class:`collections.deque` for improved performance. (Contributed + by Raymond Hettinger.) + +* The :mod:`ConfigParser` classes have been enhanced slightly. The :meth:`read` + method now returns a list of the files that were successfully parsed, and the + :meth:`set` method raises :exc:`TypeError` if passed a *value* argument that + isn't a string. (Contributed by John Belmonte and David Goodger.) + +* The :mod:`curses` module now supports the ncurses extension + :func:`use_default_colors`. On platforms where the terminal supports + transparency, this makes it possible to use a transparent background. + (Contributed by Jörg Lehmann.) + +* The :mod:`difflib` module now includes an :class:`HtmlDiff` class that creates + an HTML table showing a side by side comparison of two versions of a text. + (Contributed by Dan Gass.) + +* The :mod:`email` package was updated to version 3.0, which dropped various + deprecated APIs and removes support for Python versions earlier than 2.3. The + 3.0 version of the package uses a new incremental parser for MIME messages, + available in the :mod:`email.FeedParser` module. The new parser doesn't require + reading the entire message into memory, and doesn't throw exceptions if a + message is malformed; instead it records any problems in the :attr:`defect` + attribute of the message. (Developed by Anthony Baxter, Barry Warsaw, Thomas + Wouters, and others.) + +* The :mod:`heapq` module has been converted to C. The resulting tenfold + improvement in speed makes the module suitable for handling high volumes of + data. In addition, the module has two new functions :func:`nlargest` and + :func:`nsmallest` that use heaps to find the N largest or smallest values in a + dataset without the expense of a full sort. (Contributed by Raymond Hettinger.) + +* The :mod:`httplib` module now contains constants for HTTP status codes defined + in various HTTP-related RFC documents. Constants have names such as + :const:`OK`, :const:`CREATED`, :const:`CONTINUE`, and + :const:`MOVED_PERMANENTLY`; use pydoc to get a full list. (Contributed by + Andrew Eland.) + +* The :mod:`imaplib` module now supports IMAP's THREAD command (contributed by + Yves Dionne) and new :meth:`deleteacl` and :meth:`myrights` methods (contributed + by Arnaud Mazin). + +* The :mod:`itertools` module gained a :func:`groupby(iterable[, *func*])` + function. *iterable* is something that can be iterated over to return a stream + of elements, and the optional *func* parameter is a function that takes an + element and returns a key value; if omitted, the key is simply the element + itself. :func:`groupby` then groups the elements into subsequences which have + matching values of the key, and returns a series of 2-tuples containing the key + value and an iterator over the subsequence. + + Here's an example to make this clearer. The *key* function simply returns + whether a number is even or odd, so the result of :func:`groupby` is to return + consecutive runs of odd or even numbers. :: + + >>> import itertools + >>> L = [2, 4, 6, 7, 8, 9, 11, 12, 14] + >>> for key_val, it in itertools.groupby(L, lambda x: x % 2): + ... print key_val, list(it) + ... + 0 [2, 4, 6] + 1 [7] + 0 [8] + 1 [9, 11] + 0 [12, 14] + >>> + + :func:`groupby` is typically used with sorted input. The logic for + :func:`groupby` is similar to the Unix ``uniq`` filter which makes it handy for + eliminating, counting, or identifying duplicate elements:: + + >>> word = 'abracadabra' + >>> letters = sorted(word) # Turn string into a sorted list of letters + >>> letters + ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'c', 'd', 'r', 'r'] + >>> for k, g in itertools.groupby(letters): + ... print k, list(g) + ... + a ['a', 'a', 'a', 'a', 'a'] + b ['b', 'b'] + c ['c'] + d ['d'] + r ['r', 'r'] + >>> # List unique letters + >>> [k for k, g in groupby(letters)] + ['a', 'b', 'c', 'd', 'r'] + >>> # Count letter occurrences + >>> [(k, len(list(g))) for k, g in groupby(letters)] + [('a', 5), ('b', 2), ('c', 1), ('d', 1), ('r', 2)] + + (Contributed by Hye-Shik Chang.) + +* :mod:`itertools` also gained a function named :func:`tee(iterator, N)` that + returns *N* independent iterators that replicate *iterator*. If *N* is omitted, + the default is 2. :: + + >>> L = [1,2,3] + >>> i1, i2 = itertools.tee(L) + >>> i1,i2 + (<itertools.tee object at 0x402c2080>, <itertools.tee object at 0x402c2090>) + >>> list(i1) # Run the first iterator to exhaustion + [1, 2, 3] + >>> list(i2) # Run the second iterator to exhaustion + [1, 2, 3] + + Note that :func:`tee` has to keep copies of the values returned by the + iterator; in the worst case, it may need to keep all of them. This should + therefore be used carefully if the leading iterator can run far ahead of the + trailing iterator in a long stream of inputs. If the separation is large, then + you might as well use :func:`list` instead. When the iterators track closely + with one another, :func:`tee` is ideal. Possible applications include + bookmarking, windowing, or lookahead iterators. (Contributed by Raymond + Hettinger.) + +* A number of functions were added to the :mod:`locale` module, such as + :func:`bind_textdomain_codeset` to specify a particular encoding and a family of + :func:`l\*gettext` functions that return messages in the chosen encoding. + (Contributed by Gustavo Niemeyer.) + +* Some keyword arguments were added to the :mod:`logging` package's + :func:`basicConfig` function to simplify log configuration. The default + behavior is to log messages to standard error, but various keyword arguments can + be specified to log to a particular file, change the logging format, or set the + logging level. For example:: + + import logging + logging.basicConfig(filename='/var/log/application.log', + level=0, # Log all messages + format='%(levelname):%(process):%(thread):%(message)') + + Other additions to the :mod:`logging` package include a :meth:`log(level, msg)` + convenience method, as well as a :class:`TimedRotatingFileHandler` class that + rotates its log files at a timed interval. The module already had + :class:`RotatingFileHandler`, which rotated logs once the file exceeded a + certain size. Both classes derive from a new :class:`BaseRotatingHandler` class + that can be used to implement other rotating handlers. + + (Changes implemented by Vinay Sajip.) + +* The :mod:`marshal` module now shares interned strings on unpacking a data + structure. This may shrink the size of certain pickle strings, but the primary + effect is to make :file:`.pyc` files significantly smaller. (Contributed by + Martin von Löwis.) + +* The :mod:`nntplib` module's :class:`NNTP` class gained :meth:`description` and + :meth:`descriptions` methods to retrieve newsgroup descriptions for a single + group or for a range of groups. (Contributed by Jürgen A. Erhard.) + +* Two new functions were added to the :mod:`operator` module, + :func:`attrgetter(attr)` and :func:`itemgetter(index)`. Both functions return + callables that take a single argument and return the corresponding attribute or + item; these callables make excellent data extractors when used with :func:`map` + or :func:`sorted`. For example:: + + >>> L = [('c', 2), ('d', 1), ('a', 4), ('b', 3)] + >>> map(operator.itemgetter(0), L) + ['c', 'd', 'a', 'b'] + >>> map(operator.itemgetter(1), L) + [2, 1, 4, 3] + >>> sorted(L, key=operator.itemgetter(1)) # Sort list by second tuple item + [('d', 1), ('c', 2), ('b', 3), ('a', 4)] + + (Contributed by Raymond Hettinger.) + +* The :mod:`optparse` module was updated in various ways. The module now passes + its messages through :func:`gettext.gettext`, making it possible to + internationalize Optik's help and error messages. Help messages for options can + now include the string ``'%default'``, which will be replaced by the option's + default value. (Contributed by Greg Ward.) + +* The long-term plan is to deprecate the :mod:`rfc822` module in some future + Python release in favor of the :mod:`email` package. To this end, the + :func:`email.Utils.formatdate` function has been changed to make it usable as a + replacement for :func:`rfc822.formatdate`. You may want to write new e-mail + processing code with this in mind. (Change implemented by Anthony Baxter.) + +* A new :func:`urandom(n)` function was added to the :mod:`os` module, returning + a string containing *n* bytes of random data. This function provides access to + platform-specific sources of randomness such as :file:`/dev/urandom` on Linux or + the Windows CryptoAPI. (Contributed by Trevor Perrin.) + +* Another new function: :func:`os.path.lexists(path)` returns true if the file + specified by *path* exists, whether or not it's a symbolic link. This differs + from the existing :func:`os.path.exists(path)` function, which returns false if + *path* is a symlink that points to a destination that doesn't exist. + (Contributed by Beni Cherniavsky.) + +* A new :func:`getsid` function was added to the :mod:`posix` module that + underlies the :mod:`os` module. (Contributed by J. Raynor.) + +* The :mod:`poplib` module now supports POP over SSL. (Contributed by Hector + Urtubia.) + +* The :mod:`profile` module can now profile C extension functions. (Contributed + by Nick Bastin.) + +* The :mod:`random` module has a new method called :meth:`getrandbits(N)` that + returns a long integer *N* bits in length. The existing :meth:`randrange` + method now uses :meth:`getrandbits` where appropriate, making generation of + arbitrarily large random numbers more efficient. (Contributed by Raymond + Hettinger.) + +* The regular expression language accepted by the :mod:`re` module was extended + with simple conditional expressions, written as ``(?(group)A|B)``. *group* is + either a numeric group ID or a group name defined with ``(?P<group>...)`` + earlier in the expression. If the specified group matched, the regular + expression pattern *A* will be tested against the string; if the group didn't + match, the pattern *B* will be used instead. (Contributed by Gustavo Niemeyer.) + +* The :mod:`re` module is also no longer recursive, thanks to a massive amount + of work by Gustavo Niemeyer. In a recursive regular expression engine, certain + patterns result in a large amount of C stack space being consumed, and it was + possible to overflow the stack. For example, if you matched a 30000-byte string + of ``a`` characters against the expression ``(a|b)+``, one stack frame was + consumed per character. Python 2.3 tried to check for stack overflow and raise + a :exc:`RuntimeError` exception, but certain patterns could sidestep the + checking and if you were unlucky Python could segfault. Python 2.4's regular + expression engine can match this pattern without problems. + +* The :mod:`signal` module now performs tighter error-checking on the parameters + to the :func:`signal.signal` function. For example, you can't set a handler on + the :const:`SIGKILL` signal; previous versions of Python would quietly accept + this, but 2.4 will raise a :exc:`RuntimeError` exception. + +* Two new functions were added to the :mod:`socket` module. :func:`socketpair` + returns a pair of connected sockets and :func:`getservbyport(port)` looks up the + service name for a given port number. (Contributed by Dave Cole and Barry + Warsaw.) + +* The :func:`sys.exitfunc` function has been deprecated. Code should be using + the existing :mod:`atexit` module, which correctly handles calling multiple exit + functions. Eventually :func:`sys.exitfunc` will become a purely internal + interface, accessed only by :mod:`atexit`. + +* The :mod:`tarfile` module now generates GNU-format tar files by default. + (Contributed by Lars Gustaebel.) + +* The :mod:`threading` module now has an elegantly simple way to support + thread-local data. The module contains a :class:`local` class whose attribute + values are local to different threads. :: + + import threading + + data = threading.local() + data.number = 42 + data.url = ('www.python.org', 80) + + Other threads can assign and retrieve their own values for the :attr:`number` + and :attr:`url` attributes. You can subclass :class:`local` to initialize + attributes or to add methods. (Contributed by Jim Fulton.) + +* The :mod:`timeit` module now automatically disables periodic garbage + collection during the timing loop. This change makes consecutive timings more + comparable. (Contributed by Raymond Hettinger.) + +* The :mod:`weakref` module now supports a wider variety of objects including + Python functions, class instances, sets, frozensets, deques, arrays, files, + sockets, and regular expression pattern objects. (Contributed by Raymond + Hettinger.) + +* The :mod:`xmlrpclib` module now supports a multi-call extension for + transmitting multiple XML-RPC calls in a single HTTP operation. (Contributed by + Brian Quinlan.) + +* The :mod:`mpz`, :mod:`rotor`, and :mod:`xreadlines` modules have been + removed. + +.. % ====================================================================== +.. % whole new modules get described in subsections here +.. % ===================== + + +cookielib +--------- + +The :mod:`cookielib` library supports client-side handling for HTTP cookies, +mirroring the :mod:`Cookie` module's server-side cookie support. Cookies are +stored in cookie jars; the library transparently stores cookies offered by the +web server in the cookie jar, and fetches the cookie from the jar when +connecting to the server. As in web browsers, policy objects control whether +cookies are accepted or not. + +In order to store cookies across sessions, two implementations of cookie jars +are provided: one that stores cookies in the Netscape format so applications can +use the Mozilla or Lynx cookie files, and one that stores cookies in the same +format as the Perl libwww library. + +:mod:`urllib2` has been changed to interact with :mod:`cookielib`: +:class:`HTTPCookieProcessor` manages a cookie jar that is used when accessing +URLs. + +This module was contributed by John J. Lee. + +.. % ================== + + +doctest +------- + +The :mod:`doctest` module underwent considerable refactoring thanks to Edward +Loper and Tim Peters. Testing can still be as simple as running +:func:`doctest.testmod`, but the refactorings allow customizing the module's +operation in various ways + +The new :class:`DocTestFinder` class extracts the tests from a given object's +docstrings:: + + def f (x, y): + """>>> f(2,2) + 4 + >>> f(3,2) + 6 + """ + return x*y + + finder = doctest.DocTestFinder() + + # Get list of DocTest instances + tests = finder.find(f) + +The new :class:`DocTestRunner` class then runs individual tests and can produce +a summary of the results:: + + runner = doctest.DocTestRunner() + for t in tests: + tried, failed = runner.run(t) + + runner.summarize(verbose=1) + +The above example produces the following output:: + + 1 items passed all tests: + 2 tests in f + 2 tests in 1 items. + 2 passed and 0 failed. + Test passed. + +:class:`DocTestRunner` uses an instance of the :class:`OutputChecker` class to +compare the expected output with the actual output. This class takes a number +of different flags that customize its behaviour; ambitious users can also write +a completely new subclass of :class:`OutputChecker`. + +The default output checker provides a number of handy features. For example, +with the :const:`doctest.ELLIPSIS` option flag, an ellipsis (``...``) in the +expected output matches any substring, making it easier to accommodate outputs +that vary in minor ways:: + + def o (n): + """>>> o(1) + <__main__.C instance at 0x...> + >>> + """ + +Another special string, ``<BLANKLINE>``, matches a blank line:: + + def p (n): + """>>> p(1) + <BLANKLINE> + >>> + """ + +Another new capability is producing a diff-style display of the output by +specifying the :const:`doctest.REPORT_UDIFF` (unified diffs), +:const:`doctest.REPORT_CDIFF` (context diffs), or :const:`doctest.REPORT_NDIFF` +(delta-style) option flags. For example:: + + def g (n): + """>>> g(4) + here + is + a + lengthy + >>>""" + L = 'here is a rather lengthy list of words'.split() + for word in L[:n]: + print word + +Running the above function's tests with :const:`doctest.REPORT_UDIFF` specified, +you get the following output:: + + ********************************************************************** + File ``t.py'', line 15, in g + Failed example: + g(4) + Differences (unified diff with -expected +actual): + @@ -2,3 +2,3 @@ + is + a + -lengthy + +rather + ********************************************************************** + +.. % ====================================================================== + + +Build and C API Changes +======================= + +Some of the changes to Python's build process and to the C API are: + +* Three new convenience macros were added for common return values from + extension functions: :cmacro:`Py_RETURN_NONE`, :cmacro:`Py_RETURN_TRUE`, and + :cmacro:`Py_RETURN_FALSE`. (Contributed by Brett Cannon.) + +* Another new macro, :cmacro:`Py_CLEAR(obj)`, decreases the reference count of + *obj* and sets *obj* to the null pointer. (Contributed by Jim Fulton.) + +* A new function, :cfunc:`PyTuple_Pack(N, obj1, obj2, ..., objN)`, constructs + tuples from a variable length argument list of Python objects. (Contributed by + Raymond Hettinger.) + +* A new function, :cfunc:`PyDict_Contains(d, k)`, implements fast dictionary + lookups without masking exceptions raised during the look-up process. + (Contributed by Raymond Hettinger.) + +* The :cmacro:`Py_IS_NAN(X)` macro returns 1 if its float or double argument + *X* is a NaN. (Contributed by Tim Peters.) + +* C code can avoid unnecessary locking by using the new + :cfunc:`PyEval_ThreadsInitialized` function to tell if any thread operations + have been performed. If this function returns false, no lock operations are + needed. (Contributed by Nick Coghlan.) + +* A new function, :cfunc:`PyArg_VaParseTupleAndKeywords`, is the same as + :cfunc:`PyArg_ParseTupleAndKeywords` but takes a :ctype:`va_list` instead of a + number of arguments. (Contributed by Greg Chapman.) + +* A new method flag, :const:`METH_COEXISTS`, allows a function defined in slots + to co-exist with a :ctype:`PyCFunction` having the same name. This can halve + the access time for a method such as :meth:`set.__contains__`. (Contributed by + Raymond Hettinger.) + +* Python can now be built with additional profiling for the interpreter itself, + intended as an aid to people developing the Python core. Providing + :option:`----enable-profiling` to the :program:`configure` script will let you + profile the interpreter with :program:`gprof`, and providing the + :option:`----with-tsc` switch enables profiling using the Pentium's Time-Stamp- + Counter register. Note that the :option:`----with-tsc` switch is slightly + misnamed, because the profiling feature also works on the PowerPC platform, + though that processor architecture doesn't call that register "the TSC + register". (Contributed by Jeremy Hylton.) + +* The :ctype:`tracebackobject` type has been renamed to + :ctype:`PyTracebackObject`. + +.. % ====================================================================== + + +Port-Specific Changes +--------------------- + +* The Windows port now builds under MSVC++ 7.1 as well as version 6. + (Contributed by Martin von Löwis.) + +.. % ====================================================================== + + +Porting to Python 2.4 +===================== + +This section lists previously described changes that may require changes to your +code: + +* Left shifts and hexadecimal/octal constants that are too large no longer + trigger a :exc:`FutureWarning` and return a value limited to 32 or 64 bits; + instead they return a long integer. + +* Integer operations will no longer trigger an :exc:`OverflowWarning`. The + :exc:`OverflowWarning` warning will disappear in Python 2.5. + +* The :func:`zip` built-in function and :func:`itertools.izip` now return an + empty list instead of raising a :exc:`TypeError` exception if called with no + arguments. + +* You can no longer compare the :class:`date` and :class:`datetime` instances + provided by the :mod:`datetime` module. Two instances of different classes + will now always be unequal, and relative comparisons (``<``, ``>``) will raise + a :exc:`TypeError`. + +* :func:`dircache.listdir` now passes exceptions to the caller instead of + returning empty lists. + +* :func:`LexicalHandler.startDTD` used to receive the public and system IDs in + the wrong order. This has been corrected; applications relying on the wrong + order need to be fixed. + +* :func:`fcntl.ioctl` now warns if the *mutate* argument is omitted and + relevant. + +* The :mod:`tarfile` module now generates GNU-format tar files by default. + +* Encountering a failure while importing a module no longer leaves a partially- + initialized module object in ``sys.modules``. + +* :const:`None` is now a constant; code that binds a new value to the name + ``None`` is now a syntax error. + +* The :func:`signals.signal` function now raises a :exc:`RuntimeError` exception + for certain illegal values; previously these errors would pass silently. For + example, you can no longer set a handler on the :const:`SIGKILL` signal. + +.. % ====================================================================== + + +.. _acks: + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Koray Can, Hye- +Shik Chang, Michael Dyck, Raymond Hettinger, Brian Hurt, Hamish Lawson, Fredrik +Lundh, Sean Reifschneider, Sadruddin Rejeb. + diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst new file mode 100644 index 0000000..f0429ec --- /dev/null +++ b/Doc/whatsnew/2.5.rst @@ -0,0 +1,2286 @@ +**************************** + What's New in Python 2.5 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 1.01 + +.. % $Id: whatsnew25.tex 56611 2007-07-29 08:26:10Z georg.brandl $ +.. % Fix XXX comments + +This article explains the new features in Python 2.5. The final release of +Python 2.5 is scheduled for August 2006; :pep:`356` describes the planned +release schedule. + +The changes in Python 2.5 are an interesting mix of language and library +improvements. The library enhancements will be more important to Python's user +community, I think, because several widely-useful packages were added. New +modules include ElementTree for XML processing (section :ref:`module-etree`), +the SQLite database module (section :ref:`module-sqlite`), and the :mod:`ctypes` +module for calling C functions (section :ref:`module-ctypes`). + +The language changes are of middling significance. Some pleasant new features +were added, but most of them aren't features that you'll use every day. +Conditional expressions were finally added to the language using a novel syntax; +see section :ref:`pep-308`. The new ':keyword:`with`' statement will make +writing cleanup code easier (section :ref:`pep-343`). Values can now be passed +into generators (section :ref:`pep-342`). Imports are now visible as either +absolute or relative (section :ref:`pep-328`). Some corner cases of exception +handling are handled better (section :ref:`pep-341`). All these improvements +are worthwhile, but they're improvements to one specific language feature or +another; none of them are broad modifications to Python's semantics. + +As well as the language and library additions, other improvements and bugfixes +were made throughout the source tree. A search through the SVN change logs +finds there were 353 patches applied and 458 bugs fixed between Python 2.4 and +2.5. (Both figures are likely to be underestimates.) + +This article doesn't try to be a complete specification of the new features; +instead changes are briefly introduced using helpful examples. For full +details, you should always refer to the documentation for Python 2.5 at +http://docs.python.org. If you want to understand the complete implementation +and design rationale, refer to the PEP for a particular new feature. + +Comments, suggestions, and error reports for this document are welcome; please +e-mail them to the author or open a bug in the Python bug tracker. + +.. % ====================================================================== + + +.. _pep-308: + +PEP 308: Conditional Expressions +================================ + +For a long time, people have been requesting a way to write conditional +expressions, which are expressions that return value A or value B depending on +whether a Boolean value is true or false. A conditional expression lets you +write a single assignment statement that has the same effect as the following:: + + if condition: + x = true_value + else: + x = false_value + +There have been endless tedious discussions of syntax on both python-dev and +comp.lang.python. A vote was even held that found the majority of voters wanted +conditional expressions in some form, but there was no syntax that was preferred +by a clear majority. Candidates included C's ``cond ? true_v : false_v``, ``if +cond then true_v else false_v``, and 16 other variations. + +Guido van Rossum eventually chose a surprising syntax:: + + x = true_value if condition else false_value + +Evaluation is still lazy as in existing Boolean expressions, so the order of +evaluation jumps around a bit. The *condition* expression in the middle is +evaluated first, and the *true_value* expression is evaluated only if the +condition was true. Similarly, the *false_value* expression is only evaluated +when the condition is false. + +This syntax may seem strange and backwards; why does the condition go in the +*middle* of the expression, and not in the front as in C's ``c ? x : y``? The +decision was checked by applying the new syntax to the modules in the standard +library and seeing how the resulting code read. In many cases where a +conditional expression is used, one value seems to be the 'common case' and one +value is an 'exceptional case', used only on rarer occasions when the condition +isn't met. The conditional syntax makes this pattern a bit more obvious:: + + contents = ((doc + '\n') if doc else '') + +I read the above statement as meaning "here *contents* is usually assigned a +value of ``doc+'\n'``; sometimes *doc* is empty, in which special case an empty +string is returned." I doubt I will use conditional expressions very often +where there isn't a clear common and uncommon case. + +There was some discussion of whether the language should require surrounding +conditional expressions with parentheses. The decision was made to *not* +require parentheses in the Python language's grammar, but as a matter of style I +think you should always use them. Consider these two statements:: + + # First version -- no parens + level = 1 if logging else 0 + + # Second version -- with parens + level = (1 if logging else 0) + +In the first version, I think a reader's eye might group the statement into +'level = 1', 'if logging', 'else 0', and think that the condition decides +whether the assignment to *level* is performed. The second version reads +better, in my opinion, because it makes it clear that the assignment is always +performed and the choice is being made between two values. + +Another reason for including the brackets: a few odd combinations of list +comprehensions and lambdas could look like incorrect conditional expressions. +See :pep:`308` for some examples. If you put parentheses around your +conditional expressions, you won't run into this case. + + +.. seealso:: + + :pep:`308` - Conditional Expressions + PEP written by Guido van Rossum and Raymond D. Hettinger; implemented by Thomas + Wouters. + +.. % ====================================================================== + + +.. _pep-309: + +PEP 309: Partial Function Application +===================================== + +The :mod:`functools` module is intended to contain tools for functional-style +programming. + +One useful tool in this module is the :func:`partial` function. For programs +written in a functional style, you'll sometimes want to construct variants of +existing functions that have some of the parameters filled in. Consider a +Python function ``f(a, b, c)``; you could create a new function ``g(b, c)`` that +was equivalent to ``f(1, b, c)``. This is called "partial function +application". + +:func:`partial` takes the arguments ``(function, arg1, arg2, ... kwarg1=value1, +kwarg2=value2)``. The resulting object is callable, so you can just call it to +invoke *function* with the filled-in arguments. + +Here's a small but realistic example:: + + import functools + + def log (message, subsystem): + "Write the contents of 'message' to the specified subsystem." + print '%s: %s' % (subsystem, message) + ... + + server_log = functools.partial(log, subsystem='server') + server_log('Unable to open socket') + +Here's another example, from a program that uses PyGTK. Here a context- +sensitive pop-up menu is being constructed dynamically. The callback provided +for the menu option is a partially applied version of the :meth:`open_item` +method, where the first argument has been provided. :: + + ... + class Application: + def open_item(self, path): + ... + def init (self): + open_func = functools.partial(self.open_item, item_path) + popup_menu.append( ("Open", open_func, 1) ) + +Another function in the :mod:`functools` module is the +:func:`update_wrapper(wrapper, wrapped)` function that helps you write well- +behaved decorators. :func:`update_wrapper` copies the name, module, and +docstring attribute to a wrapper function so that tracebacks inside the wrapped +function are easier to understand. For example, you might write:: + + def my_decorator(f): + def wrapper(*args, **kwds): + print 'Calling decorated function' + return f(*args, **kwds) + functools.update_wrapper(wrapper, f) + return wrapper + +:func:`wraps` is a decorator that can be used inside your own decorators to copy +the wrapped function's information. An alternate version of the previous +example would be:: + + def my_decorator(f): + @functools.wraps(f) + def wrapper(*args, **kwds): + print 'Calling decorated function' + return f(*args, **kwds) + return wrapper + + +.. seealso:: + + :pep:`309` - Partial Function Application + PEP proposed and written by Peter Harris; implemented by Hye-Shik Chang and Nick + Coghlan, with adaptations by Raymond Hettinger. + +.. % ====================================================================== + + +.. _pep-314: + +PEP 314: Metadata for Python Software Packages v1.1 +=================================================== + +Some simple dependency support was added to Distutils. The :func:`setup` +function now has ``requires``, ``provides``, and ``obsoletes`` keyword +parameters. When you build a source distribution using the ``sdist`` command, +the dependency information will be recorded in the :file:`PKG-INFO` file. + +Another new keyword parameter is ``download_url``, which should be set to a URL +for the package's source code. This means it's now possible to look up an entry +in the package index, determine the dependencies for a package, and download the +required packages. :: + + VERSION = '1.0' + setup(name='PyPackage', + version=VERSION, + requires=['numarray', 'zlib (>=1.1.4)'], + obsoletes=['OldPackage'] + download_url=('http://www.example.com/pypackage/dist/pkg-%s.tar.gz' + % VERSION), + ) + +Another new enhancement to the Python package index at +http://cheeseshop.python.org is storing source and binary archives for a +package. The new :command:`upload` Distutils command will upload a package to +the repository. + +Before a package can be uploaded, you must be able to build a distribution using +the :command:`sdist` Distutils command. Once that works, you can run ``python +setup.py upload`` to add your package to the PyPI archive. Optionally you can +GPG-sign the package by supplying the :option:`--sign` and :option:`--identity` +options. + +Package uploading was implemented by Martin von Löwis and Richard Jones. + + +.. seealso:: + + :pep:`314` - Metadata for Python Software Packages v1.1 + PEP proposed and written by A.M. Kuchling, Richard Jones, and Fred Drake; + implemented by Richard Jones and Fred Drake. + +.. % ====================================================================== + + +.. _pep-328: + +PEP 328: Absolute and Relative Imports +====================================== + +The simpler part of PEP 328 was implemented in Python 2.4: parentheses could now +be used to enclose the names imported from a module using the ``from ... import +...`` statement, making it easier to import many different names. + +The more complicated part has been implemented in Python 2.5: importing a module +can be specified to use absolute or package-relative imports. The plan is to +move toward making absolute imports the default in future versions of Python. + +Let's say you have a package directory like this:: + + pkg/ + pkg/__init__.py + pkg/main.py + pkg/string.py + +This defines a package named :mod:`pkg` containing the :mod:`pkg.main` and +:mod:`pkg.string` submodules. + +Consider the code in the :file:`main.py` module. What happens if it executes +the statement ``import string``? In Python 2.4 and earlier, it will first look +in the package's directory to perform a relative import, finds +:file:`pkg/string.py`, imports the contents of that file as the +:mod:`pkg.string` module, and that module is bound to the name ``string`` in the +:mod:`pkg.main` module's namespace. + +That's fine if :mod:`pkg.string` was what you wanted. But what if you wanted +Python's standard :mod:`string` module? There's no clean way to ignore +:mod:`pkg.string` and look for the standard module; generally you had to look at +the contents of ``sys.modules``, which is slightly unclean. Holger Krekel's +:mod:`py.std` package provides a tidier way to perform imports from the standard +library, ``import py ; py.std.string.join()``, but that package isn't available +on all Python installations. + +Reading code which relies on relative imports is also less clear, because a +reader may be confused about which module, :mod:`string` or :mod:`pkg.string`, +is intended to be used. Python users soon learned not to duplicate the names of +standard library modules in the names of their packages' submodules, but you +can't protect against having your submodule's name being used for a new module +added in a future version of Python. + +In Python 2.5, you can switch :keyword:`import`'s behaviour to absolute imports +using a ``from __future__ import absolute_import`` directive. This absolute- +import behaviour will become the default in a future version (probably Python +2.7). Once absolute imports are the default, ``import string`` will always +find the standard library's version. It's suggested that users should begin +using absolute imports as much as possible, so it's preferable to begin writing +``from pkg import string`` in your code. + +Relative imports are still possible by adding a leading period to the module +name when using the ``from ... import`` form:: + + # Import names from pkg.string + from .string import name1, name2 + # Import pkg.string + from . import string + +This imports the :mod:`string` module relative to the current package, so in +:mod:`pkg.main` this will import *name1* and *name2* from :mod:`pkg.string`. +Additional leading periods perform the relative import starting from the parent +of the current package. For example, code in the :mod:`A.B.C` module can do:: + + from . import D # Imports A.B.D + from .. import E # Imports A.E + from ..F import G # Imports A.F.G + +Leading periods cannot be used with the ``import modname`` form of the import +statement, only the ``from ... import`` form. + + +.. seealso:: + + :pep:`328` - Imports: Multi-Line and Absolute/Relative + PEP written by Aahz; implemented by Thomas Wouters. + + http://codespeak.net/py/current/doc/index.html + The py library by Holger Krekel, which contains the :mod:`py.std` package. + +.. % ====================================================================== + + +.. _pep-338: + +PEP 338: Executing Modules as Scripts +===================================== + +The :option:`-m` switch added in Python 2.4 to execute a module as a script +gained a few more abilities. Instead of being implemented in C code inside the +Python interpreter, the switch now uses an implementation in a new module, +:mod:`runpy`. + +The :mod:`runpy` module implements a more sophisticated import mechanism so that +it's now possible to run modules in a package such as :mod:`pychecker.checker`. +The module also supports alternative import mechanisms such as the +:mod:`zipimport` module. This means you can add a .zip archive's path to +``sys.path`` and then use the :option:`-m` switch to execute code from the +archive. + + +.. seealso:: + + :pep:`338` - Executing modules as scripts + PEP written and implemented by Nick Coghlan. + +.. % ====================================================================== + + +.. _pep-341: + +PEP 341: Unified try/except/finally +=================================== + +Until Python 2.5, the :keyword:`try` statement came in two flavours. You could +use a :keyword:`finally` block to ensure that code is always executed, or one or +more :keyword:`except` blocks to catch specific exceptions. You couldn't +combine both :keyword:`except` blocks and a :keyword:`finally` block, because +generating the right bytecode for the combined version was complicated and it +wasn't clear what the semantics of the combined statement should be. + +Guido van Rossum spent some time working with Java, which does support the +equivalent of combining :keyword:`except` blocks and a :keyword:`finally` block, +and this clarified what the statement should mean. In Python 2.5, you can now +write:: + + try: + block-1 ... + except Exception1: + handler-1 ... + except Exception2: + handler-2 ... + else: + else-block + finally: + final-block + +The code in *block-1* is executed. If the code raises an exception, the various +:keyword:`except` blocks are tested: if the exception is of class +:class:`Exception1`, *handler-1* is executed; otherwise if it's of class +:class:`Exception2`, *handler-2* is executed, and so forth. If no exception is +raised, the *else-block* is executed. + +No matter what happened previously, the *final-block* is executed once the code +block is complete and any raised exceptions handled. Even if there's an error in +an exception handler or the *else-block* and a new exception is raised, the code +in the *final-block* is still run. + + +.. seealso:: + + :pep:`341` - Unifying try-except and try-finally + PEP written by Georg Brandl; implementation by Thomas Lee. + +.. % ====================================================================== + + +.. _pep-342: + +PEP 342: New Generator Features +=============================== + +Python 2.5 adds a simple way to pass values *into* a generator. As introduced in +Python 2.3, generators only produce output; once a generator's code was invoked +to create an iterator, there was no way to pass any new information into the +function when its execution is resumed. Sometimes the ability to pass in some +information would be useful. Hackish solutions to this include making the +generator's code look at a global variable and then changing the global +variable's value, or passing in some mutable object that callers then modify. + +To refresh your memory of basic generators, here's a simple example:: + + def counter (maximum): + i = 0 + while i < maximum: + yield i + i += 1 + +When you call ``counter(10)``, the result is an iterator that returns the values +from 0 up to 9. On encountering the :keyword:`yield` statement, the iterator +returns the provided value and suspends the function's execution, preserving the +local variables. Execution resumes on the following call to the iterator's +:meth:`next` method, picking up after the :keyword:`yield` statement. + +In Python 2.3, :keyword:`yield` was a statement; it didn't return any value. In +2.5, :keyword:`yield` is now an expression, returning a value that can be +assigned to a variable or otherwise operated on:: + + val = (yield i) + +I recommend that you always put parentheses around a :keyword:`yield` expression +when you're doing something with the returned value, as in the above example. +The parentheses aren't always necessary, but it's easier to always add them +instead of having to remember when they're needed. + +(:pep:`342` explains the exact rules, which are that a :keyword:`yield`\ +-expression must always be parenthesized except when it occurs at the top-level +expression on the right-hand side of an assignment. This means you can write +``val = yield i`` but have to use parentheses when there's an operation, as in +``val = (yield i) + 12``.) + +Values are sent into a generator by calling its :meth:`send(value)` method. The +generator's code is then resumed and the :keyword:`yield` expression returns the +specified *value*. If the regular :meth:`next` method is called, the +:keyword:`yield` returns :const:`None`. + +Here's the previous example, modified to allow changing the value of the +internal counter. :: + + def counter (maximum): + i = 0 + while i < maximum: + val = (yield i) + # If value provided, change counter + if val is not None: + i = val + else: + i += 1 + +And here's an example of changing the counter:: + + >>> it = counter(10) + >>> print it.next() + 0 + >>> print it.next() + 1 + >>> print it.send(8) + 8 + >>> print it.next() + 9 + >>> print it.next() + Traceback (most recent call last): + File ``t.py'', line 15, in ? + print it.next() + StopIteration + +:keyword:`yield` will usually return :const:`None`, so you should always check +for this case. Don't just use its value in expressions unless you're sure that +the :meth:`send` method will be the only method used to resume your generator +function. + +In addition to :meth:`send`, there are two other new methods on generators: + +* :meth:`throw(type, value=None, traceback=None)` is used to raise an exception + inside the generator; the exception is raised by the :keyword:`yield` expression + where the generator's execution is paused. + +* :meth:`close` raises a new :exc:`GeneratorExit` exception inside the generator + to terminate the iteration. On receiving this exception, the generator's code + must either raise :exc:`GeneratorExit` or :exc:`StopIteration`. Catching the + :exc:`GeneratorExit` exception and returning a value is illegal and will trigger + a :exc:`RuntimeError`; if the function raises some other exception, that + exception is propagated to the caller. :meth:`close` will also be called by + Python's garbage collector when the generator is garbage-collected. + + If you need to run cleanup code when a :exc:`GeneratorExit` occurs, I suggest + using a ``try: ... finally:`` suite instead of catching :exc:`GeneratorExit`. + +The cumulative effect of these changes is to turn generators from one-way +producers of information into both producers and consumers. + +Generators also become *coroutines*, a more generalized form of subroutines. +Subroutines are entered at one point and exited at another point (the top of the +function, and a :keyword:`return` statement), but coroutines can be entered, +exited, and resumed at many different points (the :keyword:`yield` statements). +We'll have to figure out patterns for using coroutines effectively in Python. + +The addition of the :meth:`close` method has one side effect that isn't obvious. +:meth:`close` is called when a generator is garbage-collected, so this means the +generator's code gets one last chance to run before the generator is destroyed. +This last chance means that ``try...finally`` statements in generators can now +be guaranteed to work; the :keyword:`finally` clause will now always get a +chance to run. The syntactic restriction that you couldn't mix :keyword:`yield` +statements with a ``try...finally`` suite has therefore been removed. This +seems like a minor bit of language trivia, but using generators and +``try...finally`` is actually necessary in order to implement the +:keyword:`with` statement described by PEP 343. I'll look at this new statement +in the following section. + +Another even more esoteric effect of this change: previously, the +:attr:`gi_frame` attribute of a generator was always a frame object. It's now +possible for :attr:`gi_frame` to be ``None`` once the generator has been +exhausted. + + +.. seealso:: + + :pep:`342` - Coroutines via Enhanced Generators + PEP written by Guido van Rossum and Phillip J. Eby; implemented by Phillip J. + Eby. Includes examples of some fancier uses of generators as coroutines. + + Earlier versions of these features were proposed in :pep:`288` by Raymond + Hettinger and :pep:`325` by Samuele Pedroni. + + http://en.wikipedia.org/wiki/Coroutine + The Wikipedia entry for coroutines. + + http://www.sidhe.org/~dan/blog/archives/000178.html + An explanation of coroutines from a Perl point of view, written by Dan Sugalski. + +.. % ====================================================================== + + +.. _pep-343: + +PEP 343: The 'with' statement +============================= + +The ':keyword:`with`' statement clarifies code that previously would use +``try...finally`` blocks to ensure that clean-up code is executed. In this +section, I'll discuss the statement as it will commonly be used. In the next +section, I'll examine the implementation details and show how to write objects +for use with this statement. + +The ':keyword:`with`' statement is a new control-flow structure whose basic +structure is:: + + with expression [as variable]: + with-block + +The expression is evaluated, and it should result in an object that supports the +context management protocol (that is, has :meth:`__enter__` and :meth:`__exit__` +methods. + +The object's :meth:`__enter__` is called before *with-block* is executed and +therefore can run set-up code. It also may return a value that is bound to the +name *variable*, if given. (Note carefully that *variable* is *not* assigned +the result of *expression*.) + +After execution of the *with-block* is finished, the object's :meth:`__exit__` +method is called, even if the block raised an exception, and can therefore run +clean-up code. + +To enable the statement in Python 2.5, you need to add the following directive +to your module:: + + from __future__ import with_statement + +The statement will always be enabled in Python 2.6. + +Some standard Python objects now support the context management protocol and can +be used with the ':keyword:`with`' statement. File objects are one example:: + + with open('/etc/passwd', 'r') as f: + for line in f: + print line + ... more processing code ... + +After this statement has executed, the file object in *f* will have been +automatically closed, even if the :keyword:`for` loop raised an exception part- +way through the block. + +.. note:: + + In this case, *f* is the same object created by :func:`open`, because + :meth:`file.__enter__` returns *self*. + +The :mod:`threading` module's locks and condition variables also support the +':keyword:`with`' statement:: + + lock = threading.Lock() + with lock: + # Critical section of code + ... + +The lock is acquired before the block is executed and always released once the +block is complete. + +The new :func:`localcontext` function in the :mod:`decimal` module makes it easy +to save and restore the current decimal context, which encapsulates the desired +precision and rounding characteristics for computations:: + + from decimal import Decimal, Context, localcontext + + # Displays with default precision of 28 digits + v = Decimal('578') + print v.sqrt() + + with localcontext(Context(prec=16)): + # All code in this block uses a precision of 16 digits. + # The original context is restored on exiting the block. + print v.sqrt() + + +.. _context-managers: + +Writing Context Managers +------------------------ + +Under the hood, the ':keyword:`with`' statement is fairly complicated. Most +people will only use ':keyword:`with`' in company with existing objects and +don't need to know these details, so you can skip the rest of this section if +you like. Authors of new objects will need to understand the details of the +underlying implementation and should keep reading. + +A high-level explanation of the context management protocol is: + +* The expression is evaluated and should result in an object called a "context + manager". The context manager must have :meth:`__enter__` and :meth:`__exit__` + methods. + +* The context manager's :meth:`__enter__` method is called. The value returned + is assigned to *VAR*. If no ``'as VAR'`` clause is present, the value is simply + discarded. + +* The code in *BLOCK* is executed. + +* If *BLOCK* raises an exception, the :meth:`__exit__(type, value, traceback)` + is called with the exception details, the same values returned by + :func:`sys.exc_info`. The method's return value controls whether the exception + is re-raised: any false value re-raises the exception, and ``True`` will result + in suppressing it. You'll only rarely want to suppress the exception, because + if you do the author of the code containing the ':keyword:`with`' statement will + never realize anything went wrong. + +* If *BLOCK* didn't raise an exception, the :meth:`__exit__` method is still + called, but *type*, *value*, and *traceback* are all ``None``. + +Let's think through an example. I won't present detailed code but will only +sketch the methods necessary for a database that supports transactions. + +(For people unfamiliar with database terminology: a set of changes to the +database are grouped into a transaction. Transactions can be either committed, +meaning that all the changes are written into the database, or rolled back, +meaning that the changes are all discarded and the database is unchanged. See +any database textbook for more information.) + +Let's assume there's an object representing a database connection. Our goal will +be to let the user write code like this:: + + db_connection = DatabaseConnection() + with db_connection as cursor: + cursor.execute('insert into ...') + cursor.execute('delete from ...') + # ... more operations ... + +The transaction should be committed if the code in the block runs flawlessly or +rolled back if there's an exception. Here's the basic interface for +:class:`DatabaseConnection` that I'll assume:: + + class DatabaseConnection: + # Database interface + def cursor (self): + "Returns a cursor object and starts a new transaction" + def commit (self): + "Commits current transaction" + def rollback (self): + "Rolls back current transaction" + +The :meth:`__enter__` method is pretty easy, having only to start a new +transaction. For this application the resulting cursor object would be a useful +result, so the method will return it. The user can then add ``as cursor`` to +their ':keyword:`with`' statement to bind the cursor to a variable name. :: + + class DatabaseConnection: + ... + def __enter__ (self): + # Code to start a new transaction + cursor = self.cursor() + return cursor + +The :meth:`__exit__` method is the most complicated because it's where most of +the work has to be done. The method has to check if an exception occurred. If +there was no exception, the transaction is committed. The transaction is rolled +back if there was an exception. + +In the code below, execution will just fall off the end of the function, +returning the default value of ``None``. ``None`` is false, so the exception +will be re-raised automatically. If you wished, you could be more explicit and +add a :keyword:`return` statement at the marked location. :: + + class DatabaseConnection: + ... + def __exit__ (self, type, value, tb): + if tb is None: + # No exception, so commit + self.commit() + else: + # Exception occurred, so rollback. + self.rollback() + # return False + + +.. _module-contextlib: + +The contextlib module +--------------------- + +The new :mod:`contextlib` module provides some functions and a decorator that +are useful for writing objects for use with the ':keyword:`with`' statement. + +The decorator is called :func:`contextmanager`, and lets you write a single +generator function instead of defining a new class. The generator should yield +exactly one value. The code up to the :keyword:`yield` will be executed as the +:meth:`__enter__` method, and the value yielded will be the method's return +value that will get bound to the variable in the ':keyword:`with`' statement's +:keyword:`as` clause, if any. The code after the :keyword:`yield` will be +executed in the :meth:`__exit__` method. Any exception raised in the block will +be raised by the :keyword:`yield` statement. + +Our database example from the previous section could be written using this +decorator as:: + + from contextlib import contextmanager + + @contextmanager + def db_transaction (connection): + cursor = connection.cursor() + try: + yield cursor + except: + connection.rollback() + raise + else: + connection.commit() + + db = DatabaseConnection() + with db_transaction(db) as cursor: + ... + +The :mod:`contextlib` module also has a :func:`nested(mgr1, mgr2, ...)` function +that combines a number of context managers so you don't need to write nested +':keyword:`with`' statements. In this example, the single ':keyword:`with`' +statement both starts a database transaction and acquires a thread lock:: + + lock = threading.Lock() + with nested (db_transaction(db), lock) as (cursor, locked): + ... + +Finally, the :func:`closing(object)` function returns *object* so that it can be +bound to a variable, and calls ``object.close`` at the end of the block. :: + + import urllib, sys + from contextlib import closing + + with closing(urllib.urlopen('http://www.yahoo.com')) as f: + for line in f: + sys.stdout.write(line) + + +.. seealso:: + + :pep:`343` - The "with" statement + PEP written by Guido van Rossum and Nick Coghlan; implemented by Mike Bland, + Guido van Rossum, and Neal Norwitz. The PEP shows the code generated for a + ':keyword:`with`' statement, which can be helpful in learning how the statement + works. + + The documentation for the :mod:`contextlib` module. + +.. % ====================================================================== + + +.. _pep-352: + +PEP 352: Exceptions as New-Style Classes +======================================== + +Exception classes can now be new-style classes, not just classic classes, and +the built-in :exc:`Exception` class and all the standard built-in exceptions +(:exc:`NameError`, :exc:`ValueError`, etc.) are now new-style classes. + +The inheritance hierarchy for exceptions has been rearranged a bit. In 2.5, the +inheritance relationships are:: + + BaseException # New in Python 2.5 + |- KeyboardInterrupt + |- SystemExit + |- Exception + |- (all other current built-in exceptions) + +This rearrangement was done because people often want to catch all exceptions +that indicate program errors. :exc:`KeyboardInterrupt` and :exc:`SystemExit` +aren't errors, though, and usually represent an explicit action such as the user +hitting Control-C or code calling :func:`sys.exit`. A bare ``except:`` will +catch all exceptions, so you commonly need to list :exc:`KeyboardInterrupt` and +:exc:`SystemExit` in order to re-raise them. The usual pattern is:: + + try: + ... + except (KeyboardInterrupt, SystemExit): + raise + except: + # Log error... + # Continue running program... + +In Python 2.5, you can now write ``except Exception`` to achieve the same +result, catching all the exceptions that usually indicate errors but leaving +:exc:`KeyboardInterrupt` and :exc:`SystemExit` alone. As in previous versions, +a bare ``except:`` still catches all exceptions. + +The goal for Python 3.0 is to require any class raised as an exception to derive +from :exc:`BaseException` or some descendant of :exc:`BaseException`, and future +releases in the Python 2.x series may begin to enforce this constraint. +Therefore, I suggest you begin making all your exception classes derive from +:exc:`Exception` now. It's been suggested that the bare ``except:`` form should +be removed in Python 3.0, but Guido van Rossum hasn't decided whether to do this +or not. + +Raising of strings as exceptions, as in the statement ``raise "Error +occurred"``, is deprecated in Python 2.5 and will trigger a warning. The aim is +to be able to remove the string-exception feature in a few releases. + + +.. seealso:: + + :pep:`352` - Required Superclass for Exceptions + PEP written by Brett Cannon and Guido van Rossum; implemented by Brett Cannon. + +.. % ====================================================================== + + +.. _pep-353: + +PEP 353: Using ssize_t as the index type +======================================== + +A wide-ranging change to Python's C API, using a new :ctype:`Py_ssize_t` type +definition instead of :ctype:`int`, will permit the interpreter to handle more +data on 64-bit platforms. This change doesn't affect Python's capacity on 32-bit +platforms. + +Various pieces of the Python interpreter used C's :ctype:`int` type to store +sizes or counts; for example, the number of items in a list or tuple were stored +in an :ctype:`int`. The C compilers for most 64-bit platforms still define +:ctype:`int` as a 32-bit type, so that meant that lists could only hold up to +``2**31 - 1`` = 2147483647 items. (There are actually a few different +programming models that 64-bit C compilers can use -- see +http://www.unix.org/version2/whatsnew/lp64_wp.html for a discussion -- but the +most commonly available model leaves :ctype:`int` as 32 bits.) + +A limit of 2147483647 items doesn't really matter on a 32-bit platform because +you'll run out of memory before hitting the length limit. Each list item +requires space for a pointer, which is 4 bytes, plus space for a +:ctype:`PyObject` representing the item. 2147483647\*4 is already more bytes +than a 32-bit address space can contain. + +It's possible to address that much memory on a 64-bit platform, however. The +pointers for a list that size would only require 16 GiB of space, so it's not +unreasonable that Python programmers might construct lists that large. +Therefore, the Python interpreter had to be changed to use some type other than +:ctype:`int`, and this will be a 64-bit type on 64-bit platforms. The change +will cause incompatibilities on 64-bit machines, so it was deemed worth making +the transition now, while the number of 64-bit users is still relatively small. +(In 5 or 10 years, we may *all* be on 64-bit machines, and the transition would +be more painful then.) + +This change most strongly affects authors of C extension modules. Python +strings and container types such as lists and tuples now use +:ctype:`Py_ssize_t` to store their size. Functions such as +:cfunc:`PyList_Size` now return :ctype:`Py_ssize_t`. Code in extension modules +may therefore need to have some variables changed to :ctype:`Py_ssize_t`. + +The :cfunc:`PyArg_ParseTuple` and :cfunc:`Py_BuildValue` functions have a new +conversion code, ``n``, for :ctype:`Py_ssize_t`. :cfunc:`PyArg_ParseTuple`'s +``s#`` and ``t#`` still output :ctype:`int` by default, but you can define the +macro :cmacro:`PY_SSIZE_T_CLEAN` before including :file:`Python.h` to make +them return :ctype:`Py_ssize_t`. + +:pep:`353` has a section on conversion guidelines that extension authors should +read to learn about supporting 64-bit platforms. + + +.. seealso:: + + :pep:`353` - Using ssize_t as the index type + PEP written and implemented by Martin von Löwis. + +.. % ====================================================================== + + +.. _pep-357: + +PEP 357: The '__index__' method +=============================== + +The NumPy developers had a problem that could only be solved by adding a new +special method, :meth:`__index__`. When using slice notation, as in +``[start:stop:step]``, the values of the *start*, *stop*, and *step* indexes +must all be either integers or long integers. NumPy defines a variety of +specialized integer types corresponding to unsigned and signed integers of 8, +16, 32, and 64 bits, but there was no way to signal that these types could be +used as slice indexes. + +Slicing can't just use the existing :meth:`__int__` method because that method +is also used to implement coercion to integers. If slicing used +:meth:`__int__`, floating-point numbers would also become legal slice indexes +and that's clearly an undesirable behaviour. + +Instead, a new special method called :meth:`__index__` was added. It takes no +arguments and returns an integer giving the slice index to use. For example:: + + class C: + def __index__ (self): + return self.value + +The return value must be either a Python integer or long integer. The +interpreter will check that the type returned is correct, and raises a +:exc:`TypeError` if this requirement isn't met. + +A corresponding :attr:`nb_index` slot was added to the C-level +:ctype:`PyNumberMethods` structure to let C extensions implement this protocol. +:cfunc:`PyNumber_Index(obj)` can be used in extension code to call the +:meth:`__index__` function and retrieve its result. + + +.. seealso:: + + :pep:`357` - Allowing Any Object to be Used for Slicing + PEP written and implemented by Travis Oliphant. + +.. % ====================================================================== + + +.. _other-lang: + +Other Language Changes +====================== + +Here are all of the changes that Python 2.5 makes to the core Python language. + +* The :class:`dict` type has a new hook for letting subclasses provide a default + value when a key isn't contained in the dictionary. When a key isn't found, the + dictionary's :meth:`__missing__(key)` method will be called. This hook is used + to implement the new :class:`defaultdict` class in the :mod:`collections` + module. The following example defines a dictionary that returns zero for any + missing key:: + + class zerodict (dict): + def __missing__ (self, key): + return 0 + + d = zerodict({1:1, 2:2}) + print d[1], d[2] # Prints 1, 2 + print d[3], d[4] # Prints 0, 0 + +* Both 8-bit and Unicode strings have new :meth:`partition(sep)` and + :meth:`rpartition(sep)` methods that simplify a common use case. + + The :meth:`find(S)` method is often used to get an index which is then used to + slice the string and obtain the pieces that are before and after the separator. + :meth:`partition(sep)` condenses this pattern into a single method call that + returns a 3-tuple containing the substring before the separator, the separator + itself, and the substring after the separator. If the separator isn't found, + the first element of the tuple is the entire string and the other two elements + are empty. :meth:`rpartition(sep)` also returns a 3-tuple but starts searching + from the end of the string; the ``r`` stands for 'reverse'. + + Some examples:: + + >>> ('http://www.python.org').partition('://') + ('http', '://', 'www.python.org') + >>> ('file:/usr/share/doc/index.html').partition('://') + ('file:/usr/share/doc/index.html', '', '') + >>> (u'Subject: a quick question').partition(':') + (u'Subject', u':', u' a quick question') + >>> 'www.python.org'.rpartition('.') + ('www.python', '.', 'org') + >>> 'www.python.org'.rpartition(':') + ('', '', 'www.python.org') + + (Implemented by Fredrik Lundh following a suggestion by Raymond Hettinger.) + +* The :meth:`startswith` and :meth:`endswith` methods of string types now accept + tuples of strings to check for. :: + + def is_image_file (filename): + return filename.endswith(('.gif', '.jpg', '.tiff')) + + (Implemented by Georg Brandl following a suggestion by Tom Lynn.) + + .. % RFE #1491485 + +* The :func:`min` and :func:`max` built-in functions gained a ``key`` keyword + parameter analogous to the ``key`` argument for :meth:`sort`. This parameter + supplies a function that takes a single argument and is called for every value + in the list; :func:`min`/:func:`max` will return the element with the + smallest/largest return value from this function. For example, to find the + longest string in a list, you can do:: + + L = ['medium', 'longest', 'short'] + # Prints 'longest' + print max(L, key=len) + # Prints 'short', because lexicographically 'short' has the largest value + print max(L) + + (Contributed by Steven Bethard and Raymond Hettinger.) + +* Two new built-in functions, :func:`any` and :func:`all`, evaluate whether an + iterator contains any true or false values. :func:`any` returns :const:`True` + if any value returned by the iterator is true; otherwise it will return + :const:`False`. :func:`all` returns :const:`True` only if all of the values + returned by the iterator evaluate as true. (Suggested by Guido van Rossum, and + implemented by Raymond Hettinger.) + +* The result of a class's :meth:`__hash__` method can now be either a long + integer or a regular integer. If a long integer is returned, the hash of that + value is taken. In earlier versions the hash value was required to be a + regular integer, but in 2.5 the :func:`id` built-in was changed to always + return non-negative numbers, and users often seem to use ``id(self)`` in + :meth:`__hash__` methods (though this is discouraged). + + .. % Bug #1536021 + +* ASCII is now the default encoding for modules. It's now a syntax error if a + module contains string literals with 8-bit characters but doesn't have an + encoding declaration. In Python 2.4 this triggered a warning, not a syntax + error. See :pep:`263` for how to declare a module's encoding; for example, you + might add a line like this near the top of the source file:: + + # -*- coding: latin1 -*- + +* A new warning, :class:`UnicodeWarning`, is triggered when you attempt to + compare a Unicode string and an 8-bit string that can't be converted to Unicode + using the default ASCII encoding. The result of the comparison is false:: + + >>> chr(128) == unichr(128) # Can't convert chr(128) to Unicode + __main__:1: UnicodeWarning: Unicode equal comparison failed + to convert both arguments to Unicode - interpreting them + as being unequal + False + >>> chr(127) == unichr(127) # chr(127) can be converted + True + + Previously this would raise a :class:`UnicodeDecodeError` exception, but in 2.5 + this could result in puzzling problems when accessing a dictionary. If you + looked up ``unichr(128)`` and ``chr(128)`` was being used as a key, you'd get a + :class:`UnicodeDecodeError` exception. Other changes in 2.5 resulted in this + exception being raised instead of suppressed by the code in :file:`dictobject.c` + that implements dictionaries. + + Raising an exception for such a comparison is strictly correct, but the change + might have broken code, so instead :class:`UnicodeWarning` was introduced. + + (Implemented by Marc-André Lemburg.) + +* One error that Python programmers sometimes make is forgetting to include an + :file:`__init__.py` module in a package directory. Debugging this mistake can be + confusing, and usually requires running Python with the :option:`-v` switch to + log all the paths searched. In Python 2.5, a new :exc:`ImportWarning` warning is + triggered when an import would have picked up a directory as a package but no + :file:`__init__.py` was found. This warning is silently ignored by default; + provide the :option:`-Wd` option when running the Python executable to display + the warning message. (Implemented by Thomas Wouters.) + +* The list of base classes in a class definition can now be empty. As an + example, this is now legal:: + + class C(): + pass + + (Implemented by Brett Cannon.) + +.. % ====================================================================== + + +.. _interactive: + +Interactive Interpreter Changes +------------------------------- + +In the interactive interpreter, ``quit`` and ``exit`` have long been strings so +that new users get a somewhat helpful message when they try to quit:: + + >>> quit + 'Use Ctrl-D (i.e. EOF) to exit.' + +In Python 2.5, ``quit`` and ``exit`` are now objects that still produce string +representations of themselves, but are also callable. Newbies who try ``quit()`` +or ``exit()`` will now exit the interpreter as they expect. (Implemented by +Georg Brandl.) + +The Python executable now accepts the standard long options :option:`--help` +and :option:`--version`; on Windows, it also accepts the :option:`/?` option +for displaying a help message. (Implemented by Georg Brandl.) + +.. % ====================================================================== + + +.. _opts: + +Optimizations +------------- + +Several of the optimizations were developed at the NeedForSpeed sprint, an event +held in Reykjavik, Iceland, from May 21--28 2006. The sprint focused on speed +enhancements to the CPython implementation and was funded by EWT LLC with local +support from CCP Games. Those optimizations added at this sprint are specially +marked in the following list. + +* When they were introduced in Python 2.4, the built-in :class:`set` and + :class:`frozenset` types were built on top of Python's dictionary type. In 2.5 + the internal data structure has been customized for implementing sets, and as a + result sets will use a third less memory and are somewhat faster. (Implemented + by Raymond Hettinger.) + +* The speed of some Unicode operations, such as finding substrings, string + splitting, and character map encoding and decoding, has been improved. + (Substring search and splitting improvements were added by Fredrik Lundh and + Andrew Dalke at the NeedForSpeed sprint. Character maps were improved by Walter + Dörwald and Martin von Löwis.) + + .. % Patch 1313939, 1359618 + +* The :func:`long(str, base)` function is now faster on long digit strings + because fewer intermediate results are calculated. The peak is for strings of + around 800--1000 digits where the function is 6 times faster. (Contributed by + Alan McIntyre and committed at the NeedForSpeed sprint.) + + .. % Patch 1442927 + +* It's now illegal to mix iterating over a file with ``for line in file`` and + calling the file object's :meth:`read`/:meth:`readline`/:meth:`readlines` + methods. Iteration uses an internal buffer and the :meth:`read\*` methods + don't use that buffer. Instead they would return the data following the + buffer, causing the data to appear out of order. Mixing iteration and these + methods will now trigger a :exc:`ValueError` from the :meth:`read\*` method. + (Implemented by Thomas Wouters.) + + .. % Patch 1397960 + +* The :mod:`struct` module now compiles structure format strings into an + internal representation and caches this representation, yielding a 20% speedup. + (Contributed by Bob Ippolito at the NeedForSpeed sprint.) + +* The :mod:`re` module got a 1 or 2% speedup by switching to Python's allocator + functions instead of the system's :cfunc:`malloc` and :cfunc:`free`. + (Contributed by Jack Diederich at the NeedForSpeed sprint.) + +* The code generator's peephole optimizer now performs simple constant folding + in expressions. If you write something like ``a = 2+3``, the code generator + will do the arithmetic and produce code corresponding to ``a = 5``. (Proposed + and implemented by Raymond Hettinger.) + +* Function calls are now faster because code objects now keep the most recently + finished frame (a "zombie frame") in an internal field of the code object, + reusing it the next time the code object is invoked. (Original patch by Michael + Hudson, modified by Armin Rigo and Richard Jones; committed at the NeedForSpeed + sprint.) Frame objects are also slightly smaller, which may improve cache + locality and reduce memory usage a bit. (Contributed by Neal Norwitz.) + + .. % Patch 876206 + .. % Patch 1337051 + +* Python's built-in exceptions are now new-style classes, a change that speeds + up instantiation considerably. Exception handling in Python 2.5 is therefore + about 30% faster than in 2.4. (Contributed by Richard Jones, Georg Brandl and + Sean Reifschneider at the NeedForSpeed sprint.) + +* Importing now caches the paths tried, recording whether they exist or not so + that the interpreter makes fewer :cfunc:`open` and :cfunc:`stat` calls on + startup. (Contributed by Martin von Löwis and Georg Brandl.) + + .. % Patch 921466 + +.. % ====================================================================== + + +.. _modules: + +New, Improved, and Removed Modules +================================== + +The standard library received many enhancements and bug fixes in Python 2.5. +Here's a partial list of the most notable changes, sorted alphabetically by +module name. Consult the :file:`Misc/NEWS` file in the source tree for a more +complete list of changes, or look through the SVN logs for all the details. + +* The :mod:`audioop` module now supports the a-LAW encoding, and the code for + u-LAW encoding has been improved. (Contributed by Lars Immisch.) + +* The :mod:`codecs` module gained support for incremental codecs. The + :func:`codec.lookup` function now returns a :class:`CodecInfo` instance instead + of a tuple. :class:`CodecInfo` instances behave like a 4-tuple to preserve + backward compatibility but also have the attributes :attr:`encode`, + :attr:`decode`, :attr:`incrementalencoder`, :attr:`incrementaldecoder`, + :attr:`streamwriter`, and :attr:`streamreader`. Incremental codecs can receive + input and produce output in multiple chunks; the output is the same as if the + entire input was fed to the non-incremental codec. See the :mod:`codecs` module + documentation for details. (Designed and implemented by Walter Dörwald.) + + .. % Patch 1436130 + +* The :mod:`collections` module gained a new type, :class:`defaultdict`, that + subclasses the standard :class:`dict` type. The new type mostly behaves like a + dictionary but constructs a default value when a key isn't present, + automatically adding it to the dictionary for the requested key value. + + The first argument to :class:`defaultdict`'s constructor is a factory function + that gets called whenever a key is requested but not found. This factory + function receives no arguments, so you can use built-in type constructors such + as :func:`list` or :func:`int`. For example, you can make an index of words + based on their initial letter like this:: + + words = """Nel mezzo del cammin di nostra vita + mi ritrovai per una selva oscura + che la diritta via era smarrita""".lower().split() + + index = defaultdict(list) + + for w in words: + init_letter = w[0] + index[init_letter].append(w) + + Printing ``index`` results in the following output:: + + defaultdict(<type 'list'>, {'c': ['cammin', 'che'], 'e': ['era'], + 'd': ['del', 'di', 'diritta'], 'm': ['mezzo', 'mi'], + 'l': ['la'], 'o': ['oscura'], 'n': ['nel', 'nostra'], + 'p': ['per'], 's': ['selva', 'smarrita'], + 'r': ['ritrovai'], 'u': ['una'], 'v': ['vita', 'via']} + + (Contributed by Guido van Rossum.) + +* The :class:`deque` double-ended queue type supplied by the :mod:`collections` + module now has a :meth:`remove(value)` method that removes the first occurrence + of *value* in the queue, raising :exc:`ValueError` if the value isn't found. + (Contributed by Raymond Hettinger.) + +* New module: The :mod:`contextlib` module contains helper functions for use + with the new ':keyword:`with`' statement. See section :ref:`module-contextlib` + for more about this module. + +* New module: The :mod:`cProfile` module is a C implementation of the existing + :mod:`profile` module that has much lower overhead. The module's interface is + the same as :mod:`profile`: you run ``cProfile.run('main()')`` to profile a + function, can save profile data to a file, etc. It's not yet known if the + Hotshot profiler, which is also written in C but doesn't match the + :mod:`profile` module's interface, will continue to be maintained in future + versions of Python. (Contributed by Armin Rigo.) + + Also, the :mod:`pstats` module for analyzing the data measured by the profiler + now supports directing the output to any file object by supplying a *stream* + argument to the :class:`Stats` constructor. (Contributed by Skip Montanaro.) + +* The :mod:`csv` module, which parses files in comma-separated value format, + received several enhancements and a number of bugfixes. You can now set the + maximum size in bytes of a field by calling the + :meth:`csv.field_size_limit(new_limit)` function; omitting the *new_limit* + argument will return the currently-set limit. The :class:`reader` class now has + a :attr:`line_num` attribute that counts the number of physical lines read from + the source; records can span multiple physical lines, so :attr:`line_num` is not + the same as the number of records read. + + The CSV parser is now stricter about multi-line quoted fields. Previously, if a + line ended within a quoted field without a terminating newline character, a + newline would be inserted into the returned field. This behavior caused problems + when reading files that contained carriage return characters within fields, so + the code was changed to return the field without inserting newlines. As a + consequence, if newlines embedded within fields are important, the input should + be split into lines in a manner that preserves the newline characters. + + (Contributed by Skip Montanaro and Andrew McNamara.) + +* The :class:`datetime` class in the :mod:`datetime` module now has a + :meth:`strptime(string, format)` method for parsing date strings, contributed + by Josh Spoerri. It uses the same format characters as :func:`time.strptime` and + :func:`time.strftime`:: + + from datetime import datetime + + ts = datetime.strptime('10:13:15 2006-03-07', + '%H:%M:%S %Y-%m-%d') + +* The :meth:`SequenceMatcher.get_matching_blocks` method in the :mod:`difflib` + module now guarantees to return a minimal list of blocks describing matching + subsequences. Previously, the algorithm would occasionally break a block of + matching elements into two list entries. (Enhancement by Tim Peters.) + +* The :mod:`doctest` module gained a ``SKIP`` option that keeps an example from + being executed at all. This is intended for code snippets that are usage + examples intended for the reader and aren't actually test cases. + + An *encoding* parameter was added to the :func:`testfile` function and the + :class:`DocFileSuite` class to specify the file's encoding. This makes it + easier to use non-ASCII characters in tests contained within a docstring. + (Contributed by Bjorn Tillenius.) + + .. % Patch 1080727 + +* The :mod:`email` package has been updated to version 4.0. (Contributed by + Barry Warsaw.) + + .. % XXX need to provide some more detail here + +* The :mod:`fileinput` module was made more flexible. Unicode filenames are now + supported, and a *mode* parameter that defaults to ``"r"`` was added to the + :func:`input` function to allow opening files in binary or universal-newline + mode. Another new parameter, *openhook*, lets you use a function other than + :func:`open` to open the input files. Once you're iterating over the set of + files, the :class:`FileInput` object's new :meth:`fileno` returns the file + descriptor for the currently opened file. (Contributed by Georg Brandl.) + +* In the :mod:`gc` module, the new :func:`get_count` function returns a 3-tuple + containing the current collection counts for the three GC generations. This is + accounting information for the garbage collector; when these counts reach a + specified threshold, a garbage collection sweep will be made. The existing + :func:`gc.collect` function now takes an optional *generation* argument of 0, 1, + or 2 to specify which generation to collect. (Contributed by Barry Warsaw.) + +* The :func:`nsmallest` and :func:`nlargest` functions in the :mod:`heapq` + module now support a ``key`` keyword parameter similar to the one provided by + the :func:`min`/:func:`max` functions and the :meth:`sort` methods. For + example:: + + >>> import heapq + >>> L = ["short", 'medium', 'longest', 'longer still'] + >>> heapq.nsmallest(2, L) # Return two lowest elements, lexicographically + ['longer still', 'longest'] + >>> heapq.nsmallest(2, L, key=len) # Return two shortest elements + ['short', 'medium'] + + (Contributed by Raymond Hettinger.) + +* The :func:`itertools.islice` function now accepts ``None`` for the start and + step arguments. This makes it more compatible with the attributes of slice + objects, so that you can now write the following:: + + s = slice(5) # Create slice object + itertools.islice(iterable, s.start, s.stop, s.step) + + (Contributed by Raymond Hettinger.) + +* The :func:`format` function in the :mod:`locale` module has been modified and + two new functions were added, :func:`format_string` and :func:`currency`. + + The :func:`format` function's *val* parameter could previously be a string as + long as no more than one %char specifier appeared; now the parameter must be + exactly one %char specifier with no surrounding text. An optional *monetary* + parameter was also added which, if ``True``, will use the locale's rules for + formatting currency in placing a separator between groups of three digits. + + To format strings with multiple %char specifiers, use the new + :func:`format_string` function that works like :func:`format` but also supports + mixing %char specifiers with arbitrary text. + + A new :func:`currency` function was also added that formats a number according + to the current locale's settings. + + (Contributed by Georg Brandl.) + + .. % Patch 1180296 + +* The :mod:`mailbox` module underwent a massive rewrite to add the capability to + modify mailboxes in addition to reading them. A new set of classes that include + :class:`mbox`, :class:`MH`, and :class:`Maildir` are used to read mailboxes, and + have an :meth:`add(message)` method to add messages, :meth:`remove(key)` to + remove messages, and :meth:`lock`/:meth:`unlock` to lock/unlock the mailbox. + The following example converts a maildir-format mailbox into an mbox-format + one:: + + import mailbox + + # 'factory=None' uses email.Message.Message as the class representing + # individual messages. + src = mailbox.Maildir('maildir', factory=None) + dest = mailbox.mbox('/tmp/mbox') + + for msg in src: + dest.add(msg) + + (Contributed by Gregory K. Johnson. Funding was provided by Google's 2005 + Summer of Code.) + +* New module: the :mod:`msilib` module allows creating Microsoft Installer + :file:`.msi` files and CAB files. Some support for reading the :file:`.msi` + database is also included. (Contributed by Martin von Löwis.) + +* The :mod:`nis` module now supports accessing domains other than the system + default domain by supplying a *domain* argument to the :func:`nis.match` and + :func:`nis.maps` functions. (Contributed by Ben Bell.) + +* The :mod:`operator` module's :func:`itemgetter` and :func:`attrgetter` + functions now support multiple fields. A call such as + ``operator.attrgetter('a', 'b')`` will return a function that retrieves the + :attr:`a` and :attr:`b` attributes. Combining this new feature with the + :meth:`sort` method's ``key`` parameter lets you easily sort lists using + multiple fields. (Contributed by Raymond Hettinger.) + +* The :mod:`optparse` module was updated to version 1.5.1 of the Optik library. + The :class:`OptionParser` class gained an :attr:`epilog` attribute, a string + that will be printed after the help message, and a :meth:`destroy` method to + break reference cycles created by the object. (Contributed by Greg Ward.) + +* The :mod:`os` module underwent several changes. The :attr:`stat_float_times` + variable now defaults to true, meaning that :func:`os.stat` will now return time + values as floats. (This doesn't necessarily mean that :func:`os.stat` will + return times that are precise to fractions of a second; not all systems support + such precision.) + + Constants named :attr:`os.SEEK_SET`, :attr:`os.SEEK_CUR`, and + :attr:`os.SEEK_END` have been added; these are the parameters to the + :func:`os.lseek` function. Two new constants for locking are + :attr:`os.O_SHLOCK` and :attr:`os.O_EXLOCK`. + + Two new functions, :func:`wait3` and :func:`wait4`, were added. They're similar + the :func:`waitpid` function which waits for a child process to exit and returns + a tuple of the process ID and its exit status, but :func:`wait3` and + :func:`wait4` return additional information. :func:`wait3` doesn't take a + process ID as input, so it waits for any child process to exit and returns a + 3-tuple of *process-id*, *exit-status*, *resource-usage* as returned from the + :func:`resource.getrusage` function. :func:`wait4(pid)` does take a process ID. + (Contributed by Chad J. Schroeder.) + + On FreeBSD, the :func:`os.stat` function now returns times with nanosecond + resolution, and the returned object now has :attr:`st_gen` and + :attr:`st_birthtime`. The :attr:`st_flags` member is also available, if the + platform supports it. (Contributed by Antti Louko and Diego Pettenò.) + + .. % (Patch 1180695, 1212117) + +* The Python debugger provided by the :mod:`pdb` module can now store lists of + commands to execute when a breakpoint is reached and execution stops. Once + breakpoint #1 has been created, enter ``commands 1`` and enter a series of + commands to be executed, finishing the list with ``end``. The command list can + include commands that resume execution, such as ``continue`` or ``next``. + (Contributed by Grégoire Dooms.) + + .. % Patch 790710 + +* The :mod:`pickle` and :mod:`cPickle` modules no longer accept a return value + of ``None`` from the :meth:`__reduce__` method; the method must return a tuple + of arguments instead. The ability to return ``None`` was deprecated in Python + 2.4, so this completes the removal of the feature. + +* The :mod:`pkgutil` module, containing various utility functions for finding + packages, was enhanced to support PEP 302's import hooks and now also works for + packages stored in ZIP-format archives. (Contributed by Phillip J. Eby.) + +* The pybench benchmark suite by Marc-André Lemburg is now included in the + :file:`Tools/pybench` directory. The pybench suite is an improvement on the + commonly used :file:`pystone.py` program because pybench provides a more + detailed measurement of the interpreter's speed. It times particular operations + such as function calls, tuple slicing, method lookups, and numeric operations, + instead of performing many different operations and reducing the result to a + single number as :file:`pystone.py` does. + +* The :mod:`pyexpat` module now uses version 2.0 of the Expat parser. + (Contributed by Trent Mick.) + +* The :class:`Queue` class provided by the :mod:`Queue` module gained two new + methods. :meth:`join` blocks until all items in the queue have been retrieved + and all processing work on the items have been completed. Worker threads call + the other new method, :meth:`task_done`, to signal that processing for an item + has been completed. (Contributed by Raymond Hettinger.) + +* The old :mod:`regex` and :mod:`regsub` modules, which have been deprecated + ever since Python 2.0, have finally been deleted. Other deleted modules: + :mod:`statcache`, :mod:`tzparse`, :mod:`whrandom`. + +* Also deleted: the :file:`lib-old` directory, which includes ancient modules + such as :mod:`dircmp` and :mod:`ni`, was removed. :file:`lib-old` wasn't on the + default ``sys.path``, so unless your programs explicitly added the directory to + ``sys.path``, this removal shouldn't affect your code. + +* The :mod:`rlcompleter` module is no longer dependent on importing the + :mod:`readline` module and therefore now works on non-Unix platforms. (Patch + from Robert Kiendl.) + + .. % Patch #1472854 + +* The :mod:`SimpleXMLRPCServer` and :mod:`DocXMLRPCServer` classes now have a + :attr:`rpc_paths` attribute that constrains XML-RPC operations to a limited set + of URL paths; the default is to allow only ``'/'`` and ``'/RPC2'``. Setting + :attr:`rpc_paths` to ``None`` or an empty tuple disables this path checking. + + .. % Bug #1473048 + +* The :mod:`socket` module now supports :const:`AF_NETLINK` sockets on Linux, + thanks to a patch from Philippe Biondi. Netlink sockets are a Linux-specific + mechanism for communications between a user-space process and kernel code; an + introductory article about them is at http://www.linuxjournal.com/article/7356. + In Python code, netlink addresses are represented as a tuple of 2 integers, + ``(pid, group_mask)``. + + Two new methods on socket objects, :meth:`recv_into(buffer)` and + :meth:`recvfrom_into(buffer)`, store the received data in an object that + supports the buffer protocol instead of returning the data as a string. This + means you can put the data directly into an array or a memory-mapped file. + + Socket objects also gained :meth:`getfamily`, :meth:`gettype`, and + :meth:`getproto` accessor methods to retrieve the family, type, and protocol + values for the socket. + +* New module: the :mod:`spwd` module provides functions for accessing the shadow + password database on systems that support shadow passwords. + +* The :mod:`struct` is now faster because it compiles format strings into + :class:`Struct` objects with :meth:`pack` and :meth:`unpack` methods. This is + similar to how the :mod:`re` module lets you create compiled regular expression + objects. You can still use the module-level :func:`pack` and :func:`unpack` + functions; they'll create :class:`Struct` objects and cache them. Or you can + use :class:`Struct` instances directly:: + + s = struct.Struct('ih3s') + + data = s.pack(1972, 187, 'abc') + year, number, name = s.unpack(data) + + You can also pack and unpack data to and from buffer objects directly using the + :meth:`pack_into(buffer, offset, v1, v2, ...)` and :meth:`unpack_from(buffer, + offset)` methods. This lets you store data directly into an array or a memory- + mapped file. + + (:class:`Struct` objects were implemented by Bob Ippolito at the NeedForSpeed + sprint. Support for buffer objects was added by Martin Blais, also at the + NeedForSpeed sprint.) + +* The Python developers switched from CVS to Subversion during the 2.5 + development process. Information about the exact build version is available as + the ``sys.subversion`` variable, a 3-tuple of ``(interpreter-name, branch-name, + revision-range)``. For example, at the time of writing my copy of 2.5 was + reporting ``('CPython', 'trunk', '45313:45315')``. + + This information is also available to C extensions via the + :cfunc:`Py_GetBuildInfo` function that returns a string of build information + like this: ``"trunk:45355:45356M, Apr 13 2006, 07:42:19"``. (Contributed by + Barry Warsaw.) + +* Another new function, :func:`sys._current_frames`, returns the current stack + frames for all running threads as a dictionary mapping thread identifiers to the + topmost stack frame currently active in that thread at the time the function is + called. (Contributed by Tim Peters.) + +* The :class:`TarFile` class in the :mod:`tarfile` module now has an + :meth:`extractall` method that extracts all members from the archive into the + current working directory. It's also possible to set a different directory as + the extraction target, and to unpack only a subset of the archive's members. + + The compression used for a tarfile opened in stream mode can now be autodetected + using the mode ``'r|*'``. (Contributed by Lars Gustäbel.) + + .. % patch 918101 + +* The :mod:`threading` module now lets you set the stack size used when new + threads are created. The :func:`stack_size([*size*])` function returns the + currently configured stack size, and supplying the optional *size* parameter + sets a new value. Not all platforms support changing the stack size, but + Windows, POSIX threading, and OS/2 all do. (Contributed by Andrew MacIntyre.) + + .. % Patch 1454481 + +* The :mod:`unicodedata` module has been updated to use version 4.1.0 of the + Unicode character database. Version 3.2.0 is required by some specifications, + so it's still available as :attr:`unicodedata.ucd_3_2_0`. + +* New module: the :mod:`uuid` module generates universally unique identifiers + (UUIDs) according to :rfc:`4122`. The RFC defines several different UUID + versions that are generated from a starting string, from system properties, or + purely randomly. This module contains a :class:`UUID` class and functions + named :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, and :func:`uuid5` to + generate different versions of UUID. (Version 2 UUIDs are not specified in + :rfc:`4122` and are not supported by this module.) :: + + >>> import uuid + >>> # make a UUID based on the host ID and current time + >>> uuid.uuid1() + UUID('a8098c1a-f86e-11da-bd1a-00112444be1e') + + >>> # make a UUID using an MD5 hash of a namespace UUID and a name + >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org') + UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e') + + >>> # make a random UUID + >>> uuid.uuid4() + UUID('16fd2706-8baf-433b-82eb-8c7fada847da') + + >>> # make a UUID using a SHA-1 hash of a namespace UUID and a name + >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org') + UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d') + + (Contributed by Ka-Ping Yee.) + +* The :mod:`weakref` module's :class:`WeakKeyDictionary` and + :class:`WeakValueDictionary` types gained new methods for iterating over the + weak references contained in the dictionary. :meth:`iterkeyrefs` and + :meth:`keyrefs` methods were added to :class:`WeakKeyDictionary`, and + :meth:`itervaluerefs` and :meth:`valuerefs` were added to + :class:`WeakValueDictionary`. (Contributed by Fred L. Drake, Jr.) + +* The :mod:`webbrowser` module received a number of enhancements. It's now + usable as a script with ``python -m webbrowser``, taking a URL as the argument; + there are a number of switches to control the behaviour (:option:`-n` for a new + browser window, :option:`-t` for a new tab). New module-level functions, + :func:`open_new` and :func:`open_new_tab`, were added to support this. The + module's :func:`open` function supports an additional feature, an *autoraise* + parameter that signals whether to raise the open window when possible. A number + of additional browsers were added to the supported list such as Firefox, Opera, + Konqueror, and elinks. (Contributed by Oleg Broytmann and Georg Brandl.) + + .. % Patch #754022 + +* The :mod:`xmlrpclib` module now supports returning :class:`datetime` objects + for the XML-RPC date type. Supply ``use_datetime=True`` to the :func:`loads` + function or the :class:`Unmarshaller` class to enable this feature. (Contributed + by Skip Montanaro.) + + .. % Patch 1120353 + +* The :mod:`zipfile` module now supports the ZIP64 version of the format, + meaning that a .zip archive can now be larger than 4 GiB and can contain + individual files larger than 4 GiB. (Contributed by Ronald Oussoren.) + + .. % Patch 1446489 + +* The :mod:`zlib` module's :class:`Compress` and :class:`Decompress` objects now + support a :meth:`copy` method that makes a copy of the object's internal state + and returns a new :class:`Compress` or :class:`Decompress` object. + (Contributed by Chris AtLee.) + + .. % Patch 1435422 + +.. % ====================================================================== + + +.. _module-ctypes: + +The ctypes package +------------------ + +The :mod:`ctypes` package, written by Thomas Heller, has been added to the +standard library. :mod:`ctypes` lets you call arbitrary functions in shared +libraries or DLLs. Long-time users may remember the :mod:`dl` module, which +provides functions for loading shared libraries and calling functions in them. +The :mod:`ctypes` package is much fancier. + +To load a shared library or DLL, you must create an instance of the +:class:`CDLL` class and provide the name or path of the shared library or DLL. +Once that's done, you can call arbitrary functions by accessing them as +attributes of the :class:`CDLL` object. :: + + import ctypes + + libc = ctypes.CDLL('libc.so.6') + result = libc.printf("Line of output\n") + +Type constructors for the various C types are provided: :func:`c_int`, +:func:`c_float`, :func:`c_double`, :func:`c_char_p` (equivalent to :ctype:`char +\*`), and so forth. Unlike Python's types, the C versions are all mutable; you +can assign to their :attr:`value` attribute to change the wrapped value. Python +integers and strings will be automatically converted to the corresponding C +types, but for other types you must call the correct type constructor. (And I +mean *must*; getting it wrong will often result in the interpreter crashing +with a segmentation fault.) + +You shouldn't use :func:`c_char_p` with a Python string when the C function will +be modifying the memory area, because Python strings are supposed to be +immutable; breaking this rule will cause puzzling bugs. When you need a +modifiable memory area, use :func:`create_string_buffer`:: + + s = "this is a string" + buf = ctypes.create_string_buffer(s) + libc.strfry(buf) + +C functions are assumed to return integers, but you can set the :attr:`restype` +attribute of the function object to change this:: + + >>> libc.atof('2.71828') + -1783957616 + >>> libc.atof.restype = ctypes.c_double + >>> libc.atof('2.71828') + 2.71828 + +:mod:`ctypes` also provides a wrapper for Python's C API as the +``ctypes.pythonapi`` object. This object does *not* release the global +interpreter lock before calling a function, because the lock must be held when +calling into the interpreter's code. There's a :class:`py_object()` type +constructor that will create a :ctype:`PyObject \*` pointer. A simple usage:: + + import ctypes + + d = {} + ctypes.pythonapi.PyObject_SetItem(ctypes.py_object(d), + ctypes.py_object("abc"), ctypes.py_object(1)) + # d is now {'abc', 1}. + +Don't forget to use :class:`py_object()`; if it's omitted you end up with a +segmentation fault. + +:mod:`ctypes` has been around for a while, but people still write and +distribution hand-coded extension modules because you can't rely on +:mod:`ctypes` being present. Perhaps developers will begin to write Python +wrappers atop a library accessed through :mod:`ctypes` instead of extension +modules, now that :mod:`ctypes` is included with core Python. + + +.. seealso:: + + http://starship.python.net/crew/theller/ctypes/ + The ctypes web page, with a tutorial, reference, and FAQ. + + The documentation for the :mod:`ctypes` module. + +.. % ====================================================================== + + +.. _module-etree: + +The ElementTree package +----------------------- + +A subset of Fredrik Lundh's ElementTree library for processing XML has been +added to the standard library as :mod:`xml.etree`. The available modules are +:mod:`ElementTree`, :mod:`ElementPath`, and :mod:`ElementInclude` from +ElementTree 1.2.6. The :mod:`cElementTree` accelerator module is also +included. + +The rest of this section will provide a brief overview of using ElementTree. +Full documentation for ElementTree is available at http://effbot.org/zone +/element-index.htm. + +ElementTree represents an XML document as a tree of element nodes. The text +content of the document is stored as the :attr:`.text` and :attr:`.tail` +attributes of (This is one of the major differences between ElementTree and +the Document Object Model; in the DOM there are many different types of node, +including :class:`TextNode`.) + +The most commonly used parsing function is :func:`parse`, that takes either a +string (assumed to contain a filename) or a file-like object and returns an +:class:`ElementTree` instance:: + + from xml.etree import ElementTree as ET + + tree = ET.parse('ex-1.xml') + + feed = urllib.urlopen( + 'http://planet.python.org/rss10.xml') + tree = ET.parse(feed) + +Once you have an :class:`ElementTree` instance, you can call its :meth:`getroot` +method to get the root :class:`Element` node. + +There's also an :func:`XML` function that takes a string literal and returns an +:class:`Element` node (not an :class:`ElementTree`). This function provides a +tidy way to incorporate XML fragments, approaching the convenience of an XML +literal:: + + svg = ET.XML("""<svg width="10px" version="1.0"> + </svg>""") + svg.set('height', '320px') + svg.append(elem1) + +Each XML element supports some dictionary-like and some list-like access +methods. Dictionary-like operations are used to access attribute values, and +list-like operations are used to access child nodes. + ++-------------------------------+--------------------------------------------+ +| Operation | Result | ++===============================+============================================+ +| ``elem[n]`` | Returns n'th child element. | ++-------------------------------+--------------------------------------------+ +| ``elem[m:n]`` | Returns list of m'th through n'th child | +| | elements. | ++-------------------------------+--------------------------------------------+ +| ``len(elem)`` | Returns number of child elements. | ++-------------------------------+--------------------------------------------+ +| ``list(elem)`` | Returns list of child elements. | ++-------------------------------+--------------------------------------------+ +| ``elem.append(elem2)`` | Adds *elem2* as a child. | ++-------------------------------+--------------------------------------------+ +| ``elem.insert(index, elem2)`` | Inserts *elem2* at the specified location. | ++-------------------------------+--------------------------------------------+ +| ``del elem[n]`` | Deletes n'th child element. | ++-------------------------------+--------------------------------------------+ +| ``elem.keys()`` | Returns list of attribute names. | ++-------------------------------+--------------------------------------------+ +| ``elem.get(name)`` | Returns value of attribute *name*. | ++-------------------------------+--------------------------------------------+ +| ``elem.set(name, value)`` | Sets new value for attribute *name*. | ++-------------------------------+--------------------------------------------+ +| ``elem.attrib`` | Retrieves the dictionary containing | +| | attributes. | ++-------------------------------+--------------------------------------------+ +| ``del elem.attrib[name]`` | Deletes attribute *name*. | ++-------------------------------+--------------------------------------------+ + +Comments and processing instructions are also represented as :class:`Element` +nodes. To check if a node is a comment or processing instructions:: + + if elem.tag is ET.Comment: + ... + elif elem.tag is ET.ProcessingInstruction: + ... + +To generate XML output, you should call the :meth:`ElementTree.write` method. +Like :func:`parse`, it can take either a string or a file-like object:: + + # Encoding is US-ASCII + tree.write('output.xml') + + # Encoding is UTF-8 + f = open('output.xml', 'w') + tree.write(f, encoding='utf-8') + +(Caution: the default encoding used for output is ASCII. For general XML work, +where an element's name may contain arbitrary Unicode characters, ASCII isn't a +very useful encoding because it will raise an exception if an element's name +contains any characters with values greater than 127. Therefore, it's best to +specify a different encoding such as UTF-8 that can handle any Unicode +character.) + +This section is only a partial description of the ElementTree interfaces. Please +read the package's official documentation for more details. + + +.. seealso:: + + http://effbot.org/zone/element-index.htm + Official documentation for ElementTree. + +.. % ====================================================================== + + +.. _module-hashlib: + +The hashlib package +------------------- + +A new :mod:`hashlib` module, written by Gregory P. Smith, has been added to +replace the :mod:`md5` and :mod:`sha` modules. :mod:`hashlib` adds support for +additional secure hashes (SHA-224, SHA-256, SHA-384, and SHA-512). When +available, the module uses OpenSSL for fast platform optimized implementations +of algorithms. + +The old :mod:`md5` and :mod:`sha` modules still exist as wrappers around hashlib +to preserve backwards compatibility. The new module's interface is very close +to that of the old modules, but not identical. The most significant difference +is that the constructor functions for creating new hashing objects are named +differently. :: + + # Old versions + h = md5.md5() + h = md5.new() + + # New version + h = hashlib.md5() + + # Old versions + h = sha.sha() + h = sha.new() + + # New version + h = hashlib.sha1() + + # Hash that weren't previously available + h = hashlib.sha224() + h = hashlib.sha256() + h = hashlib.sha384() + h = hashlib.sha512() + + # Alternative form + h = hashlib.new('md5') # Provide algorithm as a string + +Once a hash object has been created, its methods are the same as before: +:meth:`update(string)` hashes the specified string into the current digest +state, :meth:`digest` and :meth:`hexdigest` return the digest value as a binary +string or a string of hex digits, and :meth:`copy` returns a new hashing object +with the same digest state. + + +.. seealso:: + + The documentation for the :mod:`hashlib` module. + +.. % ====================================================================== + + +.. _module-sqlite: + +The sqlite3 package +------------------- + +The pysqlite module (http://www.pysqlite.org), a wrapper for the SQLite embedded +database, has been added to the standard library under the package name +:mod:`sqlite3`. + +SQLite is a C library that provides a lightweight disk-based database that +doesn't require a separate server process and allows accessing the database +using a nonstandard variant of the SQL query language. Some applications can use +SQLite for internal data storage. It's also possible to prototype an +application using SQLite and then port the code to a larger database such as +PostgreSQL or Oracle. + +pysqlite was written by Gerhard Häring and provides a SQL interface compliant +with the DB-API 2.0 specification described by :pep:`249`. + +If you're compiling the Python source yourself, note that the source tree +doesn't include the SQLite code, only the wrapper module. You'll need to have +the SQLite libraries and headers installed before compiling Python, and the +build process will compile the module when the necessary headers are available. + +To use the module, you must first create a :class:`Connection` object that +represents the database. Here the data will be stored in the +:file:`/tmp/example` file:: + + conn = sqlite3.connect('/tmp/example') + +You can also supply the special name ``:memory:`` to create a database in RAM. + +Once you have a :class:`Connection`, you can create a :class:`Cursor` object +and call its :meth:`execute` method to perform SQL commands:: + + c = conn.cursor() + + # Create table + c.execute('''create table stocks + (date text, trans text, symbol text, + qty real, price real)''') + + # Insert a row of data + c.execute("""insert into stocks + values ('2006-01-05','BUY','RHAT',100,35.14)""") + +Usually your SQL operations will need to use values from Python variables. You +shouldn't assemble your query using Python's string operations because doing so +is insecure; it makes your program vulnerable to an SQL injection attack. + +Instead, use the DB-API's parameter substitution. Put ``?`` as a placeholder +wherever you want to use a value, and then provide a tuple of values as the +second argument to the cursor's :meth:`execute` method. (Other database modules +may use a different placeholder, such as ``%s`` or ``:1``.) For example:: + + # Never do this -- insecure! + symbol = 'IBM' + c.execute("... where symbol = '%s'" % symbol) + + # Do this instead + t = (symbol,) + c.execute('select * from stocks where symbol=?', t) + + # Larger example + for t in (('2006-03-28', 'BUY', 'IBM', 1000, 45.00), + ('2006-04-05', 'BUY', 'MSOFT', 1000, 72.00), + ('2006-04-06', 'SELL', 'IBM', 500, 53.00), + ): + c.execute('insert into stocks values (?,?,?,?,?)', t) + +To retrieve data after executing a SELECT statement, you can either treat the +cursor as an iterator, call the cursor's :meth:`fetchone` method to retrieve a +single matching row, or call :meth:`fetchall` to get a list of the matching +rows. + +This example uses the iterator form:: + + >>> c = conn.cursor() + >>> c.execute('select * from stocks order by price') + >>> for row in c: + ... print row + ... + (u'2006-01-05', u'BUY', u'RHAT', 100, 35.140000000000001) + (u'2006-03-28', u'BUY', u'IBM', 1000, 45.0) + (u'2006-04-06', u'SELL', u'IBM', 500, 53.0) + (u'2006-04-05', u'BUY', u'MSOFT', 1000, 72.0) + >>> + +For more information about the SQL dialect supported by SQLite, see +http://www.sqlite.org. + + +.. seealso:: + + http://www.pysqlite.org + The pysqlite web page. + + http://www.sqlite.org + The SQLite web page; the documentation describes the syntax and the available + data types for the supported SQL dialect. + + The documentation for the :mod:`sqlite3` module. + + :pep:`249` - Database API Specification 2.0 + PEP written by Marc-André Lemburg. + +.. % ====================================================================== + + +.. _module-wsgiref: + +The wsgiref package +------------------- + +The Web Server Gateway Interface (WSGI) v1.0 defines a standard interface +between web servers and Python web applications and is described in :pep:`333`. +The :mod:`wsgiref` package is a reference implementation of the WSGI +specification. + +.. % XXX should this be in a PEP 333 section instead? + +The package includes a basic HTTP server that will run a WSGI application; this +server is useful for debugging but isn't intended for production use. Setting +up a server takes only a few lines of code:: + + from wsgiref import simple_server + + wsgi_app = ... + + host = '' + port = 8000 + httpd = simple_server.make_server(host, port, wsgi_app) + httpd.serve_forever() + +.. % XXX discuss structure of WSGI applications? +.. % XXX provide an example using Django or some other framework? + + +.. seealso:: + + http://www.wsgi.org + A central web site for WSGI-related resources. + + :pep:`333` - Python Web Server Gateway Interface v1.0 + PEP written by Phillip J. Eby. + +.. % ====================================================================== + + +.. _build-api: + +Build and C API Changes +======================= + +Changes to Python's build process and to the C API include: + +* The Python source tree was converted from CVS to Subversion, in a complex + migration procedure that was supervised and flawlessly carried out by Martin von + Löwis. The procedure was developed as :pep:`347`. + +* Coverity, a company that markets a source code analysis tool called Prevent, + provided the results of their examination of the Python source code. The + analysis found about 60 bugs that were quickly fixed. Many of the bugs were + refcounting problems, often occurring in error-handling code. See + http://scan.coverity.com for the statistics. + +* The largest change to the C API came from :pep:`353`, which modifies the + interpreter to use a :ctype:`Py_ssize_t` type definition instead of + :ctype:`int`. See the earlier section :ref:`pep-353` for a discussion of this + change. + +* The design of the bytecode compiler has changed a great deal, no longer + generating bytecode by traversing the parse tree. Instead the parse tree is + converted to an abstract syntax tree (or AST), and it is the abstract syntax + tree that's traversed to produce the bytecode. + + It's possible for Python code to obtain AST objects by using the + :func:`compile` built-in and specifying ``_ast.PyCF_ONLY_AST`` as the value of + the *flags* parameter:: + + from _ast import PyCF_ONLY_AST + ast = compile("""a=0 + for i in range(10): + a += i + """, "<string>", 'exec', PyCF_ONLY_AST) + + assignment = ast.body[0] + for_loop = ast.body[1] + + No official documentation has been written for the AST code yet, but :pep:`339` + discusses the design. To start learning about the code, read the definition of + the various AST nodes in :file:`Parser/Python.asdl`. A Python script reads this + file and generates a set of C structure definitions in + :file:`Include/Python-ast.h`. The :cfunc:`PyParser_ASTFromString` and + :cfunc:`PyParser_ASTFromFile`, defined in :file:`Include/pythonrun.h`, take + Python source as input and return the root of an AST representing the contents. + This AST can then be turned into a code object by :cfunc:`PyAST_Compile`. For + more information, read the source code, and then ask questions on python-dev. + + The AST code was developed under Jeremy Hylton's management, and implemented by + (in alphabetical order) Brett Cannon, Nick Coghlan, Grant Edwards, John + Ehresman, Kurt Kaiser, Neal Norwitz, Tim Peters, Armin Rigo, and Neil + Schemenauer, plus the participants in a number of AST sprints at conferences + such as PyCon. + + .. % List of names taken from Jeremy's python-dev post at + .. % http://mail.python.org/pipermail/python-dev/2005-October/057500.html + +* Evan Jones's patch to obmalloc, first described in a talk at PyCon DC 2005, + was applied. Python 2.4 allocated small objects in 256K-sized arenas, but never + freed arenas. With this patch, Python will free arenas when they're empty. The + net effect is that on some platforms, when you allocate many objects, Python's + memory usage may actually drop when you delete them and the memory may be + returned to the operating system. (Implemented by Evan Jones, and reworked by + Tim Peters.) + + Note that this change means extension modules must be more careful when + allocating memory. Python's API has many different functions for allocating + memory that are grouped into families. For example, :cfunc:`PyMem_Malloc`, + :cfunc:`PyMem_Realloc`, and :cfunc:`PyMem_Free` are one family that allocates + raw memory, while :cfunc:`PyObject_Malloc`, :cfunc:`PyObject_Realloc`, and + :cfunc:`PyObject_Free` are another family that's supposed to be used for + creating Python objects. + + Previously these different families all reduced to the platform's + :cfunc:`malloc` and :cfunc:`free` functions. This meant it didn't matter if + you got things wrong and allocated memory with the :cfunc:`PyMem` function but + freed it with the :cfunc:`PyObject` function. With 2.5's changes to obmalloc, + these families now do different things and mismatches will probably result in a + segfault. You should carefully test your C extension modules with Python 2.5. + +* The built-in set types now have an official C API. Call :cfunc:`PySet_New` + and :cfunc:`PyFrozenSet_New` to create a new set, :cfunc:`PySet_Add` and + :cfunc:`PySet_Discard` to add and remove elements, and :cfunc:`PySet_Contains` + and :cfunc:`PySet_Size` to examine the set's state. (Contributed by Raymond + Hettinger.) + +* C code can now obtain information about the exact revision of the Python + interpreter by calling the :cfunc:`Py_GetBuildInfo` function that returns a + string of build information like this: ``"trunk:45355:45356M, Apr 13 2006, + 07:42:19"``. (Contributed by Barry Warsaw.) + +* Two new macros can be used to indicate C functions that are local to the + current file so that a faster calling convention can be used. + :cfunc:`Py_LOCAL(type)` declares the function as returning a value of the + specified *type* and uses a fast-calling qualifier. + :cfunc:`Py_LOCAL_INLINE(type)` does the same thing and also requests the + function be inlined. If :cfunc:`PY_LOCAL_AGGRESSIVE` is defined before + :file:`python.h` is included, a set of more aggressive optimizations are enabled + for the module; you should benchmark the results to find out if these + optimizations actually make the code faster. (Contributed by Fredrik Lundh at + the NeedForSpeed sprint.) + +* :cfunc:`PyErr_NewException(name, base, dict)` can now accept a tuple of base + classes as its *base* argument. (Contributed by Georg Brandl.) + +* The :cfunc:`PyErr_Warn` function for issuing warnings is now deprecated in + favour of :cfunc:`PyErr_WarnEx(category, message, stacklevel)` which lets you + specify the number of stack frames separating this function and the caller. A + *stacklevel* of 1 is the function calling :cfunc:`PyErr_WarnEx`, 2 is the + function above that, and so forth. (Added by Neal Norwitz.) + +* The CPython interpreter is still written in C, but the code can now be + compiled with a C++ compiler without errors. (Implemented by Anthony Baxter, + Martin von Löwis, Skip Montanaro.) + +* The :cfunc:`PyRange_New` function was removed. It was never documented, never + used in the core code, and had dangerously lax error checking. In the unlikely + case that your extensions were using it, you can replace it by something like + the following:: + + range = PyObject_CallFunction((PyObject*) &PyRange_Type, "lll", + start, stop, step); + +.. % ====================================================================== + + +.. _ports: + +Port-Specific Changes +--------------------- + +* MacOS X (10.3 and higher): dynamic loading of modules now uses the + :cfunc:`dlopen` function instead of MacOS-specific functions. + +* MacOS X: a :option:`--enable-universalsdk` switch was added to the + :program:`configure` script that compiles the interpreter as a universal binary + able to run on both PowerPC and Intel processors. (Contributed by Ronald + Oussoren.) + +* Windows: :file:`.dll` is no longer supported as a filename extension for + extension modules. :file:`.pyd` is now the only filename extension that will be + searched for. + +.. % ====================================================================== + + +.. _porting: + +Porting to Python 2.5 +===================== + +This section lists previously described changes that may require changes to your +code: + +* ASCII is now the default encoding for modules. It's now a syntax error if a + module contains string literals with 8-bit characters but doesn't have an + encoding declaration. In Python 2.4 this triggered a warning, not a syntax + error. + +* Previously, the :attr:`gi_frame` attribute of a generator was always a frame + object. Because of the :pep:`342` changes described in section :ref:`pep-342`, + it's now possible for :attr:`gi_frame` to be ``None``. + +* A new warning, :class:`UnicodeWarning`, is triggered when you attempt to + compare a Unicode string and an 8-bit string that can't be converted to Unicode + using the default ASCII encoding. Previously such comparisons would raise a + :class:`UnicodeDecodeError` exception. + +* Library: the :mod:`csv` module is now stricter about multi-line quoted fields. + If your files contain newlines embedded within fields, the input should be split + into lines in a manner which preserves the newline characters. + +* Library: the :mod:`locale` module's :func:`format` function's would + previously accept any string as long as no more than one %char specifier + appeared. In Python 2.5, the argument must be exactly one %char specifier with + no surrounding text. + +* Library: The :mod:`pickle` and :mod:`cPickle` modules no longer accept a + return value of ``None`` from the :meth:`__reduce__` method; the method must + return a tuple of arguments instead. The modules also no longer accept the + deprecated *bin* keyword parameter. + +* Library: The :mod:`SimpleXMLRPCServer` and :mod:`DocXMLRPCServer` classes now + have a :attr:`rpc_paths` attribute that constrains XML-RPC operations to a + limited set of URL paths; the default is to allow only ``'/'`` and ``'/RPC2'``. + Setting :attr:`rpc_paths` to ``None`` or an empty tuple disables this path + checking. + +* C API: Many functions now use :ctype:`Py_ssize_t` instead of :ctype:`int` to + allow processing more data on 64-bit machines. Extension code may need to make + the same change to avoid warnings and to support 64-bit machines. See the + earlier section :ref:`pep-353` for a discussion of this change. + +* C API: The obmalloc changes mean that you must be careful to not mix usage + of the :cfunc:`PyMem_\*` and :cfunc:`PyObject_\*` families of functions. Memory + allocated with one family's :cfunc:`\*_Malloc` must be freed with the + corresponding family's :cfunc:`\*_Free` function. + +.. % ====================================================================== + + +.. _acks: + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: Georg Brandl, +Nick Coghlan, Phillip J. Eby, Lars Gustäbel, Raymond Hettinger, Ralf W. Grosse- +Kunstleve, Kent Johnson, Iain Lowe, Martin von Löwis, Fredrik Lundh, Andrew +McNamara, Skip Montanaro, Gustavo Niemeyer, Paul Prescod, James Pryor, Mike +Rovner, Scott Weikart, Barry Warsaw, Thomas Wouters. + diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst new file mode 100644 index 0000000..b0e731a --- /dev/null +++ b/Doc/whatsnew/2.6.rst @@ -0,0 +1,236 @@ +**************************** + What's New in Python 2.6 +**************************** + +:Author: A.M. Kuchling +:Release: |release| +:Date: |today| + +.. % $Id: whatsnew26.tex 55963 2007-06-13 18:07:49Z guido.van.rossum $ +.. % Rules for maintenance: +.. % +.. % * Anyone can add text to this document. Do not spend very much time +.. % on the wording of your changes, because your text will probably +.. % get rewritten to some degree. +.. % +.. % * The maintainer will go through Misc/NEWS periodically and add +.. % changes; it's therefore more important to add your changes to +.. % Misc/NEWS than to this file. +.. % +.. % * This is not a complete list of every single change; completeness +.. % is the purpose of Misc/NEWS. Some changes I consider too small +.. % or esoteric to include. If such a change is added to the text, +.. % I'll just remove it. (This is another reason you shouldn't spend +.. % too much time on writing your addition.) +.. % +.. % * If you want to draw your new text to the attention of the +.. % maintainer, add 'XXX' to the beginning of the paragraph or +.. % section. +.. % +.. % * It's OK to just add a fragmentary note about a change. For +.. % example: "XXX Describe the transmogrify() function added to the +.. % socket module." The maintainer will research the change and +.. % write the necessary text. +.. % +.. % * You can comment out your additions if you like, but it's not +.. % necessary (especially when a final release is some months away). +.. % +.. % * Credit the author of a patch or bugfix. Just the name is +.. % sufficient; the e-mail address isn't necessary. +.. % +.. % * It's helpful to add the bug/patch number as a comment: +.. % +.. % % Patch 12345 +.. % XXX Describe the transmogrify() function added to the socket +.. % module. +.. % (Contributed by P.Y. Developer.) +.. % +.. % This saves the maintainer the effort of going through the SVN log +.. % when researching a change. + +This article explains the new features in Python 2.6. No release date for +Python 2.6 has been set; it will probably be released in mid 2008. + +This article doesn't attempt to provide a complete specification of the new +features, but instead provides a convenient overview. For full details, you +should refer to the documentation for Python 2.6. If you want to understand the +complete implementation and design rationale, refer to the PEP for a particular +new feature. + +.. % Compare with previous release in 2 - 3 sentences here. +.. % add hyperlink when the documentation becomes available online. + +.. % ====================================================================== +.. % Large, PEP-level features and changes should be described here. +.. % Should there be a new section here for 3k migration? +.. % Or perhaps a more general section describing module changes/deprecation? +.. % sets module deprecated +.. % ====================================================================== + + +Other Language Changes +====================== + +Here are all of the changes that Python 2.6 makes to the core Python language. + +* An obscure change: when you use the the :func:`locals` function inside a + :keyword:`class` statement, the resulting dictionary no longer returns free + variables. (Free variables, in this case, are variables referred to in the + :keyword:`class` statement that aren't attributes of the class.) + +.. % ====================================================================== + + +Optimizations +------------- + +* Internally, a bit is now set in type objects to indicate some of the standard + built-in types. This speeds up checking if an object is a subclass of one of + these types. (Contributed by Neal Norwitz.) + +The net result of the 2.6 optimizations is that Python 2.6 runs the pystone +benchmark around XX% faster than Python 2.5. + +.. % ====================================================================== + + +New, Improved, and Deprecated Modules +===================================== + +As usual, Python's standard library received a number of enhancements and bug +fixes. Here's a partial list of the most notable changes, sorted alphabetically +by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more +complete list of changes, or look through the CVS logs for all the details. + +* A new data type in the :mod:`collections` module: :class:`NamedTuple(typename, + fieldnames)` is a factory function that creates subclasses of the standard tuple + whose fields are accessible by name as well as index. For example:: + + var_type = collections.NamedTuple('variable', + 'id name type size') + var = var_type(1, 'frequency', 'int', 4) + + print var[0], var.id # Equivalent + print var[2], var.type # Equivalent + + (Contributed by Raymond Hettinger.) + +* A new method in the :mod:`curses` module: for a window, :meth:`chgat` changes + the display characters for a certain number of characters on a single line. :: + + # Boldface text starting at y=0,x=21 + # and affecting the rest of the line. + stdscr.chgat(0,21, curses.A_BOLD) + + (Contributed by Fabian Kreutz.) + +* The :func:`glob.glob` function can now return Unicode filenames if + a Unicode path was used and Unicode filenames are matched within the directory. + + .. % Patch #1001604 + +* The :mod:`gopherlib` module has been removed. + +* A new function in the :mod:`heapq` module: ``merge(iter1, iter2, ...)`` + takes any number of iterables that return data *in sorted order*, and returns + a new iterator that returns the contents of all the iterators, also in sorted + order. For example:: + + heapq.merge([1, 3, 5, 9], [2, 8, 16]) -> + [1, 2, 3, 5, 8, 9, 16] + + (Contributed by Raymond Hettinger.) + +* A new function in the :mod:`itertools` module: ``izip_longest(iter1, iter2, + ...[, fillvalue])`` makes tuples from each of the elements; if some of the + iterables are shorter than others, the missing values are set to *fillvalue*. + For example:: + + itertools.izip_longest([1,2,3], [1,2,3,4,5]) -> + [(1, 1), (2, 2), (3, 3), (None, 4), (None, 5)] + + (Contributed by Raymond Hettinger.) + +* The :mod:`macfs` module has been removed. This in turn required the + :func:`macostools.touched` function to be removed because it depended on the + :mod:`macfs` module. + + .. % Patch #1490190 + +* New functions in the :mod:`posix` module: :func:`chflags` and :func:`lchflags` + are wrappers for the corresponding system calls (where they're available). + Constants for the flag values are defined in the :mod:`stat` module; some + possible values include :const:`UF_IMMUTABLE` to signal the file may not be + changed and :const:`UF_APPEND` to indicate that data can only be appended to the + file. (Contributed by M. Levinson.) + +* The :mod:`rgbimg` module has been removed. + +* The :mod:`smtplib` module now supports SMTP over SSL thanks to the addition + of the :class:`SMTP_SSL` class. This class supports an interface identical to + the existing :class:`SMTP` class. (Contributed by Monty Taylor.) + +* The :mod:`test.test_support` module now contains a :func:`EnvironmentVarGuard` + context manager that supports temporarily changing environment variables and + automatically restores them to their old values. (Contributed by Brett Cannon.) + +.. % ====================================================================== +.. % whole new modules get described in \subsections here + +.. % ====================================================================== + + +Build and C API Changes +======================= + +Changes to Python's build process and to the C API include: + +* Detailed changes are listed here. + +.. % ====================================================================== + + +Port-Specific Changes +--------------------- + +Platform-specific changes go here. + +.. % ====================================================================== + + +.. _section-other: + +Other Changes and Fixes +======================= + +As usual, there were a bunch of other improvements and bugfixes scattered +throughout the source tree. A search through the change logs finds there were +XXX patches applied and YYY bugs fixed between Python 2.5 and 2.6. Both figures +are likely to be underestimates. + +Some of the more notable changes are: + +* Details go here. + +.. % ====================================================================== + + +Porting to Python 2.6 +===================== + +This section lists previously described changes that may require changes to your +code: + +* Everything is all in the details! + +.. % ====================================================================== + + +.. _acks: + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: . + diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst new file mode 100644 index 0000000..ac82317 --- /dev/null +++ b/Doc/whatsnew/3.0.rst @@ -0,0 +1,161 @@ +**************************** + What's New in Python 3.0 +**************************** + +:Author: A.M. Kuchling + +.. |release| replace:: 0.0 + +.. % $Id: whatsnew26.tex 55506 2007-05-22 07:43:29Z neal.norwitz $ +.. % Rules for maintenance: +.. % +.. % * Anyone can add text to this document. Do not spend very much time +.. % on the wording of your changes, because your text will probably +.. % get rewritten to some degree. +.. % +.. % * The maintainer will go through Misc/NEWS periodically and add +.. % changes; it's therefore more important to add your changes to +.. % Misc/NEWS than to this file. +.. % +.. % * This is not a complete list of every single change; completeness +.. % is the purpose of Misc/NEWS. Some changes I consider too small +.. % or esoteric to include. If such a change is added to the text, +.. % I'll just remove it. (This is another reason you shouldn't spend +.. % too much time on writing your addition.) +.. % +.. % * If you want to draw your new text to the attention of the +.. % maintainer, add 'XXX' to the beginning of the paragraph or +.. % section. +.. % +.. % * It's OK to just add a fragmentary note about a change. For +.. % example: "XXX Describe the transmogrify() function added to the +.. % socket module." The maintainer will research the change and +.. % write the necessary text. +.. % +.. % * You can comment out your additions if you like, but it's not +.. % necessary (especially when a final release is some months away). +.. % +.. % * Credit the author of a patch or bugfix. Just the name is +.. % sufficient; the e-mail address isn't necessary. +.. % +.. % * It's helpful to add the bug/patch number as a comment: +.. % +.. % % Patch 12345 +.. % XXX Describe the transmogrify() function added to the socket +.. % module. +.. % (Contributed by P.Y. Developer.) +.. % +.. % This saves the maintainer the effort of going through the SVN log +.. % when researching a change. + +This article explains the new features in Python 3.0. No release date for +Python 3.0 has been set; it will probably be released in mid 2008. + +This article doesn't attempt to provide a complete specification of the new +features, but instead provides a convenient overview. For full details, you +should refer to the documentation for Python 3.0. If you want to understand the +complete implementation and design rationale, refer to the PEP for a particular +new feature. + +.. % Compare with previous release in 2 - 3 sentences here. +.. % add hyperlink when the documentation becomes available online. + +.. % ====================================================================== +.. % Large, PEP-level features and changes should be described here. +.. % Should there be a new section here for 3k migration? +.. % Or perhaps a more general section describing module changes/deprecation? +.. % sets module deprecated +.. % ====================================================================== + + +Other Language Changes +====================== + +Here are all of the changes that Python 2.6 makes to the core Python language. + +* Detailed changes are listed here. + +.. % ====================================================================== + + +Optimizations +------------- + +* Detailed changes are listed here. + +The net result of the 3.0 optimizations is that Python 3.0 runs the pystone +benchmark around XX% slower than Python 2.6. + +.. % ====================================================================== + + +New, Improved, and Deprecated Modules +===================================== + +As usual, Python's standard library received a number of enhancements and bug +fixes. Here's a partial list of the most notable changes, sorted alphabetically +by module name. Consult the :file:`Misc/NEWS` file in the source tree for a more +complete list of changes, or look through the CVS logs for all the details. + +* Detailed changes are listed here. + +.. % ====================================================================== +.. % whole new modules get described in \subsections here + +.. % ====================================================================== + + +Build and C API Changes +======================= + +Changes to Python's build process and to the C API include: + +* Detailed changes are listed here. + +.. % ====================================================================== + + +Port-Specific Changes +--------------------- + +Platform-specific changes go here. + +.. % ====================================================================== + + +.. _section-other: + +Other Changes and Fixes +======================= + +As usual, there were a bunch of other improvements and bugfixes scattered +throughout the source tree. A search through the change logs finds there were +XXX patches applied and YYY bugs fixed between Python 2.6 and 3.0. Both figures +are likely to be underestimates. + +Some of the more notable changes are: + +* Details go here. + +.. % ====================================================================== + + +Porting to Python 3.0 +===================== + +This section lists previously described changes that may require changes to your +code: + +* Everything is all in the details! + +.. % ====================================================================== + + +.. _acks: + +Acknowledgements +================ + +The author would like to thank the following people for offering suggestions, +corrections and assistance with various drafts of this article: . + -- cgit v0.12