From c524cff3d33666a5aad5988b33c928c7c729927a Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Fri, 26 Nov 2010 08:42:45 +0000 Subject: Merged revisions 85530,85532-85534,85538-85543,85546-85548 via svnmerge from svn+ssh://svn.python.org/python/branches/py3k ........ r85530 | georg.brandl | 2010-10-15 17:32:05 +0200 (Fr, 15 Okt 2010) | 1 line Refrain from using inline suites. ........ r85532 | georg.brandl | 2010-10-15 18:03:02 +0200 (Fr, 15 Okt 2010) | 1 line #7771: reference to documentation of dictview methods and operations. ........ r85533 | georg.brandl | 2010-10-15 18:07:41 +0200 (Fr, 15 Okt 2010) | 1 line #9683: remove broken dead code dealing with nested arguments removed from Py3k, and update the docs and docstrings accordingly. ........ r85534 | georg.brandl | 2010-10-15 18:19:43 +0200 (Fr, 15 Okt 2010) | 1 line #9801: document how list and dict proxies created by Managers behave w.r.t. mutable items. ........ r85538 | georg.brandl | 2010-10-15 18:35:46 +0200 (Fr, 15 Okt 2010) | 1 line #7303: add documentation for useful pkgutil functions and classes. ........ r85539 | georg.brandl | 2010-10-15 18:42:14 +0200 (Fr, 15 Okt 2010) | 1 line Fix issue references. ........ r85540 | georg.brandl | 2010-10-15 18:42:37 +0200 (Fr, 15 Okt 2010) | 1 line #6798: fix wrong docs for the arguments to several trace events. ........ r85541 | georg.brandl | 2010-10-15 18:53:24 +0200 (Fr, 15 Okt 2010) | 1 line #4968: updates to inspect.is* function docs. ........ r85542 | georg.brandl | 2010-10-15 19:01:15 +0200 (Fr, 15 Okt 2010) | 1 line #7790: move table of struct_time members to the actual description of struct_time. ........ r85543 | georg.brandl | 2010-10-15 19:03:02 +0200 (Fr, 15 Okt 2010) | 1 line #4785: document strict argument of JSONDecoder, plus add object_pairs_hook in the docstrings. ........ r85546 | georg.brandl | 2010-10-15 19:58:45 +0200 (Fr, 15 Okt 2010) | 1 line #5762: fix handling of empty namespace in minidom, which would result in AttributeError on toxml(). ........ r85547 | georg.brandl | 2010-10-15 20:00:35 +0200 (Fr, 15 Okt 2010) | 1 line #6098: Refrain from claiming DOM level 3 conformance in minidom. ........ r85548 | georg.brandl | 2010-10-15 21:46:19 +0200 (Fr, 15 Okt 2010) | 1 line #10072: assume a bit less knowledge of the FTP protocol in the ftplib docs. ........ --- Doc/c-api/init.rst | 9 ++- Doc/library/ftplib.rst | 40 +++++---- Doc/library/inspect.rst | 27 ++++--- Doc/library/json.rst | 11 ++- Doc/library/multiprocessing.rst | 18 +++++ Doc/library/pkgutil.rst | 174 ++++++++++++++++++++++++++++++++++------ Doc/library/stdtypes.rst | 29 ++----- Doc/library/sys.rst | 9 ++- Doc/library/time.rst | 93 +++++++++++---------- Doc/tutorial/controlflow.rst | 6 +- Lib/inspect.py | 40 +++------ Lib/json/__init__.py | 24 +++++- Lib/json/decoder.py | 14 ++++ Lib/test/test_minidom.py | 7 ++ Lib/xml/dom/minidom.py | 9 +-- Misc/NEWS | 5 ++ 16 files changed, 347 insertions(+), 168 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index ae5d028..a176e5a 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -908,13 +908,14 @@ Python-level trace functions in previous versions. +------------------------------+--------------------------------------+ | :const:`PyTrace_LINE` | Always *NULL*. | +------------------------------+--------------------------------------+ - | :const:`PyTrace_RETURN` | Value being returned to the caller. | + | :const:`PyTrace_RETURN` | Value being returned to the caller, | + | | or *NULL* if caused by an exception. | +------------------------------+--------------------------------------+ - | :const:`PyTrace_C_CALL` | Name of function being called. | + | :const:`PyTrace_C_CALL` | Function object being called. | +------------------------------+--------------------------------------+ - | :const:`PyTrace_C_EXCEPTION` | Always *NULL*. | + | :const:`PyTrace_C_EXCEPTION` | Function object being called. | +------------------------------+--------------------------------------+ - | :const:`PyTrace_C_RETURN` | Always *NULL*. | + | :const:`PyTrace_C_RETURN` | Function object being called. | +------------------------------+--------------------------------------+ diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 75a8fb1..5545505 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -54,18 +54,21 @@ The module defines the following items: .. exception:: error_temp - Exception raised when an error code in the range 400--499 is received. + Exception raised when an error code signifying a temporary error (response + codes in the range 400--499) is received. .. exception:: error_perm - Exception raised when an error code in the range 500--599 is received. + Exception raised when an error code signifying a permanent error (response + codes in the range 500--599) is received. .. exception:: error_proto - Exception raised when a reply is received from the server that does not begin - with a digit in the range 1--5. + Exception raised when a reply is received from the server that does not fit + the response specifications of the File Transfer Protocol, i.e. begin with a + digit in the range 1--5. .. data:: all_errors @@ -158,9 +161,9 @@ followed by ``lines`` for the text version or ``binary`` for the binary version. .. method:: FTP.voidcmd(cmd) - Send a simple command string to the server and handle the response. Return - nothing if a response code in the range 200--299 is received. Raise an exception - otherwise. + Send a simple command string to the server and handle the response. Return + nothing if a response code corresponding to success (codes in the range + 200--299) is received. Raise :exc:`error_reply` otherwise. .. method:: FTP.retrbinary(cmd, callback, blocksize=8192, rest=None) @@ -177,12 +180,15 @@ followed by ``lines`` for the text version or ``binary`` for the binary version. .. method:: FTP.retrlines(cmd, callback=None) - Retrieve a file or directory listing in ASCII transfer mode. *cmd* - should be an appropriate ``RETR`` command (see :meth:`retrbinary`) or a - command such as ``LIST``, ``NLST`` or ``MLSD`` (usually just the string - ``'LIST'``). The *callback* function is called for each line with a - string argument containing the line with the trailing CRLF stripped. - The default *callback* prints the line to ``sys.stdout``. + Retrieve a file or directory listing in ASCII transfer mode. *cmd* should be + an appropriate ``RETR`` command (see :meth:`retrbinary`) or a command such as + ``LIST``, ``NLST`` or ``MLSD`` (usually just the string ``'LIST'``). + ``LIST`` retrieves a list of files and information about those files. + ``NLST`` retrieves a list of file names. On some servers, ``MLSD`` retrieves + a machine readable list of files and information about those files. The + *callback* function is called for each line with a string argument containing + the line with the trailing CRLF stripped. The default *callback* prints the + line to ``sys.stdout``. .. method:: FTP.set_pasv(boolean) @@ -240,10 +246,10 @@ followed by ``lines`` for the text version or ``binary`` for the binary version. .. method:: FTP.nlst(argument[, ...]) - Return a list of files as returned by the ``NLST`` command. The optional - *argument* is a directory to list (default is the current server directory). - Multiple arguments can be used to pass non-standard options to the ``NLST`` - command. + Return a list of file names as returned by the ``NLST`` command. The + optional *argument* is a directory to list (default is the current server + directory). Multiple arguments can be used to pass non-standard options to + the ``NLST`` command. .. method:: FTP.dir(argument[, ...]) diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index cc88acf..7bb3e71 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -204,18 +204,19 @@ attributes: .. function:: isclass(object) - Return true if the object is a class. + Return true if the object is a class, whether built-in or created in Python + code. .. function:: ismethod(object) - Return true if the object is a method. + Return true if the object is a bound method written in Python. .. function:: isfunction(object) - Return true if the object is a Python function or unnamed (:term:`lambda`) - function. + Return true if the object is a Python function, which includes functions + created by a :term:`lambda` expression. .. function:: isgeneratorfunction(object) @@ -245,13 +246,14 @@ attributes: .. function:: isbuiltin(object) - Return true if the object is a built-in function. + Return true if the object is a built-in function or a bound built-in method. .. function:: isroutine(object) Return true if the object is a user-defined or built-in function or method. + .. function:: isabstract(object) Return true if the object is an abstract base class. @@ -259,8 +261,9 @@ attributes: .. function:: ismethoddescriptor(object) - Return true if the object is a method descriptor, but not if :func:`ismethod` - or :func:`isclass` or :func:`isfunction` are true. + Return true if the object is a method descriptor, but not if + :func:`ismethod`, :func:`isclass`, :func:`isfunction` or :func:`isbuiltin` + are true. This, for example, is true of ``int.__add__``. An object passing this test has a :attr:`__get__` attribute but not a :attr:`__set__` attribute, but @@ -422,19 +425,19 @@ Classes and functions Get information about arguments passed into a particular frame. A :term:`named tuple` ``ArgInfo(args, varargs, keywords, locals)`` is - returned. *args* is a list of the argument names (it may contain nested - lists). *varargs* and *varkw* are the names of the ``*`` and ``**`` arguments - or ``None``. *locals* is the locals dictionary of the given frame. + returned. *args* is a list of the argument names. *varargs* and *varkw* are + the names of the ``*`` and ``**`` arguments or ``None``. *locals* is the + locals dictionary of the given frame. -.. function:: formatargspec(args[, varargs, varkw, defaults, formatarg, formatvarargs, formatvarkw, formatvalue, join]) +.. function:: formatargspec(args[, varargs, varkw, defaults, formatarg, formatvarargs, formatvarkw, formatvalue]) Format a pretty argument spec from the four values returned by :func:`getargspec`. The format\* arguments are the corresponding optional formatting functions that are called to turn names and values into strings. -.. function:: formatargvalues(args[, varargs, varkw, locals, formatarg, formatvarargs, formatvarkw, formatvalue, join]) +.. function:: formatargvalues(args[, varargs, varkw, locals, formatarg, formatvarargs, formatvarkw, formatvalue]) Format a pretty argument spec from the four values returned by :func:`getargvalues`. The format\* arguments are the corresponding optional diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 3b203a2..a26001d 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -149,7 +149,7 @@ Basic Usage To use a custom :class:`JSONEncoder` subclass (e.g. one that overrides the :meth:`default` method to serialize additional types), specify it with the - *cls* kwarg. + *cls* kwarg; otherwise :class:`JSONEncoder` is used. .. function:: dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, **kw) @@ -195,8 +195,8 @@ Basic Usage are encountered. To use a custom :class:`JSONDecoder` subclass, specify it with the ``cls`` - kwarg. Additional keyword arguments will be passed to the constructor of the - class. + kwarg; otherwise :class:`JSONDecoder` is used. Additional keyword arguments + will be passed to the constructor of the class. .. function:: loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw) @@ -275,6 +275,11 @@ Encoders and decoders ``'false'``. This can be used to raise an exception if invalid JSON numbers are encountered. + If *strict* is ``False`` (``True`` is the default), then control characters + will be allowed inside strings. Control characters in this context are + those with character codes in the 0-31 range, including ``'\t'`` (tab), + ``'\n'``, ``'\r'`` and ``'\0'``. + .. method:: decode(s) diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 9fa2d81..264d432 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1288,6 +1288,24 @@ their parent process exits. The manager classes are defined in the Create a shared ``list`` object and return a proxy for it. + .. note:: + + Modifications to mutable values or items in dict and list proxies will not + be propagated through the manager, because the proxy has no way of knowing + when its values or items are modified. To modify such an item, you can + re-assign the modified object to the container proxy:: + + # create a list proxy and append a mutable object (a dictionary) + lproxy = manager.list() + lproxy.append({}) + # now mutate the dictionary + d = lproxy[0] + d['a'] = 1 + d['b'] = 2 + # at this point, the changes to d are not yet synced, but by + # reassigning the dictionary, the proxy is notified of the change + lproxy[0] = d + Namespace objects >>>>>>>>>>>>>>>>> diff --git a/Doc/library/pkgutil.rst b/Doc/library/pkgutil.rst index 48d53e3..f9f5e86 100644 --- a/Doc/library/pkgutil.rst +++ b/Doc/library/pkgutil.rst @@ -3,40 +3,166 @@ ============================================ .. module:: pkgutil - :synopsis: Utilities to support extension of packages. + :synopsis: Utilities for the import system. - -This module provides functions to manipulate packages: +This module provides utilities for the import system, in particular package +support. .. function:: extend_path(path, name) - Extend the search path for the modules which comprise a package. Intended use is - to place the following code in a package's :file:`__init__.py`:: + Extend the search path for the modules which comprise a package. Intended + use is to place the following code in a package's :file:`__init__.py`:: from pkgutil import extend_path __path__ = extend_path(__path__, __name__) - This will add to the package's ``__path__`` all subdirectories of directories on - ``sys.path`` named after the package. This is useful if one wants to distribute - different parts of a single logical package as multiple directories. + This will add to the package's ``__path__`` all subdirectories of directories + on ``sys.path`` named after the package. This is useful if one wants to + distribute different parts of a single logical package as multiple + directories. - It also looks for :file:`\*.pkg` files beginning where ``*`` matches the *name* - argument. This feature is similar to :file:`\*.pth` files (see the :mod:`site` - module for more information), except that it doesn't special-case lines starting - with ``import``. A :file:`\*.pkg` file is trusted at face value: apart from - checking for duplicates, all entries found in a :file:`\*.pkg` file are added to - the path, regardless of whether they exist on the filesystem. (This is a - feature.) + It also looks for :file:`\*.pkg` files beginning where ``*`` matches the + *name* argument. This feature is similar to :file:`\*.pth` files (see the + :mod:`site` module for more information), except that it doesn't special-case + lines starting with ``import``. A :file:`\*.pkg` file is trusted at face + value: apart from checking for duplicates, all entries found in a + :file:`\*.pkg` file are added to the path, regardless of whether they exist + on the filesystem. (This is a feature.) If the input path is not a list (as is the case for frozen packages) it is returned unchanged. The input path is not modified; an extended copy is returned. Items are only appended to the copy at the end. - It is assumed that ``sys.path`` is a sequence. Items of ``sys.path`` that are - not strings referring to existing directories are ignored. Unicode items on - ``sys.path`` that cause errors when used as filenames may cause this function - to raise an exception (in line with :func:`os.path.isdir` behavior). + It is assumed that :data:`sys.path` is a sequence. Items of :data:`sys.path` + that are not strings referring to existing directories are ignored. Unicode + items on :data:`sys.path` that cause errors when used as filenames may cause + this function to raise an exception (in line with :func:`os.path.isdir` + behavior). + + +.. class:: ImpImporter(dirname=None) + + :pep:`302` Importer that wraps Python's "classic" import algorithm. + + If *dirname* is a string, a :pep:`302` importer is created that searches that + directory. If *dirname* is ``None``, a :pep:`302` importer is created that + searches the current :data:`sys.path`, plus any modules that are frozen or + built-in. + + Note that :class:`ImpImporter` does not currently support being used by + placement on :data:`sys.meta_path`. + + +.. class:: ImpLoader(fullname, file, filename, etc) + + :pep:`302` Loader that wraps Python's "classic" import algorithm. + + +.. function:: find_loader(fullname) + + Find a :pep:`302` "loader" object for *fullname*. + + If *fullname* contains dots, path must be the containing package's + ``__path__``. Returns ``None`` if the module cannot be found or imported. + This function uses :func:`iter_importers`, and is thus subject to the same + limitations regarding platform-specific special import locations such as the + Windows registry. + + +.. function:: get_importer(path_item) + + Retrieve a :pep:`302` importer for the given *path_item*. + + The returned importer is cached in :data:`sys.path_importer_cache` if it was + newly created by a path hook. + + If there is no importer, a wrapper around the basic import machinery is + returned. This wrapper is never inserted into the importer cache (None is + inserted instead). + + The cache (or part of it) can be cleared manually if a rescan of + :data:`sys.path_hooks` is necessary. + + +.. function:: get_loader(module_or_name) + + Get a :pep:`302` "loader" object for *module_or_name*. + + If the module or package is accessible via the normal import mechanism, a + wrapper around the relevant part of that machinery is returned. Returns + ``None`` if the module cannot be found or imported. If the named module is + not already imported, its containing package (if any) is imported, in order + to establish the package ``__path__``. + + This function uses :func:`iter_importers`, and is thus subject to the same + limitations regarding platform-specific special import locations such as the + Windows registry. + + +.. function:: iter_importers(fullname='') + + Yield :pep:`302` importers for the given module name. + + If fullname contains a '.', the importers will be for the package containing + fullname, otherwise they will be importers for :data:`sys.meta_path`, + :data:`sys.path`, and Python's "classic" import machinery, in that order. If + the named module is in a package, that package is imported as a side effect + of invoking this function. + + Non-:pep:`302` mechanisms (e.g. the Windows registry) used by the standard + import machinery to find files in alternative locations are partially + supported, but are searched *after* :data:`sys.path`. Normally, these + locations are searched *before* :data:`sys.path`, preventing :data:`sys.path` + entries from shadowing them. + + For this to cause a visible difference in behaviour, there must be a module + or package name that is accessible via both :data:`sys.path` and one of the + non-:pep:`302` file system mechanisms. In this case, the emulation will find + the former version, while the builtin import mechanism will find the latter. + + Items of the following types can be affected by this discrepancy: + ``imp.C_EXTENSION``, ``imp.PY_SOURCE``, ``imp.PY_COMPILED``, + ``imp.PKG_DIRECTORY``. + + +.. function:: iter_modules(path=None, prefix='') + + Yields ``(module_loader, name, ispkg)`` for all submodules on *path*, or, if + path is ``None``, all top-level modules on ``sys.path``. + + *path* should be either ``None`` or a list of paths to look for modules in. + + *prefix* is a string to output on the front of every module name on output. + + +.. function:: walk_packages(path=None, prefix='', onerror=None) + + Yields ``(module_loader, name, ispkg)`` for all modules recursively on + *path*, or, if path is ``None``, all accessible modules. + + *path* should be either ``None`` or a list of paths to look for modules in. + + *prefix* is a string to output on the front of every module name on output. + + Note that this function must import all *packages* (*not* all modules!) on + the given *path*, in order to access the ``__path__`` attribute to find + submodules. + + *onerror* is a function which gets called with one argument (the name of the + package which was being imported) if any exception occurs while trying to + import a package. If no *onerror* function is supplied, :exc:`ImportError`\s + are caught and ignored, while all other exceptions are propagated, + terminating the search. + + Examples:: + + # list all modules python can access + walk_packages() + + # list all submodules of ctypes + walk_packages(ctypes.__path__, ctypes.__name__ + '.') + .. function:: get_data(package, resource) @@ -48,14 +174,14 @@ This module provides functions to manipulate packages: filename, using ``/`` as the path separator. The parent directory name ``..`` is not allowed, and nor is a rooted name (starting with a ``/``). - The function returns a binary string that is the contents of the - specified resource. + The function returns a binary string that is the contents of the specified + resource. For packages located in the filesystem, which have already been imported, this is the rough equivalent of:: - d = os.path.dirname(sys.modules[package].__file__) - data = open(os.path.join(d, resource), 'rb').read() + d = os.path.dirname(sys.modules[package].__file__) + data = open(os.path.join(d, resource), 'rb').read() If the package cannot be located or loaded, or it uses a PEP 302 loader - which does not support :func:`get_data`, then None is returned. + which does not support :func:`get_data`, then ``None`` is returned. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 7641e63..5693ed5 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2038,28 +2038,11 @@ support membership tests: Keys views are set-like since their entries are unique and hashable. If all -values are hashable, so that (key, value) pairs are unique and hashable, then -the items view is also set-like. (Values views are not treated as set-like -since the entries are generally not unique.) Then these set operations are -available ("other" refers either to another view or a set): - -.. describe:: dictview & other - - Return the intersection of the dictview and the other object as a new set. - -.. describe:: dictview | other - - Return the union of the dictview and the other object as a new set. - -.. describe:: dictview - other - - Return the difference between the dictview and the other object (all elements - in *dictview* that aren't in *other*) as a new set. - -.. describe:: dictview ^ other - - Return the symmetric difference (all elements either in *dictview* or - *other*, but not in both) of the dictview and the other object as a new set. +values are hashable, so that ``(key, value)`` pairs are unique and hashable, +then the items view is also set-like. (Values views are not treated as set-like +since the entries are generally not unique.) For set-like views, all of the +operations defined for the abstract base class :class:`collections.Set` are +available (for example, ``==``, ``<``, or ``^``). An example of dictionary view usage:: @@ -2090,6 +2073,8 @@ An example of dictionary view usage:: >>> # set operations >>> keys & {'eggs', 'bacon', 'salad'} {'bacon'} + >>> keys ^ {'sausage', 'juice'} + {'juice', 'eggs', 'bacon', 'spam'} .. _typememoryview: diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bf3fd47..3af5b5f 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -746,8 +746,9 @@ always available. ``'return'`` A function (or other code block) is about to return. The local trace - function is called; *arg* is the value that will be returned. The trace - function's return value is ignored. + function is called; *arg* is the value that will be returned, or ``None`` + if the event is caused by an exception being raised. The trace function's + return value is ignored. ``'exception'`` An exception has occurred. The local trace function is called; *arg* is a @@ -759,10 +760,10 @@ always available. a built-in. *arg* is the C function object. ``'c_return'`` - A C function has returned. *arg* is ``None``. + A C function has returned. *arg* is the C function object. ``'c_exception'`` - A C function has raised an exception. *arg* is ``None``. + A C function has raised an exception. *arg* is the C function object. Note that as an exception is propagated down the chain of callers, an ``'exception'`` event is generated at each level. diff --git a/Doc/library/time.rst b/Doc/library/time.rst index ceae8fa..b91aa53 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -16,21 +16,23 @@ semantics of these functions varies among platforms. An explanation of some terminology and conventions is in order. - .. index:: single: epoch +.. index:: single: epoch * The :dfn:`epoch` is the point where the time starts. On January 1st of that year, at 0 hours, the "time since the epoch" is zero. For Unix, the epoch is 1970. To find out what the epoch is, look at ``gmtime(0)``. - .. index:: single: Year 2038 +.. index:: single: Year 2038 * The functions in this module do not handle dates and times before the epoch or far in the future. The cut-off point in the future is determined by the C library; for Unix, it is typically in 2038. - .. index:: - single: Year 2000 - single: Y2K +.. index:: + single: Year 2000 + single: Y2K + +.. _time-y2kissues: * **Year 2000 (Y2K) issues**: Python depends on the platform's C library, which generally doesn't have year 2000 issues, since all dates and times are @@ -47,16 +49,16 @@ An explanation of some terminology and conventions is in order. Note that this is new as of Python 1.5.2(a2); earlier versions, up to Python 1.5.1 and 1.5.2a1, would add 1900 to year values below 1900. - .. index:: - single: UTC - single: Coordinated Universal Time - single: Greenwich Mean Time +.. index:: + single: UTC + single: Coordinated Universal Time + single: Greenwich Mean Time * UTC is Coordinated Universal Time (formerly known as Greenwich Mean Time, or GMT). The acronym UTC is not a mistake but a compromise between English and French. - .. index:: single: Daylight Saving Time +.. index:: single: Daylight Saving Time * DST is Daylight Saving Time, an adjustment of the timezone by (usually) one hour during part of the year. DST rules are magic (determined by local law) and @@ -81,37 +83,7 @@ An explanation of some terminology and conventions is in order. :func:`gmtime`, :func:`localtime`, and :func:`strptime` also offer attribute names for individual fields. - +-------+------------------+------------------------------+ - | Index | Attribute | Values | - +=======+==================+==============================+ - | 0 | :attr:`tm_year` | (for example, 1993) | - +-------+------------------+------------------------------+ - | 1 | :attr:`tm_mon` | range [1,12] | - +-------+------------------+------------------------------+ - | 2 | :attr:`tm_mday` | range [1,31] | - +-------+------------------+------------------------------+ - | 3 | :attr:`tm_hour` | range [0,23] | - +-------+------------------+------------------------------+ - | 4 | :attr:`tm_min` | range [0,59] | - +-------+------------------+------------------------------+ - | 5 | :attr:`tm_sec` | range [0,61]; see **(1)** in | - | | | :func:`strftime` description | - +-------+------------------+------------------------------+ - | 6 | :attr:`tm_wday` | range [0,6], Monday is 0 | - +-------+------------------+------------------------------+ - | 7 | :attr:`tm_yday` | range [1,366] | - +-------+------------------+------------------------------+ - | 8 | :attr:`tm_isdst` | 0, 1 or -1; see below | - +-------+------------------+------------------------------+ - - Note that unlike the C structure, the month value is a range of 1-12, not 0-11. - A year value will be handled as described under "Year 2000 (Y2K) issues" above. - A ``-1`` argument as the daylight savings flag, passed to :func:`mktime` will - usually result in the correct daylight savings state to be filled in. - - When a tuple with an incorrect length is passed to a function expecting a - :class:`struct_time`, or having elements of the wrong type, a :exc:`TypeError` - is raised. + See :class:`struct_time` for a description of these objects. * Use the following functions to convert between time representations: @@ -388,10 +360,45 @@ The module defines the following functions and data items: documented as supported. -.. data:: struct_time +.. class:: struct_time The type of the time value sequence returned by :func:`gmtime`, - :func:`localtime`, and :func:`strptime`. + :func:`localtime`, and :func:`strptime`. It is an object with a :term:`named + tuple` interface: values can be accessed by index and by attribute name. The + following values are present: + + +-------+-------------------+---------------------------------+ + | Index | Attribute | Values | + +=======+===================+=================================+ + | 0 | :attr:`tm_year` | (for example, 1993) | + +-------+-------------------+---------------------------------+ + | 1 | :attr:`tm_mon` | range [1, 12] | + +-------+-------------------+---------------------------------+ + | 2 | :attr:`tm_mday` | range [1, 31] | + +-------+-------------------+---------------------------------+ + | 3 | :attr:`tm_hour` | range [0, 23] | + +-------+-------------------+---------------------------------+ + | 4 | :attr:`tm_min` | range [0, 59] | + +-------+-------------------+---------------------------------+ + | 5 | :attr:`tm_sec` | range [0, 61]; see **(1)** in | + | | | :func:`strftime` description | + +-------+-------------------+---------------------------------+ + | 6 | :attr:`tm_wday` | range [0, 6], Monday is 0 | + +-------+-------------------+---------------------------------+ + | 7 | :attr:`tm_yday` | range [1, 366] | + +-------+-------------------+---------------------------------+ + | 8 | :attr:`tm_isdst` | 0, 1 or -1; see below | + +-------+-------------------+---------------------------------+ + + Note that unlike the C structure, the month value is a range of [1, 12], not + [0, 11]. A year value will be handled as described under :ref:`Year 2000 + (Y2K) issues ` above. A ``-1`` argument as the daylight + savings flag, passed to :func:`mktime` will usually result in the correct + daylight savings state to be filled in. + + When a tuple with an incorrect length is passed to a function expecting a + :class:`struct_time`, or having elements of the wrong type, a + :exc:`TypeError` is raised. .. function:: time() diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index bd88ad6..e33a596 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -458,10 +458,12 @@ function like this:: def cheeseshop(kind, *arguments, **keywords): print("-- Do you have any", kind, "?") print("-- I'm sorry, we're all out of", kind) - for arg in arguments: print(arg) + for arg in arguments: + print(arg) print("-" * 40) keys = sorted(keywords.keys()) - for kw in keys: print(kw, ":", keywords[kw]) + for kw in keys: + print(kw, ":", keywords[kw]) It could be called like this:: diff --git a/Lib/inspect.py b/Lib/inspect.py index 5c7cfb4..ffe05b7 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -737,9 +737,9 @@ def getargs(co): """Get information about the arguments accepted by a code object. Three things are returned: (args, varargs, varkw), where - 'args' is the list of argument names, possibly containing nested - lists. Keyword-only arguments are appended. 'varargs' and 'varkw' - are the names of the * and ** arguments or None.""" + 'args' is the list of argument names. Keyword-only arguments are + appended. 'varargs' and 'varkw' are the names of the * and ** + arguments or None.""" args, varargs, kwonlyargs, varkw = _getfullargs(co) return Arguments(args + kwonlyargs, varargs, varkw) @@ -747,9 +747,8 @@ def _getfullargs(co): """Get information about the arguments accepted by a code object. Four things are returned: (args, varargs, kwonlyargs, varkw), where - 'args' and 'kwonlyargs' are lists of argument names (with 'args' - possibly containing nested lists), and 'varargs' and 'varkw' are the - names of the * and ** arguments or None.""" + 'args' and 'kwonlyargs' are lists of argument names, and 'varargs' + and 'varkw' are the names of the * and ** arguments or None.""" if not iscode(co): raise TypeError('{!r} is not a code object'.format(co)) @@ -778,7 +777,7 @@ def getargspec(func): """Get the names and default values of a function's arguments. A tuple of four things is returned: (args, varargs, varkw, defaults). - 'args' is a list of the argument names (it may contain nested lists). + 'args' is a list of the argument names. 'args' will include keyword-only argument names. 'varargs' and 'varkw' are the names of the * and ** arguments or None. 'defaults' is an n-tuple of the default values of the last n arguments. @@ -803,7 +802,7 @@ def getfullargspec(func): A tuple of seven things is returned: (args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults annotations). - 'args' is a list of the argument names (it may contain nested lists). + 'args' is a list of the argument names. 'varargs' and 'varkw' are the names of the * and ** arguments or None. 'defaults' is an n-tuple of the default values of the last n arguments. 'kwonlyargs' is a list of keyword-only argument names. @@ -827,25 +826,12 @@ def getargvalues(frame): """Get information about arguments passed into a particular frame. A tuple of four things is returned: (args, varargs, varkw, locals). - 'args' is a list of the argument names (it may contain nested lists). + 'args' is a list of the argument names. 'varargs' and 'varkw' are the names of the * and ** arguments or None. 'locals' is the locals dictionary of the given frame.""" args, varargs, varkw = getargs(frame.f_code) return ArgInfo(args, varargs, varkw, frame.f_locals) -def joinseq(seq): - if len(seq) == 1: - return '(' + seq[0] + ',)' - else: - return '(' + ', '.join(seq) + ')' - -def strseq(object, convert, join=joinseq): - """Recursively walk a sequence, stringifying each element.""" - if type(object) in (list, tuple): - return join(map(lambda o, c=convert, j=join: strseq(o, c, j), object)) - else: - return convert(object) - def formatannotation(annotation, base_module=None): if isinstance(annotation, type): if annotation.__module__ in ('builtins', base_module): @@ -866,8 +852,7 @@ def formatargspec(args, varargs=None, varkw=None, defaults=None, formatvarkw=lambda name: '**' + name, formatvalue=lambda value: '=' + repr(value), formatreturns=lambda text: ' -> ' + text, - formatannotation=formatannotation, - join=joinseq): + formatannotation=formatannotation): """Format an argument spec from the values returned by getargspec or getfullargspec. @@ -885,7 +870,7 @@ def formatargspec(args, varargs=None, varkw=None, defaults=None, if defaults: firstdefault = len(args) - len(defaults) for i, arg in enumerate(args): - spec = strseq(arg, formatargandannotation, join) + spec = formatargandannotation(arg) if defaults and i >= firstdefault: spec = spec + formatvalue(defaults[i - firstdefault]) specs.append(spec) @@ -911,8 +896,7 @@ def formatargvalues(args, varargs, varkw, locals, formatarg=str, formatvarargs=lambda name: '*' + name, formatvarkw=lambda name: '**' + name, - formatvalue=lambda value: '=' + repr(value), - join=joinseq): + formatvalue=lambda value: '=' + repr(value)): """Format an argument spec from the 4 values returned by getargvalues. The first four arguments are (args, varargs, varkw, locals). The @@ -924,7 +908,7 @@ def formatargvalues(args, varargs, varkw, locals, return formatarg(name) + formatvalue(locals[name]) specs = [] for i in range(len(args)): - specs.append(strseq(args[i], convert, join)) + specs.append(convert(args[i])) if varargs: specs.append(formatvarargs(varargs) + formatvalue(locals[varargs])) if varkw: diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index 5d8cb19..d71c2ce 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -155,7 +155,7 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. """ # cached encoder @@ -213,7 +213,7 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg; otherwise ``JSONEncoder`` is used. """ # cached encoder @@ -244,8 +244,16 @@ def load(fp, cls=None, object_hook=None, parse_float=None, ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg; otherwise ``JSONDecoder`` is used. """ return loads(fp.read(), @@ -264,6 +272,14 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). + ``object_pairs_hook`` is an optional function that will be called with the + result of any object literal decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. + ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to float(num_str). This can be used to use another datatype or parser @@ -280,7 +296,7 @@ def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, are encountered. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg; otherwise ``JSONDecoder`` is used. """ if (cls is None and object_hook is None and diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index 3e7405b..d606cbd 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -289,6 +289,15 @@ class JSONDecoder(object): place of the given ``dict``. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). + ``object_pairs_hook``, if specified will be called with the result of + every JSON object decoded with an ordered list of pairs. The return + value of ``object_pairs_hook`` will be used instead of the ``dict``. + This feature can be used to implement custom decoders that rely on the + order that the key and value pairs are decoded (for example, + collections.OrderedDict will remember the order of insertion). If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes + priority. + ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to float(num_str). This can be used to use another datatype or parser @@ -304,6 +313,11 @@ class JSONDecoder(object): This can be used to raise an exception if invalid JSON numbers are encountered. + If ``strict`` is false (true is the default), then control + characters will be allowed inside strings. Control characters in + this context are those with character codes in the 0-31 range, + including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. + """ self.object_hook = object_hook self.parse_float = parse_float or float diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 461ac65..c58d17d 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -1479,6 +1479,13 @@ class MinidomTest(unittest.TestCase): doc.appendChild(doc.createComment("foo--bar")) self.assertRaises(ValueError, doc.toxml) + def testEmptyXMLNSValue(self): + doc = parseString("\n" + "\n") + doc2 = parseString(doc.toxml()) + self.confirm(doc2.namespaceURI == xml.dom.EMPTY_NAMESPACE) + + def test_main(): run_unittest(MinidomTest) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index f4f4400..0e62e73 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -293,9 +293,10 @@ def _in_document(node): def _write_data(writer, data): "Writes datachars to writer." - data = data.replace("&", "&").replace("<", "<") - data = data.replace("\"", """).replace(">", ">") - writer.write(data) + if data: + data = data.replace("&", "&").replace("<", "<"). \ + replace("\"", """).replace(">", ">") + writer.write(data) def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: @@ -1358,11 +1359,9 @@ class Notation(Identified, Childless, Node): class DOMImplementation(DOMImplementationLS): _features = [("core", "1.0"), ("core", "2.0"), - ("core", "3.0"), ("core", None), ("xml", "1.0"), ("xml", "2.0"), - ("xml", "3.0"), ("xml", None), ("ls-load", "3.0"), ("ls-load", None), diff --git a/Misc/NEWS b/Misc/NEWS index b77b789..d167151 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -18,6 +18,11 @@ Library - Issue #10459: Update CJK character names to Unicode 5.1. +- Issue #6098: Don't claim DOM level 3 conformance in minidom. + +- Issue #5762: Fix AttributeError raised by ``xml.dom.minidom`` when an empty + XML namespace attribute is encountered. + - Issue #1710703: Write structures for an empty ZIP archive when a ZipFile is created in modes 'a' or 'w' and then closed without adding any files. Raise BadZipfile (rather than IOError) when opening small non-ZIP files. -- cgit v0.12 pps">"longintrepr.h" #include "code.h" #include "marshal.h" /* High water mark to determine when the marshalled object is dangerously deep * and risks coring the interpreter. When the object stack gets this deep, * raise an exception instead of continuing. */ #define MAX_MARSHAL_STACK_DEPTH 2000 #define TYPE_NULL '0' #define TYPE_NONE 'N' #define TYPE_FALSE 'F' #define TYPE_TRUE 'T' #define TYPE_STOPITER 'S' #define TYPE_ELLIPSIS '.' #define TYPE_INT 'i' #define TYPE_INT64 'I' #define TYPE_FLOAT 'f' #define TYPE_BINARY_FLOAT 'g' #define TYPE_COMPLEX 'x' #define TYPE_BINARY_COMPLEX 'y' #define TYPE_LONG 'l' #define TYPE_STRING 's' #define TYPE_INTERNED 't' #define TYPE_STRINGREF 'R' #define TYPE_TUPLE '(' #define TYPE_LIST '[' #define TYPE_DICT '{' #define TYPE_CODE 'c' #define TYPE_UNICODE 'u' #define TYPE_UNKNOWN '?' #define TYPE_SET '<' #define TYPE_FROZENSET '>' typedef struct { FILE *fp; int error; int depth; /* If fp == NULL, the following are valid: */ PyObject *str; char *ptr; char *end; PyObject *strings; /* dict on marshal, list on unmarshal */ int version; } WFILE; #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \ else w_more(c, p) static void w_more(int c, WFILE *p) { Py_ssize_t size, newsize; if (p->str == NULL) return; /* An error already occurred */ size = PyString_Size(p->str); newsize = size + 1024; if (_PyString_Resize(&p->str, newsize) != 0) { p->ptr = p->end = NULL; } else { p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size; p->end = PyString_AS_STRING((PyStringObject *)p->str) + newsize; *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char); } } static void w_string(char *s, int n, WFILE *p) { if (p->fp != NULL) { fwrite(s, 1, n, p->fp); } else { while (--n >= 0) { w_byte(*s, p); s++; } } } static void w_short(int x, WFILE *p) { w_byte((char)( x & 0xff), p); w_byte((char)((x>> 8) & 0xff), p); } static void w_long(long x, WFILE *p) { w_byte((char)( x & 0xff), p); w_byte((char)((x>> 8) & 0xff), p); w_byte((char)((x>>16) & 0xff), p); w_byte((char)((x>>24) & 0xff), p); } #if SIZEOF_LONG > 4 static void w_long64(long x, WFILE *p) { w_long(x, p); w_long(x>>32, p); } #endif static void w_object(PyObject *v, WFILE *p) { Py_ssize_t i, n; p->depth++; if (p->depth > MAX_MARSHAL_STACK_DEPTH) { p->error = 2; } else if (v == NULL) { w_byte(TYPE_NULL, p); } else if (v == Py_None) { w_byte(TYPE_NONE, p); } else if (v == PyExc_StopIteration) { w_byte(TYPE_STOPITER, p); } else if (v == Py_Ellipsis) { w_byte(TYPE_ELLIPSIS, p); } else if (v == Py_False) { w_byte(TYPE_FALSE, p); } else if (v == Py_True) { w_byte(TYPE_TRUE, p); } else if (PyInt_CheckExact(v)) { long x = PyInt_AS_LONG((PyIntObject *)v); #if SIZEOF_LONG > 4 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31); if (y && y != -1) { w_byte(TYPE_INT64, p); w_long64(x, p); } else #endif { w_byte(TYPE_INT, p); w_long(x, p); } } else if (PyLong_CheckExact(v)) { PyLongObject *ob = (PyLongObject *)v; w_byte(TYPE_LONG, p); n = ob->ob_size; w_long((long)n, p); if (n < 0) n = -n; for (i = 0; i < n; i++) w_short(ob->ob_digit[i], p); } else if (PyFloat_CheckExact(v)) { if (p->version > 1) { unsigned char buf[8]; if (_PyFloat_Pack8(PyFloat_AsDouble(v), buf, 1) < 0) { p->error = 1; return; } w_byte(TYPE_BINARY_FLOAT, p); w_string((char*)buf, 8, p); } else { char buf[256]; /* Plenty to format any double */ PyFloat_AsReprString(buf, (PyFloatObject *)v); n = strlen(buf); w_byte(TYPE_FLOAT, p); w_byte((int)n, p); w_string(buf, (int)n, p); } } #ifndef WITHOUT_COMPLEX else if (PyComplex_CheckExact(v)) { if (p->version > 1) { unsigned char buf[8]; if (_PyFloat_Pack8(PyComplex_RealAsDouble(v), buf, 1) < 0) { p->error = 1; return; } w_byte(TYPE_BINARY_COMPLEX, p); w_string((char*)buf, 8, p); if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v), buf, 1) < 0) { p->error = 1; return; } w_string((char*)buf, 8, p); } else { char buf[256]; /* Plenty to format any double */ PyFloatObject *temp; w_byte(TYPE_COMPLEX, p); temp = (PyFloatObject*)PyFloat_FromDouble( PyComplex_RealAsDouble(v)); if (!temp) { p->error = 1; return; } PyFloat_AsReprString(buf, temp); Py_DECREF(temp); n = strlen(buf); w_byte((int)n, p); w_string(buf, (int)n, p); temp = (PyFloatObject*)PyFloat_FromDouble( PyComplex_ImagAsDouble(v)); if (!temp) { p->error = 1; return; } PyFloat_AsReprString(buf, temp); Py_DECREF(temp); n = strlen(buf); w_byte((int)n, p); w_string(buf, (int)n, p); } } #endif else if (PyString_CheckExact(v)) { if (p->strings && PyString_CHECK_INTERNED(v)) { PyObject *o = PyDict_GetItem(p->strings, v); if (o) { long w = PyInt_AsLong(o); w_byte(TYPE_STRINGREF, p); w_long(w, p); goto exit; } else { int ok; o = PyInt_FromSsize_t(PyDict_Size(p->strings)); ok = o && PyDict_SetItem(p->strings, v, o) >= 0; Py_XDECREF(o); if (!ok) { p->depth--; p->error = 1; return; } w_byte(TYPE_INTERNED, p); } } else { w_byte(TYPE_STRING, p); } n = PyString_GET_SIZE(v); if (n > INT_MAX) { /* huge strings are not supported */ p->depth--; p->error = 1; return; } w_long((long)n, p); w_string(PyString_AS_STRING(v), (int)n, p); } #ifdef Py_USING_UNICODE else if (PyUnicode_CheckExact(v)) { PyObject *utf8; utf8 = PyUnicode_AsUTF8String(v); if (utf8 == NULL) { p->depth--; p->error = 1; return; } w_byte(TYPE_UNICODE, p); n = PyString_GET_SIZE(utf8); if (n > INT_MAX) { p->depth--; p->error = 1; return; } w_long((long)n, p); w_string(PyString_AS_STRING(utf8), (int)n, p); Py_DECREF(utf8); } #endif else if (PyTuple_CheckExact(v)) { w_byte(TYPE_TUPLE, p); n = PyTuple_Size(v); w_long((long)n, p); for (i = 0; i < n; i++) { w_object(PyTuple_GET_ITEM(v, i), p); } } else if (PyList_CheckExact(v)) { w_byte(TYPE_LIST, p); n = PyList_GET_SIZE(v); w_long((long)n, p); for (i = 0; i < n; i++) { w_object(PyList_GET_ITEM(v, i), p); } } else if (PyDict_CheckExact(v)) { Py_ssize_t pos; PyObject *key, *value; w_byte(TYPE_DICT, p); /* This one is NULL object terminated! */ pos = 0; while (PyDict_Next(v, &pos, &key, &value)) { w_object(key, p); w_object(value, p); } w_object((PyObject *)NULL, p); } else if (PyAnySet_CheckExact(v)) { PyObject *value, *it; if (PyObject_TypeCheck(v, &PySet_Type)) w_byte(TYPE_SET, p); else w_byte(TYPE_FROZENSET, p); n = PyObject_Size(v); if (n == -1) { p->depth--; p->error = 1; return; } w_long((long)n, p); it = PyObject_GetIter(v); if (it == NULL) { p->depth--; p->error = 1; return; } while ((value = PyIter_Next(it)) != NULL) { w_object(value, p); Py_DECREF(value); } Py_DECREF(it); if (PyErr_Occurred()) { p->depth--; p->error = 1; return; } } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; w_byte(TYPE_CODE, p); w_long(co->co_argcount, p); w_long(co->co_nlocals, p); w_long(co->co_stacksize, p); w_long(co->co_flags, p); w_object(co->co_code, p); w_object(co->co_consts, p); w_object(co->co_names, p); w_object(co->co_varnames, p); w_object(co->co_freevars, p); w_object(co->co_cellvars, p); w_object(co->co_filename, p); w_object(co->co_name, p); w_long(co->co_firstlineno, p); w_object(co->co_lnotab, p); } else if (PyObject_CheckReadBuffer(v)) { /* Write unknown buffer-style objects as a string */ char *s; PyBufferProcs *pb = v->ob_type->tp_as_buffer; w_byte(TYPE_STRING, p); n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s); if (n > INT_MAX) { p->depth--; p->error = 1; return; } w_long((long)n, p); w_string(s, (int)n, p); } else { w_byte(TYPE_UNKNOWN, p); p->error = 1; } exit: p->depth--; } /* version currently has no effect for writing longs. */ void PyMarshal_WriteLongToFile(long x, FILE *fp, int version) { WFILE wf; wf.fp = fp; wf.error = 0; wf.depth = 0; wf.strings = NULL; wf.version = version; w_long(x, &wf); } void PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) { WFILE wf; wf.fp = fp; wf.error = 0; wf.depth = 0; wf.strings = (version > 0) ? PyDict_New() : NULL; wf.version = version; w_object(x, &wf); Py_XDECREF(wf.strings); } typedef WFILE RFILE; /* Same struct with different invariants */ #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF) #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p)) static int r_string(char *s, int n, RFILE *p) { if (p->fp != NULL) /* The result fits into int because it must be <=n. */ return (int)fread(s, 1, n, p->fp); if (p->end - p->ptr < n) n = (int)(p->end - p->ptr); memcpy(s, p->ptr, n); p->ptr += n; return n; } static int r_short(RFILE *p) { register short x; x = r_byte(p); x |= r_byte(p) << 8; /* Sign-extension, in case short greater than 16 bits */ x |= -(x & 0x8000); return x; } static long r_long(RFILE *p) { register long x; register FILE *fp = p->fp; if (fp) { x = getc(fp); x |= (long)getc(fp) << 8; x |= (long)getc(fp) << 16; x |= (long)getc(fp) << 24; } else { x = rs_byte(p); x |= (long)rs_byte(p) << 8; x |= (long)rs_byte(p) << 16; x |= (long)rs_byte(p) << 24; } #if SIZEOF_LONG > 4 /* Sign extension for 64-bit machines */ x |= -(x & 0x80000000L); #endif return x; } /* r_long64 deals with the TYPE_INT64 code. On a machine with sizeof(long) > 4, it returns a Python int object, else a Python long object. Note that w_long64 writes out TYPE_INT if 32 bits is enough, so there's no inefficiency here in returning a PyLong on 32-bit boxes for everything written via TYPE_INT64 (i.e., if an int is written via TYPE_INT64, it *needs* more than 32 bits). */ static PyObject * r_long64(RFILE *p) { long lo4 = r_long(p); long hi4 = r_long(p); #if SIZEOF_LONG > 4 long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); return PyInt_FromLong(x); #else unsigned char buf[8]; int one = 1; int is_little_endian = (int)*(char*)&one; if (is_little_endian) { memcpy(buf, &lo4, 4); memcpy(buf+4, &hi4, 4); } else { memcpy(buf, &hi4, 4); memcpy(buf+4, &lo4, 4); } return _PyLong_FromByteArray(buf, 8, is_little_endian, 1); #endif } static PyObject * r_object(RFILE *p) { /* NULL is a valid return value, it does not necessarily means that an exception is set. */ PyObject *v, *v2; long i, n; int type = r_byte(p); PyObject *retval; p->depth++; if (p->depth > MAX_MARSHAL_STACK_DEPTH) { p->depth--; PyErr_SetString(PyExc_ValueError, "recursion limit exceeded"); return NULL; } switch (type) { case EOF: PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; case TYPE_NULL: retval = NULL; break; case TYPE_NONE: Py_INCREF(Py_None); retval = Py_None; break; case TYPE_STOPITER: Py_INCREF(PyExc_StopIteration); retval = PyExc_StopIteration; break; case TYPE_ELLIPSIS: Py_INCREF(Py_Ellipsis); retval = Py_Ellipsis; break; case TYPE_FALSE: Py_INCREF(Py_False); retval = Py_False; break; case TYPE_TRUE: Py_INCREF(Py_True); retval = Py_True; break; case TYPE_INT: retval = PyInt_FromLong(r_long(p)); break; case TYPE_INT64: retval = r_long64(p); break; case TYPE_LONG: { int size; PyLongObject *ob; n = r_long(p); if (n < -INT_MAX || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } size = n<0 ? -n : n; ob = _PyLong_New(size); if (ob == NULL) { retval = NULL; break; } ob->ob_size = n; for (i = 0; i < size; i++) { int digit = r_short(p); if (digit < 0) { Py_DECREF(ob); PyErr_SetString(PyExc_ValueError, "bad marshal data"); ob = NULL; break; } if (ob != NULL) ob->ob_digit[i] = digit; } retval = (PyObject *)ob; break; } case TYPE_FLOAT: { char buf[256]; double dx; n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } buf[n] = '\0'; retval = NULL; PyFPE_START_PROTECT("atof", break) dx = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(dx) retval = PyFloat_FromDouble(dx); break; } case TYPE_BINARY_FLOAT: { unsigned char buf[8]; double x; if (r_string((char*)buf, 8, p) != 8) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } x = _PyFloat_Unpack8(buf, 1); if (x == -1.0 && PyErr_Occurred()) { retval = NULL; break; } retval = PyFloat_FromDouble(x); break; } #ifndef WITHOUT_COMPLEX case TYPE_COMPLEX: { char buf[256]; Py_complex c; n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } buf[n] = '\0'; retval = NULL; PyFPE_START_PROTECT("atof", break;) c.real = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(c) n = r_byte(p); if (n == EOF || r_string(buf, (int)n, p) != n) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } buf[n] = '\0'; PyFPE_START_PROTECT("atof", break) c.imag = PyOS_ascii_atof(buf); PyFPE_END_PROTECT(c) retval = PyComplex_FromCComplex(c); break; } case TYPE_BINARY_COMPLEX: { unsigned char buf[8]; Py_complex c; if (r_string((char*)buf, 8, p) != 8) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } c.real = _PyFloat_Unpack8(buf, 1); if (c.real == -1.0 && PyErr_Occurred()) { retval = NULL; break; } if (r_string((char*)buf, 8, p) != 8) { PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } c.imag = _PyFloat_Unpack8(buf, 1); if (c.imag == -1.0 && PyErr_Occurred()) { retval = NULL; break; } retval = PyComplex_FromCComplex(c); break; } #endif case TYPE_INTERNED: case TYPE_STRING: n = r_long(p); if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } v = PyString_FromStringAndSize((char *)NULL, n); if (v == NULL) { retval = NULL; break; } if (r_string(PyString_AS_STRING(v), (int)n, p) != n) { Py_DECREF(v); PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } if (type == TYPE_INTERNED) { PyString_InternInPlace(&v); if (PyList_Append(p->strings, v) < 0) { retval = NULL; break; } } retval = v; break; case TYPE_STRINGREF: n = r_long(p); if (n < 0 || n >= PyList_GET_SIZE(p->strings)) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } v = PyList_GET_ITEM(p->strings, n); Py_INCREF(v); retval = v; break; #ifdef Py_USING_UNICODE case TYPE_UNICODE: { char *buffer; n = r_long(p); if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } buffer = PyMem_NEW(char, n); if (buffer == NULL) { retval = PyErr_NoMemory(); break; } if (r_string(buffer, (int)n, p) != n) { PyMem_DEL(buffer); PyErr_SetString(PyExc_EOFError, "EOF read where object expected"); retval = NULL; break; } v = PyUnicode_DecodeUTF8(buffer, n, NULL); PyMem_DEL(buffer); retval = v; break; } #endif case TYPE_TUPLE: n = r_long(p); if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } v = PyTuple_New((int)n); if (v == NULL) { retval = NULL; break; } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data"); Py_DECREF(v); v = NULL; break; } PyTuple_SET_ITEM(v, (int)i, v2); } retval = v; break; case TYPE_LIST: n = r_long(p); if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } v = PyList_New((int)n); if (v == NULL) { retval = NULL; break; } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data"); Py_DECREF(v); v = NULL; break; } PyList_SET_ITEM(v, (int)i, v2); } retval = v; break; case TYPE_DICT: v = PyDict_New(); if (v == NULL) { retval = NULL; break; } for (;;) { PyObject *key, *val; key = r_object(p); if (key == NULL) break; val = r_object(p); if (val != NULL) PyDict_SetItem(v, key, val); Py_DECREF(key); Py_XDECREF(val); } if (PyErr_Occurred()) { Py_DECREF(v); v = NULL; } retval = v; break; case TYPE_SET: case TYPE_FROZENSET: n = r_long(p); if (n < 0 || n > INT_MAX) { PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL); if (v == NULL) { retval = NULL; break; } for (i = 0; i < n; i++) { v2 = r_object(p); if ( v2 == NULL ) { if (!PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data"); Py_DECREF(v); v = NULL; break; } if (PySet_Add(v, v2) == -1) { Py_DECREF(v); Py_DECREF(v2); v = NULL; break; } Py_DECREF(v2); } retval = v; break; case TYPE_CODE: if (PyEval_GetRestricted()) { PyErr_SetString(PyExc_RuntimeError, "cannot unmarshal code objects in " "restricted execution mode"); retval = NULL; break; } else { int argcount; int nlocals; int stacksize; int flags; PyObject *code = NULL; PyObject *consts = NULL; PyObject *names = NULL; PyObject *varnames = NULL; PyObject *freevars = NULL; PyObject *cellvars = NULL; PyObject *filename = NULL; PyObject *name = NULL; int firstlineno; PyObject *lnotab = NULL; v = NULL; /* XXX ignore long->int overflows for now */ argcount = (int)r_long(p); nlocals = (int)r_long(p); stacksize = (int)r_long(p); flags = (int)r_long(p); code = r_object(p); if (code == NULL) goto code_error; consts = r_object(p); if (consts == NULL) goto code_error; names = r_object(p); if (names == NULL) goto code_error; varnames = r_object(p); if (varnames == NULL) goto code_error; freevars = r_object(p); if (freevars == NULL) goto code_error; cellvars = r_object(p); if (cellvars == NULL) goto code_error; filename = r_object(p); if (filename == NULL) goto code_error; name = r_object(p); if (name == NULL) goto code_error; firstlineno = (int)r_long(p); lnotab = r_object(p); if (lnotab == NULL) goto code_error; v = (PyObject *) PyCode_New( argcount, nlocals, stacksize, flags, code, consts, names, varnames, freevars, cellvars, filename, name, firstlineno, lnotab); code_error: Py_XDECREF(code); Py_XDECREF(consts); Py_XDECREF(names); Py_XDECREF(varnames); Py_XDECREF(freevars); Py_XDECREF(cellvars); Py_XDECREF(filename); Py_XDECREF(name); Py_XDECREF(lnotab); } retval = v; break; default: /* Bogus data got written, which isn't ideal. This will let you keep working and recover. */ PyErr_SetString(PyExc_ValueError, "bad marshal data"); retval = NULL; break; } p->depth--; return retval; } static PyObject * read_object(RFILE *p) { PyObject *v; if (PyErr_Occurred()) { fprintf(stderr, "XXX readobject called with exception set\n"); return NULL; } v = r_object(p); if (v == NULL && !PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data"); return v; } int PyMarshal_ReadShortFromFile(FILE *fp) { RFILE rf; assert(fp); rf.fp = fp; rf.strings = NULL; rf.end = rf.ptr = NULL; return r_short(&rf); } long PyMarshal_ReadLongFromFile(FILE *fp) { RFILE rf; rf.fp = fp; rf.strings = NULL; rf.ptr = rf.end = NULL; return r_long(&rf); } #ifdef HAVE_FSTAT /* Return size of file in bytes; < 0 if unknown. */ static off_t getfilesize(FILE *fp) { struct stat st; if (fstat(fileno(fp), &st) != 0) return -1; else return st.st_size; } #endif /* If we can get the size of the file up-front, and it's reasonably small, * read it in one gulp and delegate to ...FromString() instead. Much quicker * than reading a byte at a time from file; speeds .pyc imports. * CAUTION: since this may read the entire remainder of the file, don't * call it unless you know you're done with the file. */ PyObject * PyMarshal_ReadLastObjectFromFile(FILE *fp) { /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT. * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc. */ #define SMALL_FILE_LIMIT (1L << 14) #define REASONABLE_FILE_LIMIT (1L << 18) #ifdef HAVE_FSTAT off_t filesize; #endif #ifdef HAVE_FSTAT filesize = getfilesize(fp); if (filesize > 0) { char buf[SMALL_FILE_LIMIT]; char* pBuf = NULL; if (filesize <= SMALL_FILE_LIMIT) pBuf = buf; else if (filesize <= REASONABLE_FILE_LIMIT) pBuf = (char *)PyMem_MALLOC(filesize); if (pBuf != NULL) { PyObject* v; size_t n; /* filesize must fit into an int, because it is smaller than REASONABLE_FILE_LIMIT */ n = fread(pBuf, 1, (int)filesize, fp); v = PyMarshal_ReadObjectFromString(pBuf, n); if (pBuf != buf) PyMem_FREE(pBuf); return v; } } #endif /* We don't have fstat, or we do but the file is larger than * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time. */ return PyMarshal_ReadObjectFromFile(fp); #undef SMALL_FILE_LIMIT #undef REASONABLE_FILE_LIMIT } PyObject * PyMarshal_ReadObjectFromFile(FILE *fp) { RFILE rf; PyObject *result; rf.fp = fp; rf.strings = PyList_New(0); rf.depth = 0; rf.ptr = rf.end = NULL; result = r_object(&rf); Py_DECREF(rf.strings); return result; } PyObject * PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len) { RFILE rf; PyObject *result; rf.fp = NULL; rf.ptr = str; rf.end = str + len; rf.strings = PyList_New(0); rf.depth = 0; result = r_object(&rf); Py_DECREF(rf.strings); return result; } PyObject * PyMarshal_WriteObjectToString(PyObject *x, int version) { WFILE wf; wf.fp = NULL; wf.str = PyString_FromStringAndSize((char *)NULL, 50); if (wf.str == NULL) return NULL; wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str); wf.end = wf.ptr + PyString_Size(wf.str); wf.error = 0; wf.depth = 0; wf.version = version; wf.strings = (version > 0) ? PyDict_New() : NULL; w_object(x, &wf); Py_XDECREF(wf.strings); if (wf.str != NULL) { char *base = PyString_AS_STRING((PyStringObject *)wf.str); if (wf.ptr - base > PY_SSIZE_T_MAX) { Py_DECREF(wf.str); PyErr_SetString(PyExc_OverflowError, "too much marshall data for a string"); return NULL; } _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)); } if (wf.error) { Py_XDECREF(wf.str); PyErr_SetString(PyExc_ValueError, (wf.error==1)?"unmarshallable object" :"object too deeply nested to marshal"); return NULL; } return wf.str; } /* And an interface for Python programs... */ static PyObject * marshal_dump(PyObject *self, PyObject *args) { WFILE wf; PyObject *x; PyObject *f; int version = Py_MARSHAL_VERSION; if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) return NULL; if (!PyFile_Check(f)) { PyErr_SetString(PyExc_TypeError, "marshal.dump() 2nd arg must be file"); return NULL; } wf.fp = PyFile_AsFile(f); wf.str = NULL; wf.ptr = wf.end = NULL; wf.error = 0; wf.depth = 0; wf.strings = (version > 0) ? PyDict_New() : 0; wf.version = version; w_object(x, &wf); Py_XDECREF(wf.strings); if (wf.error) { PyErr_SetString(PyExc_ValueError, (wf.error==1)?"unmarshallable object" :"object too deeply nested to marshal"); return NULL; } Py_INCREF(Py_None); return Py_None; } static PyObject * marshal_load(PyObject *self, PyObject *f) { RFILE rf; PyObject *result; if (!PyFile_Check(f)) { PyErr_SetString(PyExc_TypeError, "marshal.load() arg must be file"); return NULL; } rf.fp = PyFile_AsFile(f); rf.strings = PyList_New(0); rf.depth = 0; result = read_object(&rf); Py_DECREF(rf.strings); return result; } static PyObject * marshal_dumps(PyObject *self, PyObject *args) { PyObject *x; int version = Py_MARSHAL_VERSION; if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version)) return NULL; return PyMarshal_WriteObjectToString(x, version); } static PyObject * marshal_loads(PyObject *self, PyObject *args) { RFILE rf; char *s; Py_ssize_t n; PyObject* result; if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) return NULL; rf.fp = NULL; rf.ptr = s; rf.end = s + n; rf.strings = PyList_New(0); rf.depth = 0; result = read_object(&rf); Py_DECREF(rf.strings); return result; } static PyMethodDef marshal_methods[] = { {"dump", marshal_dump, METH_VARARGS}, {"load", marshal_load, METH_O}, {"dumps", marshal_dumps, METH_VARARGS}, {"loads", marshal_loads, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; PyMODINIT_FUNC PyMarshal_Init(void) { PyObject *mod = Py_InitModule("marshal", marshal_methods); if (mod == NULL) return; PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); }