From 765531d2d083c7a4e9478fcd960eebe04ac6b192 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 26 Mar 2013 01:11:54 +0100 Subject: Issue #17516: use comment syntax for comments, instead of multiline string --- Lib/ctypes/__init__.py | 20 +- Lib/ctypes/test/test_internals.py | 19 +- Lib/ctypes/test/test_macholib.py | 56 +++--- Lib/datetime.py | 390 +++++++++++++++++++------------------- Lib/email/_header_value_parser.py | 34 ++-- Lib/importlib/_bootstrap.py | 187 +++++++++--------- Lib/logging/handlers.py | 7 +- Lib/pickletools.py | 225 +++++++++++----------- Lib/test/test_getargs2.py | 60 +++--- Lib/xml/etree/ElementTree.py | 2 +- Tools/scripts/reindent.py | 2 +- 11 files changed, 494 insertions(+), 508 deletions(-) diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index e2f75c5..e34c646 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -34,17 +34,15 @@ from _ctypes import FUNCFLAG_CDECL as _FUNCFLAG_CDECL, \ FUNCFLAG_USE_ERRNO as _FUNCFLAG_USE_ERRNO, \ FUNCFLAG_USE_LASTERROR as _FUNCFLAG_USE_LASTERROR -""" -WINOLEAPI -> HRESULT -WINOLEAPI_(type) - -STDMETHODCALLTYPE - -STDMETHOD(name) -STDMETHOD_(type, name) - -STDAPICALLTYPE -""" +# WINOLEAPI -> HRESULT +# WINOLEAPI_(type) +# +# STDMETHODCALLTYPE +# +# STDMETHOD(name) +# STDMETHOD_(type, name) +# +# STDAPICALLTYPE def create_string_buffer(init, size=None): """create_string_buffer(aBytes) -> character array diff --git a/Lib/ctypes/test/test_internals.py b/Lib/ctypes/test/test_internals.py index cbf2e05..271e3f5 100644 --- a/Lib/ctypes/test/test_internals.py +++ b/Lib/ctypes/test/test_internals.py @@ -5,17 +5,14 @@ from sys import getrefcount as grc # XXX This test must be reviewed for correctness!!! -""" -ctypes' types are container types. - -They have an internal memory block, which only consists of some bytes, -but it has to keep references to other objects as well. This is not -really needed for trivial C types like int or char, but it is important -for aggregate types like strings or pointers in particular. - -What about pointers? - -""" +# ctypes' types are container types. +# +# They have an internal memory block, which only consists of some bytes, +# but it has to keep references to other objects as well. This is not +# really needed for trivial C types like int or char, but it is important +# for aggregate types like strings or pointers in particular. +# +# What about pointers? class ObjectsTestCase(unittest.TestCase): def assertSame(self, a, b): diff --git a/Lib/ctypes/test/test_macholib.py b/Lib/ctypes/test/test_macholib.py index eda846d..fd26837 100644 --- a/Lib/ctypes/test/test_macholib.py +++ b/Lib/ctypes/test/test_macholib.py @@ -3,35 +3,33 @@ import sys import unittest # Bob Ippolito: -""" -Ok.. the code to find the filename for __getattr__ should look -something like: - -import os -from macholib.dyld import dyld_find - -def find_lib(name): - possible = ['lib'+name+'.dylib', name+'.dylib', - name+'.framework/'+name] - for dylib in possible: - try: - return os.path.realpath(dyld_find(dylib)) - except ValueError: - pass - raise ValueError, "%s not found" % (name,) - -It'll have output like this: - - >>> find_lib('pthread') -'/usr/lib/libSystem.B.dylib' - >>> find_lib('z') -'/usr/lib/libz.1.dylib' - >>> find_lib('IOKit') -'/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit' - --bob - -""" +# +# Ok.. the code to find the filename for __getattr__ should look +# something like: +# +# import os +# from macholib.dyld import dyld_find +# +# def find_lib(name): +# possible = ['lib'+name+'.dylib', name+'.dylib', +# name+'.framework/'+name] +# for dylib in possible: +# try: +# return os.path.realpath(dyld_find(dylib)) +# except ValueError: +# pass +# raise ValueError, "%s not found" % (name,) +# +# It'll have output like this: +# +# >>> find_lib('pthread') +# '/usr/lib/libSystem.B.dylib' +# >>> find_lib('z') +# '/usr/lib/libz.1.dylib' +# >>> find_lib('IOKit') +# '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit' +# +# -bob from ctypes.macholib.dyld import dyld_find diff --git a/Lib/datetime.py b/Lib/datetime.py index f506e9a..8753015 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1929,203 +1929,203 @@ timezone.utc = timezone._create(timedelta(0)) timezone.min = timezone._create(timezone._minoffset) timezone.max = timezone._create(timezone._maxoffset) _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) -""" -Some time zone algebra. For a datetime x, let - x.n = x stripped of its timezone -- its naive time. - x.o = x.utcoffset(), and assuming that doesn't raise an exception or - return None - x.d = x.dst(), and assuming that doesn't raise an exception or - return None - x.s = x's standard offset, x.o - x.d - -Now some derived rules, where k is a duration (timedelta). - -1. x.o = x.s + x.d - This follows from the definition of x.s. - -2. If x and y have the same tzinfo member, x.s = y.s. - This is actually a requirement, an assumption we need to make about - sane tzinfo classes. - -3. The naive UTC time corresponding to x is x.n - x.o. - This is again a requirement for a sane tzinfo class. - -4. (x+k).s = x.s - This follows from #2, and that datimetimetz+timedelta preserves tzinfo. - -5. (x+k).n = x.n + k - Again follows from how arithmetic is defined. - -Now we can explain tz.fromutc(x). Let's assume it's an interesting case -(meaning that the various tzinfo methods exist, and don't blow up or return -None when called). - -The function wants to return a datetime y with timezone tz, equivalent to x. -x is already in UTC. - -By #3, we want - - y.n - y.o = x.n [1] - -The algorithm starts by attaching tz to x.n, and calling that y. So -x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] -becomes true; in effect, we want to solve [2] for k: - - (y+k).n - (y+k).o = x.n [2] - -By #1, this is the same as - - (y+k).n - ((y+k).s + (y+k).d) = x.n [3] - -By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. -Substituting that into [3], - - x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving - k - (y+k).s - (y+k).d = 0; rearranging, - k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so - k = y.s - (y+k).d - -On the RHS, (y+k).d can't be computed directly, but y.s can be, and we -approximate k by ignoring the (y+k).d term at first. Note that k can't be -very large, since all offset-returning methods return a duration of magnitude -less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must -be 0, so ignoring it has no consequence then. - -In any case, the new value is - z = y + y.s [4] +# Some time zone algebra. For a datetime x, let +# x.n = x stripped of its timezone -- its naive time. +# x.o = x.utcoffset(), and assuming that doesn't raise an exception or +# return None +# x.d = x.dst(), and assuming that doesn't raise an exception or +# return None +# x.s = x's standard offset, x.o - x.d +# +# Now some derived rules, where k is a duration (timedelta). +# +# 1. x.o = x.s + x.d +# This follows from the definition of x.s. +# +# 2. If x and y have the same tzinfo member, x.s = y.s. +# This is actually a requirement, an assumption we need to make about +# sane tzinfo classes. +# +# 3. The naive UTC time corresponding to x is x.n - x.o. +# This is again a requirement for a sane tzinfo class. +# +# 4. (x+k).s = x.s +# This follows from #2, and that datimetimetz+timedelta preserves tzinfo. +# +# 5. (x+k).n = x.n + k +# Again follows from how arithmetic is defined. +# +# Now we can explain tz.fromutc(x). Let's assume it's an interesting case +# (meaning that the various tzinfo methods exist, and don't blow up or return +# None when called). +# +# The function wants to return a datetime y with timezone tz, equivalent to x. +# x is already in UTC. +# +# By #3, we want +# +# y.n - y.o = x.n [1] +# +# The algorithm starts by attaching tz to x.n, and calling that y. So +# x.n = y.n at the start. Then it wants to add a duration k to y, so that [1] +# becomes true; in effect, we want to solve [2] for k: +# +# (y+k).n - (y+k).o = x.n [2] +# +# By #1, this is the same as +# +# (y+k).n - ((y+k).s + (y+k).d) = x.n [3] +# +# By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start. +# Substituting that into [3], +# +# x.n + k - (y+k).s - (y+k).d = x.n; the x.n terms cancel, leaving +# k - (y+k).s - (y+k).d = 0; rearranging, +# k = (y+k).s - (y+k).d; by #4, (y+k).s == y.s, so +# k = y.s - (y+k).d +# +# On the RHS, (y+k).d can't be computed directly, but y.s can be, and we +# approximate k by ignoring the (y+k).d term at first. Note that k can't be +# very large, since all offset-returning methods return a duration of magnitude +# less than 24 hours. For that reason, if y is firmly in std time, (y+k).d must +# be 0, so ignoring it has no consequence then. +# +# In any case, the new value is +# +# z = y + y.s [4] +# +# It's helpful to step back at look at [4] from a higher level: it's simply +# mapping from UTC to tz's standard time. +# +# At this point, if +# +# z.n - z.o = x.n [5] +# +# we have an equivalent time, and are almost done. The insecurity here is +# at the start of daylight time. Picture US Eastern for concreteness. The wall +# time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good +# sense then. The docs ask that an Eastern tzinfo class consider such a time to +# be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST +# on the day DST starts. We want to return the 1:MM EST spelling because that's +# the only spelling that makes sense on the local wall clock. +# +# In fact, if [5] holds at this point, we do have the standard-time spelling, +# but that takes a bit of proof. We first prove a stronger result. What's the +# difference between the LHS and RHS of [5]? Let +# +# diff = x.n - (z.n - z.o) [6] +# +# Now +# z.n = by [4] +# (y + y.s).n = by #5 +# y.n + y.s = since y.n = x.n +# x.n + y.s = since z and y are have the same tzinfo member, +# y.s = z.s by #2 +# x.n + z.s +# +# Plugging that back into [6] gives +# +# diff = +# x.n - ((x.n + z.s) - z.o) = expanding +# x.n - x.n - z.s + z.o = cancelling +# - z.s + z.o = by #2 +# z.d +# +# So diff = z.d. +# +# If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time +# spelling we wanted in the endcase described above. We're done. Contrarily, +# if z.d = 0, then we have a UTC equivalent, and are also done. +# +# If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to +# add to z (in effect, z is in tz's standard time, and we need to shift the +# local clock into tz's daylight time). +# +# Let +# +# z' = z + z.d = z + diff [7] +# +# and we can again ask whether +# +# z'.n - z'.o = x.n [8] +# +# If so, we're done. If not, the tzinfo class is insane, according to the +# assumptions we've made. This also requires a bit of proof. As before, let's +# compute the difference between the LHS and RHS of [8] (and skipping some of +# the justifications for the kinds of substitutions we've done several times +# already): +# +# diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] +# x.n - (z.n + diff - z'.o) = replacing diff via [6] +# x.n - (z.n + x.n - (z.n - z.o) - z'.o) = +# x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n +# - z.n + z.n - z.o + z'.o = cancel z.n +# - z.o + z'.o = #1 twice +# -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo +# z'.d - z.d +# +# So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, +# we've found the UTC-equivalent so are done. In fact, we stop with [7] and +# return z', not bothering to compute z'.d. +# +# How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by +# a dst() offset, and starting *from* a time already in DST (we know z.d != 0), +# would have to change the result dst() returns: we start in DST, and moving +# a little further into it takes us out of DST. +# +# There isn't a sane case where this can happen. The closest it gets is at +# the end of DST, where there's an hour in UTC with no spelling in a hybrid +# tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During +# that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM +# UTC) because the docs insist on that, but 0:MM is taken as being in daylight +# time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local +# clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in +# standard time. Since that's what the local clock *does*, we want to map both +# UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous +# in local time, but so it goes -- it's the way the local clock works. +# +# When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, +# so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. +# z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] +# (correctly) concludes that z' is not UTC-equivalent to x. +# +# Because we know z.d said z was in daylight time (else [5] would have held and +# we would have stopped then), and we know z.d != z'.d (else [8] would have held +# and we have stopped then), and there are only 2 possible values dst() can +# return in Eastern, it follows that z'.d must be 0 (which it is in the example, +# but the reasoning doesn't depend on the example -- it depends on there being +# two possible dst() outcomes, one zero and the other non-zero). Therefore +# z' must be in standard time, and is the spelling we want in this case. +# +# Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is +# concerned (because it takes z' as being in standard time rather than the +# daylight time we intend here), but returning it gives the real-life "local +# clock repeats an hour" behavior when mapping the "unspellable" UTC hour into +# tz. +# +# When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with +# the 1:MM standard time spelling we want. +# +# So how can this break? One of the assumptions must be violated. Two +# possibilities: +# +# 1) [2] effectively says that y.s is invariant across all y belong to a given +# time zone. This isn't true if, for political reasons or continental drift, +# a region decides to change its base offset from UTC. +# +# 2) There may be versions of "double daylight" time where the tail end of +# the analysis gives up a step too early. I haven't thought about that +# enough to say. +# +# In any case, it's clear that the default fromutc() is strong enough to handle +# "almost all" time zones: so long as the standard offset is invariant, it +# doesn't matter if daylight time transition points change from year to year, or +# if daylight time is skipped in some years; it doesn't matter how large or +# small dst() may get within its bounds; and it doesn't even matter if some +# perverse time zone returns a negative dst()). So a breaking case must be +# pretty bizarre, and a tzinfo subclass can override fromutc() if it is. -It's helpful to step back at look at [4] from a higher level: it's simply -mapping from UTC to tz's standard time. - -At this point, if - - z.n - z.o = x.n [5] - -we have an equivalent time, and are almost done. The insecurity here is -at the start of daylight time. Picture US Eastern for concreteness. The wall -time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good -sense then. The docs ask that an Eastern tzinfo class consider such a time to -be EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST -on the day DST starts. We want to return the 1:MM EST spelling because that's -the only spelling that makes sense on the local wall clock. - -In fact, if [5] holds at this point, we do have the standard-time spelling, -but that takes a bit of proof. We first prove a stronger result. What's the -difference between the LHS and RHS of [5]? Let - - diff = x.n - (z.n - z.o) [6] - -Now - z.n = by [4] - (y + y.s).n = by #5 - y.n + y.s = since y.n = x.n - x.n + y.s = since z and y are have the same tzinfo member, - y.s = z.s by #2 - x.n + z.s - -Plugging that back into [6] gives - - diff = - x.n - ((x.n + z.s) - z.o) = expanding - x.n - x.n - z.s + z.o = cancelling - - z.s + z.o = by #2 - z.d - -So diff = z.d. - -If [5] is true now, diff = 0, so z.d = 0 too, and we have the standard-time -spelling we wanted in the endcase described above. We're done. Contrarily, -if z.d = 0, then we have a UTC equivalent, and are also done. - -If [5] is not true now, diff = z.d != 0, and z.d is the offset we need to -add to z (in effect, z is in tz's standard time, and we need to shift the -local clock into tz's daylight time). - -Let - - z' = z + z.d = z + diff [7] - -and we can again ask whether - - z'.n - z'.o = x.n [8] - -If so, we're done. If not, the tzinfo class is insane, according to the -assumptions we've made. This also requires a bit of proof. As before, let's -compute the difference between the LHS and RHS of [8] (and skipping some of -the justifications for the kinds of substitutions we've done several times -already): - - diff' = x.n - (z'.n - z'.o) = replacing z'.n via [7] - x.n - (z.n + diff - z'.o) = replacing diff via [6] - x.n - (z.n + x.n - (z.n - z.o) - z'.o) = - x.n - z.n - x.n + z.n - z.o + z'.o = cancel x.n - - z.n + z.n - z.o + z'.o = cancel z.n - - z.o + z'.o = #1 twice - -z.s - z.d + z'.s + z'.d = z and z' have same tzinfo - z'.d - z.d - -So z' is UTC-equivalent to x iff z'.d = z.d at this point. If they are equal, -we've found the UTC-equivalent so are done. In fact, we stop with [7] and -return z', not bothering to compute z'.d. - -How could z.d and z'd differ? z' = z + z.d [7], so merely moving z' by -a dst() offset, and starting *from* a time already in DST (we know z.d != 0), -would have to change the result dst() returns: we start in DST, and moving -a little further into it takes us out of DST. - -There isn't a sane case where this can happen. The closest it gets is at -the end of DST, where there's an hour in UTC with no spelling in a hybrid -tzinfo class. In US Eastern, that's 5:MM UTC = 0:MM EST = 1:MM EDT. During -that hour, on an Eastern clock 1:MM is taken as being in standard time (6:MM -UTC) because the docs insist on that, but 0:MM is taken as being in daylight -time (4:MM UTC). There is no local time mapping to 5:MM UTC. The local -clock jumps from 1:59 back to 1:00 again, and repeats the 1:MM hour in -standard time. Since that's what the local clock *does*, we want to map both -UTC hours 5:MM and 6:MM to 1:MM Eastern. The result is ambiguous -in local time, but so it goes -- it's the way the local clock works. - -When x = 5:MM UTC is the input to this algorithm, x.o=0, y.o=-5 and y.d=0, -so z=0:MM. z.d=60 (minutes) then, so [5] doesn't hold and we keep going. -z' = z + z.d = 1:MM then, and z'.d=0, and z'.d - z.d = -60 != 0 so [8] -(correctly) concludes that z' is not UTC-equivalent to x. - -Because we know z.d said z was in daylight time (else [5] would have held and -we would have stopped then), and we know z.d != z'.d (else [8] would have held -and we have stopped then), and there are only 2 possible values dst() can -return in Eastern, it follows that z'.d must be 0 (which it is in the example, -but the reasoning doesn't depend on the example -- it depends on there being -two possible dst() outcomes, one zero and the other non-zero). Therefore -z' must be in standard time, and is the spelling we want in this case. - -Note again that z' is not UTC-equivalent as far as the hybrid tzinfo class is -concerned (because it takes z' as being in standard time rather than the -daylight time we intend here), but returning it gives the real-life "local -clock repeats an hour" behavior when mapping the "unspellable" UTC hour into -tz. - -When the input is 6:MM, z=1:MM and z.d=0, and we stop at once, again with -the 1:MM standard time spelling we want. - -So how can this break? One of the assumptions must be violated. Two -possibilities: - -1) [2] effectively says that y.s is invariant across all y belong to a given - time zone. This isn't true if, for political reasons or continental drift, - a region decides to change its base offset from UTC. - -2) There may be versions of "double daylight" time where the tail end of - the analysis gives up a step too early. I haven't thought about that - enough to say. - -In any case, it's clear that the default fromutc() is strong enough to handle -"almost all" time zones: so long as the standard offset is invariant, it -doesn't matter if daylight time transition points change from year to year, or -if daylight time is skipped in some years; it doesn't matter how large or -small dst() may get within its bounds; and it doesn't even matter if some -perverse time zone returns a negative dst()). So a breaking case must be -pretty bizarre, and a tzinfo subclass can override fromutc() if it is. -""" try: from _datetime import * except ImportError: diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 1928505..eb31558 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1317,24 +1317,22 @@ RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # Parser # -"""Parse strings according to RFC822/2047/2822/5322 rules. - -This is a stateless parser. Each get_XXX function accepts a string and -returns either a Terminal or a TokenList representing the RFC object named -by the method and a string containing the remaining unparsed characters -from the input. Thus a parser method consumes the next syntactic construct -of a given type and returns a token representing the construct plus the -unparsed remainder of the input string. - -For example, if the first element of a structured header is a 'phrase', -then: - - phrase, value = get_phrase(value) - -returns the complete phrase from the start of the string value, plus any -characters left in the string after the phrase is removed. - -""" +# Parse strings according to RFC822/2047/2822/5322 rules. +# +# This is a stateless parser. Each get_XXX function accepts a string and +# returns either a Terminal or a TokenList representing the RFC object named +# by the method and a string containing the remaining unparsed characters +# from the input. Thus a parser method consumes the next syntactic construct +# of a given type and returns a token representing the construct plus the +# unparsed remainder of the input string. +# +# For example, if the first element of a structured header is a 'phrase', +# then: +# +# phrase, value = get_phrase(value) +# +# returns the complete phrase from the start of the string value, plus any +# characters left in the string after the phrase is removed. _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split _non_atom_end_matcher = re.compile(r"[^{}]+".format( diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 9a48c16..d80c2c0 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -299,101 +299,100 @@ def _call_with_frames_removed(f, *args, **kwds): # Finder/loader utility code ############################################### -"""Magic word to reject .pyc files generated by other Python versions. -It should change for each incompatible change to the bytecode. - -The value of CR and LF is incorporated so if you ever read or write -a .pyc file in text mode the magic number will be wrong; also, the -Apple MPW compiler swaps their values, botching string constants. - -The magic numbers must be spaced apart at least 2 values, as the --U interpeter flag will cause MAGIC+1 being used. They have been -odd numbers for some time now. - -There were a variety of old schemes for setting the magic number. -The current working scheme is to increment the previous value by -10. - -Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic -number also includes a new "magic tag", i.e. a human readable string used -to represent the magic number in __pycache__ directories. When you change -the magic number, you must also set a new unique magic tag. Generally this -can be named after the Python major version of the magic number bump, but -it can really be anything, as long as it's different than anything else -that's come before. The tags are included in the following table, starting -with Python 3.2a0. - -Known values: - Python 1.5: 20121 - Python 1.5.1: 20121 - Python 1.5.2: 20121 - Python 1.6: 50428 - Python 2.0: 50823 - Python 2.0.1: 50823 - Python 2.1: 60202 - Python 2.1.1: 60202 - Python 2.1.2: 60202 - Python 2.2: 60717 - Python 2.3a0: 62011 - Python 2.3a0: 62021 - Python 2.3a0: 62011 (!) - Python 2.4a0: 62041 - Python 2.4a3: 62051 - Python 2.4b1: 62061 - Python 2.5a0: 62071 - Python 2.5a0: 62081 (ast-branch) - Python 2.5a0: 62091 (with) - Python 2.5a0: 62092 (changed WITH_CLEANUP opcode) - Python 2.5b3: 62101 (fix wrong code: for x, in ...) - Python 2.5b3: 62111 (fix wrong code: x += yield) - Python 2.5c1: 62121 (fix wrong lnotab with for loops and - storing constants that should have been removed) - Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) - Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) - Python 2.6a1: 62161 (WITH_CLEANUP optimization) - Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND) - Python 2.7a0: 62181 (optimize conditional branches: - introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) - Python 2.7a0 62191 (introduce SETUP_WITH) - Python 2.7a0 62201 (introduce BUILD_SET) - Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD) - Python 3000: 3000 - 3010 (removed UNARY_CONVERT) - 3020 (added BUILD_SET) - 3030 (added keyword-only parameters) - 3040 (added signature annotations) - 3050 (print becomes a function) - 3060 (PEP 3115 metaclass syntax) - 3061 (string literals become unicode) - 3071 (PEP 3109 raise changes) - 3081 (PEP 3137 make __file__ and __name__ unicode) - 3091 (kill str8 interning) - 3101 (merge from 2.6a0, see 62151) - 3103 (__file__ points to source file) - Python 3.0a4: 3111 (WITH_CLEANUP optimization). - Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT) - Python 3.1a0: 3141 (optimize list, set and dict comprehensions: - change LIST_APPEND and SET_ADD, add MAP_ADD) - Python 3.1a0: 3151 (optimize conditional branches: - introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) - Python 3.2a0: 3160 (add SETUP_WITH) - tag: cpython-32 - Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR) - tag: cpython-32 - Python 3.2a2 3180 (add DELETE_DEREF) - Python 3.3a0 3190 __class__ super closure changed - Python 3.3a0 3200 (__qualname__ added) - 3210 (added size modulo 2**32 to the pyc header) - Python 3.3a1 3220 (changed PEP 380 implementation) - Python 3.3a4 3230 (revert changes to implicit __class__ closure) - Python 3.4a1 3250 (evaluate positional default arguments before - keyword-only defaults) - -MAGIC must change whenever the bytecode emitted by the compiler may no -longer be understood by older implementations of the eval loop (usually -due to the addition of new opcodes). +# Magic word to reject .pyc files generated by other Python versions. +# It should change for each incompatible change to the bytecode. +# +# The value of CR and LF is incorporated so if you ever read or write +# a .pyc file in text mode the magic number will be wrong; also, the +# Apple MPW compiler swaps their values, botching string constants. +# +# The magic numbers must be spaced apart at least 2 values, as the +# -U interpeter flag will cause MAGIC+1 being used. They have been +# odd numbers for some time now. +# +# There were a variety of old schemes for setting the magic number. +# The current working scheme is to increment the previous value by +# 10. +# +# Starting with the adoption of PEP 3147 in Python 3.2, every bump in magic +# number also includes a new "magic tag", i.e. a human readable string used +# to represent the magic number in __pycache__ directories. When you change +# the magic number, you must also set a new unique magic tag. Generally this +# can be named after the Python major version of the magic number bump, but +# it can really be anything, as long as it's different than anything else +# that's come before. The tags are included in the following table, starting +# with Python 3.2a0. +# +# Known values: +# Python 1.5: 20121 +# Python 1.5.1: 20121 +# Python 1.5.2: 20121 +# Python 1.6: 50428 +# Python 2.0: 50823 +# Python 2.0.1: 50823 +# Python 2.1: 60202 +# Python 2.1.1: 60202 +# Python 2.1.2: 60202 +# Python 2.2: 60717 +# Python 2.3a0: 62011 +# Python 2.3a0: 62021 +# Python 2.3a0: 62011 (!) +# Python 2.4a0: 62041 +# Python 2.4a3: 62051 +# Python 2.4b1: 62061 +# Python 2.5a0: 62071 +# Python 2.5a0: 62081 (ast-branch) +# Python 2.5a0: 62091 (with) +# Python 2.5a0: 62092 (changed WITH_CLEANUP opcode) +# Python 2.5b3: 62101 (fix wrong code: for x, in ...) +# Python 2.5b3: 62111 (fix wrong code: x += yield) +# Python 2.5c1: 62121 (fix wrong lnotab with for loops and +# storing constants that should have been removed) +# Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) +# Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) +# Python 2.6a1: 62161 (WITH_CLEANUP optimization) +# Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND) +# Python 2.7a0: 62181 (optimize conditional branches: +# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) +# Python 2.7a0 62191 (introduce SETUP_WITH) +# Python 2.7a0 62201 (introduce BUILD_SET) +# Python 2.7a0 62211 (introduce MAP_ADD and SET_ADD) +# Python 3000: 3000 +# 3010 (removed UNARY_CONVERT) +# 3020 (added BUILD_SET) +# 3030 (added keyword-only parameters) +# 3040 (added signature annotations) +# 3050 (print becomes a function) +# 3060 (PEP 3115 metaclass syntax) +# 3061 (string literals become unicode) +# 3071 (PEP 3109 raise changes) +# 3081 (PEP 3137 make __file__ and __name__ unicode) +# 3091 (kill str8 interning) +# 3101 (merge from 2.6a0, see 62151) +# 3103 (__file__ points to source file) +# Python 3.0a4: 3111 (WITH_CLEANUP optimization). +# Python 3.0a5: 3131 (lexical exception stacking, including POP_EXCEPT) +# Python 3.1a0: 3141 (optimize list, set and dict comprehensions: +# change LIST_APPEND and SET_ADD, add MAP_ADD) +# Python 3.1a0: 3151 (optimize conditional branches: +# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) +# Python 3.2a0: 3160 (add SETUP_WITH) +# tag: cpython-32 +# Python 3.2a1: 3170 (add DUP_TOP_TWO, remove DUP_TOPX and ROT_FOUR) +# tag: cpython-32 +# Python 3.2a2 3180 (add DELETE_DEREF) +# Python 3.3a0 3190 __class__ super closure changed +# Python 3.3a0 3200 (__qualname__ added) +# 3210 (added size modulo 2**32 to the pyc header) +# Python 3.3a1 3220 (changed PEP 380 implementation) +# Python 3.3a4 3230 (revert changes to implicit __class__ closure) +# Python 3.4a1 3250 (evaluate positional default arguments before +# keyword-only defaults) +# +# MAGIC must change whenever the bytecode emitted by the compiler may no +# longer be understood by older implementations of the eval loop (usually +# due to the addition of new opcodes). -""" _MAGIC_BYTES = (3250).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(_MAGIC_BYTES, 'little') diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index 263acc9..8e7bb1b 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -825,10 +825,9 @@ class SysLogHandler(logging.Handler): msg = self.ident + msg if self.append_nul: msg += '\000' - """ - We need to convert record level to lowercase, maybe this will - change in the future. - """ + + # We need to convert record level to lowercase, maybe this will + # change in the future. prio = '<%d>' % self.encodePriority(self.facility, self.mapPriority(record.levelname)) prio = prio.encode('utf-8') diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 66f4edd..9f90e3e 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -33,119 +33,118 @@ bytes_types = pickle.bytes_types # by a later GET. -""" -"A pickle" is a program for a virtual pickle machine (PM, but more accurately -called an unpickling machine). It's a sequence of opcodes, interpreted by the -PM, building an arbitrarily complex Python object. - -For the most part, the PM is very simple: there are no looping, testing, or -conditional instructions, no arithmetic and no function calls. Opcodes are -executed once each, from first to last, until a STOP opcode is reached. - -The PM has two data areas, "the stack" and "the memo". - -Many opcodes push Python objects onto the stack; e.g., INT pushes a Python -integer object on the stack, whose value is gotten from a decimal string -literal immediately following the INT opcode in the pickle bytestream. Other -opcodes take Python objects off the stack. The result of unpickling is -whatever object is left on the stack when the final STOP opcode is executed. - -The memo is simply an array of objects, or it can be implemented as a dict -mapping little integers to objects. The memo serves as the PM's "long term -memory", and the little integers indexing the memo are akin to variable -names. Some opcodes pop a stack object into the memo at a given index, -and others push a memo object at a given index onto the stack again. - -At heart, that's all the PM has. Subtleties arise for these reasons: - -+ Object identity. Objects can be arbitrarily complex, and subobjects - may be shared (for example, the list [a, a] refers to the same object a - twice). It can be vital that unpickling recreate an isomorphic object - graph, faithfully reproducing sharing. - -+ Recursive objects. For example, after "L = []; L.append(L)", L is a - list, and L[0] is the same list. This is related to the object identity - point, and some sequences of pickle opcodes are subtle in order to - get the right result in all cases. - -+ Things pickle doesn't know everything about. Examples of things pickle - does know everything about are Python's builtin scalar and container - types, like ints and tuples. They generally have opcodes dedicated to - them. For things like module references and instances of user-defined - classes, pickle's knowledge is limited. Historically, many enhancements - have been made to the pickle protocol in order to do a better (faster, - and/or more compact) job on those. - -+ Backward compatibility and micro-optimization. As explained below, - pickle opcodes never go away, not even when better ways to do a thing - get invented. The repertoire of the PM just keeps growing over time. - For example, protocol 0 had two opcodes for building Python integers (INT - and LONG), protocol 1 added three more for more-efficient pickling of short - integers, and protocol 2 added two more for more-efficient pickling of - long integers (before protocol 2, the only ways to pickle a Python long - took time quadratic in the number of digits, for both pickling and - unpickling). "Opcode bloat" isn't so much a subtlety as a source of - wearying complication. - - -Pickle protocols: - -For compatibility, the meaning of a pickle opcode never changes. Instead new -pickle opcodes get added, and each version's unpickler can handle all the -pickle opcodes in all protocol versions to date. So old pickles continue to -be readable forever. The pickler can generally be told to restrict itself to -the subset of opcodes available under previous protocol versions too, so that -users can create pickles under the current version readable by older -versions. However, a pickle does not contain its version number embedded -within it. If an older unpickler tries to read a pickle using a later -protocol, the result is most likely an exception due to seeing an unknown (in -the older unpickler) opcode. - -The original pickle used what's now called "protocol 0", and what was called -"text mode" before Python 2.3. The entire pickle bytestream is made up of -printable 7-bit ASCII characters, plus the newline character, in protocol 0. -That's why it was called text mode. Protocol 0 is small and elegant, but -sometimes painfully inefficient. - -The second major set of additions is now called "protocol 1", and was called -"binary mode" before Python 2.3. This added many opcodes with arguments -consisting of arbitrary bytes, including NUL bytes and unprintable "high bit" -bytes. Binary mode pickles can be substantially smaller than equivalent -text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte -int as 4 bytes following the opcode, which is cheaper to unpickle than the -(perhaps) 11-character decimal string attached to INT. Protocol 1 also added -a number of opcodes that operate on many stack elements at once (like APPENDS -and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE). - -The third major set of additions came in Python 2.3, and is called "protocol -2". This added: - -- A better way to pickle instances of new-style classes (NEWOBJ). - -- A way for a pickle to identify its protocol (PROTO). - -- Time- and space- efficient pickling of long ints (LONG{1,4}). - -- Shortcuts for small tuples (TUPLE{1,2,3}}. - -- Dedicated opcodes for bools (NEWTRUE, NEWFALSE). - -- The "extension registry", a vector of popular objects that can be pushed - efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but - the registry contents are predefined (there's nothing akin to the memo's - PUT). - -Another independent change with Python 2.3 is the abandonment of any -pretense that it might be safe to load pickles received from untrusted -parties -- no sufficient security analysis has been done to guarantee -this and there isn't a use case that warrants the expense of such an -analysis. - -To this end, all tests for __safe_for_unpickling__ or for -copyreg.safe_constructors are removed from the unpickling code. -References to these variables in the descriptions below are to be seen -as describing unpickling in Python 2.2 and before. -""" +# "A pickle" is a program for a virtual pickle machine (PM, but more accurately +# called an unpickling machine). It's a sequence of opcodes, interpreted by the +# PM, building an arbitrarily complex Python object. +# +# For the most part, the PM is very simple: there are no looping, testing, or +# conditional instructions, no arithmetic and no function calls. Opcodes are +# executed once each, from first to last, until a STOP opcode is reached. +# +# The PM has two data areas, "the stack" and "the memo". +# +# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python +# integer object on the stack, whose value is gotten from a decimal string +# literal immediately following the INT opcode in the pickle bytestream. Other +# opcodes take Python objects off the stack. The result of unpickling is +# whatever object is left on the stack when the final STOP opcode is executed. +# +# The memo is simply an array of objects, or it can be implemented as a dict +# mapping little integers to objects. The memo serves as the PM's "long term +# memory", and the little integers indexing the memo are akin to variable +# names. Some opcodes pop a stack object into the memo at a given index, +# and others push a memo object at a given index onto the stack again. +# +# At heart, that's all the PM has. Subtleties arise for these reasons: +# +# + Object identity. Objects can be arbitrarily complex, and subobjects +# may be shared (for example, the list [a, a] refers to the same object a +# twice). It can be vital that unpickling recreate an isomorphic object +# graph, faithfully reproducing sharing. +# +# + Recursive objects. For example, after "L = []; L.append(L)", L is a +# list, and L[0] is the same list. This is related to the object identity +# point, and some sequences of pickle opcodes are subtle in order to +# get the right result in all cases. +# +# + Things pickle doesn't know everything about. Examples of things pickle +# does know everything about are Python's builtin scalar and container +# types, like ints and tuples. They generally have opcodes dedicated to +# them. For things like module references and instances of user-defined +# classes, pickle's knowledge is limited. Historically, many enhancements +# have been made to the pickle protocol in order to do a better (faster, +# and/or more compact) job on those. +# +# + Backward compatibility and micro-optimization. As explained below, +# pickle opcodes never go away, not even when better ways to do a thing +# get invented. The repertoire of the PM just keeps growing over time. +# For example, protocol 0 had two opcodes for building Python integers (INT +# and LONG), protocol 1 added three more for more-efficient pickling of short +# integers, and protocol 2 added two more for more-efficient pickling of +# long integers (before protocol 2, the only ways to pickle a Python long +# took time quadratic in the number of digits, for both pickling and +# unpickling). "Opcode bloat" isn't so much a subtlety as a source of +# wearying complication. +# +# +# Pickle protocols: +# +# For compatibility, the meaning of a pickle opcode never changes. Instead new +# pickle opcodes get added, and each version's unpickler can handle all the +# pickle opcodes in all protocol versions to date. So old pickles continue to +# be readable forever. The pickler can generally be told to restrict itself to +# the subset of opcodes available under previous protocol versions too, so that +# users can create pickles under the current version readable by older +# versions. However, a pickle does not contain its version number embedded +# within it. If an older unpickler tries to read a pickle using a later +# protocol, the result is most likely an exception due to seeing an unknown (in +# the older unpickler) opcode. +# +# The original pickle used what's now called "protocol 0", and what was called +# "text mode" before Python 2.3. The entire pickle bytestream is made up of +# printable 7-bit ASCII characters, plus the newline character, in protocol 0. +# That's why it was called text mode. Protocol 0 is small and elegant, but +# sometimes painfully inefficient. +# +# The second major set of additions is now called "protocol 1", and was called +# "binary mode" before Python 2.3. This added many opcodes with arguments +# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit" +# bytes. Binary mode pickles can be substantially smaller than equivalent +# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte +# int as 4 bytes following the opcode, which is cheaper to unpickle than the +# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added +# a number of opcodes that operate on many stack elements at once (like APPENDS +# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE). +# +# The third major set of additions came in Python 2.3, and is called "protocol +# 2". This added: +# +# - A better way to pickle instances of new-style classes (NEWOBJ). +# +# - A way for a pickle to identify its protocol (PROTO). +# +# - Time- and space- efficient pickling of long ints (LONG{1,4}). +# +# - Shortcuts for small tuples (TUPLE{1,2,3}}. +# +# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE). +# +# - The "extension registry", a vector of popular objects that can be pushed +# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but +# the registry contents are predefined (there's nothing akin to the memo's +# PUT). +# +# Another independent change with Python 2.3 is the abandonment of any +# pretense that it might be safe to load pickles received from untrusted +# parties -- no sufficient security analysis has been done to guarantee +# this and there isn't a use case that warrants the expense of such an +# analysis. +# +# To this end, all tests for __safe_for_unpickling__ or for +# copyreg.safe_constructors are removed from the unpickling code. +# References to these variables in the descriptions below are to be seen +# as describing unpickling in Python 2.2 and before. + # Meta-rule: Descriptions are stored in instances of descriptor objects, # with plain constructors. No meta-language is defined from which diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index 48ca94e..d75ad30 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -2,37 +2,35 @@ import unittest from test import support from _testcapi import getargs_keywords, getargs_keyword_only -""" -> How about the following counterproposal. This also changes some of -> the other format codes to be a little more regular. -> -> Code C type Range check -> -> b unsigned char 0..UCHAR_MAX -> h signed short SHRT_MIN..SHRT_MAX -> B unsigned char none ** -> H unsigned short none ** -> k * unsigned long none -> I * unsigned int 0..UINT_MAX - - -> i int INT_MIN..INT_MAX -> l long LONG_MIN..LONG_MAX - -> K * unsigned long long none -> L long long LLONG_MIN..LLONG_MAX - -> Notes: -> -> * New format codes. -> -> ** Changed from previous "range-and-a-half" to "none"; the -> range-and-a-half checking wasn't particularly useful. - -Plus a C API or two, e.g. PyInt_AsLongMask() -> -unsigned long and PyInt_AsLongLongMask() -> unsigned -long long (if that exists). -""" +# > How about the following counterproposal. This also changes some of +# > the other format codes to be a little more regular. +# > +# > Code C type Range check +# > +# > b unsigned char 0..UCHAR_MAX +# > h signed short SHRT_MIN..SHRT_MAX +# > B unsigned char none ** +# > H unsigned short none ** +# > k * unsigned long none +# > I * unsigned int 0..UINT_MAX +# +# +# > i int INT_MIN..INT_MAX +# > l long LONG_MIN..LONG_MAX +# +# > K * unsigned long long none +# > L long long LLONG_MIN..LLONG_MAX +# +# > Notes: +# > +# > * New format codes. +# > +# > ** Changed from previous "range-and-a-half" to "none"; the +# > range-and-a-half checking wasn't particularly useful. +# +# Plus a C API or two, e.g. PyInt_AsLongMask() -> +# unsigned long and PyInt_AsLongLongMask() -> unsigned +# long long (if that exists). LARGE = 0x7FFFFFFF VERY_LARGE = 0xFF0000121212121212121242 diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 67e4d1d..9fd6e5e 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1337,8 +1337,8 @@ def XMLID(text, parser=None): ids[id] = elem return tree, ids +# Parse XML document from string constant. Alias for XML(). fromstring = XML -"""Parse XML document from string constant. Alias for XML().""" def fromstringlist(sequence, parser=None): """Parse XML document from sequence of string fragments. diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index 4a916ea..18424de 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -52,8 +52,8 @@ verbose = False recurse = False dryrun = False makebackup = True +# A specified newline to be used in the output (set by --newline option) spec_newline = None -"""A specified newline to be used in the output (set by --newline option)""" def usage(msg=None): -- cgit v0.12