From 63065d761e6c545216b9621982d16dd459abb1f8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 15 May 2012 23:48:04 +0200 Subject: Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs. Patch by Serhiy Storchaka. --- Misc/NEWS | 3 + Objects/stringlib/codecs.h | 149 +++++++++++++++++++++++- Objects/unicodeobject.c | 277 +++++++++++++-------------------------------- 3 files changed, 230 insertions(+), 199 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 4870979..2ff55e8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.3.0 Alpha 4? Core and Builtins ----------------- +- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs. + Patch by Serhiy Storchaka. + - asdl_seq and asdl_int_seq are now Py_ssize_t sized. - Issue #14133 (PEP 415): Implement suppression of __context__ display with an diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index 366011c..07627d6 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -215,7 +215,6 @@ InvalidContinuation: goto Return; } -#undef LONG_PTR_MASK #undef ASCII_CHAR_MASK @@ -415,4 +414,152 @@ STRINGLIB(utf8_encoder)(PyObject *unicode, #undef MAX_SHORT_UNICHARS } +/* The pattern for constructing UCS2-repeated masks. */ +#if SIZEOF_LONG == 8 +# define UCS2_REPEAT_MASK 0x0001000100010001ul +#elif SIZEOF_LONG == 4 +# define UCS2_REPEAT_MASK 0x00010001ul +#else +# error C 'long' size should be either 4 or 8! +#endif + +/* The mask for fast checking. */ +#if STRINGLIB_SIZEOF_CHAR == 1 +/* The mask for fast checking of whether a C 'long' contains a + non-ASCII or non-Latin1 UTF16-encoded characters. */ +# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR)) +#else +/* The mask for fast checking of whether a C 'long' may contain + UTF16-encoded surrogate characters. This is an efficient heuristic, + assuming that non-surrogate characters with a code point >= 0x8000 are + rare in most input. +*/ +# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u) +#endif +/* The mask for fast byte-swapping. */ +#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu) +/* Swap bytes. */ +#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \ + (((value) & STRIPPED_MASK) << 8)) + +Py_LOCAL_INLINE(Py_UCS4) +STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, + STRINGLIB_CHAR *dest, Py_ssize_t *outpos, + int native_ordering) +{ + Py_UCS4 ch; + const unsigned char *aligned_end = + (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); + const unsigned char *q = *inptr; + STRINGLIB_CHAR *p = dest + *outpos; + /* Offsets from q for retrieving byte pairs in the right order. */ +#ifdef BYTEORDER_IS_LITTLE_ENDIAN + int ihi = !!native_ordering, ilo = !native_ordering; +#else + int ihi = !native_ordering, ilo = !!native_ordering; +#endif + --e; + + while (q < e) { + Py_UCS4 ch2; + /* First check for possible aligned read of a C 'long'. Unaligned + reads are more expensive, better to defer to another iteration. */ + if (!((size_t) q & LONG_PTR_MASK)) { + /* Fast path for runs of in-range non-surrogate chars. */ + register const unsigned char *_q = q; + while (_q < aligned_end) { + unsigned long block = * (unsigned long *) _q; + if (native_ordering) { + /* Can use buffer directly */ + if (block & FAST_CHAR_MASK) + break; + } + else { + /* Need to byte-swap */ + if (block & SWAB(FAST_CHAR_MASK)) + break; +#if STRINGLIB_SIZEOF_CHAR == 1 + block >>= 8; +#else + block = SWAB(block); +#endif + } +#ifdef BYTEORDER_IS_LITTLE_ENDIAN +# if SIZEOF_LONG == 4 + p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); + p[1] = (STRINGLIB_CHAR)(block >> 16); +# elif SIZEOF_LONG == 8 + p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); + p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); + p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); + p[3] = (STRINGLIB_CHAR)(block >> 48); +# endif +#else +# if SIZEOF_LONG == 4 + p[0] = (STRINGLIB_CHAR)(block >> 16); + p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu); +# elif SIZEOF_LONG == 8 + p[0] = (STRINGLIB_CHAR)(block >> 48); + p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu); + p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu); + p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu); +# endif +#endif + _q += SIZEOF_LONG; + p += SIZEOF_LONG / 2; + } + q = _q; + if (q >= e) + break; + } + + ch = (q[ihi] << 8) | q[ilo]; + q += 2; + if (!Py_UNICODE_IS_SURROGATE(ch)) { +#if STRINGLIB_SIZEOF_CHAR < 2 + if (ch > STRINGLIB_MAX_CHAR) + /* Out-of-range */ + goto Return; +#endif + *p++ = (STRINGLIB_CHAR)ch; + continue; + } + + /* UTF-16 code pair: */ + if (q >= e) + goto UnexpectedEnd; + if (!Py_UNICODE_IS_HIGH_SURROGATE(ch)) + goto IllegalEncoding; + ch2 = (q[ihi] << 8) | q[ilo]; + q += 2; + if (!Py_UNICODE_IS_LOW_SURROGATE(ch2)) + goto IllegalSurrogate; + ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2); +#if STRINGLIB_SIZEOF_CHAR < 4 + /* Out-of-range */ + goto Return; +#else + *p++ = (STRINGLIB_CHAR)ch; +#endif + } + ch = 0; +Return: + *inptr = q; + *outpos = p - dest; + return ch; +UnexpectedEnd: + ch = 1; + goto Return; +IllegalEncoding: + ch = 2; + goto Return; +IllegalSurrogate: + ch = 3; + goto Return; +} +#undef UCS2_REPEAT_MASK +#undef FAST_CHAR_MASK +#undef STRIPPED_MASK +#undef SWAB +#undef LONG_PTR_MASK #endif /* STRINGLIB_IS_UNICODE */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2e1e0bd..8fbc203 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5195,25 +5195,6 @@ PyUnicode_DecodeUTF16(const char *s, return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); } -/* Two masks for fast checking of whether a C 'long' may contain - UTF16-encoded surrogate characters. This is an efficient heuristic, - assuming that non-surrogate characters with a code point >= 0x8000 are - rare in most input. - FAST_CHAR_MASK is used when the input is in native byte ordering, - SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering. -*/ -#if (SIZEOF_LONG == 8) -# define FAST_CHAR_MASK 0x8000800080008000L -# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L -# define STRIPPED_MASK 0x00FF00FF00FF00FFL -#elif (SIZEOF_LONG == 4) -# define FAST_CHAR_MASK 0x80008000L -# define SWAPPED_FAST_CHAR_MASK 0x00800080L -# define STRIPPED_MASK 0x00FF00FFL -#else -# error C 'long' size should be either 4 or 8! -#endif - PyObject * PyUnicode_DecodeUTF16Stateful(const char *s, Py_ssize_t size, @@ -5226,30 +5207,15 @@ PyUnicode_DecodeUTF16Stateful(const char *s, Py_ssize_t endinpos; Py_ssize_t outpos; PyObject *unicode; - const unsigned char *q, *e, *aligned_end; + const unsigned char *q, *e; int bo = 0; /* assume native ordering by default */ - int native_ordering = 0; + int native_ordering; const char *errmsg = ""; - /* Offsets from q for retrieving byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - int ihi = 1, ilo = 0; -#else - int ihi = 0, ilo = 1; -#endif PyObject *errorHandler = NULL; PyObject *exc = NULL; - /* Note: size will always be longer than the resulting Unicode - character count */ - unicode = PyUnicode_New(size, 127); - if (!unicode) - return NULL; - if (size == 0) - return unicode; - outpos = 0; - q = (unsigned char *)s; - e = q + size - 1; + e = q + size; if (byteorder) bo = *byteorder; @@ -5258,155 +5224,98 @@ PyUnicode_DecodeUTF16Stateful(const char *s, byte order setting accordingly. In native mode, the leading BOM mark is skipped, in all other modes, it is copied to the output stream as-is (giving a ZWNBSP character). */ - if (bo == 0) { - if (size >= 2) { - const Py_UCS4 bom = (q[ihi] << 8) | q[ilo]; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - if (bom == 0xFEFF) { - q += 2; - bo = -1; - } - else if (bom == 0xFFFE) { - q += 2; - bo = 1; - } -#else - if (bom == 0xFEFF) { - q += 2; - bo = 1; - } - else if (bom == 0xFFFE) { - q += 2; - bo = -1; - } -#endif + if (bo == 0 && size >= 2) { + const Py_UCS4 bom = (q[1] << 8) | q[0]; + if (bom == 0xFEFF) { + q += 2; + bo = -1; + } + else if (bom == 0xFFFE) { + q += 2; + bo = 1; } + if (byteorder) + *byteorder = bo; } - if (bo == -1) { - /* force LE */ - ihi = 1; - ilo = 0; - } - else if (bo == 1) { - /* force BE */ - ihi = 0; - ilo = 1; + if (q == e) { + if (consumed) + *consumed = size; + Py_INCREF(unicode_empty); + return unicode_empty; } + #ifdef BYTEORDER_IS_LITTLE_ENDIAN - native_ordering = ilo < ihi; + native_ordering = bo <= 0; #else - native_ordering = ilo > ihi; + native_ordering = bo >= 0; #endif - aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK); - while (q < e) { - Py_UCS4 ch; - /* First check for possible aligned read of a C 'long'. Unaligned - reads are more expensive, better to defer to another iteration. */ - if (!((size_t) q & LONG_PTR_MASK)) { - /* Fast path for runs of non-surrogate chars. */ - register const unsigned char *_q = q; + /* Note: size will always be longer than the resulting Unicode + character count */ + unicode = PyUnicode_New((e - q + 1) / 2, 127); + if (!unicode) + return NULL; + + outpos = 0; + while (1) { + Py_UCS4 ch = 0; + if (e - q >= 2) { int kind = PyUnicode_KIND(unicode); - void *data = PyUnicode_DATA(unicode); - while (_q < aligned_end) { - unsigned long block = * (unsigned long *) _q; - Py_UCS4 maxch; - if (native_ordering) { - /* Can use buffer directly */ - if (block & FAST_CHAR_MASK) - break; - } - else { - /* Need to byte-swap */ - if (block & SWAPPED_FAST_CHAR_MASK) - break; - block = ((block >> 8) & STRIPPED_MASK) | - ((block & STRIPPED_MASK) << 8); - } - maxch = (Py_UCS2)(block & 0xFFFF); -#if SIZEOF_LONG == 8 - ch = (Py_UCS2)((block >> 16) & 0xFFFF); - maxch = MAX_MAXCHAR(maxch, ch); - ch = (Py_UCS2)((block >> 32) & 0xFFFF); - maxch = MAX_MAXCHAR(maxch, ch); - ch = (Py_UCS2)(block >> 48); - maxch = MAX_MAXCHAR(maxch, ch); -#else - ch = (Py_UCS2)(block >> 16); - maxch = MAX_MAXCHAR(maxch, ch); -#endif - if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) { - if (unicode_widen(&unicode, outpos, maxch) < 0) - goto onError; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - } -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF)); -#if SIZEOF_LONG == 8 - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF)); - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF)); - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48))); -#else - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16)); -#endif -#else -#if SIZEOF_LONG == 8 - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48))); - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF)); - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF)); -#else - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16)); -#endif - PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF)); -#endif - _q += SIZEOF_LONG; + if (kind == PyUnicode_1BYTE_KIND) { + if (PyUnicode_IS_ASCII(unicode)) + ch = asciilib_utf16_decode(&q, e, + PyUnicode_1BYTE_DATA(unicode), &outpos, + native_ordering); + else + ch = ucs1lib_utf16_decode(&q, e, + PyUnicode_1BYTE_DATA(unicode), &outpos, + native_ordering); + } else if (kind == PyUnicode_2BYTE_KIND) { + ch = ucs2lib_utf16_decode(&q, e, + PyUnicode_2BYTE_DATA(unicode), &outpos, + native_ordering); + } else { + assert(kind == PyUnicode_4BYTE_KIND); + ch = ucs4lib_utf16_decode(&q, e, + PyUnicode_4BYTE_DATA(unicode), &outpos, + native_ordering); } - q = _q; - if (q >= e) - break; } - ch = (q[ihi] << 8) | q[ilo]; - q += 2; - - if (!Py_UNICODE_IS_SURROGATE(ch)) { + switch (ch) + { + case 0: + /* remaining byte at the end? (size should be even) */ + if (q == e || consumed) + goto End; + errmsg = "truncated data"; + startinpos = ((const char *)q) - starts; + endinpos = ((const char *)e) - starts; + break; + /* The remaining input chars are ignored if the callback + chooses to skip the input */ + case 1: + errmsg = "unexpected end of data"; + startinpos = ((const char *)q) - 2 - starts; + endinpos = ((const char *)e) - starts; + break; + case 2: + errmsg = "illegal encoding"; + startinpos = ((const char *)q) - 2 - starts; + endinpos = startinpos + 2; + break; + case 3: + errmsg = "illegal UTF-16 surrogate"; + startinpos = ((const char *)q) - 4 - starts; + endinpos = startinpos + 2; + break; + default: if (unicode_putchar(&unicode, &outpos, ch) < 0) goto onError; continue; } - /* UTF-16 code pair: */ - if (q > e) { - errmsg = "unexpected end of data"; - startinpos = (((const char *)q) - 2) - starts; - endinpos = ((const char *)e) + 1 - starts; - goto utf16Error; - } - if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) { - Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo]; - q += 2; - if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) { - if (unicode_putchar(&unicode, &outpos, - Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0) - goto onError; - continue; - } - else { - errmsg = "illegal UTF-16 surrogate"; - startinpos = (((const char *)q)-4)-starts; - endinpos = startinpos+2; - goto utf16Error; - } - - } - errmsg = "illegal encoding"; - startinpos = (((const char *)q)-2)-starts; - endinpos = startinpos+2; - /* Fall through to report the error */ - - utf16Error: if (unicode_decode_call_errorhandler( errors, &errorHandler, @@ -5421,33 +5330,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s, &outpos)) goto onError; } - /* remaining byte at the end? (size should be even) */ - if (e == q) { - if (!consumed) { - errmsg = "truncated data"; - startinpos = ((const char *)q) - starts; - endinpos = ((const char *)e) + 1 - starts; - if (unicode_decode_call_errorhandler( - errors, - &errorHandler, - "utf16", errmsg, - &starts, - (const char **)&e, - &startinpos, - &endinpos, - &exc, - (const char **)&q, - &unicode, - &outpos)) - goto onError; - /* The remaining input chars are ignored if the callback - chooses to skip the input */ - } - } - - if (byteorder) - *byteorder = bo; +End: if (consumed) *consumed = (const char *)q-starts; @@ -5466,9 +5350,6 @@ PyUnicode_DecodeUTF16Stateful(const char *s, return NULL; } -#undef FAST_CHAR_MASK -#undef SWAPPED_FAST_CHAR_MASK - PyObject * _PyUnicode_EncodeUTF16(PyObject *str, const char *errors, -- cgit v0.12 From a790c9b6d6b5b706f1b29be316f2e41d24eeb295 Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Tue, 15 May 2012 20:44:06 -0400 Subject: Issue #14817: Add rudimentary tests for pkgutil.extend_path. --- Lib/test/test_pkgutil.py | 48 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index f4e0323..d5d7448 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -137,8 +137,54 @@ class PkgutilPEP302Tests(unittest.TestCase): self.assertEqual(foo.loads, 1) del sys.modules['foo'] + +class ExtendPathTests(unittest.TestCase): + def create_init(self, pkgname): + dirname = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, dirname) + sys.path.insert(0, dirname) + + pkgdir = os.path.join(dirname, pkgname) + os.mkdir(pkgdir) + with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl: + fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n') + + return dirname + + def create_submodule(self, dirname, pkgname, submodule_name, value): + module_name = os.path.join(dirname, pkgname, submodule_name + '.py') + with open(module_name, 'w') as fl: + print('value={}'.format(value), file=fl) + + def setUp(self): + # Create 2 directories on sys.path + self.pkgname = 'foo' + self.dirname_0 = self.create_init(self.pkgname) + self.dirname_1 = self.create_init(self.pkgname) + + def tearDown(self): + del sys.path[0] + del sys.path[0] + + def test_simple(self): + self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0) + self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1) + import foo.bar + import foo.baz + # Ensure we read the expected values + self.assertEqual(foo.bar.value, 0) + self.assertEqual(foo.baz.value, 1) + + # Ensure the path is set up correctly + self.assertEqual(sorted(foo.__path__), + sorted([os.path.join(self.dirname_0, self.pkgname), + os.path.join(self.dirname_1, self.pkgname)])) + + # XXX: test .pkg files + + def test_main(): - run_unittest(PkgutilTests, PkgutilPEP302Tests) + run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests) # this is necessary if test is run repeated (like when finding leaks) import zipimport zipimport._zip_directory_cache.clear() -- cgit v0.12 From 4d377d98a1b4ab115bcd816553600e603e388831 Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Tue, 15 May 2012 18:08:11 -0700 Subject: Issue #14777: In an X11 windowing environment, tkinter may return undecoded UTF-8 bytes as a string when accessing the Tk clipboard. Modify clipboad_get() to first request type UTF8_STRING when no specific type is requested in an X11 windowing environment, falling back to the current default type STRING if that fails. Original patch by Thomas Kluyver. --- Lib/tkinter/__init__.py | 28 ++++++++++++++++++++++++++-- Misc/NEWS | 6 ++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index 8af5065..de85bf9 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -526,12 +526,19 @@ class Misc: The type keyword specifies the form in which the data is to be returned and should be an atom name such as STRING - or FILE_NAME. Type defaults to STRING. + or FILE_NAME. Type defaults to STRING, except on X11, where the default + is to try UTF8_STRING and fall back to STRING. This command is equivalent to: selection_get(CLIPBOARD) """ + if 'type' not in kw and self._windowingsystem == 'x11': + try: + kw['type'] = 'UTF8_STRING' + return self.tk.call(('clipboard', 'get') + self._options(kw)) + except TclError: + del kw['type'] return self.tk.call(('clipboard', 'get') + self._options(kw)) def clipboard_clear(self, **kw): @@ -613,8 +620,16 @@ class Misc: A keyword parameter selection specifies the name of the selection and defaults to PRIMARY. A keyword parameter displayof specifies a widget on the display - to use.""" + to use. A keyword parameter type specifies the form of data to be + fetched, defaulting to STRING except on X11, where UTF8_STRING is tried + before STRING.""" if 'displayof' not in kw: kw['displayof'] = self._w + if 'type' not in kw and self._windowingsystem == 'x11': + try: + kw['type'] = 'UTF8_STRING' + return self.tk.call(('selection', 'get') + self._options(kw)) + except TclError: + del kw['type'] return self.tk.call(('selection', 'get') + self._options(kw)) def selection_handle(self, command, **kw): """Specify a function COMMAND to call if the X @@ -1029,6 +1044,15 @@ class Misc: if displayof is None: return ('-displayof', self._w) return () + @property + def _windowingsystem(self): + """Internal function.""" + try: + return self._root()._windowingsystem_cached + except AttributeError: + ws = self._root()._windowingsystem_cached = \ + self.tk.call('tk', 'windowingsystem') + return ws def _options(self, cnf, kw = None): """Internal function.""" if kw: diff --git a/Misc/NEWS b/Misc/NEWS index f3b14b3..9b9ca67 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -63,6 +63,12 @@ Core and Builtins Library ------- +- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when + accessing the Tk clipboard. Modify clipboad_get() to first request type + UTF8_STRING when no specific type is requested in an X11 windowing + environment, falling back to the current default type STRING if that fails. + Original patch by Thomas Kluyver. + - Issue #12541: Be lenient with quotes around Realm field of HTTP Basic Authentation in urllib2. -- cgit v0.12 From ea1badbfef340c63d4f821662c887e46ed62a6ec Mon Sep 17 00:00:00 2001 From: R David Murray Date: Tue, 15 May 2012 22:07:52 -0400 Subject: #1440472: Explain that email parser/generator isn't *quite* "idempotent" --- Doc/library/email.generator.rst | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst index 85b32fe..033dcf1 100644 --- a/Doc/library/email.generator.rst +++ b/Doc/library/email.generator.rst @@ -17,7 +17,8 @@ yourself. However the bundled generator knows how to generate most email in a standards-compliant way, should handle MIME and non-MIME email messages just fine, and is designed so that the transformation from flat text, to a message structure via the :class:`~email.parser.Parser` class, and back to flat text, -is idempotent (the input is identical to the output). On the other hand, using +is idempotent (the input is identical to the output) [#]_. On the other hand, +using the Generator on a :class:`~email.message.Message` constructed by program may result in changes to the :class:`~email.message.Message` object as defaults are filled in. @@ -204,3 +205,12 @@ representing the part. The default value for *fmt* is ``None``, meaning :: [Non-text (%(type)s) part of message omitted, filename %(filename)s] + + +.. rubric:: Footnotes + +.. [#] This statement assumes that you use the appropriate setting for the + ``unixfrom`` argument, and that you set maxheaderlen=0 (which will + preserve whatever the input line lengths were). It is also not strictly + true, since in many cases runs of whitespace in headers are collapsed + into single blanks. The latter is a bug that will eventually be fixed. -- cgit v0.12 From 28e68ea6eafbc7d2bac25ce2be3c046992cc73b6 Mon Sep 17 00:00:00 2001 From: R David Murray Date: Tue, 15 May 2012 22:13:29 -0400 Subject: #1440472: reflow --- Doc/library/email.generator.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst index 033dcf1..f40869c 100644 --- a/Doc/library/email.generator.rst +++ b/Doc/library/email.generator.rst @@ -18,10 +18,9 @@ standards-compliant way, should handle MIME and non-MIME email messages just fine, and is designed so that the transformation from flat text, to a message structure via the :class:`~email.parser.Parser` class, and back to flat text, is idempotent (the input is identical to the output) [#]_. On the other hand, -using -the Generator on a :class:`~email.message.Message` constructed by program may -result in changes to the :class:`~email.message.Message` object as defaults are -filled in. +using the Generator on a :class:`~email.message.Message` constructed by program +may result in changes to the :class:`~email.message.Message` object as defaults +are filled in. :class:`bytes` output can be generated using the :class:`BytesGenerator` class. If the message object structure contains non-ASCII bytes, this generator's -- cgit v0.12 From 51b2ed51f0cc18a69a1d53eb9f0f088c99681afc Mon Sep 17 00:00:00 2001 From: Hynek Schlawack Date: Wed, 16 May 2012 09:51:07 +0200 Subject: #14809: Add HTTP status codes from RFC 6585 to http.server and http.client Patch by EungJun Yi. --- Doc/library/http.client.rst | 15 +++++++++++++++ Lib/http/client.py | 8 ++++++++ Lib/http/server.py | 10 +++++++++- Misc/ACKS | 1 + Misc/NEWS | 3 +++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index f0da8ee..ae6c91f 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -339,6 +339,15 @@ and also the following constants for integer status codes: | :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, | | | | :rfc:`2817`, Section 6 | +------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PRECONDITION_REQUIRED` | ``428`` | Additional HTTP Status Codes, | +| | | :rfc:`6585`, Section 3 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`TOO_MANY_REQUESTS` | ``429`` | Additional HTTP Status Codes, | +| | | :rfc:`6585`, Section 4 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes, | +| | | :rfc:`6585`, Section 5 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ | :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section | | | | 10.5.1 | | | | `_ | @@ -369,6 +378,12 @@ and also the following constants for integer status codes: | :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, | | | | :rfc:`2774`, Section 7 | +------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes, | +| | | :rfc:`6585`, Section 6 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ + + .. versionchanged:: 3.3 + Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`. .. data:: responses diff --git a/Lib/http/client.py b/Lib/http/client.py index 9c110d5..6089192 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -141,6 +141,9 @@ UNPROCESSABLE_ENTITY = 422 LOCKED = 423 FAILED_DEPENDENCY = 424 UPGRADE_REQUIRED = 426 +PRECONDITION_REQUIRED = 428 +TOO_MANY_REQUESTS = 429 +REQUEST_HEADER_FIELDS_TOO_LARGE = 431 # server error INTERNAL_SERVER_ERROR = 500 @@ -151,6 +154,7 @@ GATEWAY_TIMEOUT = 504 HTTP_VERSION_NOT_SUPPORTED = 505 INSUFFICIENT_STORAGE = 507 NOT_EXTENDED = 510 +NETWORK_AUTHENTICATION_REQUIRED = 511 # Mapping status codes to official W3C names responses = { @@ -192,6 +196,9 @@ responses = { 415: 'Unsupported Media Type', 416: 'Requested Range Not Satisfiable', 417: 'Expectation Failed', + 428: 'Precondition Required', + 429: 'Too Many Requests', + 431: 'Request Header Fields Too Large', 500: 'Internal Server Error', 501: 'Not Implemented', @@ -199,6 +206,7 @@ responses = { 503: 'Service Unavailable', 504: 'Gateway Timeout', 505: 'HTTP Version Not Supported', + 511: 'Network Authentication Required', } # maximal amount of data to read at one time in _safe_read diff --git a/Lib/http/server.py b/Lib/http/server.py index c1b0596..cb66f2b 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -573,7 +573,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): # Table mapping response codes to messages; entries have the # form {code: (shortmessage, longmessage)}. - # See RFC 2616. + # See RFC 2616 and 6585. responses = { 100: ('Continue', 'Request received, please continue'), 101: ('Switching Protocols', @@ -628,6 +628,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 'Cannot satisfy request range.'), 417: ('Expectation Failed', 'Expect condition could not be satisfied.'), + 428: ('Precondition Required', + 'The origin server requires the request to be conditional.'), + 429: ('Too Many Requests', 'The user has sent too many requests ' + 'in a given amount of time ("rate limiting").'), + 431: ('Request Header Fields Too Large', 'The server is unwilling to ' + 'process the request because its header fields are too large.'), 500: ('Internal Server Error', 'Server got itself in trouble'), 501: ('Not Implemented', @@ -638,6 +644,8 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): 504: ('Gateway Timeout', 'The gateway server did not receive a timely response'), 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + 511: ('Network Authentication Required', + 'The client needs to authenticate to gain network access.'), } diff --git a/Misc/ACKS b/Misc/ACKS index 2be4bf7..75b544e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1129,6 +1129,7 @@ Florent Xicluna Hirokazu Yamamoto Ka-Ping Yee Jason Yeo +EungJun Yi Bob Yodlowski Danny Yoo George Yoshida diff --git a/Misc/NEWS b/Misc/NEWS index 418bfc3..d27640d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -34,6 +34,9 @@ Core and Builtins Library ------- +- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server + and http.client. Patch by EungJun Yi. + - Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when accessing the Tk clipboard. Modify clipboad_get() to first request type UTF8_STRING when no specific type is requested in an X11 windowing -- cgit v0.12 From 5cdc6308b6ac134ab65f2001e37b0c067dadef7d Mon Sep 17 00:00:00 2001 From: "Eric V. Smith" Date: Wed, 16 May 2012 04:48:04 -0400 Subject: Cleanup so subsequent tests won't fail. Needs to be moved into a support routine (see 14715). --- Lib/test/test_pkgutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py index d5d7448..6025bcd 100644 --- a/Lib/test/test_pkgutil.py +++ b/Lib/test/test_pkgutil.py @@ -165,6 +165,9 @@ class ExtendPathTests(unittest.TestCase): def tearDown(self): del sys.path[0] del sys.path[0] + del sys.modules['foo'] + del sys.modules['foo.bar'] + del sys.modules['foo.baz'] def test_simple(self): self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0) -- cgit v0.12 From adbda1766025ff31ee20565988ca41885c9a3543 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 11:31:13 +0200 Subject: Update .hgignore for new MSVC files --- .hgignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.hgignore b/.hgignore index 1eb60c0..34fd18b 100644 --- a/.hgignore +++ b/.hgignore @@ -56,6 +56,9 @@ PC/python_nt*.h PC/pythonnt_rc*.h PC/*.obj PC/*.exe +PC/*/*.user +PC/*/*.ncb +PC/*/*.suo PCbuild/*.exe PCbuild/*.dll PCbuild/*.pdb @@ -69,6 +72,7 @@ PCbuild/*.suo PCbuild/*.*sdf PCbuild/Win32-temp-* PCbuild/x64-temp-* +BuildLog.htm __pycache__ Modules/_testembed .coverage -- cgit v0.12 From e7672d38dc430036539a2b1a279757d1cc819af7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 11:33:08 +0200 Subject: Issue #14732: The _csv module now uses PEP 3121 module initialization. Patch by Robin Schreiber. --- Misc/ACKS | 1 + Misc/NEWS | 3 ++ Modules/_csv.c | 105 ++++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 74 insertions(+), 35 deletions(-) diff --git a/Misc/ACKS b/Misc/ACKS index 75b544e..acf8a34 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -919,6 +919,7 @@ Ralf Schmitt Michael Schneider Peter Schneider-Kamp Arvin Schnell +Robin Schreiber Chad J. Schroeder Sam Schulenburg Stefan Schwarzer diff --git a/Misc/NEWS b/Misc/NEWS index d27640d..a80238a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -34,6 +34,9 @@ Core and Builtins Library ------- +- Issue #14732: The _csv module now uses PEP 3121 module initialization. + Patch by Robin Schreiber. + - Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server and http.client. Patch by EungJun Yi. diff --git a/Modules/_csv.c b/Modules/_csv.c index 443309c..89ce122 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -16,9 +16,39 @@ module instead. #define IS_BASESTRING(o) \ PyUnicode_Check(o) -static PyObject *error_obj; /* CSV exception */ -static PyObject *dialects; /* Dialect registry */ -static long field_limit = 128 * 1024; /* max parsed field size */ +typedef struct { + PyObject *error_obj; /* CSV exception */ + PyObject *dialects; /* Dialect registry */ + long field_limit; /* max parsed field size */ +} _csvstate; + +#define _csvstate(o) ((_csvstate *)PyModule_GetState(o)) + +static int +_csv_clear(PyObject *m) +{ + Py_CLEAR(_csvstate(m)->error_obj); + Py_CLEAR(_csvstate(m)->dialects); + return 0; +} + +static int +_csv_traverse(PyObject *m, visitproc visit, void *arg) +{ + Py_VISIT(_csvstate(m)->error_obj); + Py_VISIT(_csvstate(m)->dialects); + return 0; +} + +static void +_csv_free(void *m) +{ + _csv_clear((PyObject *)m); +} + +static struct PyModuleDef _csvmodule; + +#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule))) typedef enum { START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, @@ -103,10 +133,10 @@ get_dialect_from_registry(PyObject * name_obj) { PyObject *dialect_obj; - dialect_obj = PyDict_GetItem(dialects, name_obj); + dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj); if (dialect_obj == NULL) { if (!PyErr_Occurred()) - PyErr_Format(error_obj, "unknown dialect"); + PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); } else Py_INCREF(dialect_obj); @@ -544,9 +574,9 @@ parse_grow_buff(ReaderObj *self) static int parse_add_char(ReaderObj *self, Py_UCS4 c) { - if (self->field_len >= field_limit) { - PyErr_Format(error_obj, "field larger than field limit (%ld)", - field_limit); + if (self->field_len >= _csvstate_global->field_limit) { + PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)", + _csvstate_global->field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -703,7 +733,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else { /* illegal */ - PyErr_Format(error_obj, "'%c' expected after '%c'", + PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'", dialect->delimiter, dialect->quotechar); return -1; @@ -716,7 +746,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) else if (c == '\0') self->state = START_RECORD; else { - PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); + PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; } break; @@ -755,12 +785,12 @@ Reader_iternext(ReaderObj *self) if (lineobj == NULL) { /* End of input OR exception */ if (!PyErr_Occurred() && self->field_len != 0) - PyErr_Format(error_obj, + PyErr_Format(_csvstate_global->error_obj, "newline inside string"); return NULL; } if (!PyUnicode_Check(lineobj)) { - PyErr_Format(error_obj, + PyErr_Format(_csvstate_global->error_obj, "iterator should return strings, " "not %.200s " "(did you open the file in text mode?)", @@ -778,7 +808,7 @@ Reader_iternext(ReaderObj *self) c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); - PyErr_Format(error_obj, + PyErr_Format(_csvstate_global->error_obj, "line contains NULL byte"); goto err; } @@ -994,7 +1024,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, } if (want_escape) { if (!dialect->escapechar) { - PyErr_Format(error_obj, + PyErr_Format(_csvstate_global->error_obj, "need to escape, but no escapechar set"); return -1; } @@ -1010,7 +1040,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, void *field_data, */ if (i == 0 && quote_empty) { if (dialect->quoting == QUOTE_NONE) { - PyErr_Format(error_obj, + PyErr_Format(_csvstate_global->error_obj, "single empty field record must be quoted"); return -1; } @@ -1127,7 +1157,7 @@ csv_writerow(WriterObj *self, PyObject *seq) PyObject *line, *result; if (!PySequence_Check(seq)) - return PyErr_Format(error_obj, "sequence expected"); + return PyErr_Format(_csvstate_global->error_obj, "sequence expected"); len = PySequence_Length(seq); if (len < 0) @@ -1353,7 +1383,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) static PyObject * csv_list_dialects(PyObject *module, PyObject *args) { - return PyDict_Keys(dialects); + return PyDict_Keys(_csvstate_global->dialects); } static PyObject * @@ -1372,7 +1402,7 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) dialect = _call_dialect(dialect_obj, kwargs); if (dialect == NULL) return NULL; - if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { + if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) { Py_DECREF(dialect); return NULL; } @@ -1384,8 +1414,8 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) static PyObject * csv_unregister_dialect(PyObject *module, PyObject *name_obj) { - if (PyDict_DelItem(dialects, name_obj) < 0) - return PyErr_Format(error_obj, "unknown dialect"); + if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) + return PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); Py_INCREF(Py_None); return Py_None; } @@ -1400,7 +1430,7 @@ static PyObject * csv_field_size_limit(PyObject *module, PyObject *args) { PyObject *new_limit = NULL; - long old_limit = field_limit; + long old_limit = _csvstate_global->field_limit; if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) return NULL; @@ -1410,9 +1440,9 @@ csv_field_size_limit(PyObject *module, PyObject *args) "limit must be an integer"); return NULL; } - field_limit = PyLong_AsLong(new_limit); - if (field_limit == -1 && PyErr_Occurred()) { - field_limit = old_limit; + _csvstate_global->field_limit = PyLong_AsLong(new_limit); + if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) { + _csvstate_global->field_limit = old_limit; return NULL; } } @@ -1551,17 +1581,16 @@ static struct PyMethodDef csv_methods[] = { { NULL, NULL } }; - static struct PyModuleDef _csvmodule = { PyModuleDef_HEAD_INIT, "_csv", csv_module_doc, - -1, + sizeof(_csvstate), csv_methods, NULL, - NULL, - NULL, - NULL + _csv_traverse, + _csv_clear, + _csv_free }; PyMODINIT_FUNC @@ -1589,11 +1618,16 @@ PyInit__csv(void) MODULE_VERSION) == -1) return NULL; + /* Set the field limit */ + _csvstate(module)->field_limit = 128 * 1024; + /* Do I still need to add this var to the Module Dict? */ + /* Add _dialects dictionary */ - dialects = PyDict_New(); - if (dialects == NULL) + _csvstate(module)->dialects = PyDict_New(); + if (_csvstate(module)->dialects == NULL) return NULL; - if (PyModule_AddObject(module, "_dialects", dialects)) + Py_INCREF(_csvstate(module)->dialects); + if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects)) return NULL; /* Add quote styles into dictionary */ @@ -1609,9 +1643,10 @@ PyInit__csv(void) return NULL; /* Add the CSV exception object to the module. */ - error_obj = PyErr_NewException("_csv.Error", NULL, NULL); - if (error_obj == NULL) + _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); + if (_csvstate(module)->error_obj == NULL) return NULL; - PyModule_AddObject(module, "Error", error_obj); + Py_INCREF(_csvstate(module)->error_obj); + PyModule_AddObject(module, "Error", _csvstate(module)->error_obj); return module; } -- cgit v0.12 From 32bc80c523ed31d86d3deda99e0c18d4fc6c2e93 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 12:51:55 +0200 Subject: Fix build failure. --- Objects/exceptions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index b994862..9e10b7e 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -349,7 +349,8 @@ PyException_SetContext(PyObject *self, PyObject *context) { static struct PyMemberDef BaseException_members[] = { {"__suppress_context__", T_BOOL, - offsetof(PyBaseExceptionObject, suppress_context)} + offsetof(PyBaseExceptionObject, suppress_context)}, + {NULL} }; -- cgit v0.12 From b84bc7a7ce6649f786b5b3f80e944cb51b332faf Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 12:58:04 +0200 Subject: Avoid "warning: no newline at end of file" in importlib.h. --- Python/freeze_importlib.py | 2 ++ Python/importlib.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/freeze_importlib.py b/Python/freeze_importlib.py index c012722..b96e7f2 100644 --- a/Python/freeze_importlib.py +++ b/Python/freeze_importlib.py @@ -25,6 +25,8 @@ def main(input_path, output_path): with open(output_path, 'w', encoding='utf-8') as output_file: output_file.write('\n'.join(lines)) output_file.write('/* Mercurial binary marker: \x00 */') + # Avoid a compiler warning for lack of EOL + output_file.write('\n') if __name__ == '__main__': diff --git a/Python/importlib.h b/Python/importlib.h index 0beeb59..cf5619a 100644 --- a/Python/importlib.h +++ b/Python/importlib.h @@ -3029,4 +3029,4 @@ unsigned char _Py_M__importlib[] = { 12,17,6,2,12,47,18,25,12,23,12,15,24,30,6,1, 6,3,12,54, }; -/* Mercurial binary marker: */ \ No newline at end of file +/* Mercurial binary marker: */ -- cgit v0.12 From a103b96a80f049f68ccf2dd3d5d7858b26a27e94 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 14:37:54 +0200 Subject: Issue #14829: Fix bisect and range() indexing with large indices (>= 2 ** 32) under 64-bit Windows. --- Misc/NEWS | 3 +++ Modules/_bisectmodule.c | 3 ++- Objects/rangeobject.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 9b9ca67..08d21f7 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -63,6 +63,9 @@ Core and Builtins Library ------- +- Issue #14829: Fix bisect and range() indexing with large indices + (>= 2 ** 32) under 64-bit Windows. + - Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when accessing the Tk clipboard. Modify clipboad_get() to first request type UTF8_STRING when no specific type is requested in an X11 windowing diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index 93d0eed..eae2978 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -3,6 +3,7 @@ Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru). */ +#define PY_SSIZE_T_CLEAN #include "Python.h" static Py_ssize_t @@ -192,7 +193,7 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw) if (PyList_Insert(list, index, item) < 0) return NULL; } else { - result = PyObject_CallMethod(list, "insert", "iO", index, item); + result = PyObject_CallMethod(list, "insert", "nO", index, item); if (result == NULL) return NULL; Py_DECREF(result); diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c index 58d373c..935b205 100644 --- a/Objects/rangeobject.c +++ b/Objects/rangeobject.c @@ -307,7 +307,7 @@ compute_range_item(rangeobject *r, PyObject *arg) static PyObject * range_item(rangeobject *r, Py_ssize_t i) { - PyObject *res, *arg = PyLong_FromLong(i); + PyObject *res, *arg = PyLong_FromSsize_t(i); if (!arg) { return NULL; } -- cgit v0.12 From b3c16fb040bcc1785486047153b977950c2b8186 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 14:42:38 +0200 Subject: More .hgignore additions for new VS build files --- .hgignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.hgignore b/.hgignore index 34fd18b..4e4c2ce 100644 --- a/.hgignore +++ b/.hgignore @@ -32,7 +32,6 @@ Modules/Setup.local Modules/config.c Modules/ld_so_aix$ Parser/pgen$ -PCbuild/amd64/ ^core ^python-gdb.py ^python.exe-gdb.py @@ -59,6 +58,9 @@ PC/*.exe PC/*/*.user PC/*/*.ncb PC/*/*.suo +PC/*/Win32-temp-* +PC/*/x64-temp-* +PC/*/amd64 PCbuild/*.exe PCbuild/*.dll PCbuild/*.pdb @@ -72,6 +74,7 @@ PCbuild/*.suo PCbuild/*.*sdf PCbuild/Win32-temp-* PCbuild/x64-temp-* +PCbuild/amd64 BuildLog.htm __pycache__ Modules/_testembed -- cgit v0.12 From fb5b954ee0c389b7cba4b07a3411ca6f1795cb94 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 16 May 2012 15:01:40 +0200 Subject: Forward port additional tests from 2.7 (issue #14829). --- Lib/test/test_bisect.py | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py index c24a1a2..2ac3a68 100644 --- a/Lib/test/test_bisect.py +++ b/Lib/test/test_bisect.py @@ -23,6 +23,28 @@ del sys.modules['bisect'] import bisect as c_bisect +class Range(object): + """A trivial range()-like object without any integer width limitations.""" + def __init__(self, start, stop): + self.start = start + self.stop = stop + self.last_insert = None + + def __len__(self): + return self.stop - self.start + + def __getitem__(self, idx): + n = self.stop - self.start + if idx < 0: + idx += n + if idx >= n: + raise IndexError(idx) + return self.start + idx + + def insert(self, idx, item): + self.last_insert = idx, item + + class TestBisect(unittest.TestCase): module = None @@ -125,9 +147,28 @@ class TestBisect(unittest.TestCase): def test_large_range(self): # Issue 13496 mod = self.module - data = range(sys.maxsize-1) - self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3) - self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2) + n = sys.maxsize + data = range(n-1) + self.assertEqual(mod.bisect_left(data, n-3), n-3) + self.assertEqual(mod.bisect_right(data, n-3), n-2) + self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3) + self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2) + + def test_large_pyrange(self): + # Same as above, but without C-imposed limits on range() parameters + mod = self.module + n = sys.maxsize + data = Range(0, n-1) + self.assertEqual(mod.bisect_left(data, n-3), n-3) + self.assertEqual(mod.bisect_right(data, n-3), n-2) + self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3) + self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2) + x = n - 100 + mod.insort_left(data, x, x - 50, x + 50) + self.assertEqual(data.last_insert, (x, x)) + x = n - 200 + mod.insort_right(data, x, x - 50, x + 50) + self.assertEqual(data.last_insert, (x + 1, x)) def test_random(self, n=25): from random import randrange -- cgit v0.12