From a5cd255a7cb15c7920171f0796cf163a00a5ecaf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 15 Oct 2013 23:36:56 +0200 Subject: Close #19267: Fix support of multibyte encoding (ex: UTF-16) in the logging module. --- Lib/logging/__init__.py | 2 +- Lib/test/test_logging.py | 18 ++++++++++++++++++ Misc/NEWS | 3 +++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 0e8e3dd..cda781a 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -857,7 +857,7 @@ class StreamHandler(Handler): try: if (isinstance(msg, unicode) and getattr(stream, 'encoding', None)): - ufs = fs.decode(stream.encoding) + ufs = u'%s\n' try: stream.write(ufs % msg) except UnicodeEncodeError: diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 31bc48e..0c10580 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -1060,6 +1060,24 @@ class EncodingTest(BaseTest): #Compare against what the data should be when encoded in CP-1251 self.assertEqual(s, '\xe4\xee \xf1\xe2\xe8\xe4\xe0\xed\xe8\xff\n') + def test_encoding_utf16_unicode(self): + # Issue #19267 + log = logging.getLogger("test") + message = u'b\u0142\u0105d' + writer_class = codecs.getwriter('utf-16-le') + writer_class.encoding = 'utf-16-le' + stream = cStringIO.StringIO() + writer = writer_class(stream, 'strict') + handler = logging.StreamHandler(writer) + log.addHandler(handler) + try: + log.warning(message) + finally: + log.removeHandler(handler) + handler.close() + s = stream.getvalue() + self.assertEqual(s, 'b\x00B\x01\x05\x01d\x00\n\x00') + class WarningsTest(BaseTest): diff --git a/Misc/NEWS b/Misc/NEWS index 816bd4d..6e4b7fb 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -370,6 +370,9 @@ Library - Issue #17926: Fix dbm.__contains__ on 64-bit big-endian machines. +- Issue #19267: Fix support of multibyte encoding (ex: UTF-16) in the logging + module. + - Issue #17918: When using SSLSocket.accept(), if the SSL handshake failed on the new socket, the socket would linger indefinitely. Thanks to Peter Saveliev for reporting. -- cgit v0.12 From 2235648b86304d359da23b5d2954729c82d4c132 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 17 Oct 2013 12:45:45 +0300 Subject: Issue 19276: Fix tests for wave files on big-endian platforms. Skip tests for 24-bit wave file on big-endian platforms. --- Lib/test/test_wave.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_wave.py b/Lib/test/test_wave.py index b7ba891..5b48cef 100644 --- a/Lib/test/test_wave.py +++ b/Lib/test/test_wave.py @@ -1,6 +1,7 @@ from test.test_support import TESTFN, run_unittest import unittest from test import audiotests +import sys import wave @@ -44,9 +45,13 @@ class WavePCM16Test(audiotests.AudioWriteTests, EEDF1755 82061666 7FFF1446 80001296 499C0EB2 52BA0DB9 EFB70F5C CE400FBC \ E4B50CEB 63440A5A 08CA0A1F 2BBA0B0B 51460E47 8BCB113C B6F50EEA 44150A59 \ """) - frames = audiotests.byteswap2(frames) + if sys.byteorder != 'big': + frames = audiotests.byteswap2(frames) +@unittest.skipIf(sys.byteorder == 'big', + '24-bit wave files are supported only on little-endian ' + 'platforms') class WavePCM24Test(audiotests.AudioWriteTests, audiotests.AudioTestsWithSourceFile, unittest.TestCase): @@ -73,7 +78,8 @@ class WavePCM24Test(audiotests.AudioWriteTests, E4B49C0CEA2D 6344A80A5A7C 08C8FE0A1FFE 2BB9860B0A0E \ 51486F0E44E1 8BCC64113B05 B6F4EC0EEB36 4413170A5B48 \ """) - frames = audiotests.byteswap3(frames) + if sys.byteorder != 'big': + frames = audiotests.byteswap3(frames) class WavePCM32Test(audiotests.AudioWriteTests, @@ -102,7 +108,8 @@ class WavePCM32Test(audiotests.AudioWriteTests, E4B49CC00CEA2D90 6344A8800A5A7CA0 08C8FE800A1FFEE0 2BB986C00B0A0E00 \ 51486F800E44E190 8BCC6480113B0580 B6F4EC000EEB3630 441317800A5B48A0 \ """) - frames = audiotests.byteswap4(frames) + if sys.byteorder != 'big': + frames = audiotests.byteswap4(frames) def test_main(): -- cgit v0.12 From cea38084ebf10af90907396ce062674d0b933be1 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Thu, 17 Oct 2013 19:51:00 +0200 Subject: Closes #19277: document all parameters of compressobj(). --- Doc/library/zlib.rst | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index a3a27c5..192bd4d 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -68,7 +68,7 @@ The available exception and functions in this module are: Raises the :exc:`error` exception if any error occurs. -.. function:: compressobj([level]) +.. function:: compressobj([level[, method[, wbits[, memlevel[, strategy]]]]]) Returns a compression object, to be used for compressing data streams that won't fit into memory at once. *level* is an integer from ``0`` to ``9`` controlling @@ -76,6 +76,21 @@ The available exception and functions in this module are: ``9`` is slowest and produces the most. ``0`` is no compression. The default value is ``6``. + *method* is the compression algorithm. Currently, the only supported value is + ``DEFLATED``. + + *wbits* is the base two logarithm of the size of the window buffer. This + should be an integer from ``8`` to ``15``. Higher values give better + compression, but use more memory. The default is 15. + + *memlevel* controls the amount of memory used for internal compression state. + Valid values range from ``1`` to ``9``. Higher values using more memory, + but are faster and produce smaller output. The default is 8. + + *strategy* is used to tune the compression algorithm. Possible values are + ``Z_DEFAULT_STRATEGY``, ``Z_FILTERED``, and ``Z_HUFFMAN_ONLY``. The default + is ``Z_DEFAULT_STRATEGY``. + .. function:: crc32(data[, value]) -- cgit v0.12 From 7137803238ff54ae339e09ff676f6631000cfb28 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 17 Oct 2013 23:03:48 +0300 Subject: Issue #19276: Fixed the wave module on 64-bit big-endian platforms. --- Lib/wave.py | 4 +++- Misc/NEWS | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/wave.py b/Lib/wave.py index 04a1a6d..d6eef78 100644 --- a/Lib/wave.py +++ b/Lib/wave.py @@ -80,7 +80,7 @@ class Error(Exception): WAVE_FORMAT_PCM = 0x0001 -_array_fmts = None, 'b', 'h', None, 'l' +_array_fmts = None, 'b', 'h', None, 'i' # Determine endian-ness import struct @@ -238,6 +238,7 @@ class Wave_read: import array chunk = self._data_chunk data = array.array(_array_fmts[self._sampwidth]) + assert data.itemsize == self._sampwidth nitems = nframes * self._nchannels if nitems * self._sampwidth > chunk.chunksize - chunk.size_read: nitems = (chunk.chunksize - chunk.size_read) / self._sampwidth @@ -421,6 +422,7 @@ class Wave_write: if self._sampwidth > 1 and big_endian: import array data = array.array(_array_fmts[self._sampwidth], data) + assert data.itemsize == self._sampwidth data.byteswap() data.tofile(self._file) self._datawritten = self._datawritten + len(data) * self._sampwidth diff --git a/Misc/NEWS b/Misc/NEWS index 6e4b7fb..8af9753 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -35,6 +35,8 @@ Core and Builtins Library ------- +- Issue #19276: Fixed the wave module on 64-bit big-endian platforms. + - Issue #18458: Prevent crashes with newer versions of libedit. Its readline emulation has changed from 0-based indexing to 1-based like gnu readline. Original patch by Ronald Oussoren. -- cgit v0.12 From de2f8fadcc05b87c33bd5f2e27cab9daf1e821ce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 18 Oct 2013 11:54:52 +0300 Subject: Fix markup. --- Doc/library/aifc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/aifc.rst b/Doc/library/aifc.rst index 1265423..de4144c 100644 --- a/Doc/library/aifc.rst +++ b/Doc/library/aifc.rst @@ -30,8 +30,8 @@ sampling rate or frame rate is the number of times per second the sound is sampled. The number of channels indicate if the audio is mono, stereo, or quadro. Each frame consists of one sample per channel. The sample size is the size in bytes of each sample. Thus a frame consists of -*nchannels*\**samplesize* bytes, and a second's worth of audio consists of -*nchannels*\**samplesize*\**framerate* bytes. +*nchannels*\*\ *samplesize* bytes, and a second's worth of audio consists of +*nchannels*\*\ *samplesize*\*\ *framerate* bytes. For example, CD quality audio has a sample size of two bytes (16 bits), uses two channels (stereo) and has a frame rate of 44,100 frames/second. This gives a -- cgit v0.12 From 2f4e7648b4dd188a2f0550f0b9297ce86db87148 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 18 Oct 2013 17:16:04 +0300 Subject: Remove redundant empty lines. --- Misc/NEWS | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Misc/NEWS b/Misc/NEWS index 8af9753..e56e3b8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -230,7 +230,6 @@ Library existing directory caused mkstemp and related APIs to fail instead of retrying. Report and fix by Vlad Shcherbina. - Tools/Demos ----------- @@ -3301,7 +3300,6 @@ Extension Modules - Issue #7567: Don't call `setupterm' twice. - Tools/Demos ----------- @@ -4539,7 +4537,6 @@ Core and Builtins - Issue #7466: Segmentation fault when the garbage collector is called in the middle of populating a tuple. Patch by Florent Xicluna. - Library ------- @@ -7013,7 +7010,6 @@ Build - Issue #3215: Build sqlite3 as sqlite3.dll, not sqlite3.pyd. - Documentation ------------- @@ -7079,7 +7075,6 @@ Core and Builtins only available if asserts are left in the code, in cases where they can't be triggered from Python code. - Extension Modules ----------------- - Issue #1179: [CVE-2007-4965] Integer overflow in imageop module. @@ -7387,7 +7382,6 @@ Build NOTE: 64-bit and 4-way builds are only suppported on Mac OS X 10.5 (or later). - C API ----- @@ -8263,7 +8257,6 @@ Core and builtins threading.enumerate() list after the join() for a brief period until it actually exited. - Library ------- @@ -9356,7 +9349,6 @@ Tools platform.python_implementation(); this will now be saved in the benchmark pickle. - Documentation ------------- @@ -9407,7 +9399,6 @@ Documentation applied to a newly created list object and add notes that this isn't a good idea. - Tools/Demos ----------- @@ -9420,7 +9411,6 @@ Tools/Demos - Bug #1546372: Fixed small bugglet in pybench that caused a missing file not to get reported properly. - Build ----- @@ -9503,7 +9493,6 @@ Build pybuildbot.identify to include some information about the build environment. - C API ----- @@ -9566,7 +9555,6 @@ C API - Bug #1542693: remove semi-colon at end of PyImport_ImportModuleEx macro so it can be used as an expression. - Windows ------- @@ -9580,7 +9568,6 @@ Windows - Bug #1216: Restore support for Visual Studio 2002. - Mac --- -- cgit v0.12 From 97b36181dbbdc16619421a9d5ce5e71a5084a36c Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Fri, 18 Oct 2013 12:57:55 -0400 Subject: fix description of super() behavior on descriptors --- Doc/howto/descriptor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/descriptor.rst b/Doc/howto/descriptor.rst index f793562..7828188 100644 --- a/Doc/howto/descriptor.rst +++ b/Doc/howto/descriptor.rst @@ -124,7 +124,7 @@ The important points to remember are: The object returned by ``super()`` also has a custom :meth:`__getattribute__` method for invoking descriptors. The call ``super(B, obj).m()`` searches ``obj.__class__.__mro__`` for the base class ``A`` immediately following ``B`` -and then returns ``A.__dict__['m'].__get__(obj, A)``. If not a descriptor, +and then returns ``A.__dict__['m'].__get__(obj, B)``. If not a descriptor, ``m`` is returned unchanged. If not in the dictionary, ``m`` reverts to a search using :meth:`object.__getattribute__`. -- cgit v0.12 From 11f880a73b71e6843b5d697a2a181fda37c4983f Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Fri, 18 Oct 2013 20:40:23 -0700 Subject: Update OS X installer for building on 10.9. --- Mac/BuildScript/build-installer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Mac/BuildScript/build-installer.py b/Mac/BuildScript/build-installer.py index ef822d5..a9d4091 100755 --- a/Mac/BuildScript/build-installer.py +++ b/Mac/BuildScript/build-installer.py @@ -155,6 +155,7 @@ target_cc_map = { '10.6': ('gcc-4.2', 'g++-4.2'), '10.7': ('clang', 'clang++'), '10.8': ('clang', 'clang++'), + '10.9': ('clang', 'clang++'), } CC, CXX = target_cc_map[DEPTARGET] -- cgit v0.12 From 6ea3c9b22a3e81e6484a30c1d64c6a859a40da6a Mon Sep 17 00:00:00 2001 From: Ned Deily Date: Fri, 18 Oct 2013 21:33:57 -0700 Subject: Ensure setup.py looks for zlib.h in an OS X SDK. --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index bb7f295..de4392f 100644 --- a/setup.py +++ b/setup.py @@ -1389,6 +1389,8 @@ class PyBuildExt(build_ext): zlib_h = zlib_inc[0] + '/zlib.h' version = '"0.0.0"' version_req = '"1.1.3"' + if host_platform == 'darwin' and is_macosx_sdk_path(zlib_h): + zlib_h = os.path.join(macosx_sdk_root(), zlib_h[1:]) fp = open(zlib_h) while 1: line = fp.readline() -- cgit v0.12 From f10567233237673e8b96aff8a9753a6426fa4474 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 19 Oct 2013 20:37:49 +0300 Subject: Issue #19279: UTF-7 decoder no more produces illegal unicode strings. --- Lib/test/test_codecs.py | 29 +++++++++++++++++++++++++++++ Misc/NEWS | 2 ++ Objects/unicodeobject.c | 2 ++ 3 files changed, 33 insertions(+) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0ccf818..c9a2515 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -611,6 +611,35 @@ class UTF7Test(ReadTest): ] ) + def test_errors(self): + tests = [ + ('a\xffb', u'a\ufffdb'), + ('a+IK', u'a\ufffd'), + ('a+IK-b', u'a\ufffdb'), + ('a+IK,b', u'a\ufffdb'), + ('a+IKx', u'a\u20ac\ufffd'), + ('a+IKx-b', u'a\u20ac\ufffdb'), + ('a+IKwgr', u'a\u20ac\ufffd'), + ('a+IKwgr-b', u'a\u20ac\ufffdb'), + ('a+IKwgr,', u'a\u20ac\ufffd'), + ('a+IKwgr,-b', u'a\u20ac\ufffd-b'), + ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'), + ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'), + ('a+/,+IKw-b', u'a\ufffd\u20acb'), + ('a+//,+IKw-b', u'a\ufffd\u20acb'), + ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'), + ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'), + ] + for raw, expected in tests: + self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode, + raw, 'strict', True) + self.assertEqual(raw.decode('utf-7', 'replace'), expected) + + def test_nonbmp(self): + self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-') + self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-') + self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0') + class UTF16ExTest(unittest.TestCase): def test_errors(self): diff --git a/Misc/NEWS b/Misc/NEWS index e56e3b8..b0b4f10 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,8 @@ What's New in Python 2.7.6? Core and Builtins ----------------- +- Issue #19279: UTF-7 decoder no more produces illegal unicode strings. + - Issue #18739: Fix an inconsistency between math.log(n) and math.log(long(n)); the results could be off from one another by a ulp or two. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 866eb9b..5ce9c88 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1671,6 +1671,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, (base64buffer >> (base64bits-16)); base64bits -= 16; base64buffer &= (1 << base64bits) - 1; /* clear high bits */ + assert(outCh <= 0xffff); if (surrogate) { /* expecting a second surrogate */ if (outCh >= 0xDC00 && outCh <= 0xDFFF) { @@ -1737,6 +1738,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, inShift = 1; shiftOutStart = p; base64bits = 0; + base64buffer = 0; } } else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ -- cgit v0.12