From 0bec35d2d07de46a904a9862a9bc6ac4d9d161b9 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Thu, 7 Jul 2011 12:59:31 +0100 Subject: Closes #12391: temporary files are now cleaned up. --- Lib/packaging/install.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/packaging/install.py b/Lib/packaging/install.py index 551ece1..51e70df 100644 --- a/Lib/packaging/install.py +++ b/Lib/packaging/install.py @@ -42,10 +42,7 @@ def _move_files(files, destination): :param files: a list of files to move. :param destination: the destination directory to put on the files. - if not defined, create a new one, using mkdtemp """ - if not destination: - destination = tempfile.mkdtemp() for old in files: filename = os.path.split(old)[-1] @@ -126,8 +123,11 @@ def install_local_project(path): elif _is_archive_file(path): logger.info('Installing from archive: %s', path) _unpacked_dir = tempfile.mkdtemp() - shutil.unpack_archive(path, _unpacked_dir) - return _run_install_from_archive(_unpacked_dir) + try: + shutil.unpack_archive(path, _unpacked_dir) + return _run_install_from_archive(_unpacked_dir) + finally: + shutil.rmtree(_unpacked_dir) else: logger.warning('No projects to install.') return False @@ -179,8 +179,6 @@ def install_dists(dists, path, paths=None): :param path: base path to install distribution in :param paths: list of paths (defaults to sys.path) to look for info """ - if not path: - path = tempfile.mkdtemp() installed_dists = [] for dist in dists: -- cgit v0.12 From 081fe46ff96bccb1a256c356443b625b467814c8 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 8 Jul 2011 01:10:28 +0200 Subject: Issue #9566: cast unsigned int to Py_ssize_t in md5 and sha1 modules Fix a compiler warning on Windows 64 bits. --- Modules/md5module.c | 2 +- Modules/sha1module.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/md5module.c b/Modules/md5module.c index 208930d..de43f1c 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -243,7 +243,7 @@ void md5_process(struct md5_state *md5, in += MD5_BLOCKSIZE; inlen -= MD5_BLOCKSIZE; } else { - n = MIN(inlen, (MD5_BLOCKSIZE - md5->curlen)); + n = MIN(inlen, (Py_ssize_t)(MD5_BLOCKSIZE - md5->curlen)); memcpy(md5->buf + md5->curlen, in, (size_t)n); md5->curlen += n; in += n; diff --git a/Modules/sha1module.c b/Modules/sha1module.c index b25bd44..1cace54 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -218,7 +218,7 @@ void sha1_process(struct sha1_state *sha1, in += SHA1_BLOCKSIZE; inlen -= SHA1_BLOCKSIZE; } else { - n = MIN(inlen, (SHA1_BLOCKSIZE - sha1->curlen)); + n = MIN(inlen, (Py_ssize_t)(SHA1_BLOCKSIZE - sha1->curlen)); memcpy(sha1->buf + sha1->curlen, in, (size_t)n); sha1->curlen += n; in += n; -- cgit v0.12 From 2cded9c3f31d2fea4b033f44eaa828e508f03391 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 8 Jul 2011 01:45:13 +0200 Subject: Issue #12016: Multibyte CJK decoders now resynchronize faster They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'. --- Doc/whatsnew/3.3.rst | 23 +++++++++ Lib/test/test_codecencodings_cn.py | 21 +++++---- Lib/test/test_codecencodings_hk.py | 4 +- Lib/test/test_codecencodings_jp.py | 96 ++++++++++++++++++++++++-------------- Lib/test/test_codecencodings_kr.py | 25 ++++++---- Lib/test/test_codecencodings_tw.py | 4 +- Lib/test/test_codecmaps_tw.py | 3 ++ Misc/NEWS | 4 ++ Modules/cjkcodecs/_codecs_cn.c | 14 +++--- Modules/cjkcodecs/_codecs_hk.c | 2 +- Modules/cjkcodecs/_codecs_jp.c | 34 +++++++------- Modules/cjkcodecs/_codecs_kr.c | 18 +++---- Modules/cjkcodecs/_codecs_tw.c | 4 +- 13 files changed, 159 insertions(+), 93 deletions(-) diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index e5e1805..990085e 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -68,6 +68,29 @@ New, Improved, and Deprecated Modules * Stub +codecs +------ + +Multibyte CJK decoders now resynchronize faster. They only ignore the first +byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', +'replace') gives '�\n' instead of '�'. + +(http://bugs.python.org/issue12016) + +Don't reset incremental encoders of CJK codecs at each call to their encode() +method anymore. For example: :: + + $ ./python -q + >>> import codecs + >>> encoder = codecs.getincrementalencoder('hz')('strict') + >>> b''.join(encoder.encode(x) for x in '\u52ff\u65bd\u65bc\u4eba\u3002 Bye.') + b'~{NpJ)l6HK!#~} Bye.' + +This example gives b'~{Np~}~{J)~}~{l6~}~{HK~}~{!#~} Bye.' with older Python +versions. + +(http://bugs.python.org/issue12100) + faulthandler ------------ diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index dca9f10..ee3d165 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -15,8 +15,8 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x81\x81\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), (b"\xc1\x64", "strict", None), ) @@ -28,8 +28,8 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), (b"\x83\x34\x83\x31", "strict", None), ("\u30fb", "strict", None), @@ -42,11 +42,14 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), - (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"), + (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), ("\u30fb", "strict", b"\x819\xa79"), + (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), + (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), + (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), ) has_iso10646 = True @@ -74,9 +77,11 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase): '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 'Bye.\n'), # invalid bytes - (b'ab~cd', 'replace', 'ab\uFFFDd'), + (b'ab~cd', 'replace', 'ab\uFFFDcd'), (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), + (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), + (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), ) def test_main(): diff --git a/Lib/test/test_codecencodings_hk.py b/Lib/test/test_codecencodings_hk.py index ccdc0b4..520df43 100644 --- a/Lib/test/test_codecencodings_hk.py +++ b/Lib/test/test_codecencodings_hk.py @@ -15,8 +15,8 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), ) diff --git a/Lib/test/test_codecencodings_jp.py b/Lib/test/test_codecencodings_jp.py index f56a373..87e4812 100644 --- a/Lib/test/test_codecencodings_jp.py +++ b/Lib/test/test_codecencodings_jp.py @@ -15,50 +15,57 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), (b"abc\xf8", "strict", None), - (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), - (b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"), + (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), + (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), + (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), + (b"ab\xEBxy", "replace", "ab\uFFFDxy"), + (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), + (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), # sjis vs cp932 (b"\\\x7e", "replace", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), ) +euc_commontests = ( + # invalid bytes + (b"abc\x80\x80\xc1\xc4", "strict", None), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"), + (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), + (b"abc\xc8", "strict", None), + (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"), + (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"), + (b"\xc1\x64", "strict", None), + (b"\xa1\xc0", "strict", "\uff3c"), + (b"\xa1\xc0\\", "strict", "\uff3c\\"), + (b"\x8eXY", "replace", "\ufffdXY"), +) + +class Test_EUC_JIS_2004(test_multibytecodec_support.TestBase, + unittest.TestCase): + encoding = 'euc_jis_2004' + tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') + codectests = euc_commontests + xmlcharnametest = ( + "\xab\u211c\xbb = \u2329\u1234\u232a", + b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" + ) + class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jisx0213' tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') - codectests = ( - # invalid bytes - (b"abc\x80\x80\xc1\xc4", "strict", None), - (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), - (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), - (b"abc\x8f\x83\x83", "replace", "abc\ufffd"), - (b"\xc1\x64", "strict", None), - (b"\xa1\xc0", "strict", "\uff3c"), - ) + codectests = euc_commontests xmlcharnametest = ( "\xab\u211c\xbb = \u2329\u1234\u232a", b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" ) -eucjp_commontests = ( - (b"abc\x80\x80\xc1\xc4", "strict", None), - (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"), - (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), - (b"abc\x8f\x83\x83", "replace", "abc\ufffd"), - (b"\xc1\x64", "strict", None), -) - class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jp' tstring = test_multibytecodec_support.load_teststring('euc_jp') - codectests = eucjp_commontests + ( - (b"\xa1\xc0\\", "strict", "\uff3c\\"), + codectests = euc_commontests + ( ("\xa5", "strict", b"\x5c"), ("\u203e", "strict", b"\x7e"), ) @@ -66,8 +73,6 @@ class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, shiftjis_commonenctests = ( (b"abc\x80\x80\x82\x84", "strict", None), (b"abc\xf8", "strict", None), - (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), ) @@ -75,20 +80,41 @@ class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jis' tstring = test_multibytecodec_support.load_teststring('shift_jis') codectests = shiftjis_commonenctests + ( + (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), + (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), + (b"\\\x7e", "strict", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), + (b"abc\x81\x39", "replace", "abc\ufffd9"), + (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), + (b"abc\xFF\x58", "replace", "abc\ufffdX"), + ) + +class Test_SJIS_2004(test_multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'shift_jis_2004' + tstring = test_multibytecodec_support.load_teststring('shift_jis') + codectests = shiftjis_commonenctests + ( + (b"\\\x7e", "strict", "\xa5\u203e"), + (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), + (b"abc\xEA\xFC", "strict", "abc\u64bf"), + (b"\x81\x39xy", "replace", "\ufffd9xy"), + (b"\xFF\x58xy", "replace", "\ufffdXxy"), + (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), + (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), + (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), + ) + xmlcharnametest = ( + "\xab\u211c\xbb = \u2329\u1234\u232a", + b"\x85Gℜ\x85Q = ⟨ሴ⟩" ) class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jisx0213' tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') - codectests = ( - # invalid bytes - (b"abc\x80\x80\x82\x84", "strict", None), - (b"abc\xf8", "strict", None), - (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"), - (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"), - (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), + codectests = shiftjis_commonenctests + ( + (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), + (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), + # sjis vs cp932 (b"\\\x7e", "replace", "\xa5\u203e"), (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), diff --git a/Lib/test/test_codecencodings_kr.py b/Lib/test/test_codecencodings_kr.py index de4da7f..4997e83 100644 --- a/Lib/test/test_codecencodings_kr.py +++ b/Lib/test/test_codecencodings_kr.py @@ -15,8 +15,8 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), ) @@ -27,8 +27,8 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"), # composed make-up sequence errors @@ -40,13 +40,14 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase): (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"), - (b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"), + (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'), (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None), (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None), - (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"), - (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"), - (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"), + (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'), + (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'), + (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'), + (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'), (b"\xc1\xc4", "strict", "\uc894"), ) @@ -57,9 +58,13 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"), + (b"\xD8abc", "replace", "\uFFFDabc"), + (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"), + (b"\x84bxy", "replace", "\uFFFDbxy"), + (b"\x8CBxy", "replace", "\uFFFDBxy"), ) def test_main(): diff --git a/Lib/test/test_codecencodings_tw.py b/Lib/test/test_codecencodings_tw.py index 12d3c9f..f2f3c18 100644 --- a/Lib/test/test_codecencodings_tw.py +++ b/Lib/test/test_codecencodings_tw.py @@ -15,8 +15,8 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase): # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), - (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), - (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), + (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"), + (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), ) diff --git a/Lib/test/test_codecmaps_tw.py b/Lib/test/test_codecmaps_tw.py index 6db5091..412b9de 100644 --- a/Lib/test/test_codecmaps_tw.py +++ b/Lib/test/test_codecmaps_tw.py @@ -23,6 +23,9 @@ class TestCP950Map(test_multibytecodec_support.TestBase_Mapping, (b'\xa2\xcc', '\u5341'), (b'\xa2\xce', '\u5345'), ] + codectests = ( + (b"\xFFxy", "replace", "\ufffdxy"), + ) def test_main(): support.run_unittest(__name__) diff --git a/Misc/NEWS b/Misc/NEWS index f742f8a..404185d 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -219,6 +219,10 @@ Core and Builtins Library ------- +- Issue #12016: Multibyte CJK decoders now resynchronize faster. They only + ignore the first byte of an invalid byte sequence. For example, + b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'. + - Issue #12459: time.sleep() now raises a ValueError if the sleep length is negative, instead of an infinite sleep on Windows or raising an IOError on Linux for example, to have the same behaviour on all platforms. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index ab4e659..9e9e96c 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -85,7 +85,7 @@ DECODER(gb2312) TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -141,7 +141,7 @@ DECODER(gbk) REQUIRE_INBUF(2) GBK_DECODE(c, IN2, **outbuf) - else return 2; + else return 1; NEXT(2, 1) } @@ -267,7 +267,7 @@ DECODER(gb18030) c3 = IN3; c4 = IN4; if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) - return 4; + return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; @@ -292,12 +292,12 @@ DECODER(gb18030) continue; } } - return 4; + return 1; } GBK_DECODE(c, c2, **outbuf) else TRYMAP_DEC(gb18030ext, **outbuf, c, c2); - else return 2; + else return 1; NEXT(2, 1) } @@ -400,7 +400,7 @@ DECODER(hz) else if (c2 == '\n') ; /* line-continuation */ else - return 2; + return 1; NEXT(2, 0); continue; } @@ -419,7 +419,7 @@ DECODER(hz) NEXT(2, 1) } else - return 2; + return 1; } } diff --git a/Modules/cjkcodecs/_codecs_hk.c b/Modules/cjkcodecs/_codecs_hk.c index 558a42f..d3ad04b 100644 --- a/Modules/cjkcodecs/_codecs_hk.c +++ b/Modules/cjkcodecs/_codecs_hk.c @@ -161,7 +161,7 @@ DECODER(big5hkscs) case 0x8864: WRITE2(0x00ca, 0x030c); break; case 0x88a3: WRITE2(0x00ea, 0x0304); break; case 0x88a5: WRITE2(0x00ea, 0x030c); break; - default: return 2; + default: return 1; } NEXT(2, 2) /* all decoded codepoints are pairs, above. */ diff --git a/Modules/cjkcodecs/_codecs_jp.c b/Modules/cjkcodecs/_codecs_jp.c index a05e01b..a500696 100644 --- a/Modules/cjkcodecs/_codecs_jp.c +++ b/Modules/cjkcodecs/_codecs_jp.c @@ -112,7 +112,7 @@ DECODER(cp932) TRYMAP_DEC(cp932ext, **outbuf, c, c2); else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){ if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -120,7 +120,7 @@ DECODER(cp932) c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21; TRYMAP_DEC(jisx0208, **outbuf, c, c2); - else return 2; + else return 1; } else if (c >= 0xf0 && c <= 0xf9) { if ((c2 >= 0x40 && c2 <= 0x7e) || @@ -128,10 +128,10 @@ DECODER(cp932) OUT1(0xe000 + 188 * (c - 0xf0) + (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41)) else - return 2; + return 1; } else - return 2; + return 1; NEXT(2, 1) } @@ -256,7 +256,7 @@ DECODER(euc_jis_2004) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -274,7 +274,7 @@ DECODER(euc_jis_2004) continue; } else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ; - else return 3; + else return 1; NEXT(3, 1) } else { @@ -300,7 +300,7 @@ DECODER(euc_jis_2004) NEXT(2, 2) continue; } - else return 2; + else return 1; NEXT(2, 1) } } @@ -388,7 +388,7 @@ DECODER(euc_jp) NEXT(2, 1) } else - return 2; + return 1; } else if (c == 0x8f) { unsigned char c2, c3; @@ -401,7 +401,7 @@ DECODER(euc_jp) NEXT(3, 1) } else - return 3; + return 1; } else { unsigned char c2; @@ -417,7 +417,7 @@ DECODER(euc_jp) #endif TRYMAP_DEC(jisx0208, **outbuf, c ^ 0x80, c2 ^ 0x80) ; - else return 2; + else return 1; NEXT(2, 1) } } @@ -502,7 +502,7 @@ DECODER(shift_jis) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -522,10 +522,10 @@ DECODER(shift_jis) continue; } else - return 2; + return 1; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } @@ -645,7 +645,7 @@ DECODER(shift_jis_2004) REQUIRE_INBUF(2) c2 = IN2; if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc) - return 2; + return 1; c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1); c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41); @@ -671,7 +671,7 @@ DECODER(shift_jis_2004) NEXT_OUT(2) } else - return 2; + return 1; NEXT_IN(2) } else { /* Plane 2 */ @@ -689,13 +689,13 @@ DECODER(shift_jis_2004) continue; } else - return 2; + return 1; NEXT(2, 1) } continue; } else - return 2; + return 1; NEXT(1, 1) /* JIS X 0201 */ } diff --git a/Modules/cjkcodecs/_codecs_kr.c b/Modules/cjkcodecs/_codecs_kr.c index 9272e36..f5697dd 100644 --- a/Modules/cjkcodecs/_codecs_kr.c +++ b/Modules/cjkcodecs/_codecs_kr.c @@ -123,7 +123,7 @@ DECODER(euc_kr) if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE || (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE) - return 8; + return 1; c = (*inbuf)[3]; if (0xa1 <= c && c <= 0xbe) @@ -143,7 +143,7 @@ DECODER(euc_kr) jong = NONE; if (cho == NONE || jung == NONE || jong == NONE) - return 8; + return 1; OUT1(0xac00 + cho*588 + jung*28 + jong); NEXT(8, 1) @@ -152,7 +152,7 @@ DECODER(euc_kr) NEXT(2, 1) } else - return 2; + return 1; } return 0; @@ -208,7 +208,7 @@ DECODER(cp949) REQUIRE_INBUF(2) TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80); else TRYMAP_DEC(cp949ext, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } @@ -375,7 +375,7 @@ DECODER(johab) i_jong = johabidx_jongseong[c_jong]; if (i_cho == NONE || i_jung == NONE || i_jong == NONE) - return 2; + return 1; /* we don't use U+1100 hangul jamo yet. */ if (i_cho == FILL) { @@ -391,7 +391,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_jungseong[c_jung]) else - return 2; + return 1; } } else { if (i_jung == FILL) { @@ -399,7 +399,7 @@ DECODER(johab) OUT1(0x3100 | johabjamo_choseong[c_cho]) else - return 2; + return 1; } else OUT1(0xac00 + @@ -414,7 +414,7 @@ DECODER(johab) c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) || (c2 & 0x7f) == 0x7f || (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3))) - return 2; + return 1; else { unsigned char t1, t2; @@ -425,7 +425,7 @@ DECODER(johab) t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21; TRYMAP_DEC(ksx1001, **outbuf, t1, t2); - else return 2; + else return 1; NEXT(2, 1) } } diff --git a/Modules/cjkcodecs/_codecs_tw.c b/Modules/cjkcodecs/_codecs_tw.c index 38cf723..916298d 100644 --- a/Modules/cjkcodecs/_codecs_tw.c +++ b/Modules/cjkcodecs/_codecs_tw.c @@ -55,7 +55,7 @@ DECODER(big5) TRYMAP_DEC(big5, **outbuf, c, IN2) { NEXT(2, 1) } - else return 2; + else return 1; } return 0; @@ -109,7 +109,7 @@ DECODER(cp950) TRYMAP_DEC(cp950ext, **outbuf, c, IN2); else TRYMAP_DEC(big5, **outbuf, c, IN2); - else return 2; + else return 1; NEXT(2, 1) } -- cgit v0.12 From f8bebf8566bc31e4fe90c5c5f10bfa72f3b90c91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Araujo?= Date: Sat, 2 Jul 2011 16:58:25 +0200 Subject: Improve reST target --- Doc/install/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/install/install.rst b/Doc/install/install.rst index 8067544..2f100c4 100644 --- a/Doc/install/install.rst +++ b/Doc/install/install.rst @@ -4,7 +4,7 @@ Installing Python projects: overwiew ==================================== -.. _packaging_packaging-intro: +.. _packaging-install-intro: Introduction ============ -- cgit v0.12 From 6e2e3b9e8188acca0d40df365f3d9595fad59991 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 8 Jul 2011 02:26:39 +0200 Subject: Issue #12423: Fix os.abort() documentation The Python signal handler for SIGABRT is not called on os.abort() (only if the signal is raised manually or sent by another process). Patch by Kamil Kisiel. --- Doc/ACKS.txt | 3 ++- Doc/library/os.rst | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/ACKS.txt b/Doc/ACKS.txt index 7f67d36..1ad18f1 100644 --- a/Doc/ACKS.txt +++ b/Doc/ACKS.txt @@ -105,6 +105,7 @@ docs@python.org), and we'll be glad to correct the problem. * Robert Kern * Jim Kerr * Jan Kim + * Kamil Kisiel * Greg Kochanski * Guido Kollerie * Peter A. Koren @@ -142,7 +143,7 @@ docs@python.org), and we'll be glad to correct the problem. * Ross Moore * Sjoerd Mullender * Dale Nagata - * Michal Nowikowski + * Michal Nowikowski * Ng Pheng Siong * Koray Oner * Tomas Oppelstrup diff --git a/Doc/library/os.rst b/Doc/library/os.rst index d712181..c412ee1 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1582,8 +1582,9 @@ to be ignored. Generate a :const:`SIGABRT` signal to the current process. On Unix, the default behavior is to produce a core dump; on Windows, the process immediately returns - an exit code of ``3``. Be aware that programs which use :func:`signal.signal` - to register a handler for :const:`SIGABRT` will behave differently. + an exit code of ``3``. Be aware that calling this function will not call the + Python signal handler registered for :const:`SIGABRT` with + :func:`signal.signal`. Availability: Unix, Windows. -- cgit v0.12 From ce5fe83878c56ac17007e88148714ee523e64c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Araujo?= Date: Fri, 8 Jul 2011 16:27:12 +0200 Subject: Factor out code used by packaging commands for HTTP requests (#12169). We now have one function to prepare multipart POST requests, and we use CRLF, as recommended by the HTTP spec (#10150). Initial patch by John Edmonds. --- Lib/packaging/command/register.py | 24 ++---------- Lib/packaging/command/upload.py | 52 +++++-------------------- Lib/packaging/command/upload_docs.py | 46 +--------------------- Lib/packaging/tests/test_command_register.py | 10 ++--- Lib/packaging/tests/test_command_upload_docs.py | 27 +------------ Lib/packaging/tests/test_util.py | 26 ++++++++++++- Lib/packaging/util.py | 47 ++++++++++++++++++++++ Misc/ACKS | 1 + Misc/NEWS | 3 ++ 9 files changed, 97 insertions(+), 139 deletions(-) diff --git a/Lib/packaging/command/register.py b/Lib/packaging/command/register.py index 962afdcc..006dfdf 100644 --- a/Lib/packaging/command/register.py +++ b/Lib/packaging/command/register.py @@ -10,7 +10,7 @@ import urllib.request from packaging import logger from packaging.util import (read_pypirc, generate_pypirc, DEFAULT_REPOSITORY, - DEFAULT_REALM, get_pypirc_path) + DEFAULT_REALM, get_pypirc_path, encode_multipart) from packaging.command.cmd import Command class register(Command): @@ -231,29 +231,11 @@ Your selection [default 1]: ''') if 'name' in data: logger.info('Registering %s to %s', data['name'], self.repository) # Build up the MIME payload for the urllib2 POST data - boundary = '--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - sep_boundary = '\n--' + boundary - end_boundary = sep_boundary + '--' - body = io.StringIO() - for key, value in data.items(): - # handle multiple entries for the same name - if not isinstance(value, (tuple, list)): - value = [value] - - for value in value: - body.write(sep_boundary) - body.write('\nContent-Disposition: form-data; name="%s"'%key) - body.write("\n\n") - body.write(value) - if value and value[-1] == '\r': - body.write('\n') # write an extra newline (lurve Macs) - body.write(end_boundary) - body.write("\n") - body = body.getvalue() + content_type, body = encode_multipart(data.items(), []) # build the Request headers = { - 'Content-type': 'multipart/form-data; boundary=%s; charset=utf-8'%boundary, + 'Content-type': content_type, 'Content-length': str(len(body)) } req = urllib.request.Request(self.repository, body, headers) diff --git a/Lib/packaging/command/upload.py b/Lib/packaging/command/upload.py index df265c9..e39016c 100644 --- a/Lib/packaging/command/upload.py +++ b/Lib/packaging/command/upload.py @@ -14,7 +14,7 @@ from urllib.request import urlopen, Request from packaging import logger from packaging.errors import PackagingOptionError from packaging.util import (spawn, read_pypirc, DEFAULT_REPOSITORY, - DEFAULT_REALM) + DEFAULT_REALM, encode_multipart) from packaging.command.cmd import Command @@ -131,54 +131,22 @@ class upload(Command): auth = b"Basic " + standard_b64encode(user_pass) # Build up the MIME payload for the POST data - boundary = b'--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - sep_boundary = b'\n--' + boundary - end_boundary = sep_boundary + b'--' - body = BytesIO() - - file_fields = ('content', 'gpg_signature') - - for key, value in data.items(): - # handle multiple entries for the same name - if not isinstance(value, tuple): - value = [value] - - content_dispo = '\nContent-Disposition: form-data; name="%s"' % key - - if key in file_fields: - filename_, content = value - filename_ = ';filename="%s"' % filename_ - body.write(sep_boundary) - body.write(content_dispo.encode('utf-8')) - body.write(filename_.encode('utf-8')) - body.write(b"\n\n") - body.write(content) - else: - for value in value: - value = str(value).encode('utf-8') - body.write(sep_boundary) - body.write(content_dispo.encode('utf-8')) - body.write(b"\n\n") - body.write(value) - if value and value.endswith(b'\r'): - # write an extra newline (lurve Macs) - body.write(b'\n') - - body.write(end_boundary) - body.write(b"\n") - body = body.getvalue() + files = [] + for key in ('content', 'gpg_signature'): + if key in data: + filename_, value = data.pop(key) + files.append((key, filename_, value)) + + content_type, body = encode_multipart(data.items(), files) logger.info("Submitting %s to %s", filename, self.repository) # build the Request - headers = {'Content-type': - 'multipart/form-data; boundary=%s' % - boundary.decode('ascii'), + headers = {'Content-type': content_type, 'Content-length': str(len(body)), 'Authorization': auth} - request = Request(self.repository, data=body, - headers=headers) + request = Request(self.repository, body, headers) # send the data try: result = urlopen(request) diff --git a/Lib/packaging/command/upload_docs.py b/Lib/packaging/command/upload_docs.py index 47e6217..03dd3ec 100644 --- a/Lib/packaging/command/upload_docs.py +++ b/Lib/packaging/command/upload_docs.py @@ -10,7 +10,8 @@ import urllib.parse from io import BytesIO from packaging import logger -from packaging.util import read_pypirc, DEFAULT_REPOSITORY, DEFAULT_REALM +from packaging.util import (read_pypirc, DEFAULT_REPOSITORY, DEFAULT_REALM, + encode_multipart) from packaging.errors import PackagingFileError from packaging.command.cmd import Command @@ -28,49 +29,6 @@ def zip_dir(directory): return destination -# grabbed from -# http://code.activestate.com/recipes/ -# 146306-http-client-to-post-using-multipartform-data/ -# TODO factor this out for use by install and command/upload - -def encode_multipart(fields, files, boundary=None): - """ - *fields* is a sequence of (name: str, value: str) elements for regular - form fields, *files* is a sequence of (name: str, filename: str, value: - bytes) elements for data to be uploaded as files. - - Returns (content_type: bytes, body: bytes) ready for http.client.HTTP. - """ - if boundary is None: - boundary = b'--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' - elif not isinstance(boundary, bytes): - raise TypeError('boundary is not bytes but %r' % type(boundary)) - - l = [] - for key, value in fields: - l.extend(( - b'--' + boundary, - ('Content-Disposition: form-data; name="%s"' % - key).encode('utf-8'), - b'', - value.encode('utf-8'))) - - for key, filename, value in files: - l.extend(( - b'--' + boundary, - ('Content-Disposition: form-data; name="%s"; filename="%s"' % - (key, filename)).encode('utf-8'), - b'', - value)) - l.append(b'--' + boundary + b'--') - l.append(b'') - - body = b'\r\n'.join(l) - - content_type = b'multipart/form-data; boundary=' + boundary - return content_type, body - - class upload_docs(Command): description = "upload HTML documentation to PyPI" diff --git a/Lib/packaging/tests/test_command_register.py b/Lib/packaging/tests/test_command_register.py index 7aa487a..183f84e 100644 --- a/Lib/packaging/tests/test_command_register.py +++ b/Lib/packaging/tests/test_command_register.py @@ -152,7 +152,7 @@ class RegisterTestCase(support.TempdirManager, req1 = dict(self.conn.reqs[0].headers) req2 = dict(self.conn.reqs[1].headers) self.assertEqual(req2['Content-length'], req1['Content-length']) - self.assertIn('xxx', self.conn.reqs[1].data) + self.assertIn(b'xxx', self.conn.reqs[1].data) def test_password_not_in_file(self): @@ -180,8 +180,8 @@ class RegisterTestCase(support.TempdirManager, self.assertEqual(len(self.conn.reqs), 1) req = self.conn.reqs[0] headers = dict(req.headers) - self.assertEqual(headers['Content-length'], '608') - self.assertIn('tarek', req.data) + self.assertEqual(headers['Content-length'], '628') + self.assertIn(b'tarek', req.data) def test_password_reset(self): # this test runs choice 3 @@ -195,8 +195,8 @@ class RegisterTestCase(support.TempdirManager, self.assertEqual(len(self.conn.reqs), 1) req = self.conn.reqs[0] headers = dict(req.headers) - self.assertEqual(headers['Content-length'], '290') - self.assertIn('tarek', req.data) + self.assertEqual(headers['Content-length'], '298') + self.assertIn(b'tarek', req.data) @unittest.skipUnless(DOCUTILS_SUPPORT, 'needs docutils') def test_strict(self): diff --git a/Lib/packaging/tests/test_command_upload_docs.py b/Lib/packaging/tests/test_command_upload_docs.py index 9876d18..4070279 100644 --- a/Lib/packaging/tests/test_command_upload_docs.py +++ b/Lib/packaging/tests/test_command_upload_docs.py @@ -9,8 +9,7 @@ except ImportError: _ssl = None from packaging.command import upload_docs as upload_docs_mod -from packaging.command.upload_docs import (upload_docs, zip_dir, - encode_multipart) +from packaging.command.upload_docs import upload_docs, zip_dir from packaging.dist import Distribution from packaging.errors import PackagingFileError, PackagingOptionError @@ -23,23 +22,6 @@ except ImportError: PyPIServerTestCase = object -EXPECTED_MULTIPART_OUTPUT = [ - b'---x', - b'Content-Disposition: form-data; name="username"', - b'', - b'wok', - b'---x', - b'Content-Disposition: form-data; name="password"', - b'', - b'secret', - b'---x', - b'Content-Disposition: form-data; name="picture"; filename="wok.png"', - b'', - b'PNG89', - b'---x--', - b'', -] - PYPIRC = """\ [distutils] index-servers = server1 @@ -108,13 +90,6 @@ class UploadDocsTestCase(support.TempdirManager, zip_f = zipfile.ZipFile(compressed) self.assertEqual(zip_f.namelist(), ['index.html', 'docs/index.html']) - def test_encode_multipart(self): - fields = [('username', 'wok'), ('password', 'secret')] - files = [('picture', 'wok.png', b'PNG89')] - content_type, body = encode_multipart(fields, files, b'-x') - self.assertEqual(b'multipart/form-data; boundary=-x', content_type) - self.assertEqual(EXPECTED_MULTIPART_OUTPUT, body.split(b'\r\n')) - def prepare_command(self): self.cmd.upload_dir = self.prepare_sample_dir() self.cmd.ensure_finalized() diff --git a/Lib/packaging/tests/test_util.py b/Lib/packaging/tests/test_util.py index e3ccfd5..21ac4e2 100644 --- a/Lib/packaging/tests/test_util.py +++ b/Lib/packaging/tests/test_util.py @@ -19,7 +19,7 @@ from packaging.util import ( get_compiler_versions, _MAC_OS_X_LD_VERSION, byte_compile, find_packages, spawn, get_pypirc_path, generate_pypirc, read_pypirc, resolve_name, iglob, RICH_GLOB, egginfo_to_distinfo, is_setuptools, is_distutils, is_packaging, - get_install_method, cfg_to_args) + get_install_method, cfg_to_args, encode_multipart) PYPIRC = """\ @@ -54,6 +54,23 @@ username:tarek password:xxx """ +EXPECTED_MULTIPART_OUTPUT = [ + b'---x', + b'Content-Disposition: form-data; name="username"', + b'', + b'wok', + b'---x', + b'Content-Disposition: form-data; name="password"', + b'', + b'secret', + b'---x', + b'Content-Disposition: form-data; name="picture"; filename="wok.png"', + b'', + b'PNG89', + b'---x--', + b'', +] + class FakePopen: test_class = None @@ -525,6 +542,13 @@ class UtilTestCase(support.EnvironRestorer, self.assertEqual(args['scripts'], dist.scripts) self.assertEqual(args['py_modules'], dist.py_modules) + def test_encode_multipart(self): + fields = [('username', 'wok'), ('password', 'secret')] + files = [('picture', 'wok.png', b'PNG89')] + content_type, body = encode_multipart(fields, files, b'-x') + self.assertEqual(b'multipart/form-data; boundary=-x', content_type) + self.assertEqual(EXPECTED_MULTIPART_OUTPUT, body.split(b'\r\n')) + class GlobTestCaseBase(support.TempdirManager, support.LoggingCatcher, diff --git a/Lib/packaging/util.py b/Lib/packaging/util.py index 76db89a..5637e80 100644 --- a/Lib/packaging/util.py +++ b/Lib/packaging/util.py @@ -1487,3 +1487,50 @@ def _mkpath(name, mode=0o777, verbose=True, dry_run=False): _path_created.add(abs_head) return created_dirs + + +def encode_multipart(fields, files, boundary=None): + """Prepare a multipart HTTP request. + + *fields* is a sequence of (name: str, value: str) elements for regular + form fields, *files* is a sequence of (name: str, filename: str, value: + bytes) elements for data to be uploaded as files. + + Returns (content_type: bytes, body: bytes) ready for http.client.HTTP. + """ + # Taken from + # http://code.activestate.com/recipes/146306-http-client-to-post-using-multipartform-data/ + + if boundary is None: + boundary = b'--------------GHSKFJDLGDS7543FJKLFHRE75642756743254' + elif not isinstance(boundary, bytes): + raise TypeError('boundary must be bytes, not %r' % type(boundary)) + + l = [] + for key, values in fields: + # handle multiple entries for the same name + if not isinstance(values, (tuple, list)): + values=[values] + + for value in values: + l.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"' % + key).encode('utf-8'), + b'', + value.encode('utf-8'))) + + for key, filename, value in files: + l.extend(( + b'--' + boundary, + ('Content-Disposition: form-data; name="%s"; filename="%s"' % + (key, filename)).encode('utf-8'), + b'', + value)) + + l.append(b'--' + boundary + b'--') + l.append(b'') + + body = b'\r\n'.join(l) + content_type = b'multipart/form-data; boundary=' + boundary + return content_type, body diff --git a/Misc/ACKS b/Misc/ACKS index 378b60e..0662bae 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -263,6 +263,7 @@ Maxim Dzumanenko Walter Dörwald Hans Eckardt Rodolpho Eckhardt +John Edmonds Grant Edwards John Ehresman Eric Eisner diff --git a/Misc/NEWS b/Misc/NEWS index 404185d..c45a725 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -219,6 +219,9 @@ Core and Builtins Library ------- +- Issues #12169 and #10510: Factor out code used by various packaging commands + to make HTTP POST requests, and make sure it uses CRLF. + - Issue #12016: Multibyte CJK decoders now resynchronize faster. They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'. -- cgit v0.12 From 4468e55d4bd2a9ac4c6aeaffeb99653b7737ebd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Araujo?= Date: Fri, 8 Jul 2011 17:22:19 +0200 Subject: Close file handles in a timely manner in packaging.database (#12504). This fixes a bug with the remove (uninstall) feature on Windows. Patch by Thomas Holmes. --- Lib/packaging/database.py | 12 +++++++----- Lib/packaging/tests/test_uninstall.py | 2 -- Misc/ACKS | 1 + Misc/NEWS | 3 +++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Lib/packaging/database.py b/Lib/packaging/database.py index 67946a2..c71d608 100644 --- a/Lib/packaging/database.py +++ b/Lib/packaging/database.py @@ -158,17 +158,18 @@ class Distribution: self.name, self.version, self.path) def _get_records(self, local=False): + results = [] with self.get_distinfo_file('RECORD') as record: record_reader = csv.reader(record, delimiter=',', lineterminator='\n') - # XXX needs an explaining comment for row in record_reader: - path, checksum, size = (row[:] + - [None for i in range(len(row), 3)]) + missing = [None for i in range(len(row), 3)] + path, checksum, size = row + missing if local: path = path.replace('/', os.sep) path = os.path.join(sys.prefix, path) - yield path, checksum, size + results.append((path, checksum, size)) + return results def get_resource_path(self, relative_path): with self.get_distinfo_file('RESOURCES') as resources_file: @@ -197,7 +198,8 @@ class Distribution: :type local: boolean :returns: iterator of (path, md5, size) """ - return self._get_records(local) + for result in self._get_records(local): + yield result def uses(self, path): """ diff --git a/Lib/packaging/tests/test_uninstall.py b/Lib/packaging/tests/test_uninstall.py index 0ef7f3b..c7702de 100644 --- a/Lib/packaging/tests/test_uninstall.py +++ b/Lib/packaging/tests/test_uninstall.py @@ -93,7 +93,6 @@ class UninstallTestCase(support.TempdirManager, self.assertRaises(PackagingError, remove, 'Foo', paths=[self.root_dir]) - @unittest.skipIf(sys.platform == 'win32', 'deactivated for now') def test_uninstall(self): dist, install_lib = self.install_dist() self.assertIsFile(install_lib, 'foo', '__init__.py') @@ -103,7 +102,6 @@ class UninstallTestCase(support.TempdirManager, self.assertIsNotFile(install_lib, 'foo', 'sub', '__init__.py') self.assertIsNotFile(install_lib, 'Foo-0.1.dist-info', 'RECORD') - @unittest.skipIf(sys.platform == 'win32', 'deactivated for now') def test_remove_issue(self): # makes sure if there are OSErrors (like permission denied) # remove() stops and display a clean error diff --git a/Misc/ACKS b/Misc/ACKS index 0662bae..6879c98 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -419,6 +419,7 @@ Jonathan Hogg Gerrit Holl Shane Holloway Rune Holm +Thomas Holmes Philip Homburg Naofumi Honda Jeffrey Honig diff --git a/Misc/NEWS b/Misc/NEWS index c45a725..cd0eab6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -219,6 +219,9 @@ Core and Builtins Library ------- +- Issue #12504: Close file handles in a timely manner in packaging.database. + This fixes a bug with the remove (uninstall) feature on Windows. + - Issues #12169 and #10510: Factor out code used by various packaging commands to make HTTP POST requests, and make sure it uses CRLF. -- cgit v0.12 From b9ac25d1c394714f0565845b274e7eebb402f1e7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 8 Jul 2011 18:47:06 +0200 Subject: Issue #12440: When testing whether some bits in SSLContext.options can be reset, check the version of the OpenSSL headers Python was compiled against, rather than the runtime version of the OpenSSL library. --- Lib/ssl.py | 2 ++ Lib/test/test_ssl.py | 2 +- Misc/NEWS | 4 ++++ Modules/_ssl.c | 34 +++++++++++++++++++++++++--------- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/Lib/ssl.py b/Lib/ssl.py index e9e9aa8..ce9ebdf 100644 --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -77,6 +77,8 @@ from _ssl import ( ) from _ssl import HAS_SNI from _ssl import PROTOCOL_SSLv3, PROTOCOL_SSLv23, PROTOCOL_TLSv1 +from _ssl import _OPENSSL_API_VERSION + _PROTOCOL_NAMES = { PROTOCOL_TLSv1: "TLSv1", PROTOCOL_SSLv23: "SSLv23", diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 7edf5b2..869381a 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -60,7 +60,7 @@ def handle_error(prefix): def can_clear_options(): # 0.9.8m or higher - return ssl.OPENSSL_VERSION_INFO >= (0, 9, 8, 13, 15) + return ssl._OPENSSL_API_VERSION >= (0, 9, 8, 13, 15) def no_sslv2_implies_sslv3_hello(): # 0.9.7h or higher diff --git a/Misc/NEWS b/Misc/NEWS index 0bc7cd7..018d799 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,10 @@ C-API Tests ----- +- Issue #12440: When testing whether some bits in SSLContext.options can be + reset, check the version of the OpenSSL headers Python was compiled against, + rather than the runtime version of the OpenSSL library. + - Issue #12497: Install test/data to prevent failures of the various codecmaps tests. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index a813d5f..27dcdbc 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -2037,6 +2037,24 @@ static struct PyModuleDef _sslmodule = { NULL }; + +static void +parse_openssl_version(unsigned long libver, + unsigned int *major, unsigned int *minor, + unsigned int *fix, unsigned int *patch, + unsigned int *status) +{ + *status = libver & 0xF; + libver >>= 4; + *patch = libver & 0xFF; + libver >>= 8; + *fix = libver & 0xFF; + libver >>= 8; + *minor = libver & 0xFF; + libver >>= 8; + *major = libver & 0xFF; +} + PyMODINIT_FUNC PyInit__ssl(void) { @@ -2149,15 +2167,7 @@ PyInit__ssl(void) return NULL; if (PyModule_AddObject(m, "OPENSSL_VERSION_NUMBER", r)) return NULL; - status = libver & 0xF; - libver >>= 4; - patch = libver & 0xFF; - libver >>= 8; - fix = libver & 0xFF; - libver >>= 8; - minor = libver & 0xFF; - libver >>= 8; - major = libver & 0xFF; + parse_openssl_version(libver, &major, &minor, &fix, &patch, &status); r = Py_BuildValue("IIIII", major, minor, fix, patch, status); if (r == NULL || PyModule_AddObject(m, "OPENSSL_VERSION_INFO", r)) return NULL; @@ -2165,5 +2175,11 @@ PyInit__ssl(void) if (r == NULL || PyModule_AddObject(m, "OPENSSL_VERSION", r)) return NULL; + libver = OPENSSL_VERSION_NUMBER; + parse_openssl_version(libver, &major, &minor, &fix, &patch, &status); + r = Py_BuildValue("IIIII", major, minor, fix, patch, status); + if (r == NULL || PyModule_AddObject(m, "_OPENSSL_API_VERSION", r)) + return NULL; + return m; } -- cgit v0.12 From 72fff046a6d96131d2a929699e6fc8875f57e452 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 8 Jul 2011 19:19:57 +0200 Subject: Avoid failing in test_urllibnet.test_bad_address when some overzealous DNS service (e.g. OpenDNS) resolves a non-existent domain name. The test is now skipped instead. --- Lib/test/test_urllibnet.py | 8 ++++++++ Misc/NEWS | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py index 03d1708..383b2af 100644 --- a/Lib/test/test_urllibnet.py +++ b/Lib/test/test_urllibnet.py @@ -113,6 +113,14 @@ class urlopenNetworkTests(unittest.TestCase): def test_bad_address(self): # Make sure proper exception is raised when connecting to a bogus # address. + bogus_domain = "sadflkjsasf.i.nvali.d" + try: + socket.gethostbyname(bogus_domain) + except socket.gaierror: + pass + else: + # This happens with some overzealous DNS providers such as OpenDNS + self.skipTest("%r should not resolve for test to work" % bogus_domain) self.assertRaises(IOError, # SF patch 809915: In Sep 2003, VeriSign started # highjacking invalid .com and .net addresses to diff --git a/Misc/NEWS b/Misc/NEWS index 018d799..e415b4a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,10 @@ C-API Tests ----- +- Avoid failing in test_urllibnet.test_bad_address when some overzealous + DNS service (e.g. OpenDNS) resolves a non-existent domain name. The test + is now skipped instead. + - Issue #12440: When testing whether some bits in SSLContext.options can be reset, check the version of the OpenSSL headers Python was compiled against, rather than the runtime version of the OpenSSL library. -- cgit v0.12 From 95531ea2f1c69eed6b8a5d964f0fd47c124ccca8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 8 Jul 2011 19:43:51 +0200 Subject: Avoid failing in test_robotparser when mueblesmoraleda.com is flaky and an overzealous DNS service (e.g. OpenDNS) redirects to a placeholder Web site. --- Lib/test/test_robotparser.py | 19 +++++++++++++++++-- Misc/NEWS | 4 ++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py index 2a6d047..178761d 100644 --- a/Lib/test/test_robotparser.py +++ b/Lib/test/test_robotparser.py @@ -1,7 +1,8 @@ import io import unittest import urllib.robotparser -from urllib.error import URLError +from urllib.error import URLError, HTTPError +from urllib.request import urlopen from test import support class RobotTestCase(unittest.TestCase): @@ -237,13 +238,27 @@ class NetworkTestCase(unittest.TestCase): support.requires('network') with support.transient_internet('mueblesmoraleda.com'): url = 'http://mueblesmoraleda.com' + robots_url = url + "/robots.txt" + # First check the URL is usable for our purposes, since the + # test site is a bit flaky. + try: + urlopen(robots_url) + except HTTPError as e: + if e.code not in {401, 403}: + self.skipTest( + "%r should return a 401 or 403 HTTP error, not %r" + % (robots_url, e.code)) + else: + self.skipTest( + "%r should return a 401 or 403 HTTP error, not succeed" + % (robots_url)) parser = urllib.robotparser.RobotFileParser() parser.set_url(url) try: parser.read() except URLError: self.skipTest('%s is unavailable' % url) - self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False) + self.assertEqual(parser.can_fetch("*", robots_url), False) def testPythonOrg(self): support.requires('network') diff --git a/Misc/NEWS b/Misc/NEWS index e415b4a..53b7562 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -47,6 +47,10 @@ C-API Tests ----- +- Avoid failing in test_robotparser when mueblesmoraleda.com is flaky and + an overzealous DNS service (e.g. OpenDNS) redirects to a placeholder + Web site. + - Avoid failing in test_urllibnet.test_bad_address when some overzealous DNS service (e.g. OpenDNS) resolves a non-existent domain name. The test is now skipped instead. -- cgit v0.12