1 files changed, 226 insertions, 16 deletions
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 06789cd..4d491b0 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -2,10 +2,14 @@ import io
 import unittest
 import pickle
 import pickletools
+import sys
 import copyreg
 from http.cookies import SimpleCookie
 
-from test.support import TestFailed, TESTFN, run_with_locale
+from test.support import (
+    TestFailed, TESTFN, run_with_locale,
+    _2G, _4G, bigmemtest,
+    )
 
 from pickle import bytes_types
 
@@ -14,6 +18,8 @@ from pickle import bytes_types
 # kind of outer loop.
 protocols = range(pickle.HIGHEST_PROTOCOL + 1)
 
+character_size = 4 if sys.maxunicode > 0xFFFF else 2
+
 
 # Return True if opcode code appears in the pickle, else False.
 def opcode_in_pickle(code, pickle):
@@ -30,6 +36,21 @@ def count_opcode(code, pickle):
             n += 1
     return n
 
+
+class UnseekableIO(io.BytesIO):
+    def peek(self, *args):
+        raise NotImplementedError
+
+    def seekable(self):
+        return False
+
+    def seek(self, *args):
+        raise io.UnsupportedOperation
+
+    def tell(self):
+        raise io.UnsupportedOperation
+
+
 # We can't very well test the extension registry without putting known stuff
 # in it, but we have to be careful to restore its original state.  Code
 # should do this:
@@ -100,6 +121,19 @@ class metaclass(type):
 class use_metaclass(object, metaclass=metaclass):
     pass
 
+class pickling_metaclass(type):
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.reduce_args == other.reduce_args)
+
+    def __reduce__(self):
+        return (create_dynamic_class, self.reduce_args)
+
+def create_dynamic_class(name, bases):
+    result = pickling_metaclass(name, bases, dict())
+    result.reduce_args = (name, bases)
+    return result
+
 # DATA0 .. DATA2 are the pickles we expect under the various protocols, for
 # the object returned by create_data().
 
@@ -602,9 +636,15 @@ class AbstractPickleTests(unittest.TestCase):
 
     def test_bytes(self):
         for proto in protocols:
-            for u in b'', b'xyz', b'xyz'*100:
-                p = self.dumps(u)
-                self.assertEqual(self.loads(p), u)
+            for s in b'', b'xyz', b'xyz'*100:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
+            for s in [bytes([i]) for i in range(256)]:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
+            for s in [bytes([i, i]) for i in range(256)]:
+                p = self.dumps(s)
+                self.assertEqual(self.loads(p), s)
 
     def test_ints(self):
         import sys
@@ -667,6 +707,11 @@ class AbstractPickleTests(unittest.TestCase):
     def test_getinitargs(self):
         pass
 
+    def test_pop_empty_stack(self):
+        # Test issue7455
+        s = b'0'
+        self.assertRaises((pickle.UnpicklingError, IndexError), self.loads, s)
+
     def test_metaclass(self):
         a = use_metaclass()
         for proto in protocols:
@@ -674,6 +719,14 @@ class AbstractPickleTests(unittest.TestCase):
             b = self.loads(s)
             self.assertEqual(a.__class__, b.__class__)
 
+    def test_dynamic_class(self):
+        a = create_dynamic_class("my_dynamic_class", (object,))
+        copyreg.pickle(pickling_metaclass, pickling_metaclass.__reduce__)
+        for proto in protocols:
+            s = self.dumps(a, proto)
+            b = self.loads(s)
+            self.assertEqual(a, b)
+
     def test_structseq(self):
         import time
         import os
@@ -841,8 +894,8 @@ class AbstractPickleTests(unittest.TestCase):
 
             # Dump using protocol 1 for comparison.
             s1 = self.dumps(x, 1)
-            self.assertTrue(__name__.encode("utf-8") in s1)
-            self.assertTrue(b"MyList" in s1)
+            self.assertIn(__name__.encode("utf-8"), s1)
+            self.assertIn(b"MyList", s1)
             self.assertEqual(opcode_in_pickle(opcode, s1), False)
 
             y = self.loads(s1)
@@ -851,8 +904,8 @@ class AbstractPickleTests(unittest.TestCase):
 
             # Dump using protocol 2 for test.
             s2 = self.dumps(x, 2)
-            self.assertTrue(__name__.encode("utf-8") not in s2)
-            self.assertTrue(b"MyList" not in s2)
+            self.assertNotIn(__name__.encode("utf-8"), s2)
+            self.assertNotIn(b"MyList", s2)
             self.assertEqual(opcode_in_pickle(opcode, s2), True, repr(s2))
 
             y = self.loads(s2)
@@ -903,7 +956,7 @@ class AbstractPickleTests(unittest.TestCase):
         x = dict.fromkeys(range(n))
         for proto in protocols:
             s = self.dumps(x, proto)
-            assert isinstance(s, bytes_types)
+            self.assertIsInstance(s, bytes_types)
             y = self.loads(s)
             self.assertEqual(x, y)
             num_setitems = count_opcode(pickle.SETITEMS, s)
@@ -1068,6 +1121,131 @@ class AbstractPickleTests(unittest.TestCase):
         dumped = self.dumps(set([3]), 2)
         self.assertEqual(dumped, DATA6)
 
+    def test_large_pickles(self):
+        # Test the correctness of internal buffering routines when handling
+        # large data.
+        for proto in protocols:
+            data = (1, min, b'xy' * (30 * 1024), len)
+            dumped = self.dumps(data, proto)
+            loaded = self.loads(dumped)
+            self.assertEqual(len(loaded), len(data))
+            self.assertEqual(loaded, data)
+
+    def test_empty_bytestring(self):
+        # issue 11286
+        empty = self.loads(b'\x80\x03U\x00q\x00.', encoding='koi8-r')
+        self.assertEqual(empty, '')
+
+    def check_negative_32b_binXXX(self, dumped):
+        if sys.maxsize > 2**32:
+            self.skipTest("test is only meaningful on 32-bit builds")
+        # XXX Pure Python pickle reads lengths as signed and passes
+        # them directly to read() (hence the EOFError)
+        with self.assertRaises((pickle.UnpicklingError, EOFError,
+                                ValueError, OverflowError)):
+            self.loads(dumped)
+
+    def test_negative_32b_binbytes(self):
+        # On 32-bit builds, a BINBYTES of 2**31 or more is refused
+        self.check_negative_32b_binXXX(b'\x80\x03B\xff\xff\xff\xffxyzq\x00.')
+
+    def test_negative_32b_binunicode(self):
+        # On 32-bit builds, a BINUNICODE of 2**31 or more is refused
+        self.check_negative_32b_binXXX(b'\x80\x03X\xff\xff\xff\xffxyzq\x00.')
+
+    def test_negative_put(self):
+        # Issue #12847
+        dumped = b'Va\np-1\n.'
+        self.assertRaises(ValueError, self.loads, dumped)
+
+    def test_negative_32b_binput(self):
+        # Issue #12847
+        if sys.maxsize > 2**32:
+            self.skipTest("test is only meaningful on 32-bit builds")
+        dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
+        self.assertRaises(ValueError, self.loads, dumped)
+
+
+class BigmemPickleTests(unittest.TestCase):
+
+    # Binary protocols can serialize longs of up to 2GB-1
+
+    @bigmemtest(size=_2G, memuse=1 + 1, dry_run=False)
+    def test_huge_long_32b(self, size):
+        data = 1 << (8 * size)
+        try:
+            for proto in protocols:
+                if proto < 2:
+                    continue
+                with self.assertRaises((ValueError, OverflowError)):
+                    self.dumps(data, protocol=proto)
+        finally:
+            data = None
+
+    # Protocol 3 can serialize up to 4GB-1 as a bytes object
+    # (older protocols don't have a dedicated opcode for bytes and are
+    # too inefficient)
+
+    @bigmemtest(size=_2G, memuse=1 + 1, dry_run=False)
+    def test_huge_bytes_32b(self, size):
+        data = b"abcd" * (size // 4)
+        try:
+            for proto in protocols:
+                if proto < 3:
+                    continue
+                try:
+                    pickled = self.dumps(data, protocol=proto)
+                    self.assertTrue(b"abcd" in pickled[:15])
+                    self.assertTrue(b"abcd" in pickled[-15:])
+                finally:
+                    pickled = None
+        finally:
+            data = None
+
+    @bigmemtest(size=_4G, memuse=1 + 1, dry_run=False)
+    def test_huge_bytes_64b(self, size):
+        data = b"a" * size
+        try:
+            for proto in protocols:
+                if proto < 3:
+                    continue
+                with self.assertRaises((ValueError, OverflowError)):
+                    self.dumps(data, protocol=proto)
+        finally:
+            data = None
+
+    # All protocols use 1-byte per printable ASCII character; we add another
+    # byte because the encoded form has to be copied into the internal buffer.
+
+    @bigmemtest(size=_2G, memuse=2 + character_size, dry_run=False)
+    def test_huge_str_32b(self, size):
+        data = "abcd" * (size // 4)
+        try:
+            for proto in protocols:
+                try:
+                    pickled = self.dumps(data, protocol=proto)
+                    self.assertTrue(b"abcd" in pickled[:15])
+                    self.assertTrue(b"abcd" in pickled[-15:])
+                finally:
+                    pickled = None
+        finally:
+            data = None
+
+    # BINUNICODE (protocols 1, 2 and 3) cannot carry more than
+    # 2**32 - 1 bytes of utf-8 encoded unicode.
+
+    @bigmemtest(size=_4G, memuse=1 + character_size, dry_run=False)
+    def test_huge_str_64b(self, size):
+        data = "a" * size
+        try:
+            for proto in protocols:
+                if proto == 0:
+                    continue
+                with self.assertRaises((ValueError, OverflowError)):
+                    self.dumps(data, protocol=proto)
+        finally:
+            data = None
+
 
 # Test classes for reduce_ex
 
@@ -1112,9 +1290,6 @@ class REX_five(object):
 class MyInt(int):
     sample = 1
 
-class MyLong(int):
-    sample = 1
-
 class MyFloat(float):
     sample = 1.0
 
@@ -1136,7 +1311,7 @@ class MyList(list):
 class MyDict(dict):
     sample = {"a": 1, "b": 2}
 
-myclasses = [MyInt, MyLong, MyFloat,
+myclasses = [MyInt, MyFloat,
              MyComplex,
              MyStr, MyUnicode,
              MyTuple, MyList, MyDict]
@@ -1213,9 +1388,6 @@ class AbstractPickleModuleTests(unittest.TestCase):
         # Test issue4298
         s = bytes([0x58, 0, 0, 0, 0x54])
         self.assertRaises(EOFError, pickle.loads, s)
-        # Test issue7455
-        s = b'0'
-        self.assertRaises(pickle.UnpicklingError, pickle.loads, s)
 
 
 class AbstractPersistentPicklerTests(unittest.TestCase):
@@ -1367,6 +1539,44 @@ class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
         f.seek(0)
         self.assertEqual(unpickler.load(), data2)
 
+    def _check_multiple_unpicklings(self, ioclass):
+        for proto in protocols:
+            data1 = [(x, str(x)) for x in range(2000)] + [b"abcde", len]
+            f = ioclass()
+            pickler = self.pickler_class(f, protocol=proto)
+            pickler.dump(data1)
+            pickled = f.getvalue()
+
+            N = 5
+            f = ioclass(pickled * N)
+            unpickler = self.unpickler_class(f)
+            for i in range(N):
+                if f.seekable():
+                    pos = f.tell()
+                self.assertEqual(unpickler.load(), data1)
+                if f.seekable():
+                    self.assertEqual(f.tell(), pos + len(pickled))
+            self.assertRaises(EOFError, unpickler.load)
+
+    def test_multiple_unpicklings_seekable(self):
+        self._check_multiple_unpicklings(io.BytesIO)
+
+    def test_multiple_unpicklings_unseekable(self):
+        self._check_multiple_unpicklings(UnseekableIO)
+
+    def test_unpickling_buffering_readline(self):
+        # Issue #12687: the unpickler's buffering logic could fail with
+        # text mode opcodes.
+        data = list(range(10))
+        for proto in protocols:
+            for buf_size in range(1, 11):
+                f = io.BufferedRandom(io.BytesIO(), buffer_size=buf_size)
+                pickler = self.pickler_class(f, protocol=proto)
+                pickler.dump(data)
+                f.seek(0)
+                unpickler = self.unpickler_class(f)
+                self.assertEqual(unpickler.load(), data)
+
 
 if __name__ == "__main__":
     # Print some stuff that can be used to rewrite DATA{0,1,2}