diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-11-16 07:44:43 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-16 07:44:43 (GMT) |
commit | 3daaafb700df45716bb55f3a293f88773baf3463 (patch) | |
tree | 72e6fe022b82ba7660e82f3561ef991e33a345dd | |
parent | 0a2abdfca2495291809855cf7dfc6721c9c962e3 (diff) | |
download | cpython-3daaafb700df45716bb55f3a293f88773baf3463.zip cpython-3daaafb700df45716bb55f3a293f88773baf3463.tar.gz cpython-3daaafb700df45716bb55f3a293f88773baf3463.tar.bz2 |
bpo-32037: Use the INT opcode for 32-bit integers in protocol 0 pickles. (#4407)
-rw-r--r-- | Lib/pickle.py | 5 | ||||
-rw-r--r-- | Lib/pickletools.py | 72 | ||||
-rw-r--r-- | Lib/test/pickletester.py | 6 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst | 4 | ||||
-rw-r--r-- | Modules/_pickle.c | 46 |
5 files changed, 70 insertions, 63 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py index faa8fd7..350d4a4 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -674,7 +674,10 @@ class _Pickler: else: self.write(LONG4 + pack("<i", n) + encoded) return - self.write(LONG + repr(obj).encode("ascii") + b'L\n') + if -0x80000000 <= obj <= 0x7fffffff: + self.write(INT + repr(obj).encode("ascii") + b'\n') + else: + self.write(LONG + repr(obj).encode("ascii") + b'L\n') dispatch[int] = save_long def save_float(self, obj): diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 0c8dddc..408c2ac 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2480,35 +2480,35 @@ _dis_test = r""" 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 - 5: L LONG 1 - 9: a APPEND - 10: L LONG 2 - 14: a APPEND - 15: ( MARK - 16: L LONG 3 - 20: L LONG 4 - 24: t TUPLE (MARK at 15) - 25: p PUT 1 - 28: a APPEND - 29: ( MARK - 30: d DICT (MARK at 29) - 31: p PUT 2 - 34: c GLOBAL '_codecs encode' - 50: p PUT 3 - 53: ( MARK - 54: V UNICODE 'abc' - 59: p PUT 4 - 62: V UNICODE 'latin1' - 70: p PUT 5 - 73: t TUPLE (MARK at 53) - 74: p PUT 6 - 77: R REDUCE - 78: p PUT 7 - 81: V UNICODE 'def' - 86: p PUT 8 - 89: s SETITEM - 90: a APPEND - 91: . STOP + 5: I INT 1 + 8: a APPEND + 9: I INT 2 + 12: a APPEND + 13: ( MARK + 14: I INT 3 + 17: I INT 4 + 20: t TUPLE (MARK at 13) + 21: p PUT 1 + 24: a APPEND + 25: ( MARK + 26: d DICT (MARK at 25) + 27: p PUT 2 + 30: c GLOBAL '_codecs encode' + 46: p PUT 3 + 49: ( MARK + 50: V UNICODE 'abc' + 55: p PUT 4 + 58: V UNICODE 'latin1' + 66: p PUT 5 + 69: t TUPLE (MARK at 49) + 70: p PUT 6 + 73: R REDUCE + 74: p PUT 7 + 77: V UNICODE 'def' + 82: p PUT 8 + 85: s SETITEM + 86: a APPEND + 87: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. @@ -2577,13 +2577,13 @@ highest protocol among opcodes = 0 93: p PUT 6 96: V UNICODE 'value' 103: p PUT 7 - 106: L LONG 42 - 111: s SETITEM - 112: b BUILD - 113: a APPEND - 114: g GET 5 - 117: a APPEND - 118: . STOP + 106: I INT 42 + 110: s SETITEM + 111: b BUILD + 112: a APPEND + 113: g GET 5 + 116: a APPEND + 117: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 243bc94..bf6116b 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -1821,7 +1821,7 @@ class AbstractPickleTests(unittest.TestCase): with self.subTest(proto=proto): s = self.dumps(x, proto) if proto < 1: - self.assertIn(b'\nL64206', s) # LONG + self.assertIn(b'\nI64206', s) # INT else: self.assertIn(b'M\xce\xfa', s) # BININT2 self.assertEqual(opcode_in_pickle(pickle.NEWOBJ, s), @@ -1837,7 +1837,7 @@ class AbstractPickleTests(unittest.TestCase): with self.subTest(proto=proto): s = self.dumps(x, proto) if proto < 1: - self.assertIn(b'\nL64206', s) # LONG + self.assertIn(b'\nI64206', s) # INT elif proto < 2: self.assertIn(b'M\xce\xfa', s) # BININT2 elif proto < 4: @@ -1857,7 +1857,7 @@ class AbstractPickleTests(unittest.TestCase): with self.subTest(proto=proto): s = self.dumps(x, proto) if proto < 1: - self.assertIn(b'\nL64206', s) # LONG + self.assertIn(b'\nI64206', s) # INT elif proto < 2: self.assertIn(b'M\xce\xfa', s) # BININT2 elif proto < 4: diff --git a/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst b/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst new file mode 100644 index 0000000..d077d0e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-15-19-04-22.bpo-32037.r8-5Nk.rst @@ -0,0 +1,4 @@ +Integers that fit in a signed 32-bit integer will be now pickled with +protocol 0 using the INT opcode. This will decrease the size of a pickle, +speed up pickling and unpickling, and make these integers be unpickled as +int instances in Python 2. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 4b7f1ed..943c701 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1858,18 +1858,13 @@ save_long(PicklerObject *self, PyObject *obj) PyObject *repr = NULL; Py_ssize_t size; long val; + int overflow; int status = 0; - const char long_op = LONG; - - val= PyLong_AsLong(obj); - if (val == -1 && PyErr_Occurred()) { - /* out of range for int pickling */ - PyErr_Clear(); - } - else if (self->bin && - (sizeof(long) <= 4 || - (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) { + val= PyLong_AsLongAndOverflow(obj, &overflow); + if (!overflow && (sizeof(long) <= 4 || + (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) + { /* result fits in a signed 4-byte integer. Note: we can't use -0x80000000L in the above condition because some @@ -1882,31 +1877,35 @@ save_long(PicklerObject *self, PyObject *obj) char pdata[32]; Py_ssize_t len = 0; - pdata[1] = (unsigned char)(val & 0xff); - pdata[2] = (unsigned char)((val >> 8) & 0xff); - pdata[3] = (unsigned char)((val >> 16) & 0xff); - pdata[4] = (unsigned char)((val >> 24) & 0xff); - - if ((pdata[4] == 0) && (pdata[3] == 0)) { - if (pdata[2] == 0) { - pdata[0] = BININT1; - len = 2; + if (self->bin) { + pdata[1] = (unsigned char)(val & 0xff); + pdata[2] = (unsigned char)((val >> 8) & 0xff); + pdata[3] = (unsigned char)((val >> 16) & 0xff); + pdata[4] = (unsigned char)((val >> 24) & 0xff); + + if ((pdata[4] != 0) || (pdata[3] != 0)) { + pdata[0] = BININT; + len = 5; } - else { + else if (pdata[2] != 0) { pdata[0] = BININT2; len = 3; } + else { + pdata[0] = BININT1; + len = 2; + } } else { - pdata[0] = BININT; - len = 5; + sprintf(pdata, "%c%ld\n", INT, val); + len = strlen(pdata); } - if (_Pickler_Write(self, pdata, len) < 0) return -1; return 0; } + assert(!PyErr_Occurred()); if (self->proto >= 2) { /* Linear-time pickling. */ @@ -1986,6 +1985,7 @@ save_long(PicklerObject *self, PyObject *obj) goto error; } else { + const char long_op = LONG; const char *string; /* proto < 2: write the repr and newline. This is quadratic-time (in |