summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-01-20 20:43:58 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-01-20 20:43:58 (GMT)
commit8dd05147d6224f4982ef6b14d904bb600ef33ea3 (patch)
treef65fc6569e9fdb3ef82719d268531676f2b2743d
parent6dc4396708010c001bc4ac1a95550f806f654408 (diff)
downloadcpython-8dd05147d6224f4982ef6b14d904bb600ef33ea3.zip
cpython-8dd05147d6224f4982ef6b14d904bb600ef33ea3.tar.gz
cpython-8dd05147d6224f4982ef6b14d904bb600ef33ea3.tar.bz2
Issue #4842, patch 1/2: fix pickle in Python 3.x so that pickling with the
'L' opcode always appends an 'L' on output, just as 2.x does. When unpickling, remove the trailing 'L' (if present) before passing the result to PyLong_FromString.
-rw-r--r--Lib/pickle.py4
-rw-r--r--Lib/pickletools.py78
-rw-r--r--Lib/test/pickletester.py162
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/_pickle.c36
5 files changed, 155 insertions, 128 deletions
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 2947bd4..409d4b2 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -470,7 +470,7 @@ class _Pickler:
else:
self.write(LONG4 + pack("<i", n) + encoded)
return
- self.write(LONG + repr(obj).encode("ascii") + b'\n')
+ self.write(LONG + repr(obj).encode("ascii") + b'L\n')
dispatch[int] = save_long
def save_float(self, obj, pack=struct.pack):
@@ -890,6 +890,8 @@ class _Unpickler:
def load_long(self):
val = self.readline()[:-1].decode("ascii")
+ if val and val[-1] == 'L':
+ val = val[:-1]
self.append(int(val, 0))
dispatch[LONG[0]] = load_long
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 1dd45da..6614df7 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -527,6 +527,8 @@ def read_decimalnl_long(f):
"""
s = read_stringnl(f, decode=False, stripquotes=False)
+ if s[-1:] == b'L':
+ s = s[:-1]
return int(s)
@@ -2052,39 +2054,39 @@ _dis_test = r"""
1: l LIST (MARK at 0)
2: p PUT 0
5: L LONG 1
- 8: a APPEND
- 9: L LONG 2
- 12: a APPEND
- 13: ( MARK
- 14: L LONG 3
- 17: L LONG 4
- 20: t TUPLE (MARK at 13)
- 21: p PUT 1
- 24: a APPEND
- 25: ( MARK
- 26: d DICT (MARK at 25)
- 27: p PUT 2
- 30: c GLOBAL 'builtins bytes'
- 46: p PUT 3
- 49: ( MARK
- 50: ( MARK
- 51: l LIST (MARK at 50)
- 52: p PUT 4
- 55: L LONG 97
- 59: a APPEND
- 60: L LONG 98
+ 9: a APPEND
+ 10: L LONG 2
+ 14: a APPEND
+ 15: ( MARK
+ 16: L LONG 3
+ 20: L LONG 4
+ 24: t TUPLE (MARK at 15)
+ 25: p PUT 1
+ 28: a APPEND
+ 29: ( MARK
+ 30: d DICT (MARK at 29)
+ 31: p PUT 2
+ 34: c GLOBAL 'builtins bytes'
+ 50: p PUT 3
+ 53: ( MARK
+ 54: ( MARK
+ 55: l LIST (MARK at 54)
+ 56: p PUT 4
+ 59: L LONG 97
64: a APPEND
- 65: L LONG 99
- 69: a APPEND
- 70: t TUPLE (MARK at 49)
- 71: p PUT 5
- 74: R REDUCE
- 75: p PUT 6
- 78: V UNICODE 'def'
- 83: p PUT 7
- 86: s SETITEM
- 87: a APPEND
- 88: . STOP
+ 65: L LONG 98
+ 70: a APPEND
+ 71: L LONG 99
+ 76: a APPEND
+ 77: t TUPLE (MARK at 53)
+ 78: p PUT 5
+ 81: R REDUCE
+ 82: p PUT 6
+ 85: V UNICODE 'def'
+ 90: p PUT 7
+ 93: s SETITEM
+ 94: a APPEND
+ 95: . STOP
highest protocol among opcodes = 0
Try again with a "binary" pickle.
@@ -2157,12 +2159,12 @@ highest protocol among opcodes = 0
92: V UNICODE 'value'
99: p PUT 7
102: L LONG 42
- 106: s SETITEM
- 107: b BUILD
- 108: a APPEND
- 109: g GET 5
- 112: a APPEND
- 113: . STOP
+ 107: s SETITEM
+ 108: b BUILD
+ 109: a APPEND
+ 110: g GET 5
+ 113: a APPEND
+ 114: . STOP
highest protocol among opcodes = 0
>>> dis(pickle.dumps(x, 1))
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 887b512..3ed84fb 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -90,21 +90,21 @@ class use_metaclass(object, metaclass=metaclass):
# the object returned by create_data().
DATA0 = (
- b'(lp0\nL0\naL1\naF2.0\nac'
+ b'(lp0\nL0L\naL1L\naF2.0\nac'
b'builtins\ncomplex\n'
b'p1\n(F3.0\nF0.0\ntp2\nRp'
- b'3\naL1\naL-1\naL255\naL-'
- b'255\naL-256\naL65535\na'
- b'L-65535\naL-65536\naL2'
- b'147483647\naL-2147483'
- b'647\naL-2147483648\na('
+ b'3\naL1L\naL-1L\naL255L\naL-'
+ b'255L\naL-256L\naL65535L\na'
+ b'L-65535L\naL-65536L\naL2'
+ b'147483647L\naL-2147483'
+ b'647L\naL-2147483648L\na('
b'Vabc\np4\ng4\nccopyreg'
b'\n_reconstructor\np5\n('
b'c__main__\nC\np6\ncbu'
b'iltins\nobject\np7\nNt'
b'p8\nRp9\n(dp10\nVfoo\np1'
- b'1\nL1\nsVbar\np12\nL2\nsb'
- b'g9\ntp13\nag13\naL5\na.'
+ b'1\nL1L\nsVbar\np12\nL2L\nsb'
+ b'g9\ntp13\nag13\naL5L\na.'
)
# Disassembly of DATA0
@@ -113,80 +113,80 @@ DATA0_DIS = """\
1: l LIST (MARK at 0)
2: p PUT 0
5: L LONG 0
- 8: a APPEND
- 9: L LONG 1
- 12: a APPEND
- 13: F FLOAT 2.0
- 18: a APPEND
- 19: c GLOBAL 'builtins complex'
- 37: p PUT 1
- 40: ( MARK
- 41: F FLOAT 3.0
- 46: F FLOAT 0.0
- 51: t TUPLE (MARK at 40)
- 52: p PUT 2
- 55: R REDUCE
- 56: p PUT 3
- 59: a APPEND
- 60: L LONG 1
- 63: a APPEND
- 64: L LONG -1
- 68: a APPEND
- 69: L LONG 255
- 74: a APPEND
- 75: L LONG -255
- 81: a APPEND
- 82: L LONG -256
- 88: a APPEND
- 89: L LONG 65535
- 96: a APPEND
- 97: L LONG -65535
- 105: a APPEND
- 106: L LONG -65536
+ 9: a APPEND
+ 10: L LONG 1
+ 14: a APPEND
+ 15: F FLOAT 2.0
+ 20: a APPEND
+ 21: c GLOBAL 'builtins complex'
+ 39: p PUT 1
+ 42: ( MARK
+ 43: F FLOAT 3.0
+ 48: F FLOAT 0.0
+ 53: t TUPLE (MARK at 42)
+ 54: p PUT 2
+ 57: R REDUCE
+ 58: p PUT 3
+ 61: a APPEND
+ 62: L LONG 1
+ 66: a APPEND
+ 67: L LONG -1
+ 72: a APPEND
+ 73: L LONG 255
+ 79: a APPEND
+ 80: L LONG -255
+ 87: a APPEND
+ 88: L LONG -256
+ 95: a APPEND
+ 96: L LONG 65535
+ 104: a APPEND
+ 105: L LONG -65535
114: a APPEND
- 115: L LONG 2147483647
- 127: a APPEND
- 128: L LONG -2147483647
- 141: a APPEND
- 142: L LONG -2147483648
- 155: a APPEND
- 156: ( MARK
- 157: V UNICODE 'abc'
- 162: p PUT 4
- 165: g GET 4
- 168: c GLOBAL 'copyreg _reconstructor'
- 192: p PUT 5
- 195: ( MARK
- 196: c GLOBAL '__main__ C'
- 208: p PUT 6
- 211: c GLOBAL 'builtins object'
- 228: p PUT 7
- 231: N NONE
- 232: t TUPLE (MARK at 195)
- 233: p PUT 8
- 236: R REDUCE
- 237: p PUT 9
- 240: ( MARK
- 241: d DICT (MARK at 240)
- 242: p PUT 10
- 246: V UNICODE 'foo'
- 251: p PUT 11
- 255: L LONG 1
- 258: s SETITEM
- 259: V UNICODE 'bar'
- 264: p PUT 12
- 268: L LONG 2
- 271: s SETITEM
- 272: b BUILD
- 273: g GET 9
- 276: t TUPLE (MARK at 156)
- 277: p PUT 13
- 281: a APPEND
- 282: g GET 13
- 286: a APPEND
- 287: L LONG 5
- 290: a APPEND
- 291: . STOP
+ 115: L LONG -65536
+ 124: a APPEND
+ 125: L LONG 2147483647
+ 138: a APPEND
+ 139: L LONG -2147483647
+ 153: a APPEND
+ 154: L LONG -2147483648
+ 168: a APPEND
+ 169: ( MARK
+ 170: V UNICODE 'abc'
+ 175: p PUT 4
+ 178: g GET 4
+ 181: c GLOBAL 'copyreg _reconstructor'
+ 205: p PUT 5
+ 208: ( MARK
+ 209: c GLOBAL '__main__ C'
+ 221: p PUT 6
+ 224: c GLOBAL 'builtins object'
+ 241: p PUT 7
+ 244: N NONE
+ 245: t TUPLE (MARK at 208)
+ 246: p PUT 8
+ 249: R REDUCE
+ 250: p PUT 9
+ 253: ( MARK
+ 254: d DICT (MARK at 253)
+ 255: p PUT 10
+ 259: V UNICODE 'foo'
+ 264: p PUT 11
+ 268: L LONG 1
+ 272: s SETITEM
+ 273: V UNICODE 'bar'
+ 278: p PUT 12
+ 282: L LONG 2
+ 286: s SETITEM
+ 287: b BUILD
+ 288: g GET 9
+ 291: t TUPLE (MARK at 169)
+ 292: p PUT 13
+ 296: a APPEND
+ 297: g GET 13
+ 301: a APPEND
+ 302: L LONG 5
+ 306: a APPEND
+ 307: . STOP
highest protocol among opcodes = 0
"""
diff --git a/Misc/NEWS b/Misc/NEWS
index 1050ae4..d20d20b 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -134,6 +134,9 @@ Core and Builtins
Library
-------
+- Issue #4842: Always append a trailing 'L' when pickling longs using
+ pickle protocol 0. When reading, the 'L' is optional.
+
- Add the importlib package.
- Issue #4301: Patch the logging module to add processName support, remove
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 6cc90b3..02a3e44 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -846,8 +846,8 @@ save_int(PicklerObject *self, long x)
/* Text-mode pickle, or long too big to fit in the 4-byte
* signed BININT format: store as a string.
*/
- pdata[0] = LONG; /* use LONG for consistence with pickle.py */
- PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
+ pdata[0] = LONG; /* use LONG for consistency with pickle.py */
+ PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
if (pickler_write(self, pdata, strlen(pdata)) < 0)
return -1;
}
@@ -977,8 +977,9 @@ save_long(PicklerObject *self, PyObject *obj)
else {
char *string;
- /* proto < 2: write the repr and newline. This is quadratic-time
- (in the number of digits), in both directions. */
+ /* proto < 2: write the repr and newline. This is quadratic-time (in
+ the number of digits), in both directions. We add a trailing 'L'
+ to the repr, for compatibility with Python 2.x. */
repr = PyObject_Repr(obj);
if (repr == NULL)
@@ -990,7 +991,7 @@ save_long(PicklerObject *self, PyObject *obj)
if (pickler_write(self, &long_op, 1) < 0 ||
pickler_write(self, string, size) < 0 ||
- pickler_write(self, "\n", 1) < 0)
+ pickler_write(self, "L\n", 2) < 0)
goto error;
}
@@ -2880,7 +2881,7 @@ static int
load_long(UnpicklerObject *self)
{
PyObject *value;
- char *s;
+ char *s, *ss;
Py_ssize_t len;
if ((len = unpickler_readline(self, &s)) < 0)
@@ -2888,8 +2889,27 @@ load_long(UnpicklerObject *self)
if (len < 2)
return bad_readline();
- /* XXX: Should the base argument explicitly set to 10? */
- if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
+ /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
+ the 'L' before calling PyLong_FromString. In order to maintain
+ compatibility with Python 3.0.0, we don't actually *require*
+ the 'L' to be present. */
+ if (s[len-2] == 'L') {
+ ss = (char *)PyMem_Malloc(len-1);
+ if (ss == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ strncpy(ss, s, len-2);
+ ss[len-2] = '\0';
+
+ /* XXX: Should the base argument explicitly set to 10? */
+ value = PyLong_FromString(ss, NULL, 0);
+ PyMem_Free(ss);
+ }
+ else {
+ value = PyLong_FromString(s, NULL, 0);
+ }
+ if (value == NULL)
return -1;
PDATA_PUSH(self->stack, value, -1);