From a897aeeef647259a938a36cb5eb6680c86021c6a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Nov 2017 23:26:11 +0200 Subject: bpo-32072: Fix issues with binary plists. (#4455) * Fixed saving bytearrays. * Identical objects will be saved only once. * Equal references will be load as identical objects. * Added support for saving and loading recursive data structures. --- Lib/plistlib.py | 87 +++++++++++++--------- Lib/test/test_plistlib.py | 55 ++++++++++++++ .../2017-11-18-21-13-52.bpo-32072.nwDV8L.rst | 6 ++ 3 files changed, 112 insertions(+), 36 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-11-18-21-13-52.bpo-32072.nwDV8L.rst diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 2113a2d..21ebec3 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -525,6 +525,8 @@ class InvalidFileException (ValueError): _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'} +_undefined = object() + class _BinaryPlistParser: """ Read or write a binary plist file, following the description of the binary @@ -555,7 +557,8 @@ class _BinaryPlistParser: ) = struct.unpack('>6xBBQQQ', trailer) self._fp.seek(offset_table_offset) self._object_offsets = self._read_ints(num_objects, offset_size) - return self._read_object(self._object_offsets[top_object]) + self._objects = [_undefined] * num_objects + return self._read_object(top_object) except (OSError, IndexError, struct.error, OverflowError, UnicodeDecodeError): @@ -584,62 +587,68 @@ class _BinaryPlistParser: def _read_refs(self, n): return self._read_ints(n, self._ref_size) - def _read_object(self, offset): + def _read_object(self, ref): """ - read the object at offset. + read the object by reference. May recursively read sub-objects (content of an array/dict/set) """ + result = self._objects[ref] + if result is not _undefined: + return result + + offset = self._object_offsets[ref] self._fp.seek(offset) token = self._fp.read(1)[0] tokenH, tokenL = token & 0xF0, token & 0x0F if token == 0x00: - return None + result = None elif token == 0x08: - return False + result = False elif token == 0x09: - return True + result = True # The referenced source code also mentions URL (0x0c, 0x0d) and # UUID (0x0e), but neither can be generated using the Cocoa libraries. elif token == 0x0f: - return b'' + result = b'' elif tokenH == 0x10: # int - return int.from_bytes(self._fp.read(1 << tokenL), - 'big', signed=tokenL >= 3) + result = int.from_bytes(self._fp.read(1 << tokenL), + 'big', signed=tokenL >= 3) elif token == 0x22: # real - return struct.unpack('>f', self._fp.read(4))[0] + result = struct.unpack('>f', self._fp.read(4))[0] elif token == 0x23: # real - return struct.unpack('>d', self._fp.read(8))[0] + result = struct.unpack('>d', self._fp.read(8))[0] elif token == 0x33: # date f = struct.unpack('>d', self._fp.read(8))[0] # timestamp 0 of binary plists corresponds to 1/1/2001 # (year of Mac OS X 10.0), instead of 1/1/1970. - return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f) + result = (datetime.datetime(2001, 1, 1) + + datetime.timedelta(seconds=f)) elif tokenH == 0x40: # data s = self._get_size(tokenL) if self._use_builtin_types: - return self._fp.read(s) + result = self._fp.read(s) else: - return Data(self._fp.read(s)) + result = Data(self._fp.read(s)) elif tokenH == 0x50: # ascii string s = self._get_size(tokenL) result = self._fp.read(s).decode('ascii') - return result + result = result elif tokenH == 0x60: # unicode string s = self._get_size(tokenL) - return self._fp.read(s * 2).decode('utf-16be') + result = self._fp.read(s * 2).decode('utf-16be') # tokenH == 0x80 is documented as 'UID' and appears to be used for # keyed-archiving, not in plists. @@ -647,8 +656,9 @@ class _BinaryPlistParser: elif tokenH == 0xA0: # array s = self._get_size(tokenL) obj_refs = self._read_refs(s) - return [self._read_object(self._object_offsets[x]) - for x in obj_refs] + result = [] + self._objects[ref] = result + result.extend(self._read_object(x) for x in obj_refs) # tokenH == 0xB0 is documented as 'ordset', but is not actually # implemented in the Apple reference code. @@ -661,12 +671,15 @@ class _BinaryPlistParser: key_refs = self._read_refs(s) obj_refs = self._read_refs(s) result = self._dict_type() + self._objects[ref] = result for k, o in zip(key_refs, obj_refs): - result[self._read_object(self._object_offsets[k]) - ] = self._read_object(self._object_offsets[o]) - return result + result[self._read_object(k)] = self._read_object(o) - raise InvalidFileException() + else: + raise InvalidFileException() + + self._objects[ref] = result + return result def _count_to_size(count): if count < 1 << 8: @@ -681,6 +694,8 @@ def _count_to_size(count): else: return 8 +_scalars = (str, int, float, datetime.datetime, bytes) + class _BinaryPlistWriter (object): def __init__(self, fp, sort_keys, skipkeys): self._fp = fp @@ -736,8 +751,7 @@ class _BinaryPlistWriter (object): # First check if the object is in the object table, not used for # containers to ensure that two subcontainers with the same contents # will be serialized as distinct values. - if isinstance(value, ( - str, int, float, datetime.datetime, bytes, bytearray)): + if isinstance(value, _scalars): if (type(value), value) in self._objtable: return @@ -745,15 +759,17 @@ class _BinaryPlistWriter (object): if (type(value.data), value.data) in self._objtable: return + elif id(value) in self._objidtable: + return + # Add to objectreference map refnum = len(self._objlist) self._objlist.append(value) - try: - if isinstance(value, Data): - self._objtable[(type(value.data), value.data)] = refnum - else: - self._objtable[(type(value), value)] = refnum - except TypeError: + if isinstance(value, _scalars): + self._objtable[(type(value), value)] = refnum + elif isinstance(value, Data): + self._objtable[(type(value.data), value.data)] = refnum + else: self._objidtable[id(value)] = refnum # And finally recurse into containers @@ -780,12 +796,11 @@ class _BinaryPlistWriter (object): self._flatten(o) def _getrefnum(self, value): - try: - if isinstance(value, Data): - return self._objtable[(type(value.data), value.data)] - else: - return self._objtable[(type(value), value)] - except TypeError: + if isinstance(value, _scalars): + return self._objtable[(type(value), value)] + elif isinstance(value, Data): + return self._objtable[(type(value.data), value.data)] + else: return self._objidtable[id(value)] def _write_size(self, token, size): diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index 7dd179a..ae8b623 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -169,6 +169,17 @@ class TestPlistlib(unittest.TestCase): self.assertRaises(OverflowError, plistlib.dumps, pl, fmt=fmt) + def test_bytearray(self): + for pl in (b'', b"\0\1\2\3" * 10): + for fmt in ALL_FORMATS: + with self.subTest(pl=pl, fmt=fmt): + data = plistlib.dumps(bytearray(pl), fmt=fmt) + pl2 = plistlib.loads(data) + self.assertIsInstance(pl2, bytes) + self.assertEqual(pl2, pl) + data2 = plistlib.dumps(pl2, fmt=fmt) + self.assertEqual(data, data2) + def test_bytes(self): pl = self._create() data = plistlib.dumps(pl) @@ -431,6 +442,9 @@ class TestPlistlib(unittest.TestCase): pl2 = plistlib.loads(data) self.assertEqual(dict(pl), dict(pl2)) + +class TestBinaryPlistlib(unittest.TestCase): + def test_nonstandard_refs_size(self): # Issue #21538: Refs and offsets are 24-bit integers data = (b'bplist00' @@ -443,6 +457,47 @@ class TestPlistlib(unittest.TestCase): b'\x00\x00\x00\x00\x00\x00\x00\x13') self.assertEqual(plistlib.loads(data), {'a': 'b'}) + def test_dump_duplicates(self): + # Test effectiveness of saving duplicated objects + for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde', + datetime.datetime(2004, 10, 26, 10, 33, 33), + plistlib.Data(b'abcde'), bytearray(b'abcde'), + [12, 345], (12, 345), {'12': 345}): + with self.subTest(x=x): + data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY) + self.assertLess(len(data), 1100, repr(data)) + + def test_identity(self): + for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde', + datetime.datetime(2004, 10, 26, 10, 33, 33), + plistlib.Data(b'abcde'), bytearray(b'abcde'), + [12, 345], (12, 345), {'12': 345}): + with self.subTest(x=x): + data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY) + a, b = plistlib.loads(data) + if isinstance(x, tuple): + x = list(x) + self.assertEqual(a, x) + self.assertEqual(b, x) + self.assertIs(a, b) + + def test_cycles(self): + # recursive list + a = [] + a.append(a) + b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY)) + self.assertIs(b[0], b) + # recursive tuple + a = ([],) + a[0].append(a) + b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY)) + self.assertIs(b[0][0], b) + # recursive dict + a = {} + a['x'] = a + b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY)) + self.assertIs(b['x'], b) + def test_large_timestamp(self): # Issue #26709: 32-bit timestamp out of range for ts in -2**31-1, 2**31: diff --git a/Misc/NEWS.d/next/Library/2017-11-18-21-13-52.bpo-32072.nwDV8L.rst b/Misc/NEWS.d/next/Library/2017-11-18-21-13-52.bpo-32072.nwDV8L.rst new file mode 100644 index 0000000..6da5bb4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-11-18-21-13-52.bpo-32072.nwDV8L.rst @@ -0,0 +1,6 @@ +Fixed issues with binary plists: + +* Fixed saving bytearrays. +* Identical objects will be saved only once. +* Equal references will be load as identical objects. +* Added support for saving and loading recursive data structures. -- cgit v0.12