diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2017-11-30 21:26:11 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-11-30 21:26:11 (GMT) |
commit | a897aeeef647259a938a36cb5eb6680c86021c6a (patch) | |
tree | 2659012d46b7222259d37a81a5f3d5fb330310eb /Lib/plistlib.py | |
parent | b4d1e1f7c1af6ae33f0e371576c8bcafedb099db (diff) | |
download | cpython-a897aeeef647259a938a36cb5eb6680c86021c6a.zip cpython-a897aeeef647259a938a36cb5eb6680c86021c6a.tar.gz cpython-a897aeeef647259a938a36cb5eb6680c86021c6a.tar.bz2 |
bpo-32072: Fix issues with binary plists. (#4455)
* Fixed saving bytearrays.
* Identical objects will be saved only once.
* Equal references will be load as identical objects.
* Added support for saving and loading recursive data structures.
Diffstat (limited to 'Lib/plistlib.py')
-rw-r--r-- | Lib/plistlib.py | 87 |
1 files changed, 51 insertions, 36 deletions
diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 2113a2d..21ebec3 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -525,6 +525,8 @@ class InvalidFileException (ValueError): _BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'} +_undefined = object() + class _BinaryPlistParser: """ Read or write a binary plist file, following the description of the binary @@ -555,7 +557,8 @@ class _BinaryPlistParser: ) = struct.unpack('>6xBBQQQ', trailer) self._fp.seek(offset_table_offset) self._object_offsets = self._read_ints(num_objects, offset_size) - return self._read_object(self._object_offsets[top_object]) + self._objects = [_undefined] * num_objects + return self._read_object(top_object) except (OSError, IndexError, struct.error, OverflowError, UnicodeDecodeError): @@ -584,62 +587,68 @@ class _BinaryPlistParser: def _read_refs(self, n): return self._read_ints(n, self._ref_size) - def _read_object(self, offset): + def _read_object(self, ref): """ - read the object at offset. + read the object by reference. May recursively read sub-objects (content of an array/dict/set) """ + result = self._objects[ref] + if result is not _undefined: + return result + + offset = self._object_offsets[ref] self._fp.seek(offset) token = self._fp.read(1)[0] tokenH, tokenL = token & 0xF0, token & 0x0F if token == 0x00: - return None + result = None elif token == 0x08: - return False + result = False elif token == 0x09: - return True + result = True # The referenced source code also mentions URL (0x0c, 0x0d) and # UUID (0x0e), but neither can be generated using the Cocoa libraries. elif token == 0x0f: - return b'' + result = b'' elif tokenH == 0x10: # int - return int.from_bytes(self._fp.read(1 << tokenL), - 'big', signed=tokenL >= 3) + result = int.from_bytes(self._fp.read(1 << tokenL), + 'big', signed=tokenL >= 3) elif token == 0x22: # real - return struct.unpack('>f', self._fp.read(4))[0] + result = struct.unpack('>f', self._fp.read(4))[0] elif token == 0x23: # real - return struct.unpack('>d', self._fp.read(8))[0] + result = struct.unpack('>d', self._fp.read(8))[0] elif token == 0x33: # date f = struct.unpack('>d', self._fp.read(8))[0] # timestamp 0 of binary plists corresponds to 1/1/2001 # (year of Mac OS X 10.0), instead of 1/1/1970. - return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f) + result = (datetime.datetime(2001, 1, 1) + + datetime.timedelta(seconds=f)) elif tokenH == 0x40: # data s = self._get_size(tokenL) if self._use_builtin_types: - return self._fp.read(s) + result = self._fp.read(s) else: - return Data(self._fp.read(s)) + result = Data(self._fp.read(s)) elif tokenH == 0x50: # ascii string s = self._get_size(tokenL) result = self._fp.read(s).decode('ascii') - return result + result = result elif tokenH == 0x60: # unicode string s = self._get_size(tokenL) - return self._fp.read(s * 2).decode('utf-16be') + result = self._fp.read(s * 2).decode('utf-16be') # tokenH == 0x80 is documented as 'UID' and appears to be used for # keyed-archiving, not in plists. @@ -647,8 +656,9 @@ class _BinaryPlistParser: elif tokenH == 0xA0: # array s = self._get_size(tokenL) obj_refs = self._read_refs(s) - return [self._read_object(self._object_offsets[x]) - for x in obj_refs] + result = [] + self._objects[ref] = result + result.extend(self._read_object(x) for x in obj_refs) # tokenH == 0xB0 is documented as 'ordset', but is not actually # implemented in the Apple reference code. @@ -661,12 +671,15 @@ class _BinaryPlistParser: key_refs = self._read_refs(s) obj_refs = self._read_refs(s) result = self._dict_type() + self._objects[ref] = result for k, o in zip(key_refs, obj_refs): - result[self._read_object(self._object_offsets[k]) - ] = self._read_object(self._object_offsets[o]) - return result + result[self._read_object(k)] = self._read_object(o) - raise InvalidFileException() + else: + raise InvalidFileException() + + self._objects[ref] = result + return result def _count_to_size(count): if count < 1 << 8: @@ -681,6 +694,8 @@ def _count_to_size(count): else: return 8 +_scalars = (str, int, float, datetime.datetime, bytes) + class _BinaryPlistWriter (object): def __init__(self, fp, sort_keys, skipkeys): self._fp = fp @@ -736,8 +751,7 @@ class _BinaryPlistWriter (object): # First check if the object is in the object table, not used for # containers to ensure that two subcontainers with the same contents # will be serialized as distinct values. - if isinstance(value, ( - str, int, float, datetime.datetime, bytes, bytearray)): + if isinstance(value, _scalars): if (type(value), value) in self._objtable: return @@ -745,15 +759,17 @@ class _BinaryPlistWriter (object): if (type(value.data), value.data) in self._objtable: return + elif id(value) in self._objidtable: + return + # Add to objectreference map refnum = len(self._objlist) self._objlist.append(value) - try: - if isinstance(value, Data): - self._objtable[(type(value.data), value.data)] = refnum - else: - self._objtable[(type(value), value)] = refnum - except TypeError: + if isinstance(value, _scalars): + self._objtable[(type(value), value)] = refnum + elif isinstance(value, Data): + self._objtable[(type(value.data), value.data)] = refnum + else: self._objidtable[id(value)] = refnum # And finally recurse into containers @@ -780,12 +796,11 @@ class _BinaryPlistWriter (object): self._flatten(o) def _getrefnum(self, value): - try: - if isinstance(value, Data): - return self._objtable[(type(value.data), value.data)] - else: - return self._objtable[(type(value), value)] - except TypeError: + if isinstance(value, _scalars): + return self._objtable[(type(value), value)] + elif isinstance(value, Data): + return self._objtable[(type(value.data), value.data)] + else: return self._objidtable[id(value)] def _write_size(self, token, size): |