diff options
author | Jason R. Coombs <jaraco@jaraco.com> | 2023-09-25 23:46:58 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-25 23:46:58 (GMT) |
commit | e9791ba35175171170ff09094ea46b91fc18c654 (patch) | |
tree | 699b43373fb8353ed638bc7c70fb326e8007d33f /Lib/zipfile | |
parent | 25bb266fc876b344e31e0b5634a4db94912c1aba (diff) | |
download | cpython-e9791ba35175171170ff09094ea46b91fc18c654.zip cpython-e9791ba35175171170ff09094ea46b91fc18c654.tar.gz cpython-e9791ba35175171170ff09094ea46b91fc18c654.tar.bz2 |
gh-88233: zipfile: refactor _strip_extra (#102084)
* Refactor zipfile._strip_extra to use higher level abstractions for extras instead of a heavy-state loop.
* Add blurb
* Remove _strip_extra and use _Extra.strip directly.
* Use memoryview to avoid unnecessary copies while splitting Extras.
Diffstat (limited to 'Lib/zipfile')
-rw-r--r-- | Lib/zipfile/__init__.py | 60 |
1 files changed, 37 insertions, 23 deletions
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 9fc1840..2c963de 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -188,28 +188,42 @@ _CD64_OFFSET_START_CENTDIR = 9 _DD_SIGNATURE = 0x08074b50 -_EXTRA_FIELD_STRUCT = struct.Struct('<HH') - -def _strip_extra(extra, xids): - # Remove Extra Fields with specified IDs. - unpack = _EXTRA_FIELD_STRUCT.unpack - modified = False - buffer = [] - start = i = 0 - while i + 4 <= len(extra): - xid, xlen = unpack(extra[i : i + 4]) - j = i + 4 + xlen - if xid in xids: - if i != start: - buffer.append(extra[start : i]) - start = j - modified = True - i = j - if not modified: - return extra - if start != len(extra): - buffer.append(extra[start:]) - return b''.join(buffer) + +class _Extra(bytes): + FIELD_STRUCT = struct.Struct('<HH') + + def __new__(cls, val, id=None): + return super().__new__(cls, val) + + def __init__(self, val, id=None): + self.id = id + + @classmethod + def read_one(cls, raw): + try: + xid, xlen = cls.FIELD_STRUCT.unpack(raw[:4]) + except struct.error: + xid = None + xlen = 0 + return cls(raw[:4+xlen], xid), raw[4+xlen:] + + @classmethod + def split(cls, data): + # use memoryview for zero-copy slices + rest = memoryview(data) + while rest: + extra, rest = _Extra.read_one(rest) + yield extra + + @classmethod + def strip(cls, data, xids): + """Remove Extra fields with specified IDs.""" + return b''.join( + ex + for ex in cls.split(data) + if ex.id not in xids + ) + def _check_zipfile(fp): try: @@ -1963,7 +1977,7 @@ class ZipFile: min_version = 0 if extra: # Append a ZIP64 field to the extra's - extra_data = _strip_extra(extra_data, (1,)) + extra_data = _Extra.strip(extra_data, (1,)) extra_data = struct.pack( '<HH' + 'Q'*len(extra), 1, 8*len(extra), *extra) + extra_data |