summaryrefslogtreecommitdiffstats
path: root/Lib/zipfile
diff options
context:
space:
mode:
authorJason R. Coombs <jaraco@jaraco.com>2023-09-25 23:46:58 (GMT)
committerGitHub <noreply@github.com>2023-09-25 23:46:58 (GMT)
commite9791ba35175171170ff09094ea46b91fc18c654 (patch)
tree699b43373fb8353ed638bc7c70fb326e8007d33f /Lib/zipfile
parent25bb266fc876b344e31e0b5634a4db94912c1aba (diff)
downloadcpython-e9791ba35175171170ff09094ea46b91fc18c654.zip
cpython-e9791ba35175171170ff09094ea46b91fc18c654.tar.gz
cpython-e9791ba35175171170ff09094ea46b91fc18c654.tar.bz2
gh-88233: zipfile: refactor _strip_extra (#102084)
* Refactor zipfile._strip_extra to use higher level abstractions for extras instead of a heavy-state loop. * Add blurb * Remove _strip_extra and use _Extra.strip directly. * Use memoryview to avoid unnecessary copies while splitting Extras.
Diffstat (limited to 'Lib/zipfile')
-rw-r--r--Lib/zipfile/__init__.py60
1 files changed, 37 insertions, 23 deletions
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index 9fc1840..2c963de 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -188,28 +188,42 @@ _CD64_OFFSET_START_CENTDIR = 9
_DD_SIGNATURE = 0x08074b50
-_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
-
-def _strip_extra(extra, xids):
- # Remove Extra Fields with specified IDs.
- unpack = _EXTRA_FIELD_STRUCT.unpack
- modified = False
- buffer = []
- start = i = 0
- while i + 4 <= len(extra):
- xid, xlen = unpack(extra[i : i + 4])
- j = i + 4 + xlen
- if xid in xids:
- if i != start:
- buffer.append(extra[start : i])
- start = j
- modified = True
- i = j
- if not modified:
- return extra
- if start != len(extra):
- buffer.append(extra[start:])
- return b''.join(buffer)
+
+class _Extra(bytes):
+ FIELD_STRUCT = struct.Struct('<HH')
+
+ def __new__(cls, val, id=None):
+ return super().__new__(cls, val)
+
+ def __init__(self, val, id=None):
+ self.id = id
+
+ @classmethod
+ def read_one(cls, raw):
+ try:
+ xid, xlen = cls.FIELD_STRUCT.unpack(raw[:4])
+ except struct.error:
+ xid = None
+ xlen = 0
+ return cls(raw[:4+xlen], xid), raw[4+xlen:]
+
+ @classmethod
+ def split(cls, data):
+ # use memoryview for zero-copy slices
+ rest = memoryview(data)
+ while rest:
+ extra, rest = _Extra.read_one(rest)
+ yield extra
+
+ @classmethod
+ def strip(cls, data, xids):
+ """Remove Extra fields with specified IDs."""
+ return b''.join(
+ ex
+ for ex in cls.split(data)
+ if ex.id not in xids
+ )
+
def _check_zipfile(fp):
try:
@@ -1963,7 +1977,7 @@ class ZipFile:
min_version = 0
if extra:
# Append a ZIP64 field to the extra's
- extra_data = _strip_extra(extra_data, (1,))
+ extra_data = _Extra.strip(extra_data, (1,))
extra_data = struct.pack(
'<HH' + 'Q'*len(extra),
1, 8*len(extra), *extra) + extra_data