diff options
author | Lars Gustäbel <lars@gustaebel.de> | 2011-10-14 10:53:10 (GMT) |
---|---|---|
committer | Lars Gustäbel <lars@gustaebel.de> | 2011-10-14 10:53:10 (GMT) |
commit | 01277d166a993742814c772d01987fbaafb528d4 (patch) | |
tree | 1313156ef053cbd757ce804db6387c05b925f234 /Lib/tarfile.py | |
parent | 0e7e715a117ca5f29d53890550ff00447f0c2dc5 (diff) | |
parent | ac3d137a303d579d7b02af083fda90309ab9378a (diff) | |
download | cpython-01277d166a993742814c772d01987fbaafb528d4.zip cpython-01277d166a993742814c772d01987fbaafb528d4.tar.gz cpython-01277d166a993742814c772d01987fbaafb528d4.tar.bz2 |
Merge with 3.2: Issue #13158: Fix decoding and encoding of base-256 number fields in tarfile.
The nti() function that converts a number field from a tar header to a number
failed to decode GNU tar specific base-256 fields. I also added support for
decoding and encoding negative base-256 number fields.
Diffstat (limited to 'Lib/tarfile.py')
-rw-r--r-- | Lib/tarfile.py | 43 |
1 files changed, 23 insertions, 20 deletions
diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 2560562..39fe635 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -194,16 +194,18 @@ def nti(s): """ # There are two possible encodings for a number field, see # itn() below. - if s[0] != chr(0o200): + if s[0] in (0o200, 0o377): + n = 0 + for i in range(len(s) - 1): + n <<= 8 + n += s[i + 1] + if s[0] == 0o377: + n = -(256 ** (len(s) - 1) - n) + else: try: n = int(nts(s, "ascii", "strict") or "0", 8) except ValueError: raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) return n def itn(n, digits=8, format=DEFAULT_FORMAT): @@ -212,25 +214,26 @@ def itn(n, digits=8, format=DEFAULT_FORMAT): # POSIX 1003.1-1988 requires numbers to be encoded as a string of # octal digits followed by a null-byte, this allows values up to # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0o200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. + # that if necessary. A leading 0o200 or 0o377 byte indicate this + # particular encoding, the following digits-1 bytes are a big-endian + # base-256 representation. This allows values up to (256**(digits-1))-1. + # A 0o200 byte indicates a positive number, a 0o377 byte a negative + # number. if 0 <= n < 8 ** (digits - 1): s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] + elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1): + if n >= 0: + s = bytearray([0o200]) + else: + s = bytearray([0o377]) + n = 256 ** digits + n - s = bytearray() for i in range(digits - 1): - s.insert(0, n & 0o377) + s.insert(1, n & 0o377) n >>= 8 - s.insert(0, 0o200) + else: + raise ValueError("overflow in number field") + return s def calc_chksums(buf): |