From cdde8e653babf5d705b5e56b8662b7bb842b1fce Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 20 May 2026 12:28:56 +0300 Subject: [PATCH] gh-150144: Speed up reading gzip files with long filename or comment --- Lib/gzip.py | 16 ++++++- Lib/test/test_gzip.py | 42 +++++++++++++++++++ ...-05-20-12-27-06.gh-issue-150144.m6dAoT.rst | 1 + 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-05-20-12-27-06.gh-issue-150144.m6dAoT.rst diff --git a/Lib/gzip.py b/Lib/gzip.py index 1e05f43c0c9e24..c354270f308120 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -488,7 +488,21 @@ def _read_until_null(fp, crc=None): '''Read until the first encountered null byte in fp. If crc is not None, update and return the CRC. ''' - if crc is None: + if isinstance(fp, _PaddedFile): + size = 1 + while True: + s = fp.read(size) + if not s: + break + i = s.find(0) + 1 + if crc is not None: + crc = zlib.crc32(s[:i] if i else s, crc) + if i: + fp.prepend(s[i:]) + break + if size < 2**20: + size *= 2 + elif crc is None: while True: s = fp.read(1) if not s or s == b'\000': diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index b3b7c8f87e4f9f..6adaaafa614704 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -10,6 +10,7 @@ import sys import unittest from subprocess import PIPE, Popen +from test import support from test.support import catch_unraisable_exception from test.support import force_not_colorized_test_class, import_helper from test.support import os_helper @@ -824,6 +825,47 @@ def test_corrupted_gzip_header(self): f"Corrupted gzip header. Checksums do not " f"match: {true_crc:04x} != {corrupted_crc:04x}") + def _test_long_header(self, flags): + with_crc = flags & 0x02 + prefix = b'\x1f\x8b\x08' + bytes([flags]) + b'\x00\x00\x00\x00\x00\xff' + suffix = b'\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00' + with open(self.filename, 'wb') as f: + f.write(prefix) + if with_crc: + crc = zlib.crc32(prefix) + block = b'ABCDEFGHIJKLMNOP' * 2**16 # 1 MiB + for i in range(1024): + f.write(block) + if with_crc: + crc = zlib.crc32(block, crc) + f.write(b'\x00') + if with_crc: + crc = zlib.crc32(b'\x00', crc) + f.write(struct.pack("