From 232a73de31d4867a6c0b4d66a33d47eb8c4ff7ff Mon Sep 17 00:00:00 2001 From: Robie Basak Date: Wed, 5 Jul 2023 15:32:12 +0100 Subject: [PATCH] ubuntutools/misc: swap iter_content for raw stream This is a partial revert of 1e20363. When downloading a .diff.gz source package file, we do expect it to be written to disk still compressed. If we were to uncompress it, then we would get a size mismatch and even if we were to ignore that, we'd get a hash mismatch. On the other hand when downloading a changes file we need to make sure that is written to disk uncompressed. To make this work in both cases we can ask the HTTP server for no special content encoding using "Accept-Encoding: identity". This is what wget requests, for example. Then we can write the output to the file without performing any decoding at our end by using the raw response object again. This fixes both cases. LP: #2025748 --- ubuntutools/misc.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/ubuntutools/misc.py b/ubuntutools/misc.py index 3f8f70e..0b31e42 100644 --- a/ubuntutools/misc.py +++ b/ubuntutools/misc.py @@ -348,7 +348,11 @@ def download(src, dst, size=0, *, blocksize=DOWNLOAD_BLOCKSIZE_DEFAULT): with tempfile.TemporaryDirectory() as tmpdir: tmpdst = Path(tmpdir) / "dst" try: - with requests.get(src, stream=True, timeout=60, auth=auth) as fsrc: + # We must use "Accept-Encoding: identity" so that Launchpad doesn't + # compress changes files. See LP: #2025748. + with requests.get( + src, stream=True, timeout=60, auth=auth, headers={"accept-encoding": "identity"} + ) as fsrc: with tmpdst.open("wb") as fdst: fsrc.raise_for_status() _download(fsrc, fdst, size, blocksize=blocksize) @@ -433,7 +437,16 @@ def _download(fsrc, fdst, size, *, blocksize): downloaded = 0 try: - for block in fsrc.iter_content(blocksize): + while True: + # We use fsrc.raw so that compressed files stay compressed as we + # write them to disk. For example, if this is a .diff.gz, then it + # needs to remain compressed and unmodified to remain valid as part + # of a source package later, even though Launchpad sends + # "Content-Encoding: gzip" and the requests library therefore would + # want to decompress it. See LP: #2025748. + block = fsrc.raw.read(blocksize) + if not block: + break fdst.write(block) downloaded += len(block) progress_bar.update(downloaded, size)