From ffe99d40087d119ebd5641da93f33cecd67ee03a Mon Sep 17 00:00:00 2001 From: Michael Hudson-Doyle Date: Thu, 25 Jan 2024 07:30:41 +1300 Subject: [PATCH] Remove magic-proxy and all references to it as it is not used by any builds of any release newer than 18.04. --- debian/changelog | 7 + debian/install | 2 - live-build/auto/build | 81 -- live-build/ubuntu-cpc/hooks.d/make-hooks | 4 - lp-in-release | 1 - magic-proxy | 1046 ---------------------- 6 files changed, 7 insertions(+), 1134 deletions(-) delete mode 120000 lp-in-release delete mode 100755 magic-proxy diff --git a/debian/changelog b/debian/changelog index 25412e22..d378c1e7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +livecd-rootfs (24.04.23) UNRELEASED; urgency=medium + + * Remove magic-proxy and all references to it as it is not used by any + builds of any release newer than 18.04. + + -- Michael Hudson-Doyle Thu, 25 Jan 2024 07:30:03 +1300 + livecd-rootfs (24.04.22) noble; urgency=medium * Add a largemem subarch for ubuntu-server that ships a 64k kernel variant diff --git a/debian/install b/debian/install index d2e56bb2..665a5443 100644 --- a/debian/install +++ b/debian/install @@ -2,7 +2,5 @@ auto-markable-pkgs usr/share/livecd-rootfs live-build usr/share/livecd-rootfs get-ppa-fingerprint usr/share/livecd-rootfs minimize-manual usr/share/livecd-rootfs -magic-proxy usr/share/livecd-rootfs -lp-in-release usr/share/livecd-rootfs checkout-translations-branch usr/share/livecd-rootfs update-source-catalog usr/share/livecd-rootfs diff --git a/live-build/auto/build b/live-build/auto/build index 5429a2fd..0c07a772 100755 --- a/live-build/auto/build +++ b/live-build/auto/build @@ -17,72 +17,6 @@ fi . config/functions -# New nf_tables-based versions of iptables don't work well on old kernels. -# We aren't sure exactly how old is a problem: 4.15 works, but with 4.4 new -# rules are added to all chains in the requested table rather than just one, -# and the new rules seem to have no useful effect. In such cases, -# iptables-legacy works better. -# -# We can simplify this once livecd-rootfs no longer needs to support running -# on Ubuntu 16.04 (that is, once Launchpad's build VMs are upgraded to -# Ubuntu 18.04). -run_iptables () { - local kver kver_major kver_minor - - kver="$(uname -r)" - kver="${kver%%-*}" - kver_major="${kver%%.*}" - kver="${kver#*.}" - kver_minor="${kver%%.*}" - - - # LP: #1917920 - # I'm seeing issues after iptables got upgraded from 1.8.5 to - # 1.8.7 Somehow installing our nat rule doesn't get activated, and - # no networking is happening at all. - - # But somehow calling both iptables -S makes things start working. - # Maybe no default chains are installed in our network namespace?! - # Or 1.8.7 is somehow broken? - iptables -v -t nat -S - iptables-legacy -v -t nat -S - - if [ "$kver_major" -lt 4 ] || \ - ([ "$kver_major" = 4 ] && [ "$kver_minor" -lt 15 ]); then - iptables-legacy "$@" - else - iptables "$@" - fi -} - -if [ -n "$REPO_SNAPSHOT_STAMP" ]; then - if [ "`whoami`" != "root" ]; then - echo "Magic repo snapshots only work when running as root." >&2 - exit 1 - fi - - apt-get -qyy install iptables - - # Redirect all outgoing traffic to port 80 to proxy instead. - run_iptables -v -t nat -A OUTPUT -p tcp --dport 80 \ - -m owner ! --uid-owner daemon -j REDIRECT --to 8080 - - # Run proxy as "daemon" to avoid infinite loop. - LB_PARENT_MIRROR_BOOTSTRAP=$LB_PARENT_MIRROR_BOOTSTRAP \ - /usr/share/livecd-rootfs/magic-proxy \ - --address="127.0.0.1" \ - --port=8080 \ - --run-as=daemon \ - --cutoff-time="$REPO_SNAPSHOT_STAMP" \ - --log-file=/build/livecd.magic-proxy.log \ - --pid-file=config/magic-proxy.pid \ - --background \ - --setsid - - # Quick check that magic proxy & iptables chains are working - timeout 3m apt-get update -fi - # Link output files somewhere launchpad-buildd will be able to find them. PREFIX="livecd.$PROJECT${SUBARCH:+-$SUBARCH}" @@ -546,12 +480,6 @@ EOF if [ -e binary.success ]; then rm -f binary.success else - # Dump the magic-proxy log to stdout on failure to aid debugging - if [ -f /build/livecd.magic-proxy.log ] ; then - echo "================= Magic proxy log (start) =================" - cat /build/livecd.magic-proxy.log - echo "================== Magic proxy log (end) ==================" - fi exit 1 fi @@ -712,15 +640,6 @@ case $SUBARCH in ;; esac -if [ -f "config/magic-proxy.pid" ]; then - kill -TERM $(cat config/magic-proxy.pid) - rm -f config/magic-proxy.pid - - # Remove previously-inserted iptables rule. - run_iptables -t nat -D OUTPUT -p tcp --dport 80 \ - -m owner ! --uid-owner daemon -j REDIRECT --to 8080 -fi - case $PROJECT in ubuntu-cpc) config/hooks.d/remove-implicit-artifacts diff --git a/live-build/ubuntu-cpc/hooks.d/make-hooks b/live-build/ubuntu-cpc/hooks.d/make-hooks index 25c8e0dd..d612ca8c 100755 --- a/live-build/ubuntu-cpc/hooks.d/make-hooks +++ b/live-build/ubuntu-cpc/hooks.d/make-hooks @@ -237,14 +237,10 @@ class MakeHooks: """ with open(os.path.join(self._script_dir, "explicit_provides"), "w", encoding="utf-8") as fp: - empty = True for provides in self._provides: if not self._quiet: print("[PROVIDES] %s" % provides) fp.write("%s\n" % provides) - empty = False - if not empty: - fp.write('livecd.magic-proxy.log\n') def cli(self, args): """Command line interface to the hooks generator.""" diff --git a/lp-in-release b/lp-in-release deleted file mode 120000 index fbc6dd86..00000000 --- a/lp-in-release +++ /dev/null @@ -1 +0,0 @@ -magic-proxy \ No newline at end of file diff --git a/magic-proxy b/magic-proxy deleted file mode 100755 index af765c32..00000000 --- a/magic-proxy +++ /dev/null @@ -1,1046 +0,0 @@ -#!/usr/bin/python3 -u -#-*- encoding: utf-8 -*- -""" -This script can be called as "lp-in-release" or as "magic-proxy". When called -under the former name, it acts as a CLI tool, when called under the latter name -it will act as a transparent HTTP proxy. - -The CLI tool parses the directory listing of - - http:///dists/suite/by-hash/SHA256 - -and figures out which hashes belong to an InRelease file. For example, to list -all available hashes for "cosmic" run - - ./lp-in-release list --suite cosmic - -Per default the script scans archive.ubuntu.com, but you can tell it to use a -different mirror with the --mirror-url command line parameter. Analogously, you -can list the hashes for "cosmic-updates" or "cosmic-security". The script can -also find the hash that was valid at a given timestamp via - - ./lp-in-release select --suite cosmic --cutoff-time - -Finally, you can use the script to inject inrelease-path settings into a -sources.list file via - - ./lp-in-release inject --cutoff-time /etc/apt/sources.list - -The proxy is just an extension to this functionality. Whenever a URL points at -an InRelease file or a path listed in an InRelease file, the proxy will -automatically inject the by hash URL for the resource according to the timestamp -it was configured for. The proxy works in transparent and non-transparent mode. -""" -from datetime import datetime, timedelta, tzinfo - -import argparse -import copy -import fcntl -import getopt -import hashlib -import http.client -import http.server -import json -import os -import pwd -import re -import shutil -import socketserver -import sys -import threading -import time -import urllib.error -import urllib.parse -import urllib.request - -EXIT_OK = 0 -EXIT_ERR = 1 - -class LPInReleaseBaseError(Exception): - pass - -class LPInReleaseIndexError(LPInReleaseBaseError): - pass - -class LPInReleaseCacheError(LPInReleaseBaseError): - pass - -class LPInReleaseProxyError(LPInReleaseBaseError): - pass - -IN_LP = "http://ftpmaster.internal/ubuntu" in os.environ.get("LB_PARENT_MIRROR_BOOTSTRAP", "") - -# We cannot proxy & rewrite https requests Thus apt will talk to us -# over http But we must upgrade to https for private-ppas, outside of -# launchpad hence use this helper to re-write urls. -def get_uri(host, path): - if host in ("private-ppa.launchpad.net", "private-ppa.buildd"): - if IN_LP: - return "http://private-ppa.buildd" + path - else: - return "https://private-ppa.launchpad.net" + path - # TODO add split mirror handling for ftpmaster.internal => - # (ports|archive).ubuntu.com - return "http://" + host + path - -def initialize_auth(): - auth_handler = urllib.request.HTTPBasicAuthHandler() - with open('/etc/apt/sources.list') as f: - for line in f.readlines(): - for word in line.split(): - if not word.startswith('http'): - continue - parse=urllib.parse.urlparse(word) - if not parse.username: - continue - if parse.hostname not in ("private-ppa.launchpad.net", "private-ppa.buildd"): - continue - auth_handler.add_password( - "Token Required", "https://private-ppa.launchpad.net" + parse.path, - parse.username, parse.password) - auth_handler.add_password( - "Token Required", "http://private-ppa.buildd" + parse.path, - parse.username, parse.password) - print("add password for", parse.path) - opener = urllib.request.build_opener(auth_handler) - urllib.request.install_opener(opener) - -initialize_auth() - -class InRelease: - """This class represents an InRelease file.""" - - def __init__(self, mirror, suite, data, hash_=None, last_modified=None): - """mirror must contain the proper URL of the package repository up to - the "dists" folder, e.g. - - http://archive.ubuntu.com/ubuntu - - suite is the name of the suite this InRelease file belongs to, e.g. - , -updates or -security. - - data must contain the full contents of the InReleaes file as a unicode - string. - - If supplied, then hash_ will be used as the sha256 hexdigest of the - binary encoding of the InRelease file. If not supplied, the hash will - be calculated. This is just used as a time-saver, when cache contents - are read back in. - - last_modified must be a string of format - - Thu, 26 Apr 2018 23:37:48 UTC - - representing the publication time of the InRelease file. If not given, - the generation time stored in the InRelease file will be used. Below, - this is set explicitly to correspond to the Last-Modified header spat - out by the Web server. - """ - parsed = urllib.parse.urlparse(mirror) - self.mirror = get_uri(parsed.hostname, parsed.path) - self.suite = suite - self.data = data - self.dict = {} - - if hash_: - self.hash = hash_ - else: - h = hashlib.sha256() - h.update(data.encode("utf-8")) - self.hash = h.hexdigest() - - if last_modified: - self.published = self._parse_datetime(last_modified) - else: - self.published = self._extract_timestamp(data) - - @property - def datetime(self): - """Return the publication time of this InRelease file as a string in - YYYY-MM-DD HH:MM:SS ISO format. The result is always in GMT.""" - return datetime \ - .utcfromtimestamp(self.published) \ - .strftime('%Y-%m-%d %H:%M:%S') - - @property - def normalized_address(self): - """Return the "normalized" address of the mirror URL, consisting of - only the hostname and the path. This may be used as an index into an - InReleaseCache.""" - result = urllib.parse.urlparse(self.mirror) - address = result.hostname + result.path.rstrip("/") - return address - - @property - def contents(self): - """Return the pure contents of the InRelease file with the signature - stripped off.""" - return self._split_release_and_sig(self.data)[0] - - @property - def signature(self): - """Return the ASCII-armored PGP signature of the InRelease file.""" - return self._split_release_and_sig(self.data)[1] - - def serialize(self): - """Serializes the InRelease object into Python structures to be stored - in an InReleaseCache.""" - month_names = [ "_ignore_", - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", - ] - - wkday_names = [ - "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", - ] - - dt = datetime.utcfromtimestamp(self.published) - - published = "{}, {:02} {} {} {:02}:{:02}:{:02} GMT".format( - wkday_names[dt.weekday()], - dt.day, - month_names[dt.month], - dt.year, - dt.hour, - dt.minute, - dt.second - ) - - return { - "mirror": self.mirror, - "suite": self.suite, - "hash": self.hash, - "published": published, - "data": self.data, - } - - def get_hash_for(self, path): - """Check if the given path is listed in this InRelease file and if so - return the corresponding hash in hexdigest format. If the path is not - listed, None is returned.""" - if not self.dict: - self._parse_contents() - return self.dict.get(path) - - def _parse_contents(self): - """This method parses out all lines containing SHA256 hashes and creates - an internal dict, mapping resources to hashes.""" - regex = re.compile( - r" (?P[0-9a-f]{64})\s+(?P\d+)\s+(?P\S+)") - - for line in self.contents.splitlines(): - m = regex.match(line) - if not m: - continue - self.dict[m.group("path")] = m.group("hash") - - def _parse_datetime(self, datetime_string): - """Because the behavior of Python's strptime's would be - locale-dependent, we parse datetime strings of the format found in - Last-Modified HTTP headers ourselves. This returns an integer - representing a posix timestamp or None, if the parsing failed.""" - class UTC(tzinfo): - def utcoffset(self, dt): - return timedelta(0) - - # we need a map, because strptime would be locale-dependent - month_name_to_number = { - "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6, - "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 - } - - rexpr = r"""^\s*\w+,\s+ - (?P\d+) \s+ - (?P\w+) \s+ - (?P\d+) \s+ - (?P\d+) : - (?P\d+) : - (?P\d+) .*$""" - - m = re.match(rexpr, datetime_string, flags=re.VERBOSE) - if not m: - return None - - parts = list(m.group("year", "month", "day", "hour", "min", "sec")) - parts[1] = month_name_to_number[m.group("month")] - parts = [int(s) for s in parts] - dt = datetime(*parts, tzinfo=UTC()) - epoch = datetime(1970, 1, 1, tzinfo=UTC()) - posix = (dt - epoch).total_seconds() - - return int(posix) - - def _extract_timestamp(self, data): - """Parse the contents of the InRelease file to find the time it was - generated. Returns a POSIX timestamp if found or None otherwise.""" - for line in data.splitlines(): - if line.startswith("Date:"): - return self._parse_datetime(line.split(":", 1)[1]) - - return None - - def _split_release_and_sig(self, data): - """Split the InRelease file into content and signature parts and return - a tuple of unicode strings (content, signature).""" - rexpr = re.escape("-----BEGIN PGP SIGNED MESSAGE-----") + r"\r?\n|" + \ - re.escape("-----BEGIN PGP SIGNATURE-----" ) + r"\r?\n|" + \ - re.escape("-----END PGP SIGNATURE-----" ) - - # returns content and signature - return re.split(rexpr, data)[1:3] - - -class LPInReleaseCache: - """A cache for InRelease files that can optionally be saved to and - loaded from disk.""" - - def __init__(self, filename=None): - """If filename is given, it is the name of the file that cache contents - will be saved to or loaded from when the save and load methods are - called, respectively.""" - self._filename = filename - self._data = {} - self._lock = threading.Lock() - - self.load() - - def load(self): - """Load the cache contents from disk performing some rudimentary file - locking to prevent corruption.""" - if not self._filename: - return - - buf = [] - fd = None - try: - fd = os.open(self._filename, os.O_CREAT | os.O_RDWR) - - fcntl.flock(fd, fcntl.LOCK_EX) - - while True: - tmp = os.read(fd, 4096) - if not tmp: - break - buf.append(tmp) - - fcntl.flock(fd, fcntl.LOCK_UN) - except OSError as e: - raise LPInReleaseCacheError("Failed to load cache file: {}" - .format(str(e))) - finally: - if fd: - os.close(fd) - - cache_data = {} if not buf else json.loads( - b"".join(buf).decode("utf-8")) - - with self._lock: - self._data = cache_data - - def save(self): - """Save the cache contents to disk performing some rudimentary file - locking to prevent corruption.""" - if not self._filename: - return - - with self._lock: - buf = json \ - .dumps(self._data, ensure_ascii=False, indent=4, - sort_keys=True) \ - .encode("utf-8") - - fd = None - try: - fd = os.open(self._filename, os.O_CREAT | os.O_RDWR) - - fcntl.flock(fd, fcntl.LOCK_EX) - - os.ftruncate(fd, 0) - os.write(fd, buf) - - fcntl.flock(fd, fcntl.LOCK_UN) - except OSError as e: - raise LPInReleaseCacheError("Failed to store cache file: {}" - .format(str(e))) - finally: - if fd: - os.close(fd) - - def add(self, inrelease): - """Add the given InRelease object to the cache.""" - with self._lock: - self._data \ - .setdefault(inrelease.normalized_address, {}) \ - .setdefault(inrelease.suite, {}) \ - .setdefault(inrelease.hash, inrelease.serialize()) - - def get_one(self, mirror, suite, hash_): - """Return a single InRelease object for the given mirror and suite, - corresponding to the hash or None if such an entry does not exist.""" - with self._lock: - url_obj = urllib.parse.urlparse(mirror) - address = url_obj.hostname + url_obj.path.rstrip("/") - - inrel = self._data\ - .get(address, {})\ - .get(suite, {})\ - .get(hash_) - - if not inrel: - return None - - return InRelease( - inrel["mirror"], - inrel["suite"], - inrel["data"], - hash_=inrel["hash"], - last_modified=inrel["published"] - ) - - def get_all(self, mirror, suite): - """Retrieve a list of InRelease objects for the given mirror and suite. - Return a list of all known InRelease objects for the given mirror and - suite.""" - with self._lock: - url_obj = urllib.parse.urlparse(mirror) - address = url_obj.scheme + url_obj.hostname + url_obj.path.rstrip("/") - - inrel_by_hash = self._data\ - .get(address, {})\ - .get(suite, {}) - - inrelease_list = [] - - for hash_, inrel in inrel_by_hash.items(): - inrelease_list.append( - InRelease( - inrel["mirror"], - inrel["suite"], - inrel["data"], - hash_=inrel["hash"], - last_modified=inrel["published"] - ) - ) - - return inrelease_list - - -class LPInReleaseIndex: - """Abstraction to the build system's view of the "by hash" database. - Currently, that interface is the by-hash directory listing of the Web - server.""" - - def __init__(self, mirror, suite, cache=None): - """The mirror is the base URL of the repository up to the "dists" - folder, e.g. - - http://archive.ubuntu.com/ubuntu - - suite is the name of the suite this InReleaseIndex object operates on, - e.g. , -updates or -security. - - Optionally, cache can be initialized to a LPInReleaseCache object, in - which case all look-ups will first go to the cache and only cache - misses will result in requests to the Web server. - """ - parsed = urllib.parse.urlparse(mirror) - self._mirror = get_uri(parsed.hostname, parsed.path) - self._suite = suite - self._cache = cache - - self._base_url = "/".join([self._mirror, "dists", self._suite, - "by-hash/SHA256"]) - - def inrelease_files(self): - """Iterate over all InRelease files found in the archive for the mirror - and suite this index has been configured to operate on.""" - hashes = self._retrieve_hashes() - - for h in hashes: - inrelease = None - - if self._cache: - inrelease = self._cache.get_one(self._mirror, - self._suite, hash_=h) - if not inrelease: - inrelease = self._retrieve_inrelease(h) - if not inrelease: - continue - - yield inrelease - - def get_inrelease_for_timestamp(self, time_gmt): - """Find and return the InRelease file that was valid at the given Posix - timestamp.""" - candidate = None - - for inrelease in self.inrelease_files(): - if inrelease.published > time_gmt: - continue - if not candidate or inrelease.published > candidate.published: - candidate = inrelease - - return candidate - - def _retrieve_inrelease(self, hash_): - """Retrieve the contents of the file identified by hash_. Check if the - file is an InRelease file and return a corresponding InRelease object. - If the hash_ does not belong to an InRelease file, None is returned.""" - _500KB = 500 * 1024 - - buf = b"" - inrelease = None - url = self._base_url + "/" + hash_ - - try: - with urllib.request.urlopen(url) as response: - - # InRelease files seem to be around 200-300KB - - content_length = response.headers.get("Content-Length") - last_modified = response.headers.get("Last-Modified") - - if not content_length: - buf = response.read(_500KB + 1) - content_length = len(buf) - else: - content_length = int(content_length) - - # Slightly silly heuristic, but does the job - - if content_length > _500KB or content_length < 1024: - return None - - buf += response.read() - - content_encoding = self \ - ._guess_content_encoding_for_response(response) - - # few additional checks to see if this is an InRelease file - - try: - buf = buf.decode(content_encoding) - except UnicodeError: - return None - - if not buf.startswith("-----BEGIN PGP SIGNED MESSAGE-----"): - return None - - for kw in ["Origin:", "Label:", "Suite:", "Acquire-By-Hash:"]: - if not kw in buf: - return None - - inrelease = InRelease(self._mirror, self._suite, buf, - hash_=hash_, last_modified=last_modified) - - if self._cache: - self._cache.add(inrelease) - except urllib.error.HTTPError as e: - if not e.code in [404,]: - raise LPInReleaseIndexError("Error retrieving {}: {}" - .format(url, str(e))) - - return inrelease - - def _guess_content_encoding_for_response(self, response): - """Guess the content encoding of the given HTTPResponse object.""" - content_encoding = response.headers.get("Content-Encoding") - content_type = response.headers.get("Content-Type", - "text/html;charset=UTF-8") - - if not content_encoding: - m = re.match(r"^.*charset=(\S+)$", content_type) - - if m: - content_encoding = m.group(1) - else: - content_encoding = "UTF-8" - - return content_encoding - - def _retrieve_hashes(self): - """Retrieve all available by-hashes for the mirror and suite that this - index is configured to operate on.""" - hashes = [] - - if self._cache: - cache_entry = self._cache.get_all(self._mirror, self._suite) - if cache_entry: - return [inrel.hash for inrel in cache_entry] - - try: - request=urllib.request.Request(self._base_url) - with urllib.request.urlopen(request) as response: - content_encoding = self._guess_content_encoding_for_response( - response) - - body = response.read().decode(content_encoding) - hashes = list(set(re.findall(r"[a-z0-9]{64}", body))) - except urllib.error.URLError as e: - raise LPInReleaseIndexError("Could not retrieve hash listing: {}" - .format(str(e))) - - return hashes - - -class LPInReleaseIndexCli: - """A CLI interface for LPInReleaseIndex.""" - - def __init__(self, name): - self._name = name - self._mirror = None - self._suite = None - self._timestamp = None - self._cachefile = None - self._cache = None - self._infile = None - self._outfile = None - - def __call__(self, args): - options = vars(self._parse_opts(args)) - - # Copy settings to object attributes - for key, value in options.items(): - if hasattr(self, "_" + key): - setattr(self, "_" + key, value) - - if self._cachefile: - self._cache = LPInReleaseCache(self._cachefile) - - try: - options["func"]() - except LPInReleaseIndexError as e: - sys.stderr.write("{}: {}\n".format(self._name, str(e))) - sys.exit(EXIT_ERR) - - if self._cache: - self._cache.save() - - def list(self): - """List all InRelease hashes for a given mirror and suite.""" - for inrelease in self._list(self._mirror, self._suite): - if self._timestamp and inrelease.published > self._timestamp: - continue - - print("{} {} ({})".format( - inrelease.hash, - inrelease.datetime, - inrelease.published, - )) - - def select(self): - """Find the hash of the InRelease file valid at a given timestamp.""" - candidate = self._select(self._mirror, self._suite) - - if candidate: - print("{} {} ({})".format( - candidate.hash, - candidate.datetime, - candidate.published, - )) - - def inject(self): - """Inject by-hash and inrelease-path settings into a sources.list.""" - sources_list = self._infile - - if not os.path.exists(sources_list): - sys.stderr.write("{}: No such file: {}.\n" - .format(self._name, sources_list)) - sys.exit(EXIT_ERR) - - with open(sources_list, "r", encoding="utf-8") as fp: - buf = fp.read() - - rexpr = re.compile(r"""^ - (?Pdeb(?:-src)?)\s+ - (?P\[[^\]]+\]\s+)? - (?P(?P\S+):\S+)\s+ - (?P\S+)\s+ - (?P.*)$""", flags=re.VERBOSE) - - lines = buf.splitlines(True) - - for i, line in enumerate(lines): - line = lines[i] - m = rexpr.match(line) - - if not m: - continue - if m.group("scheme") not in ["http", "https", "ftp"]: - continue - - opts = {} - if m.group("opts"): - for entry in m.group("opts").strip().strip("[]").split(): - k, v = entry.split("=") - opts[k] = v - - inrelease = self._select(m.group("mirror"), m.group("suite")) - if inrelease: - opts["by-hash"] = "yes" - opts["inrelease-path"] = "by-hash/SHA256/" + inrelease.hash - - groupdict = m.groupdict() - groupdict["opts"] = " ".join(["{0}={1}".format(*o) for o in - opts.items()]) - - lines[i] = "{type} [{opts}] {mirror} {suite} {comps}\n"\ - .format(**groupdict) - - outfile = None - try: - if not self._outfile or self._outfile == "-": - outfile = sys.stdout - else: - outfile = open(self._outfile, "w+", encoding="utf-8") - outfile.write("".join(lines)) - finally: - if outfile and outfile != sys.stdout: - outfile.close() - - def _parse_opts(self, args): - """Parse command line arguments and initialize the CLI object.""" - main_parser = argparse.ArgumentParser() - subparsers = main_parser.add_subparsers(dest="command") - - parser_inject = subparsers.add_parser("inject", - help="Rewrite a sources.list file injecting appropriate hashes.") - parser_list = subparsers.add_parser("list", - help="List InRelease hashes for a given release and suite.") - parser_select = subparsers.add_parser("select", - help="Select hash to use for a given timestamp, release, suite.") - - parser_inject.set_defaults(func=self.inject) - parser_list.set_defaults(func=self.list) - parser_select.set_defaults(func=self.select) - - # Options common to all commands - for parser in [parser_inject, parser_list, parser_select]: - cutoff_time_required = True if parser != parser_list else False - - parser.add_argument("-t", "--cutoff-time", dest="timestamp", - type=int, required=cutoff_time_required, - help="A POSIX timestamp to pin the repo to.") - parser.add_argument("--cache-file", dest="cachefile", type=str, - help="A file where to cache intermediate results (optional).") - - mirror = "http://archive.ubuntu.com/ubuntu" - - # Options common to list, select commands - for parser in [parser_list, parser_select]: - parser.add_argument("-m", "--mirror", dest="mirror", type=str, - default=mirror, help="The URL of the mirror to use.") - parser.add_argument("-s", "--suite", - dest="suite", type=str, required=True, - help="The suite to scan (e.g. 'bionic', 'bionic-updates').") - - # Extra option for inject command - parser_inject.add_argument("-o", "--output-file", dest="outfile", - type=str, help="") - parser_inject.add_argument("infile", type=str, - help="The sources.list file to modify.") - - if not args: - main_parser.print_help() - sys.exit(EXIT_ERR) - - return main_parser.parse_args(args) - - def _list(self, mirror, suite): - """Internal helper for the list command. This is also used - implicitly by the _select method.""" - index = LPInReleaseIndex(mirror, suite, cache=self._cache) - - inrelease_files = \ - reversed( - sorted( - list(index.inrelease_files()), - key=lambda x: x.published - ) - ) - - return inrelease_files - - def _select(self, mirror, suite): - """Internal helper for the select command.""" - candidate = None - - for inrelease in self._list(mirror, suite): - if inrelease.published > self._timestamp: - continue - if not candidate or inrelease.published > candidate.published: - candidate = inrelease - - return candidate - - -class ProxyingHTTPRequestHandler(http.server.BaseHTTPRequestHandler): - """Request handler providing a virtual snapshot of the package - repositories.""" - - def do_HEAD(self): - """Process a HEAD request.""" - self.__get_request(verb="HEAD") - - def do_GET(self): - """Process a GET request.""" - self.__get_request() - - def sanitize_requestline(self): - requestline = [] - for word in self.requestline.split(): - if word.startswith('http'): - parse = urllib.parse.urlparse(word) - parse = urllib.parse.ParseResult( - parse.scheme, - parse.hostname, # not netloc, to sanitize username/password - parse.path, - parse.params, - parse.query, - parse.fragment) - requestline.append(urllib.parse.urlunparse(parse)) - else: - requestline.append(word) - self.requestline = ' '.join(requestline) - - def __get_request(self, verb="GET"): - """Pass all requests on to the destination server 1:1 except when the - target is an InRelease file or a resource listed in an InRelease files. - - In that case we silently download the resource via the by-hash URL - which was most recent at the cutoff (or repo snapshot) time and inject - it into the response. - - It is important to understand that there is no status 3xx HTTP redirect - happening here, the client does not know that what it receives is not - exactly what it requested.""" - host = self.headers.get("host") - - # the host does not start with http(s):// which result in urlparse - # to not detect the host & path correctly (LP:#1944906) - if not host.startswith("http"): - host = "http://{}".format(host) - uri = host + self.path - - parsed = urllib.parse.urlparse(uri) - - self.sanitize_requestline() - - m = re.match( - r"^(?P.*?)/dists/(?P[^/]+)/(?P.*)$", - parsed.path - ) - - if m: - mirror = get_uri(parsed.hostname, m.group("base")) - base = m.group("base") - suite = m.group("suite") - target = m.group("target") - - index = LPInReleaseIndex(mirror, suite, - cache=self.server.inrelease_cache) - inrelease = index.get_inrelease_for_timestamp( - self.server.snapshot_stamp) - - if inrelease is None: - self.log_message( - "InRelease not found for {}/{}".format(parsed.hostname, parsed.path)) - self.send_error(404, "No InRelease file found for given " - "mirror, suite and timestamp.") - return - - hash_ = None - - if target == "InRelease": - hash_ = inrelease.hash - else: - hash_ = inrelease.get_hash_for(target) - - if hash_: - self.log_message( - "Inject {} for {}".format(hash_, target)) - - target_path = target.rsplit("/", 1)[0] - - uri = "{}/dists/{}/by-hash/SHA256/{}"\ - .format(mirror, suite, hash_) - else: - uri = get_uri(parsed.hostname, parsed.path) - - ## use requests such that authentication via password database happens - ## reuse all the headers that we got asked to provide - try: - with urllib.request.urlopen( - urllib.request.Request( - uri, - method=verb, - headers=self.headers)) as response: - self.__send_response(response) - except urllib.error.HTTPError as e: - if e.code not in (304,): - self.log_message( - "urlopen() failed for {} with {}".format(uri, e.reason)) - self.__send_response(e) - except urllib.error.URLError as e: - self.log_message( - "urlopen() failed for {} with {}".format(uri, str(e.reason))) - # URLError.reason can either be a string or another Exception - # So do convert it to a string before sending the error (LP: #1946520) - self.send_error(501, str(e.reason)) - - - def __get_host_path(self): - """Figure out the host to contact and the path of the resource that is - being requested.""" - host = self.headers.get("host") - url = urllib.parse.urlparse(self.path) - path = url.path - - return host, path - - def __send_response(self, response): - """Pass on upstream response headers and body to the client.""" - if hasattr(response, "status"): - status = response.status - elif hassattr(response, "code"): - status = response.code - elif hasattr(response, "getstatus"): - status = response.getstatus() - - if hasattr(response, "headers"): - headers = response.headers - elif hasattr(response, "info"): - headers = response.info() - - self.send_response(status) - - for name, value in headers.items(): - self.send_header(name, value) - - self.end_headers() - if hasattr(response, "read"): - shutil.copyfileobj(response, self.wfile) - - -class MagicHTTPProxy(socketserver.ThreadingMixIn, http.server.HTTPServer): - """Tiny HTTP server using ProxyingHTTPRequestHandler instances to provide - a snapshot view of the package repositories.""" - - def __init__(self, server_address, server_port, cache_file=None, - repo_snapshot_stamp=time.time(), run_as=None): - - try: - super(http.server.HTTPServer, self).__init__( - (server_address, server_port), ProxyingHTTPRequestHandler) - except OSError as e: - raise LPInReleaseProxyError( - "Could not initialize proxy: {}".format(str(e))) - - self.inrelease_cache = LPInReleaseCache(filename=cache_file) - self.snapshot_stamp = repo_snapshot_stamp - - -class MagicHTTPProxyCli: - """A CLI interface for the MagicHTTPProxy.""" - - def __init__(self, name): - self._name = name - self._address = "127.0.0.1" - self._port = 8080 - self._timestamp = time.time() - self._run_as = None - self._pid_file = None - self._log_file = None - self._background = False - self._setsid = False - - def __call__(self, args): - options = self._parse_opts(args) - - proxy = MagicHTTPProxy( - options.address, - options.port, - cache_file=None, - repo_snapshot_stamp=options.timestamp - ) - - # Detach, but keep all streams open. - if options.background: - pid = os.fork() - if pid: - os._exit(EXIT_OK) - - if options.log_file: - fd = open(options.log_file, "wb+") - os.dup2(fd.fileno(), sys.stdout.fileno()) - os.dup2(fd.fileno(), sys.stderr.fileno()) - - # Become session leader and give up controlling terminal. - if options.setsid: - if not options.log_file: - fd = open(os.devnull, "wb+") - os.dup2(fd.fileno(), sys.stdout.fileno()) - os.dup2(fd.fileno(), sys.stderr.fileno()) - os.setsid() - - if options.pid_file: - with open(options.pid_file, "w+", encoding="utf-8") as fp: - fp.write(str(os.getpid())) - - if options.run_as is not None: - try: - uid = pwd.getpwnam(options.run_as).pw_uid - os.setuid(uid) - except KeyError as e: - sys.stderr.write("Failed to lookup {}: {}\n" - .format(options.run_as, str(e))) - sys.exit(EXIT_ERR) - except PermissionError as e: - sys.stderr.write("Cannot setuid: {}\n".format(str(e))) - sys.exit(EXIT_ERR) - - proxy.serve_forever() - - def _parse_opts(self, args): - """Parse command line arguments and initialize the CLI object.""" - parser = argparse.ArgumentParser() - - parser.add_argument("--address", dest="address", type=str, - default="127.0.0.1", help="The address of the interface to " - "bind to (default: 127.0.0.1)") - parser.add_argument("--port", dest="port", type=int, default=8080, - help="The port to listen on (default: 8080)") - parser.add_argument("-t", "--cutoff-time", dest="timestamp", type=int, - required=True, help="A POSIX timestamp to pin the repo to.") - parser.add_argument("--run-as", dest="run_as", type=str, - help="Drop privileges and run as this user.") - parser.add_argument("--pid-file", dest="pid_file", type=str, - help="Store the PID to this file.") - parser.add_argument("--log-file", dest="log_file", type=str, - help="Re-direct all streams to this file.") - parser.add_argument("--background", dest="background", - action="store_true", - help="Whether to go into the background.") - parser.add_argument("--setsid", dest="setsid", - action="store_true", - help="Become session leader and drop controlling TTY.") - - return parser.parse_args(args) - -if __name__ == "__main__": - name = os.path.basename(sys.argv[0]) - - try: - if name == "lp-in-release": - cli = LPInReleaseIndexCli(name) - else: - cli = MagicHTTPProxyCli(name) - - cli(sys.argv[1:]) - except LPInReleaseBaseError as e: - sys.stderr.write("{}: {}\n".format(name, str(e))) - sys.exit(EXIT_ERR) - except KeyboardInterrupt: - sys.stderr.write("{}: Caught keyboard interrupt, exiting...\n" - .format(name)) - sys.exit(EXIT_ERR)