# -*- coding: utf-8 -*- # Copyright (C) 2012 Niels Thykier # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from collections import defaultdict from functools import partial import logging from itertools import chain, filterfalse from britney2.utils import iter_except class InstallabilityTester(object): def __init__(self, universe, revuniverse, testing, broken, essentials, safe_set, eqv_table): """Create a new installability tester universe is a dict mapping package ids to their dependencies and conflicts. revuniverse is a table containing all packages with reverse relations mapping them to their reverse relations. testing is a (mutable) set of package ids that determines which of the packages in universe are currently in testing. broken is a (mutable) set of package ids that are known to be uninstallable. essentials is a set of packages with "Essential: yes". safe_set is a set of all packages which have no conflicts and either have no dependencies or only depends on other "safe" packages. Package id: (pkg_name, pkg_version, pkg_arch) - NB: arch:all packages are "re-mapped" to given architecture. (simplifies caches and dependency checking) """ self._universe = universe self._testing = testing self._broken = broken self._essentials = essentials self._revuniverse = revuniverse self._safe_set = safe_set self._eqv_table = eqv_table self._stats = InstallabilityStats() logger_name = ".".join((self.__class__.__module__, self.__class__.__name__)) self.logger = logging.getLogger(logger_name) # Cache of packages known to be broken - we deliberately do not # include "broken" in it. See _optimize for more info. self._cache_broken = set() # Cache of packages known to be installable self._cache_inst = set() # Per "arch" cache of the "minimal" (possibly incomplete) # pseudo-essential set. This includes all the packages that # are essential and packages that will always follow. # # It may not be a complete essential set, since alternatives # are not always resolved. Noticably cases like "awk" may be # left out (since it could be either gawk, mawk or # original-awk) unless something in this sets depends strictly # on one of them self._cache_ess = {} def compute_testing_installability(self): """Computes the installability of packages in testing This method computes the installability of all packages in testing and caches the result. This has the advantage of making "is_installable" queries very fast for all packages in testing. """ check_inst = self._check_inst cbroken = self._cache_broken cache_inst = self._cache_inst eqv_table = self._eqv_table testing = self._testing tcopy = [x for x in testing] for t in filterfalse(cache_inst.__contains__, tcopy): if t in cbroken: continue res = check_inst(t) if t in eqv_table: eqv = (x for x in eqv_table[t] if x in testing) if res: cache_inst.update(eqv) else: eqv_set = frozenset(eqv) testing -= eqv_set cbroken |= eqv_set @property def stats(self): return self._stats def are_equivalent(self, pkg_id1, pkg_id2): """Test if pkg_id1 and pkg_id2 are equivalent :param pkg_id1 The id of the first package :param pkg_id2 The id of the second package :return: True if pkg_id1 and pkg_id2 have the same "signature" in the package dependency graph (i.e. relations can not tell them apart semantically except for their name). Otherwise False """ eqv_table = self._eqv_table return pkg_id1 in eqv_table and pkg_id2 in eqv_table[pkg_id1] def reverse_dependencies_of(self, pkg_id): """Returns the set of reverse dependencies of a given package :param pkg_id: The package id as defined in the constructor. :return: A set containing the package ids all of the reverse dependencies of the input package. The result is suite agnostic. """ revuniverse = self._revuniverse if pkg_id not in revuniverse: return frozenset() return revuniverse[pkg_id][0] def negative_dependencies_of(self, pkg_id): """Returns the set of negative dependencies of a given package Note that there is no "reverse_negative_dependencies_of" method, since negative dependencies have no "direction" unlike positive dependencies. :param pkg_id: The package id as defined in the constructor. :return: A set containing the package ids all of the negative dependencies of the input package. The result is suite agnostic. """ return self._universe[pkg_id][1] def dependencies_of(self, pkg_id): """Returns the set of dependencies of a given package :param pkg_id: The package id as defined in the constructor. :return: A set containing the package ids all of the dependencies of the input package. The result is suite agnostic. """ return self._universe[pkg_id][0] def any_of_these_are_in_testing(self, pkgs): """Test if at least one package of a given set is in testing :param pkgs: A set of package ids (as defined in the constructor) :return: True if any of the packages in pkgs are currently in testing """ return not self._testing.isdisjoint(pkgs) def is_pkg_in_testing(self, pkg): """Test if the package of is in testing :param pkgs: A package id (as defined in the constructor) :return: True if the pkg is currently in testing """ return pkg in self._testing def add_testing_binary(self, pkg_id): """Add a binary package to "testing" If the package is not known, this method will throw an KeyError. :param pkg_id The id of the package """ if pkg_id not in self._universe: # pragma: no cover raise KeyError(str(pkg_id)) if pkg_id in self._broken: self._testing.add(pkg_id) elif pkg_id not in self._testing: self._testing.add(pkg_id) if self._cache_inst: self._stats.cache_drops += 1 self._cache_inst = set() if self._cache_broken: # Re-add broken packages as some of them may now be installable self._testing |= self._cache_broken self._cache_broken = set() if pkg_id in self._essentials and pkg_id.architecture in self._cache_ess: # Adds new essential => "pseudo-essential" set needs to be # recomputed del self._cache_ess[pkg_id.architecture] return True def remove_testing_binary(self, pkg_id): """Remove a binary from "testing" :param pkg_id The id of the package If the package is not known, this method will throw an KeyError. """ if pkg_id not in self._universe: # pragma: no cover raise KeyError(str(pkg_id)) self._cache_broken.discard(pkg_id) if pkg_id in self._testing: self._testing.remove(pkg_id) if pkg_id.architecture in self._cache_ess and pkg_id in self._cache_ess[pkg_id.architecture][0]: # Removes a package from the "pseudo-essential set" del self._cache_ess[pkg_id.architecture] if pkg_id not in self._revuniverse: # no reverse relations - safe return True if pkg_id not in self._broken and pkg_id in self._cache_inst: # It is in our cache (and not guaranteed to be broken) - throw out the cache self._cache_inst = set() self._stats.cache_drops += 1 return True def is_installable(self, pkg_id): """Test if a package is installable in this package set The package is assumed to be in "testing" and only packages in "testing" can be used to satisfy relations. :param pkg_id The id of the package Returns True iff the package is installable. Returns False otherwise. """ self._stats.is_installable_calls += 1 if pkg_id not in self._universe: # pragma: no cover raise KeyError(str(pkg_id)) if pkg_id not in self._testing or pkg_id in self._broken: self._stats.cache_hits += 1 return False if pkg_id in self._cache_inst: self._stats.cache_hits += 1 return True self._stats.cache_misses += 1 return self._check_inst(pkg_id) def _check_inst(self, t, musts=None, never=None, choices=None): # See the explanation of musts, never and choices below. cache_inst = self._cache_inst stats = self._stats if musts and t in cache_inst and not never: # use the inst cache only for direct queries/simple queries. cache = True if choices: # This is a recursive call, where there is no "never" so far. # We know t satisfies at least one of the remaining choices. # If it satisfies all remaining choices, we can use the cache # in this case (since never is empty). # # Otherwise, a later choice may be incompatible with t. for choice in choices: if t in choice: continue cache = False break if cache: return True universe = self._universe testing = self._testing cbroken = self._cache_broken safe_set = self._safe_set eqv_table = self._eqv_table # Our installability verdict - start with "yes" and change if # prove otherwise. verdict = True # set of packages that must be installed with this package if musts is None: musts = set() musts.add(t) # set of packages we can *never* choose (e.g. due to conflicts) if never is None: never = set() # set of relations were we have a choice, but where we have not # committed ourselves yet. Hopefully some choices may be taken # for us (if one of the alternatives appear in "musts") if choices is None: choices = set() # The subset of musts we haven't checked yet. check = {t} if len(musts) == 1: # Include the essential packages in testing as a starting point. if t.architecture not in self._cache_ess: # The minimal essential set cache is not present - # compute it now. (start, ess_never, ess_choices) = self._get_min_pseudo_ess_set(t.architecture) else: (start, ess_never, ess_choices) = self._cache_ess[t.architecture] if t in ess_never: # t conflicts with something in the essential set or the essential # set conflicts with t - either way, t is f***ed cbroken.add(t) testing.remove(t) stats.conflicts_essential += 1 return False musts.update(start) never.update(ess_never) choices.update(ess_choices) # curry check_loop check_loop = partial(self._check_loop, universe, testing, eqv_table, stats, musts, never, cbroken) # Useful things to remember: # # * musts and never are disjointed at all times # - if not, t cannot be installable. Either t, or one of # its dependencies conflict with t or one of its (other) # dependencies. # # * choices should generally be avoided as much as possible. # - picking a bad choice requires backtracking # - sometimes musts/never will eventually "solve" the choice. # # * check never includes choices (these are always in choices) # # * A package is installable if never and musts are disjointed # and both check and choices are empty. # - exception: resolve_choices may determine the installability # of t via recursion (calls _check_inst). In this case # check and choices are not (always) empty. def _prune_choices(rebuild, len=len): """Picks a choice from choices and updates rebuild. Prunes the choices and updates "rebuild" to reflect the pruned choices. Returns True if t is installable (determined via recursion). Returns False if a choice was picked and added to check. Returns None if t is uninstallable (no choice can be picked). NB: If this returns False, choices should be replaced by rebuild. """ # We already satisfied/chosen at least one of the literals # in the choice, so the choice is gone for choice in filter(musts.isdisjoint, choices): # cbroken is needed here because (in theory) it could # have changed since the choice was discovered and it # is smaller than testing (so presumably faster) remain = choice - never - cbroken if len(remain) > 1 and not remain.isdisjoint(safe_set): first = None for r in filter(safe_set.__contains__, remain): # don't bother giving extra arguments to _check_inst. "safe" packages are # usually trivial to satisfy on their own and will not involve conflicts # (so never will not help) if r in cache_inst or self._check_inst(r): first = r break if first: musts.add(first) check.add(first) stats.choice_resolved_using_safe_set += 1 continue # None of the safe set choices are installable, so drop them remain -= safe_set if len(remain) == 1: # the choice was reduced to one package we haven't checked - check that check.update(remain) musts.update(remain) stats.choice_presolved += 1 continue if not remain: # all alternatives would violate the conflicts or are uninstallable # => package is not installable stats.choice_presolved += 1 return False # The choice is still deferred rebuild.add(frozenset(remain)) return True # END _prune_choices while check: if not check_loop(choices, check): verdict = False break if choices: rebuild = set() if not _prune_choices(rebuild): verdict = False break if not check and rebuild: # We have to "guess" now, which is always fun, but not cheap. We # stop guessing: # - once we run out of choices to make (obviously), OR # - if one of the choices exhaust all but one option if self.resolve_choices(check, musts, never, rebuild): # The recursive call have already updated the # cache so there is not point in doing it again. return True choices = rebuild if verdict: # if t is installable, then so are all packages in musts self._cache_inst.update(musts) stats.solved_installable += 1 else: stats.solved_uninstallable += 1 return verdict def resolve_choices(self, check, musts, never, choices): universe = self._universe testing = self._testing eqv_table = self._eqv_table stats = self._stats cbroken = self._cache_broken while choices: choice_options = choices.pop() choice = iter(choice_options) last = next(choice) # pick one to go last solved = False for p in choice: musts_copy = musts.copy() never_tmp = set() choices_tmp = set() check_tmp = {p} if not self._check_loop(universe, testing, eqv_table, stats, musts_copy, never_tmp, cbroken, choices_tmp, check_tmp): # p cannot be chosen/is broken (unlikely, but ...) continue # Test if we can pick p without any consequences. # - when we can, we avoid a backtrack point. if never_tmp <= never and choices_tmp <= choices: # we can pick p without picking up new conflicts # or unresolved choices. Therefore we commit to # using p. musts.update(musts_copy) stats.choice_resolved_without_restore_point += 1 solved = True break if not musts.isdisjoint(never_tmp): # If we pick p, we will definitely end up making # t uninstallable, so p is a no-go. continue stats.backtrace_restore_point_created += 1 # We are not sure that p is safe, setup a backtrack # point and recurse. never_tmp |= never choices_tmp |= choices if self._check_inst(p, musts_copy, never_tmp, choices_tmp): # Success, p was a valid choice and made it all # installable return True # If we get here, we failed to find something that # would satisfy choice (without breaking the # installability of t). This means p cannot be used # to satisfy the dependencies, so pretend to conflict # with it - hopefully it will reduce future choices. never.add(p) stats.backtrace_restore_point_used += 1 if not solved: # Optimization for the last case; avoid the recursive call # and just assume the last will lead to a solution. If it # doesn't there is no solution and if it does, we don't # have to back-track anyway. check.add(last) musts.add(last) stats.backtrace_last_option += 1 return False def _check_loop(self, universe, testing, eqv_table, stats, musts, never, cbroken, choices, check, len=len, frozenset=frozenset): """Finds all guaranteed dependencies via "check". If it returns False, t is not installable. If it returns True then "check" is exhausted. If "choices" are empty and this returns True, then t is installable. """ # Local variables for faster access... not_satisfied = partial(filter, musts.isdisjoint) # While we have guaranteed dependencies (in check), examine all # of them. for cur in iter_except(check.pop, KeyError): (deps, cons) = universe[cur] if cons: # Conflicts? if cur in never: # cur adds a (reverse) conflict, so check if cur # is in never. # # - there is a window where two conflicting # packages can be in check. Example "A" depends # on "B" and "C". If "B" conflicts with "C", # then both "B" and "C" could end in "check". return False # We must install cur for the package to be installable, # so "obviously" we can never choose any of its conflicts never.update(cons & testing) # depgroup can be satisfied by picking something that is # already in musts - lets pick that (again). :) for depgroup in not_satisfied(deps): # Of all the packages listed in the relation remove those that # are either: # - not in testing # - known to be broken (by cache) # - in never candidates = (depgroup & testing) - never if not candidates: # We got no candidates to satisfy it - this # package cannot be installed with the current # testing if cur not in cbroken and depgroup.isdisjoint(never): # cur's dependency cannot be satisfied even if never was empty. # This means that cur itself is broken (as well). cbroken.add(cur) testing.remove(cur) return False if len(candidates) == 1: # only one possible solution to this choice and we # haven't seen it before check.update(candidates) musts.update(candidates) else: possible_eqv = set(x for x in candidates if x in eqv_table) if len(possible_eqv) > 1: # Exploit equivalency to reduce the number of # candidates if possible. Basically, this # code maps "similar" candidates into a single # candidate that will give a identical result # to any other candidate it eliminates. # # See InstallabilityTesterBuilder's # _build_eqv_packages_table method for more # information on how this works. new_cand = set(x for x in candidates if x not in possible_eqv) stats.eqv_table_times_used += 1 for chosen in iter_except(possible_eqv.pop, KeyError): new_cand.add(chosen) possible_eqv -= eqv_table[chosen] stats.eqv_table_total_number_of_alternatives_eliminated += len(candidates) - len(new_cand) if len(new_cand) == 1: check.update(new_cand) musts.update(new_cand) stats.eqv_table_reduced_to_one += 1 continue elif len(candidates) == len(new_cand): stats.eqv_table_reduced_by_zero += 1 candidates = frozenset(new_cand) else: # Candidates have to be a frozenset to be added to choices candidates = frozenset(candidates) # defer this choice till later choices.add(candidates) return True def _get_min_pseudo_ess_set(self, arch): if arch not in self._cache_ess: # The minimal essential set cache is not present - # compute it now. testing = self._testing eqv_table = self._eqv_table cbroken = self._cache_broken universe = self._universe stats = self._stats ess_base = set(x for x in self._essentials if x.architecture == arch and x in testing) start = set(ess_base) ess_never = set() ess_choices = set() not_satisfied = partial(filter, start.isdisjoint) while ess_base: self._check_loop(universe, testing, eqv_table, stats, start, ess_never, cbroken, ess_choices, ess_base) if ess_choices: # Try to break choices where possible nchoice = set() for choice in not_satisfied(ess_choices): b = False for c in choice: if universe[c][1] <= ess_never and \ not any(not_satisfied(universe[c][0])): ess_base.add(c) b = True break if not b: nchoice.add(choice) ess_choices = nchoice else: break for x in start: ess_never.update(universe[x][1]) self._cache_ess[arch] = (frozenset(start), frozenset(ess_never), frozenset(ess_choices)) return self._cache_ess[arch] def compute_stats(self): universe = self._universe eqv_table = self._eqv_table graph_stats = defaultdict(ArchStats) seen_eqv = defaultdict(set) for pkg in universe: (pkg_name, pkg_version, pkg_arch) = pkg deps, con = universe[pkg] arch_stats = graph_stats[pkg_arch] arch_stats.nodes += 1 if pkg in eqv_table and pkg not in seen_eqv[pkg_arch]: eqv = [e for e in eqv_table[pkg] if e.architecture == pkg_arch] arch_stats.eqv_nodes += len(eqv) arch_stats.add_dep_edges(deps) arch_stats.add_con_edges(con) for stat in graph_stats.values(): stat.compute_all() return graph_stats class InstallabilityStats(object): def __init__(self): self.cache_hits = 0 self.cache_misses = 0 self.cache_drops = 0 self.backtrace_restore_point_created = 0 self.backtrace_restore_point_used = 0 self.backtrace_last_option = 0 self.choice_presolved = 0 self.choice_resolved_using_safe_set = 0 self.choice_resolved_without_restore_point = 0 self.is_installable_calls = 0 self.solved_installable = 0 self.solved_uninstallable = 0 self.conflicts_essential = 0 self.eqv_table_times_used = 0 self.eqv_table_reduced_to_one = 0 self.eqv_table_reduced_by_zero = 0 self.eqv_table_total_number_of_alternatives_eliminated = 0 def stats(self): formats = [ "Requests - is_installable: {is_installable_calls}", "Cache - hits: {cache_hits}, misses: {cache_misses}, drops: {cache_drops}", "Choices - pre-solved: {choice_presolved}, safe-set: {choice_resolved_using_safe_set}, No RP: {choice_resolved_without_restore_point}", "Backtrace - RP created: {backtrace_restore_point_created}, RP used: {backtrace_restore_point_used}, reached last option: {backtrace_last_option}", "Solved - installable: {solved_installable}, uninstallable: {solved_uninstallable}, conflicts essential: {conflicts_essential}", "Eqv - times used: {eqv_table_times_used}, perfect reductions: {eqv_table_reduced_to_one}, failed reductions: {eqv_table_reduced_by_zero}, total no. of alternatives pruned: {eqv_table_total_number_of_alternatives_eliminated}", ] return [x.format(**self.__dict__) for x in formats] class ArchStats(object): def __init__(self): self.nodes = 0 self.eqv_nodes = 0 self.dep_edges = [] self.con_edges = [] self.stats = defaultdict(lambda: defaultdict(int)) def stat(self, statname): return self.stats[statname] def stat_summary(self): text = [] for statname in ['nodes', 'dependency-clauses', 'dependency-clause-alternatives', 'negative-dependency-clauses']: stat = self.stats[statname] if statname != 'nodes': format_str = "%s, max: %d, min: %d, median: %d, average: %f (%d/%d)" values = [statname, stat['max'], stat['min'], stat['median'], stat['average'], stat['sum'], stat['size']] if 'average-per-node' in stat: format_str += ", average-per-node: %f" values.append(stat['average-per-node']) else: format_str = "nodes: %d, eqv-nodes: %d" values = (self.nodes, self.eqv_nodes) text.append(format_str % tuple(values)) return text def add_dep_edges(self, edges): self.dep_edges.append(edges) def add_con_edges(self, edges): self.con_edges.append(edges) def _list_stats(self, stat_name, sorted_list, average_per_node=False): if sorted_list: stats = self.stats[stat_name] stats['max'] = sorted_list[-1] stats['min'] = sorted_list[0] stats['sum'] = sum(sorted_list) stats['size'] = len(sorted_list) stats['average'] = float(stats['sum'])/len(sorted_list) stats['median'] = sorted_list[len(sorted_list)//2] if average_per_node: stats['average-per-node'] = float(stats['sum'])/self.nodes def compute_all(self): dep_edges = self.dep_edges con_edges = self.con_edges sorted_no_dep_edges = sorted(len(x) for x in dep_edges) sorted_size_dep_edges = sorted(len(x) for x in chain.from_iterable(dep_edges)) sorted_no_con_edges = sorted(len(x) for x in con_edges) self._list_stats('dependency-clauses', sorted_no_dep_edges) self._list_stats('dependency-clause-alternatives', sorted_size_dep_edges, average_per_node=True) self._list_stats('negative-dependency-clauses', sorted_no_con_edges)