You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
britney2-ubuntu/britney2/installability/tester.py

764 lines
31 KiB

# -*- coding: utf-8 -*-
# Copyright (C) 2012 Niels Thykier <niels@thykier.net>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
from collections import defaultdict
from functools import partial
import logging
from itertools import chain, filterfalse
from britney2.utils import iter_except
class InstallabilityTester(object):
def __init__(self, universe, revuniverse, testing, broken, essentials,
safe_set, eqv_table):
"""Create a new installability tester
universe is a dict mapping package ids to their
dependencies and conflicts.
revuniverse is a table containing all packages with reverse
relations mapping them to their reverse relations.
testing is a (mutable) set of package ids that determines
which of the packages in universe are currently in testing.
broken is a (mutable) set of package ids that are known to
be uninstallable.
essentials is a set of packages with "Essential: yes".
safe_set is a set of all packages which have no conflicts and
either have no dependencies or only depends on other "safe"
packages.
Package id: (pkg_name, pkg_version, pkg_arch)
- NB: arch:all packages are "re-mapped" to given architecture.
(simplifies caches and dependency checking)
"""
self._universe = universe
self._testing = testing
self._broken = broken
self._essentials = essentials
self._revuniverse = revuniverse
self._safe_set = safe_set
self._eqv_table = eqv_table
self._stats = InstallabilityStats()
logger_name = ".".join((self.__class__.__module__, self.__class__.__name__))
self.logger = logging.getLogger(logger_name)
# Cache of packages known to be broken - we deliberately do not
# include "broken" in it. See _optimize for more info.
self._cache_broken = set()
# Cache of packages known to be installable
self._cache_inst = set()
# Per "arch" cache of the "minimal" (possibly incomplete)
# pseudo-essential set. This includes all the packages that
# are essential and packages that will always follow.
#
# It may not be a complete essential set, since alternatives
# are not always resolved. Noticably cases like "awk" may be
# left out (since it could be either gawk, mawk or
# original-awk) unless something in this sets depends strictly
# on one of them
self._cache_ess = {}
def compute_testing_installability(self):
"""Computes the installability of packages in testing
This method computes the installability of all packages in
testing and caches the result. This has the advantage of
making "is_installable" queries very fast for all packages
in testing.
"""
check_inst = self._check_inst
cbroken = self._cache_broken
cache_inst = self._cache_inst
eqv_table = self._eqv_table
testing = self._testing
tcopy = [x for x in testing]
for t in filterfalse(cache_inst.__contains__, tcopy):
if t in cbroken:
continue
res = check_inst(t)
if t in eqv_table:
eqv = (x for x in eqv_table[t] if x in testing)
if res:
cache_inst.update(eqv)
else:
eqv_set = frozenset(eqv)
testing -= eqv_set
cbroken |= eqv_set
@property
def stats(self):
return self._stats
def are_equivalent(self, pkg_id1, pkg_id2):
"""Test if pkg_id1 and pkg_id2 are equivalent
:param pkg_id1 The id of the first package
:param pkg_id2 The id of the second package
:return: True if pkg_id1 and pkg_id2 have the same "signature" in
the package dependency graph (i.e. relations can not tell
them apart semantically except for their name). Otherwise False
"""
eqv_table = self._eqv_table
return pkg_id1 in eqv_table and pkg_id2 in eqv_table[pkg_id1]
def reverse_dependencies_of(self, pkg_id):
"""Returns the set of reverse dependencies of a given package
:param pkg_id: The package id as defined in the constructor.
:return: A set containing the package ids all of the reverse
dependencies of the input package. The result is suite agnostic.
"""
revuniverse = self._revuniverse
if pkg_id not in revuniverse:
return frozenset()
return revuniverse[pkg_id][0]
def negative_dependencies_of(self, pkg_id):
"""Returns the set of negative dependencies of a given package
Note that there is no "reverse_negative_dependencies_of" method,
since negative dependencies have no "direction" unlike positive
dependencies.
:param pkg_id: The package id as defined in the constructor.
:return: A set containing the package ids all of the negative
dependencies of the input package. The result is suite agnostic.
"""
return self._universe[pkg_id][1]
def dependencies_of(self, pkg_id):
"""Returns the set of dependencies of a given package
:param pkg_id: The package id as defined in the constructor.
:return: A set containing the package ids all of the dependencies
of the input package. The result is suite agnostic.
"""
return self._universe[pkg_id][0]
def any_of_these_are_in_testing(self, pkgs):
"""Test if at least one package of a given set is in testing
:param pkgs: A set of package ids (as defined in the constructor)
:return: True if any of the packages in pkgs are currently in testing
"""
return not self._testing.isdisjoint(pkgs)
def is_pkg_in_testing(self, pkg):
"""Test if the package of is in testing
:param pkgs: A package id (as defined in the constructor)
:return: True if the pkg is currently in testing
"""
return pkg in self._testing
def add_testing_binary(self, pkg_id):
"""Add a binary package to "testing"
If the package is not known, this method will throw an
KeyError.
:param pkg_id The id of the package
"""
if pkg_id not in self._universe: # pragma: no cover
raise KeyError(str(pkg_id))
if pkg_id in self._broken:
self._testing.add(pkg_id)
elif pkg_id not in self._testing:
self._testing.add(pkg_id)
if self._cache_inst:
self._stats.cache_drops += 1
self._cache_inst = set()
if self._cache_broken:
# Re-add broken packages as some of them may now be installable
self._testing |= self._cache_broken
self._cache_broken = set()
if pkg_id in self._essentials and pkg_id.architecture in self._cache_ess:
# Adds new essential => "pseudo-essential" set needs to be
# recomputed
del self._cache_ess[pkg_id.architecture]
return True
def remove_testing_binary(self, pkg_id):
"""Remove a binary from "testing"
:param pkg_id The id of the package
If the package is not known, this method will throw an
KeyError.
"""
if pkg_id not in self._universe: # pragma: no cover
raise KeyError(str(pkg_id))
self._cache_broken.discard(pkg_id)
if pkg_id in self._testing:
self._testing.remove(pkg_id)
if pkg_id.architecture in self._cache_ess and pkg_id in self._cache_ess[pkg_id.architecture][0]:
# Removes a package from the "pseudo-essential set"
del self._cache_ess[pkg_id.architecture]
if pkg_id not in self._revuniverse:
# no reverse relations - safe
return True
if pkg_id not in self._broken and pkg_id in self._cache_inst:
# It is in our cache (and not guaranteed to be broken) - throw out the cache
self._cache_inst = set()
self._stats.cache_drops += 1
return True
def is_installable(self, pkg_id):
"""Test if a package is installable in this package set
The package is assumed to be in "testing" and only packages in
"testing" can be used to satisfy relations.
:param pkg_id The id of the package
Returns True iff the package is installable.
Returns False otherwise.
"""
self._stats.is_installable_calls += 1
if pkg_id not in self._universe: # pragma: no cover
raise KeyError(str(pkg_id))
if pkg_id not in self._testing or pkg_id in self._broken:
self._stats.cache_hits += 1
return False
if pkg_id in self._cache_inst:
self._stats.cache_hits += 1
return True
self._stats.cache_misses += 1
return self._check_inst(pkg_id)
def _check_inst(self, t, musts=None, never=None, choices=None):
# See the explanation of musts, never and choices below.
cache_inst = self._cache_inst
stats = self._stats
if musts and t in cache_inst and not never:
# use the inst cache only for direct queries/simple queries.
cache = True
if choices:
# This is a recursive call, where there is no "never" so far.
# We know t satisfies at least one of the remaining choices.
# If it satisfies all remaining choices, we can use the cache
# in this case (since never is empty).
#
# Otherwise, a later choice may be incompatible with t.
for choice in choices:
if t in choice:
continue
cache = False
break
if cache:
return True
universe = self._universe
testing = self._testing
cbroken = self._cache_broken
safe_set = self._safe_set
eqv_table = self._eqv_table
# Our installability verdict - start with "yes" and change if
# prove otherwise.
verdict = True
# set of packages that must be installed with this package
if musts is None:
musts = set()
musts.add(t)
# set of packages we can *never* choose (e.g. due to conflicts)
if never is None:
never = set()
# set of relations were we have a choice, but where we have not
# committed ourselves yet. Hopefully some choices may be taken
# for us (if one of the alternatives appear in "musts")
if choices is None:
choices = set()
# The subset of musts we haven't checked yet.
check = {t}
if len(musts) == 1:
# Include the essential packages in testing as a starting point.
if t.architecture not in self._cache_ess:
# The minimal essential set cache is not present -
# compute it now.
(start, ess_never, ess_choices) = self._get_min_pseudo_ess_set(t.architecture)
else:
(start, ess_never, ess_choices) = self._cache_ess[t.architecture]
if t in ess_never:
# t conflicts with something in the essential set or the essential
# set conflicts with t - either way, t is f***ed
cbroken.add(t)
testing.remove(t)
stats.conflicts_essential += 1
return False
musts.update(start)
never.update(ess_never)
choices.update(ess_choices)
# curry check_loop
check_loop = partial(self._check_loop, universe, testing,
eqv_table, stats, musts, never, cbroken)
# Useful things to remember:
#
# * musts and never are disjointed at all times
# - if not, t cannot be installable. Either t, or one of
# its dependencies conflict with t or one of its (other)
# dependencies.
#
# * choices should generally be avoided as much as possible.
# - picking a bad choice requires backtracking
# - sometimes musts/never will eventually "solve" the choice.
#
# * check never includes choices (these are always in choices)
#
# * A package is installable if never and musts are disjointed
# and both check and choices are empty.
# - exception: resolve_choices may determine the installability
# of t via recursion (calls _check_inst). In this case
# check and choices are not (always) empty.
def _prune_choices(rebuild, len=len):
"""Picks a choice from choices and updates rebuild.
Prunes the choices and updates "rebuild" to reflect the
pruned choices.
Returns True if t is installable (determined via recursion).
Returns False if a choice was picked and added to check.
Returns None if t is uninstallable (no choice can be picked).
NB: If this returns False, choices should be replaced by
rebuild.
"""
# We already satisfied/chosen at least one of the literals
# in the choice, so the choice is gone
for choice in filter(musts.isdisjoint, choices):
# cbroken is needed here because (in theory) it could
# have changed since the choice was discovered and it
# is smaller than testing (so presumably faster)
remain = choice - never - cbroken
if len(remain) > 1 and not remain.isdisjoint(safe_set):
first = None
for r in filter(safe_set.__contains__, remain):
# don't bother giving extra arguments to _check_inst. "safe" packages are
# usually trivial to satisfy on their own and will not involve conflicts
# (so never will not help)
if r in cache_inst or self._check_inst(r):
first = r
break
if first:
musts.add(first)
check.add(first)
stats.choice_resolved_using_safe_set += 1
continue
# None of the safe set choices are installable, so drop them
remain -= safe_set
if len(remain) == 1:
# the choice was reduced to one package we haven't checked - check that
check.update(remain)
musts.update(remain)
stats.choice_presolved += 1
continue
if not remain:
# all alternatives would violate the conflicts or are uninstallable
# => package is not installable
stats.choice_presolved += 1
return False
# The choice is still deferred
rebuild.add(frozenset(remain))
return True
# END _prune_choices
while check:
if not check_loop(choices, check):
verdict = False
break
if choices:
rebuild = set()
if not _prune_choices(rebuild):
verdict = False
break
if not check and rebuild:
# We have to "guess" now, which is always fun, but not cheap. We
# stop guessing:
# - once we run out of choices to make (obviously), OR
# - if one of the choices exhaust all but one option
if self.resolve_choices(check, musts, never, rebuild):
# The recursive call have already updated the
# cache so there is not point in doing it again.
return True
choices = rebuild
if verdict:
# if t is installable, then so are all packages in musts
self._cache_inst.update(musts)
stats.solved_installable += 1
else:
stats.solved_uninstallable += 1
return verdict
def resolve_choices(self, check, musts, never, choices):
universe = self._universe
testing = self._testing
eqv_table = self._eqv_table
stats = self._stats
cbroken = self._cache_broken
while choices:
choice_options = choices.pop()
choice = iter(choice_options)
last = next(choice) # pick one to go last
solved = False
for p in choice:
musts_copy = musts.copy()
never_tmp = set()
choices_tmp = set()
check_tmp = {p}
if not self._check_loop(universe, testing, eqv_table,
stats, musts_copy, never_tmp,
cbroken, choices_tmp,
check_tmp):
# p cannot be chosen/is broken (unlikely, but ...)
continue
# Test if we can pick p without any consequences.
# - when we can, we avoid a backtrack point.
if never_tmp <= never and choices_tmp <= choices:
# we can pick p without picking up new conflicts
# or unresolved choices. Therefore we commit to
# using p.
musts.update(musts_copy)
stats.choice_resolved_without_restore_point += 1
solved = True
break
if not musts.isdisjoint(never_tmp):
# If we pick p, we will definitely end up making
# t uninstallable, so p is a no-go.
continue
stats.backtrace_restore_point_created += 1
# We are not sure that p is safe, setup a backtrack
# point and recurse.
never_tmp |= never
choices_tmp |= choices
if self._check_inst(p, musts_copy, never_tmp,
choices_tmp):
# Success, p was a valid choice and made it all
# installable
return True
# If we get here, we failed to find something that
# would satisfy choice (without breaking the
# installability of t). This means p cannot be used
# to satisfy the dependencies, so pretend to conflict
# with it - hopefully it will reduce future choices.
never.add(p)
stats.backtrace_restore_point_used += 1
if not solved:
# Optimization for the last case; avoid the recursive call
# and just assume the last will lead to a solution. If it
# doesn't there is no solution and if it does, we don't
# have to back-track anyway.
check.add(last)
musts.add(last)
stats.backtrace_last_option += 1
return False
def _check_loop(self, universe, testing, eqv_table, stats, musts, never,
cbroken, choices, check, len=len,
frozenset=frozenset):
"""Finds all guaranteed dependencies via "check".
If it returns False, t is not installable. If it returns True
then "check" is exhausted. If "choices" are empty and this
returns True, then t is installable.
"""
# Local variables for faster access...
not_satisfied = partial(filter, musts.isdisjoint)
# While we have guaranteed dependencies (in check), examine all
# of them.
for cur in iter_except(check.pop, KeyError):
(deps, cons) = universe[cur]
if cons:
# Conflicts?
if cur in never:
# cur adds a (reverse) conflict, so check if cur
# is in never.
#
# - there is a window where two conflicting
# packages can be in check. Example "A" depends
# on "B" and "C". If "B" conflicts with "C",
# then both "B" and "C" could end in "check".
return False
# We must install cur for the package to be installable,
# so "obviously" we can never choose any of its conflicts
never.update(cons & testing)
# depgroup can be satisfied by picking something that is
# already in musts - lets pick that (again). :)
for depgroup in not_satisfied(deps):
# Of all the packages listed in the relation remove those that
# are either:
# - not in testing
# - known to be broken (by cache)
# - in never
candidates = (depgroup & testing) - never
if not candidates:
# We got no candidates to satisfy it - this
# package cannot be installed with the current
# testing
if cur not in cbroken and depgroup.isdisjoint(never):
# cur's dependency cannot be satisfied even if never was empty.
# This means that cur itself is broken (as well).
cbroken.add(cur)
testing.remove(cur)
return False
if len(candidates) == 1:
# only one possible solution to this choice and we
# haven't seen it before
check.update(candidates)
musts.update(candidates)
else:
possible_eqv = set(x for x in candidates if x in eqv_table)
if len(possible_eqv) > 1:
# Exploit equivalency to reduce the number of
# candidates if possible. Basically, this
# code maps "similar" candidates into a single
# candidate that will give a identical result
# to any other candidate it eliminates.
#
# See InstallabilityTesterBuilder's
# _build_eqv_packages_table method for more
# information on how this works.
new_cand = set(x for x in candidates if x not in possible_eqv)
stats.eqv_table_times_used += 1
for chosen in iter_except(possible_eqv.pop, KeyError):
new_cand.add(chosen)
possible_eqv -= eqv_table[chosen]
stats.eqv_table_total_number_of_alternatives_eliminated += len(candidates) - len(new_cand)
if len(new_cand) == 1:
check.update(new_cand)
musts.update(new_cand)
stats.eqv_table_reduced_to_one += 1
continue
elif len(candidates) == len(new_cand):
stats.eqv_table_reduced_by_zero += 1
candidates = frozenset(new_cand)
else:
# Candidates have to be a frozenset to be added to choices
candidates = frozenset(candidates)
# defer this choice till later
choices.add(candidates)
return True
def _get_min_pseudo_ess_set(self, arch):
if arch not in self._cache_ess:
# The minimal essential set cache is not present -
# compute it now.
testing = self._testing
eqv_table = self._eqv_table
cbroken = self._cache_broken
universe = self._universe
stats = self._stats
ess_base = set(x for x in self._essentials if x.architecture == arch and x in testing)
start = set(ess_base)
ess_never = set()
ess_choices = set()
not_satisfied = partial(filter, start.isdisjoint)
while ess_base:
self._check_loop(universe, testing, eqv_table, stats,
start, ess_never, cbroken,
ess_choices, ess_base)
if ess_choices:
# Try to break choices where possible
nchoice = set()
for choice in not_satisfied(ess_choices):
b = False
for c in choice:
if universe[c][1] <= ess_never and \
not any(not_satisfied(universe[c][0])):
ess_base.add(c)
b = True
break
if not b:
nchoice.add(choice)
ess_choices = nchoice
else:
break
for x in start:
ess_never.update(universe[x][1])
self._cache_ess[arch] = (frozenset(start), frozenset(ess_never), frozenset(ess_choices))
return self._cache_ess[arch]
def compute_stats(self):
universe = self._universe
eqv_table = self._eqv_table
graph_stats = defaultdict(ArchStats)
seen_eqv = defaultdict(set)
for pkg in universe:
(pkg_name, pkg_version, pkg_arch) = pkg
deps, con = universe[pkg]
arch_stats = graph_stats[pkg_arch]
arch_stats.nodes += 1
if pkg in eqv_table and pkg not in seen_eqv[pkg_arch]:
eqv = [e for e in eqv_table[pkg] if e.architecture == pkg_arch]
arch_stats.eqv_nodes += len(eqv)
arch_stats.add_dep_edges(deps)
arch_stats.add_con_edges(con)
for stat in graph_stats.values():
stat.compute_all()
return graph_stats
class InstallabilityStats(object):
def __init__(self):
self.cache_hits = 0
self.cache_misses = 0
self.cache_drops = 0
self.backtrace_restore_point_created = 0
self.backtrace_restore_point_used = 0
self.backtrace_last_option = 0
self.choice_presolved = 0
self.choice_resolved_using_safe_set = 0
self.choice_resolved_without_restore_point = 0
self.is_installable_calls = 0
self.solved_installable = 0
self.solved_uninstallable = 0
self.conflicts_essential = 0
self.eqv_table_times_used = 0
self.eqv_table_reduced_to_one = 0
self.eqv_table_reduced_by_zero = 0
self.eqv_table_total_number_of_alternatives_eliminated = 0
def stats(self):
formats = [
"Requests - is_installable: {is_installable_calls}",
"Cache - hits: {cache_hits}, misses: {cache_misses}, drops: {cache_drops}",
"Choices - pre-solved: {choice_presolved}, safe-set: {choice_resolved_using_safe_set}, No RP: {choice_resolved_without_restore_point}",
"Backtrace - RP created: {backtrace_restore_point_created}, RP used: {backtrace_restore_point_used}, reached last option: {backtrace_last_option}",
"Solved - installable: {solved_installable}, uninstallable: {solved_uninstallable}, conflicts essential: {conflicts_essential}",
"Eqv - times used: {eqv_table_times_used}, perfect reductions: {eqv_table_reduced_to_one}, failed reductions: {eqv_table_reduced_by_zero}, total no. of alternatives pruned: {eqv_table_total_number_of_alternatives_eliminated}",
]
return [x.format(**self.__dict__) for x in formats]
class ArchStats(object):
def __init__(self):
self.nodes = 0
self.eqv_nodes = 0
self.dep_edges = []
self.con_edges = []
self.stats = defaultdict(lambda: defaultdict(int))
def stat(self, statname):
return self.stats[statname]
def stat_summary(self):
text = []
for statname in ['nodes', 'dependency-clauses', 'dependency-clause-alternatives', 'negative-dependency-clauses']:
stat = self.stats[statname]
if statname != 'nodes':
format_str = "%s, max: %d, min: %d, median: %d, average: %f (%d/%d)"
values = [statname, stat['max'], stat['min'], stat['median'], stat['average'], stat['sum'], stat['size']]
if 'average-per-node' in stat:
format_str += ", average-per-node: %f"
values.append(stat['average-per-node'])
else:
format_str = "nodes: %d, eqv-nodes: %d"
values = (self.nodes, self.eqv_nodes)
text.append(format_str % tuple(values))
return text
def add_dep_edges(self, edges):
self.dep_edges.append(edges)
def add_con_edges(self, edges):
self.con_edges.append(edges)
def _list_stats(self, stat_name, sorted_list, average_per_node=False):
if sorted_list:
stats = self.stats[stat_name]
stats['max'] = sorted_list[-1]
stats['min'] = sorted_list[0]
stats['sum'] = sum(sorted_list)
stats['size'] = len(sorted_list)
stats['average'] = float(stats['sum'])/len(sorted_list)
stats['median'] = sorted_list[len(sorted_list)//2]
if average_per_node:
stats['average-per-node'] = float(stats['sum'])/self.nodes
def compute_all(self):
dep_edges = self.dep_edges
con_edges = self.con_edges
sorted_no_dep_edges = sorted(len(x) for x in dep_edges)
sorted_size_dep_edges = sorted(len(x) for x in chain.from_iterable(dep_edges))
sorted_no_con_edges = sorted(len(x) for x in con_edges)
self._list_stats('dependency-clauses', sorted_no_dep_edges)
self._list_stats('dependency-clause-alternatives', sorted_size_dep_edges, average_per_node=True)
self._list_stats('negative-dependency-clauses', sorted_no_con_edges)