installability: Exploit equvialency to reduce choices

For some cases, like aspell-dictionary, a number of packages can satisfy the dependency (e.g. all aspell-*). In the particular example, most (all?) of the aspell-* look so similar to the extent that reverse dependencies cannot tell two aspell-* packages apart (IRT to installability and co-installability). This patch attempts to help the installability tester by detecting such cases and reducing the number of candidates for a given choice. Reported-In: <20140716134823.GA11795@x230-buxy.home.ouaza.com> Signed-off-by: Niels Thykier <niels@thykier.net>
2025-08-11 15:44:08 +00:00 · 2014-07-19 20:05:23 +02:00 · 2014-07-19 20:05:23 +02:00 · 72daebd67c
commit 72daebd67c
parent e9a7a07856
3 changed files with 143 additions and 28 deletions
--- a/installability/builder.py
+++ b/installability/builder.py
@ -12,6 +12,7 @@
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.

+from collections import defaultdict
 from contextlib import contextmanager

 from britney_util import ifilter_except, iter_except
@ -28,7 +29,7 @@ class _RelationBuilder(object):
        self._new_breaks = set(binary_data[1])


-    def add_dependency_clause(self, or_clause):
+    def add_dependency_clause(self, or_clause, frozenset=frozenset):
        """Add a dependency clause

        The clause must be a sequence of (name, version, architecture)
@ -48,12 +49,12 @@ class _RelationBuilder(object):
        binary = self._binary
        itbuilder = self._itbuilder
        package_table = itbuilder._package_table
-        reverse_package_table = itbuilder._reverse_package_table
        okay = False
        for dep_tuple in clause:
            okay = True
-            reverse_relations = itbuilder._reverse_relations(dep_tuple)
-            reverse_relations[0].add(binary)
+            rdeps, _, rdep_relations = itbuilder._reverse_relations(dep_tuple)
+            rdeps.add(binary)
+            rdep_relations.add(clause)

        self._new_deps.add(clause)
        if not okay:
@ -193,7 +194,7 @@ class InstallabilityTesterBuilder(object):

        if binary in self._reverse_package_table:
            return self._reverse_package_table[binary]
-        rel = [set(), set()]
+        rel = [set(), set(), set()]
        self._reverse_package_table[binary] = rel
        return rel

@ -227,18 +228,21 @@ class InstallabilityTesterBuilder(object):
        # operations in _check_loop since we only have to check one
        # set (instead of two) and we remove a few duplicates here
        # and there.
+        #
+        # At the same time, intern the rdep sets
        for pkg in reverse_package_table:
            if pkg not in package_table:
                raise RuntimeError("%s/%s/%s referenced but not added!" % pkg)
-            if not reverse_package_table[pkg][1]:
-                # no rconflicts - ignore
-                continue
            deps, con = package_table[pkg]
-            if not con:
-                con = intern_set(reverse_package_table[pkg][1])
-            else:
-                con = intern_set(con | reverse_package_table[pkg][1])
-            package_table[pkg] = (deps, con)
+            rdeps, rcon, rdep_relations = reverse_package_table[pkg]
+            if rcon:
+                if not con:
+                    con = intern_set(rcon)
+                else:
+                    con = intern_set(con | rcon)
+                package_table[pkg] = (deps, con)
+            reverse_package_table[pkg] = (intern_set(rdeps), con,
+                                          intern_set(rdep_relations))

        # Check if we can expand broken.
        for t in not_broken(iter_except(check.pop, KeyError)):
@ -308,8 +312,95 @@ class InstallabilityTesterBuilder(object):
                    # add all rdeps (except those already in the safe_set)
                    check.update(reverse_package_table[pkg][0] - safe_set)

+        eqv_table = self._build_eqv_packages_table(package_table,
+                                       reverse_package_table)

        return InstallabilitySolver(package_table,
                                    reverse_package_table,
                                    self._testing, self._broken,
-                                    self._essentials, safe_set)
+                                    self._essentials, safe_set,
+                                    eqv_table)
+
+
+    def _build_eqv_packages_table(self, package_table,
+                                  reverse_package_table,
+                                  frozenset=frozenset):
+        """Attempt to build a table of equivalent packages
+
+        This method attempts to create a table of packages that are
+        equivalent (in terms of installability).  If two packages (A
+        and B) are equivalent then testing the installability of A is
+        the same as testing the installability of B.  This equivalency
+        also applies to co-installability.
+
+        The example cases:
+        * aspell-*
+        * ispell-*
+
+        Cases that do *not* apply:
+        * MTA's
+
+        The theory:
+
+        The packages A and B are equivalent iff:
+
+          reverse_depends(A) == reverse_depends(B) AND
+                conflicts(A) == conflicts(B)       AND
+                  depends(A) == depends(B)
+
+        Where "reverse_depends(X)" is the set of reverse dependencies
+        of X, "conflicts(X)" is the set of negative dependencies of X
+        (Breaks and Conflicts plus the reverse ones of those combined)
+        and "depends(X)" is the set of strong dependencies of X
+        (Depends and Pre-Depends combined).
+
+        To be honest, we are actually equally interested another
+        property as well, namely substitutability.  The package A can
+        always used instead of B, iff:
+
+          reverse_depends(A) >= reverse_depends(B) AND
+                conflicts(A) <= conflicts(B)       AND
+                  depends(A) == depends(B)
+
+        (With the same definitions as above).  Note that equivalency
+        is just a special-case of substitutability, where A and B can
+        substitute each other (i.e. a two-way substituation).
+
+        Finally, note that the "depends(A) == depends(B)" for
+        substitutability is actually not a strict requirement.  There
+        are cases where those sets are different without affecting the
+        property.
+        """
+        # Despite talking about substitutability, the method currently
+        # only finds the equivalence cases.  Lets leave
+        # substitutability for a future version.
+
+        find_eqv_table = defaultdict(list)
+        eqv_table = {}
+
+        for pkg in reverse_package_table:
+            rdeps = reverse_package_table[pkg][2]
+            if not rdeps:
+                # we don't care for things without rdeps (because
+                # it is not worth it)
+                continue
+            deps, con = package_table[pkg]
+            ekey = (deps, con, rdeps)
+            find_eqv_table[ekey].append(pkg)
+
+        for pkg_list in find_eqv_table.itervalues():
+            if len(pkg_list) < 2:
+                continue
+            if (len(pkg_list) == 2 and pkg_list[0][0] == pkg_list[1][0]
+               and pkg_list[0][2] == pkg_list[1][2]):
+                # This is a (most likely) common and boring case.  It
+                # is when pkgA depends on pkgB and is satisfied with
+                # any version available.  However, at most one version
+                # of pkgB will be available in testing, so other
+                # filters will make this case redundant.
+                continue
+            eqv_set = frozenset(pkg_list)
+            for pkg in pkg_list:
+                eqv_table[pkg] = eqv_set
+
+        return eqv_table
--- a/installability/solver.py
+++ b/installability/solver.py
@ -24,7 +24,7 @@ from britney_util import (ifilter_only, iter_except)
 class InstallabilitySolver(InstallabilityTester):

    def __init__(self, universe, revuniverse, testing, broken, essentials,
-                 safe_set):
+                 safe_set, eqv_table):
        """Create a new installability solver

        universe is a dict mapping package tuples to their
@ -44,7 +44,7 @@ class InstallabilitySolver(InstallabilityTester):
            (simplifies caches and dependency checking)
        """
        InstallabilityTester.__init__(self, universe, revuniverse, testing,
-                                      broken, essentials, safe_set)
+                                      broken, essentials, safe_set, eqv_table)


    def solve_groups(self, groups):
--- a/installability/tester.py
+++ b/installability/tester.py
@ -20,7 +20,7 @@ from britney_util import iter_except
 class InstallabilityTester(object):

    def __init__(self, universe, revuniverse, testing, broken, essentials,
-                 safe_set):
+                 safe_set, eqv_table):
        """Create a new installability tester

        universe is a dict mapping package tuples to their
@ -51,6 +51,7 @@ class InstallabilityTester(object):
        self._essentials = essentials
        self._revuniverse = revuniverse
        self._safe_set = safe_set
+        self._eqv_table = eqv_table

        # Cache of packages known to be broken - we deliberately do not
        # include "broken" in it.  See _optimize for more info.
@ -235,8 +236,9 @@ class InstallabilityTester(object):
            never.update(ess_never)

        # curry check_loop
-        check_loop = partial(self._check_loop, universe, testing, musts,
-                             never, choices, cbroken)
+        check_loop = partial(self._check_loop, universe, testing,
+                             self._eqv_table, musts, never, choices,
+                             cbroken)


        # Useful things to remember:
@ -359,8 +361,9 @@ class InstallabilityTester(object):
        return verdict


-    def _check_loop(self, universe, testing, musts, never,
-                    choices, cbroken, check):
+    def _check_loop(self, universe, testing, eqv_table, musts, never,
+                    choices, cbroken, check, len=len,
+                    frozenset=frozenset):
        """Finds all guaranteed dependencies via "check".

        If it returns False, t is not installable.  If it returns True
@ -368,8 +371,6 @@ class InstallabilityTester(object):
        returns True, then t is installable.
        """
        # Local variables for faster access...
-        l = len
-        fset = frozenset
        not_satisfied = partial(ifilter, musts.isdisjoint)

        # While we have guaranteed dependencies (in check), examine all
@ -401,9 +402,9 @@ class InstallabilityTester(object):
                #  - not in testing
                #  - known to be broken (by cache)
                #  - in never
-                candidates = fset((depgroup & testing) - never)
+                candidates = frozenset((depgroup & testing) - never)

-                if l(candidates) == 0:
+                if len(candidates) == 0:
                    # We got no candidates to satisfy it - this
                    # package cannot be installed with the current
                    # testing
@ -413,21 +414,43 @@ class InstallabilityTester(object):
                        cbroken.add(cur)
                        testing.remove(cur)
                    return False
-                if l(candidates) == 1:
+                if len(candidates) == 1:
                    # only one possible solution to this choice and we
                    # haven't seen it before
                    check.update(candidates)
                    musts.update(candidates)
                else:
+                    possible_eqv = set(x for x in candidates if x in eqv_table)
+                    if len(possible_eqv) > 1:
+                        # Exploit equvialency to reduce the number of
+                        # candidates if possible.  Basically, this
+                        # code maps "similar" candidates into a single
+                        # candidate that will give a identical result
+                        # to any other candidate it eliminates.
+                        #
+                        # See InstallabilityTesterBuilder's
+                        # _build_eqv_packages_table method for more
+                        # information on how this works.
+                        new_cand = set(x for x in candidates if x not in possible_eqv)
+                        for chosen in iter_except(possible_eqv.pop, KeyError):
+                            new_cand.add(chosen)
+                            possible_eqv -= eqv_table[chosen]
+                        if len(new_cand) == 1:
+                            check.update(new_cand)
+                            musts.update(new_cand)
+                            continue
+                        candidates = frozenset(new_cand)
                    # defer this choice till later
                    choices.add(candidates)
        return True

+
    def _get_min_pseudo_ess_set(self, arch):
        if arch not in self._cache_ess:
            # The minimal essential set cache is not present -
            # compute it now.
            testing = self._testing
+            eqv_table = self._eqv_table
            cbroken = self._cache_broken
            universe = self._universe
            safe_set = self._safe_set
@ -439,8 +462,9 @@ class InstallabilityTester(object):
            not_satisified = partial(ifilter, start.isdisjoint)

            while ess_base:
-                self._check_loop(universe, testing, start, ess_never,\
-                                     ess_choices, cbroken, ess_base)
+                self._check_loop(universe, testing, eqv_table,
+                                 start, ess_never, ess_choices,
+                                 cbroken, ess_base)
                if ess_choices:
                    # Try to break choices where possible
                    nchoice = set()