mirror of
https://git.launchpad.net/~ubuntu-release/britney/+git/britney2-ubuntu
synced 2025-02-12 15:07:04 +00:00
When we're removing choice, we need to look into all the choice groups and invalidate the cache if it's in there.
672 lines
28 KiB
Python
672 lines
28 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (C) 2012 Niels Thykier <niels@thykier.net>
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
from collections import defaultdict
|
|
from functools import partial
|
|
import logging
|
|
from itertools import chain, filterfalse
|
|
|
|
from britney2.utils import iter_except, add_transitive_dependencies_flatten
|
|
|
|
|
|
class InstallabilityTester(object):
|
|
|
|
def __init__(self, universe, suite_contents):
|
|
"""Create a new installability tester
|
|
|
|
universe is a BinaryPackageUniverse
|
|
|
|
suite_contents is a (mutable) set of package ids that determines
|
|
which of the packages in universe are currently in the suite.
|
|
|
|
Package id: (pkg_name, pkg_version, pkg_arch)
|
|
- NB: arch:all packages are "re-mapped" to given architecture.
|
|
(simplifies caches and dependency checking)
|
|
"""
|
|
|
|
self._universe = universe
|
|
# FIXME: Move this field to TargetSuite
|
|
self._suite_contents = suite_contents
|
|
self._stats = InstallabilityStats()
|
|
logger_name = ".".join((self.__class__.__module__, self.__class__.__name__))
|
|
self.logger = logging.getLogger(logger_name)
|
|
|
|
# Cache of packages known to be broken - we deliberately do not
|
|
# include "broken" in it. See _optimize for more info.
|
|
self._cache_broken = set()
|
|
# Cache of packages known to be installable
|
|
self._cache_inst = set()
|
|
# Per "arch" cache of the "minimal" (possibly incomplete)
|
|
# pseudo-essential set. This includes all the packages that
|
|
# are essential and packages that will always follow.
|
|
#
|
|
# It may not be a complete essential set, since alternatives
|
|
# are not always resolved. Noticeably cases like "awk" may be
|
|
# left out (since it could be either gawk, mawk or
|
|
# original-awk) unless something in this sets depends strictly
|
|
# on one of them
|
|
self._cache_ess = {}
|
|
|
|
essential_w_transitive_deps = set(universe.essential_packages)
|
|
add_transitive_dependencies_flatten(universe, essential_w_transitive_deps)
|
|
self._cache_essential_transitive_dependencies = essential_w_transitive_deps
|
|
|
|
def compute_installability(self):
|
|
"""Computes the installability of all the packages in the suite
|
|
|
|
This method computes the installability of all packages in
|
|
the suite and caches the result. This has the advantage of
|
|
making "is_installable" queries very fast for all packages
|
|
in the suite.
|
|
"""
|
|
|
|
universe = self._universe
|
|
check_inst = self._check_inst
|
|
cbroken = self._cache_broken
|
|
cache_inst = self._cache_inst
|
|
suite_contents = self._suite_contents
|
|
tcopy = [x for x in suite_contents]
|
|
for t in filterfalse(cache_inst.__contains__, tcopy):
|
|
if t in cbroken:
|
|
continue
|
|
res = check_inst(t)
|
|
if t in universe.equivalent_packages:
|
|
eqv = (x for x in universe.packages_equivalent_to(t) if x in suite_contents)
|
|
if res:
|
|
cache_inst.update(eqv)
|
|
else:
|
|
eqv_set = frozenset(eqv)
|
|
suite_contents -= eqv_set
|
|
cbroken |= eqv_set
|
|
|
|
@property
|
|
def stats(self):
|
|
return self._stats
|
|
|
|
def any_of_these_are_in_the_suite(self, pkgs):
|
|
"""Test if at least one package of a given set is in the suite
|
|
|
|
:param pkgs: A set of package ids (as defined in the constructor)
|
|
:return: True if any of the packages in pkgs are currently in the suite
|
|
"""
|
|
return not self._suite_contents.isdisjoint(pkgs)
|
|
|
|
def is_pkg_in_the_suite(self, pkg_id):
|
|
"""Test if the package of is in the suite
|
|
|
|
:param pkg_id: A package id (as defined in the constructor)
|
|
:return: True if the pkg is currently in the suite
|
|
"""
|
|
return pkg_id in self._suite_contents
|
|
|
|
def which_of_these_are_in_the_suite(self, pkgs):
|
|
"""Iterate over all packages that are in the suite
|
|
|
|
:param pkgs: An iterable of package ids
|
|
:return: An iterable of package ids that are in the suite
|
|
"""
|
|
yield from (x for x in pkgs if x in self._suite_contents)
|
|
|
|
def add_binary(self, pkg_id):
|
|
"""Add a binary package to the suite
|
|
|
|
If the package is not known, this method will throw an
|
|
KeyError.
|
|
|
|
:param pkg_id The id of the package
|
|
"""
|
|
|
|
if pkg_id not in self._universe: # pragma: no cover
|
|
raise KeyError(str(pkg_id))
|
|
|
|
if pkg_id in self._universe.broken_packages:
|
|
self._suite_contents.add(pkg_id)
|
|
elif pkg_id not in self._suite_contents:
|
|
self._suite_contents.add(pkg_id)
|
|
if self._cache_inst:
|
|
self._stats.cache_drops += 1
|
|
self._cache_inst = set()
|
|
if self._cache_broken:
|
|
# Re-add broken packages as some of them may now be installable
|
|
self._suite_contents |= self._cache_broken
|
|
self._cache_broken = set()
|
|
if pkg_id in self._cache_essential_transitive_dependencies and pkg_id.architecture in self._cache_ess:
|
|
# Adds new possibly pseudo-essential => "pseudo-essential" set needs to be
|
|
# recomputed
|
|
del self._cache_ess[pkg_id.architecture]
|
|
|
|
return True
|
|
|
|
def remove_binary(self, pkg_id):
|
|
"""Remove a binary from the suite
|
|
|
|
:param pkg_id The id of the package
|
|
If the package is not known, this method will throw an
|
|
KeyError.
|
|
"""
|
|
|
|
if pkg_id not in self._universe: # pragma: no cover
|
|
raise KeyError(str(pkg_id))
|
|
|
|
self._cache_broken.discard(pkg_id)
|
|
|
|
if pkg_id in self._suite_contents:
|
|
self._suite_contents.remove(pkg_id)
|
|
if pkg_id.architecture in self._cache_ess:
|
|
(start, ess_never, ess_choices) = self._cache_ess[pkg_id.architecture]
|
|
if pkg_id in start or any([pkg_id in choices for choices in ess_choices]):
|
|
# Removes a package from the "pseudo-essential set"
|
|
del self._cache_ess[pkg_id.architecture]
|
|
|
|
if not self._universe.reverse_dependencies_of(pkg_id):
|
|
# no reverse relations - safe
|
|
return True
|
|
if pkg_id not in self._universe.broken_packages and pkg_id in self._cache_inst:
|
|
# It is in our cache (and not guaranteed to be broken) - throw out the cache
|
|
self._cache_inst = set()
|
|
self._stats.cache_drops += 1
|
|
|
|
return True
|
|
|
|
def is_installable(self, pkg_id):
|
|
"""Test if a package is installable in this package set
|
|
|
|
The package is assumed to be in the suite and only packages in
|
|
the suite can be used to satisfy relations.
|
|
|
|
:param pkg_id The id of the package
|
|
Returns True iff the package is installable.
|
|
Returns False otherwise.
|
|
"""
|
|
|
|
self._stats.is_installable_calls += 1
|
|
|
|
if pkg_id not in self._universe: # pragma: no cover
|
|
raise KeyError(str(pkg_id))
|
|
|
|
if pkg_id not in self._suite_contents or pkg_id in self._universe.broken_packages:
|
|
self._stats.cache_hits += 1
|
|
return False
|
|
|
|
if pkg_id in self._cache_inst:
|
|
self._stats.cache_hits += 1
|
|
return True
|
|
|
|
self._stats.cache_misses += 1
|
|
return self._check_inst(pkg_id)
|
|
|
|
def _check_inst(self, t, musts=None, never=None, choices=None):
|
|
# See the explanation of musts, never and choices below.
|
|
stats = self._stats
|
|
universe = self._universe
|
|
suite_contents = self._suite_contents
|
|
cbroken = self._cache_broken
|
|
|
|
# Our installability verdict - start with "yes" and change if
|
|
# prove otherwise.
|
|
verdict = True
|
|
|
|
# set of packages that must be installed with this package
|
|
if musts is None:
|
|
musts = set()
|
|
musts.add(t)
|
|
# set of packages we can *never* choose (e.g. due to conflicts)
|
|
if never is None:
|
|
never = set()
|
|
# set of relations were we have a choice, but where we have not
|
|
# committed ourselves yet. Hopefully some choices may be taken
|
|
# for us (if one of the alternatives appear in "musts")
|
|
if choices is None:
|
|
choices = set()
|
|
|
|
# The subset of musts we haven't checked yet.
|
|
check = [t]
|
|
|
|
if len(musts) == 1:
|
|
# Include the essential packages in the suite as a starting point.
|
|
if t.architecture not in self._cache_ess:
|
|
# The minimal essential set cache is not present -
|
|
# compute it now.
|
|
(start, ess_never, ess_choices) = self._get_min_pseudo_ess_set(t.architecture)
|
|
else:
|
|
(start, ess_never, ess_choices) = self._cache_ess[t.architecture]
|
|
|
|
if t in ess_never:
|
|
# t conflicts with something in the essential set or the essential
|
|
# set conflicts with t - either way, t is f***ed
|
|
cbroken.add(t)
|
|
suite_contents.remove(t)
|
|
stats.conflicts_essential += 1
|
|
return False
|
|
musts.update(start)
|
|
never.update(ess_never)
|
|
choices.update(ess_choices)
|
|
|
|
# curry check_loop
|
|
check_loop = partial(self._check_loop, universe, suite_contents,
|
|
stats, musts, never, cbroken)
|
|
|
|
# Useful things to remember:
|
|
#
|
|
# * musts and never are disjointed at all times
|
|
# - if not, t cannot be installable. Either t, or one of
|
|
# its dependencies conflict with t or one of its (other)
|
|
# dependencies.
|
|
#
|
|
# * choices should generally be avoided as much as possible.
|
|
# - picking a bad choice requires backtracking
|
|
# - sometimes musts/never will eventually "solve" the choice.
|
|
#
|
|
# * check never includes choices (these are always in choices)
|
|
#
|
|
# * A package is installable if never and musts are disjointed
|
|
# and both check and choices are empty.
|
|
# - exception: resolve_choices may determine the installability
|
|
# of t via recursion (calls _check_inst). In this case
|
|
# check and choices are not (always) empty.
|
|
|
|
def _prune_choices(rebuild, len=len):
|
|
"""Picks a choice from choices and updates rebuild.
|
|
|
|
Prunes the choices and updates "rebuild" to reflect the
|
|
pruned choices.
|
|
|
|
Returns True if t is installable (determined via recursion).
|
|
Returns False if a choice was picked and added to check.
|
|
Returns None if t is uninstallable (no choice can be picked).
|
|
|
|
NB: If this returns False, choices should be replaced by
|
|
rebuild.
|
|
"""
|
|
|
|
# We already satisfied/chosen at least one of the literals
|
|
# in the choice, so the choice is gone
|
|
for choice in filter(musts.isdisjoint, choices):
|
|
# cbroken is needed here because (in theory) it could
|
|
# have changed since the choice was discovered and it
|
|
# is smaller than suite_contents (so presumably faster)
|
|
remain = choice - never - cbroken
|
|
|
|
if len(remain) == 1:
|
|
# the choice was reduced to one package we haven't checked - check that
|
|
check.extend(remain)
|
|
musts.update(remain)
|
|
stats.choice_presolved += 1
|
|
continue
|
|
|
|
if not remain:
|
|
# all alternatives would violate the conflicts or are uninstallable
|
|
# => package is not installable
|
|
stats.choice_presolved += 1
|
|
return False
|
|
|
|
# The choice is still deferred
|
|
rebuild.add(frozenset(remain))
|
|
|
|
return True
|
|
|
|
# END _prune_choices
|
|
|
|
while check:
|
|
if not check_loop(choices, check):
|
|
verdict = False
|
|
break
|
|
|
|
if choices:
|
|
rebuild = set()
|
|
|
|
if not _prune_choices(rebuild):
|
|
verdict = False
|
|
break
|
|
|
|
if not check and rebuild:
|
|
# We have to "guess" now, which is always fun, but not cheap. We
|
|
# stop guessing:
|
|
# - once we run out of choices to make (obviously), OR
|
|
# - if one of the choices exhaust all but one option
|
|
if self.resolve_choices(check, musts, never, rebuild):
|
|
# The recursive call have already updated the
|
|
# cache so there is not point in doing it again.
|
|
return True
|
|
choices = rebuild
|
|
|
|
if verdict:
|
|
# if t is installable, then so are all packages in musts
|
|
self._cache_inst.update(musts)
|
|
stats.solved_installable += 1
|
|
else:
|
|
stats.solved_uninstallable += 1
|
|
|
|
return verdict
|
|
|
|
def resolve_choices(self, check, musts, never, choices):
|
|
universe = self._universe
|
|
suite_contents = self._suite_contents
|
|
stats = self._stats
|
|
cbroken = self._cache_broken
|
|
|
|
while choices:
|
|
choice_options = choices.pop()
|
|
|
|
choice = iter(choice_options)
|
|
last = next(choice) # pick one to go last
|
|
solved = False
|
|
for p in choice:
|
|
musts_copy = musts.copy()
|
|
never_tmp = set()
|
|
choices_tmp = set()
|
|
check_tmp = [p]
|
|
# _check_loop assumes that "musts" is up to date
|
|
musts_copy.add(p)
|
|
if not self._check_loop(universe, suite_contents,
|
|
stats, musts_copy, never_tmp,
|
|
cbroken, choices_tmp,
|
|
check_tmp):
|
|
# p cannot be chosen/is broken (unlikely, but ...)
|
|
continue
|
|
|
|
# Test if we can pick p without any consequences.
|
|
# - when we can, we avoid a backtrack point.
|
|
if never_tmp <= never and choices_tmp <= choices:
|
|
# we can pick p without picking up new conflicts
|
|
# or unresolved choices. Therefore we commit to
|
|
# using p.
|
|
musts.update(musts_copy)
|
|
stats.choice_resolved_without_restore_point += 1
|
|
solved = True
|
|
break
|
|
|
|
if not musts.isdisjoint(never_tmp):
|
|
# If we pick p, we will definitely end up making
|
|
# t uninstallable, so p is a no-go.
|
|
continue
|
|
|
|
stats.backtrace_restore_point_created += 1
|
|
# We are not sure that p is safe, setup a backtrack
|
|
# point and recurse.
|
|
never_tmp |= never
|
|
choices_tmp |= choices
|
|
if self._check_inst(p, musts_copy, never_tmp,
|
|
choices_tmp):
|
|
# Success, p was a valid choice and made it all
|
|
# installable
|
|
return True
|
|
|
|
# If we get here, we failed to find something that
|
|
# would satisfy choice (without breaking the
|
|
# installability of t). This means p cannot be used
|
|
# to satisfy the dependencies, so pretend to conflict
|
|
# with it - hopefully it will reduce future choices.
|
|
never.add(p)
|
|
stats.backtrace_restore_point_used += 1
|
|
|
|
if not solved:
|
|
# Optimization for the last case; avoid the recursive call
|
|
# and just assume the last will lead to a solution. If it
|
|
# doesn't there is no solution and if it does, we don't
|
|
# have to back-track anyway.
|
|
check.append(last)
|
|
musts.add(last)
|
|
stats.backtrace_last_option += 1
|
|
return False
|
|
|
|
def _check_loop(self, universe, suite_contents, stats, musts, never,
|
|
cbroken, choices, check, len=len,
|
|
frozenset=frozenset):
|
|
"""Finds all guaranteed dependencies via "check".
|
|
|
|
If it returns False, t is not installable. If it returns True
|
|
then "check" is exhausted. If "choices" are empty and this
|
|
returns True, then t is installable.
|
|
"""
|
|
# Local variables for faster access...
|
|
not_satisfied = partial(filter, musts.isdisjoint)
|
|
|
|
# While we have guaranteed dependencies (in check), examine all
|
|
# of them.
|
|
for cur in iter_except(check.pop, IndexError):
|
|
relations = universe.relations_of(cur)
|
|
|
|
if relations.negative_dependencies:
|
|
# Conflicts?
|
|
if cur in never:
|
|
# cur adds a (reverse) conflict, so check if cur
|
|
# is in never.
|
|
#
|
|
# - there is a window where two conflicting
|
|
# packages can be in check. Example "A" depends
|
|
# on "B" and "C". If "B" conflicts with "C",
|
|
# then both "B" and "C" could end in "check".
|
|
return False
|
|
# We must install cur for the package to be installable,
|
|
# so "obviously" we can never choose any of its conflicts
|
|
never.update(relations.negative_dependencies & suite_contents)
|
|
|
|
# depgroup can be satisfied by picking something that is
|
|
# already in musts - lets pick that (again). :)
|
|
for depgroup in not_satisfied(relations.dependencies):
|
|
|
|
# Of all the packages listed in the relation remove those that
|
|
# are either:
|
|
# - not in the suite
|
|
# - known to be broken (by cache)
|
|
# - in never
|
|
candidates = (depgroup & suite_contents) - never
|
|
|
|
if not candidates:
|
|
# We got no candidates to satisfy it - this
|
|
# package cannot be installed with the current
|
|
# (version of the) suite
|
|
if cur not in cbroken and depgroup.isdisjoint(never):
|
|
# cur's dependency cannot be satisfied even if never was empty.
|
|
# This means that cur itself is broken (as well).
|
|
cbroken.add(cur)
|
|
suite_contents.remove(cur)
|
|
return False
|
|
if len(candidates) == 1:
|
|
# only one possible solution to this choice and we
|
|
# haven't seen it before
|
|
check.extend(candidates)
|
|
musts.update(candidates)
|
|
else:
|
|
possible_eqv = set(x for x in candidates if x in universe.equivalent_packages)
|
|
if len(possible_eqv) > 1:
|
|
# Exploit equivalency to reduce the number of
|
|
# candidates if possible. Basically, this
|
|
# code maps "similar" candidates into a single
|
|
# candidate that will give a identical result
|
|
# to any other candidate it eliminates.
|
|
#
|
|
# See InstallabilityTesterBuilder's
|
|
# _build_eqv_packages_table method for more
|
|
# information on how this works.
|
|
new_cand = set(x for x in candidates if x not in possible_eqv)
|
|
stats.eqv_table_times_used += 1
|
|
|
|
for chosen in iter_except(possible_eqv.pop, KeyError):
|
|
new_cand.add(chosen)
|
|
possible_eqv -= universe.packages_equivalent_to(chosen)
|
|
stats.eqv_table_total_number_of_alternatives_eliminated += len(candidates) - len(new_cand)
|
|
if len(new_cand) == 1:
|
|
check.extend(new_cand)
|
|
musts.update(new_cand)
|
|
stats.eqv_table_reduced_to_one += 1
|
|
continue
|
|
elif len(candidates) == len(new_cand):
|
|
stats.eqv_table_reduced_by_zero += 1
|
|
|
|
candidates = frozenset(new_cand)
|
|
else:
|
|
# Candidates have to be a frozenset to be added to choices
|
|
candidates = frozenset(candidates)
|
|
# defer this choice till later
|
|
choices.add(candidates)
|
|
return True
|
|
|
|
def _get_min_pseudo_ess_set(self, arch):
|
|
if arch not in self._cache_ess:
|
|
# The minimal essential set cache is not present -
|
|
# compute it now.
|
|
suite_contents = self._suite_contents
|
|
cbroken = self._cache_broken
|
|
universe = self._universe
|
|
stats = self._stats
|
|
|
|
ess_base = [x for x in self._universe.essential_packages if x.architecture == arch and x in suite_contents]
|
|
start = set(ess_base)
|
|
ess_never = set()
|
|
ess_choices = set()
|
|
not_satisfied = partial(filter, start.isdisjoint)
|
|
|
|
while ess_base:
|
|
self._check_loop(universe, suite_contents, stats,
|
|
start, ess_never, cbroken,
|
|
ess_choices, ess_base)
|
|
if ess_choices:
|
|
# Try to break choices where possible
|
|
nchoice = set()
|
|
for choice in not_satisfied(ess_choices):
|
|
b = False
|
|
for c in choice:
|
|
relations = universe.relations_of(c)
|
|
if relations.negative_dependencies <= ess_never and \
|
|
not any(not_satisfied(relations.dependencies)):
|
|
ess_base.append(c)
|
|
b = True
|
|
break
|
|
if not b:
|
|
nchoice.add(choice)
|
|
ess_choices = nchoice
|
|
else:
|
|
break
|
|
|
|
for x in start:
|
|
ess_never.update(universe.negative_dependencies_of(x))
|
|
self._cache_ess[arch] = (frozenset(start), frozenset(ess_never), frozenset(ess_choices))
|
|
|
|
return self._cache_ess[arch]
|
|
|
|
def compute_stats(self):
|
|
universe = self._universe
|
|
graph_stats = defaultdict(ArchStats)
|
|
seen_eqv = defaultdict(set)
|
|
|
|
for pkg in universe:
|
|
(pkg_name, pkg_version, pkg_arch) = pkg
|
|
relations = universe.relations_of(pkg)
|
|
arch_stats = graph_stats[pkg_arch]
|
|
|
|
arch_stats.nodes += 1
|
|
|
|
if pkg in universe.equivalent_packages and pkg not in seen_eqv[pkg_arch]:
|
|
eqv = [e for e in universe.packages_equivalent_to(pkg) if e.architecture == pkg_arch]
|
|
arch_stats.eqv_nodes += len(eqv)
|
|
|
|
arch_stats.add_dep_edges(relations.dependencies)
|
|
arch_stats.add_con_edges(relations.negative_dependencies)
|
|
|
|
for stat in graph_stats.values():
|
|
stat.compute_all()
|
|
|
|
return graph_stats
|
|
|
|
|
|
class InstallabilityStats(object):
|
|
|
|
def __init__(self):
|
|
self.cache_hits = 0
|
|
self.cache_misses = 0
|
|
self.cache_drops = 0
|
|
self.backtrace_restore_point_created = 0
|
|
self.backtrace_restore_point_used = 0
|
|
self.backtrace_last_option = 0
|
|
self.choice_presolved = 0
|
|
self.choice_resolved_without_restore_point = 0
|
|
self.is_installable_calls = 0
|
|
self.solved_installable = 0
|
|
self.solved_uninstallable = 0
|
|
self.conflicts_essential = 0
|
|
self.eqv_table_times_used = 0
|
|
self.eqv_table_reduced_to_one = 0
|
|
self.eqv_table_reduced_by_zero = 0
|
|
self.eqv_table_total_number_of_alternatives_eliminated = 0
|
|
|
|
def stats(self):
|
|
formats = [
|
|
"Requests - is_installable: {is_installable_calls}",
|
|
"Cache - hits: {cache_hits}, misses: {cache_misses}, drops: {cache_drops}",
|
|
"Choices - pre-solved: {choice_presolved}, No RP: {choice_resolved_without_restore_point}",
|
|
"Backtrace - RP created: {backtrace_restore_point_created}, RP used: {backtrace_restore_point_used}, reached last option: {backtrace_last_option}", # nopep8
|
|
"Solved - installable: {solved_installable}, uninstallable: {solved_uninstallable}, conflicts essential: {conflicts_essential}", # nopep8
|
|
"Eqv - times used: {eqv_table_times_used}, perfect reductions: {eqv_table_reduced_to_one}, failed reductions: {eqv_table_reduced_by_zero}, total no. of alternatives pruned: {eqv_table_total_number_of_alternatives_eliminated}", # nopep8
|
|
]
|
|
return [x.format(**self.__dict__) for x in formats]
|
|
|
|
|
|
class ArchStats(object):
|
|
|
|
def __init__(self):
|
|
self.nodes = 0
|
|
self.eqv_nodes = 0
|
|
self.dep_edges = []
|
|
self.con_edges = []
|
|
self.stats = defaultdict(lambda: defaultdict(int))
|
|
|
|
def stat(self, statname):
|
|
return self.stats[statname]
|
|
|
|
def stat_summary(self):
|
|
text = []
|
|
for statname in ['nodes', 'dependency-clauses', 'dependency-clause-alternatives', 'negative-dependency-clauses']:
|
|
stat = self.stats[statname]
|
|
if statname != 'nodes':
|
|
format_str = "%s, max: %d, min: %d, median: %d, average: %f (%d/%d)"
|
|
values = [statname, stat['max'], stat['min'], stat['median'], stat['average'], stat['sum'], stat['size']]
|
|
if 'average-per-node' in stat:
|
|
format_str += ", average-per-node: %f"
|
|
values.append(stat['average-per-node'])
|
|
else:
|
|
format_str = "nodes: %d, eqv-nodes: %d"
|
|
values = (self.nodes, self.eqv_nodes)
|
|
text.append(format_str % tuple(values))
|
|
return text
|
|
|
|
def add_dep_edges(self, edges):
|
|
self.dep_edges.append(edges)
|
|
|
|
def add_con_edges(self, edges):
|
|
self.con_edges.append(edges)
|
|
|
|
def _list_stats(self, stat_name, sorted_list, average_per_node=False):
|
|
if sorted_list:
|
|
stats = self.stats[stat_name]
|
|
stats['max'] = sorted_list[-1]
|
|
stats['min'] = sorted_list[0]
|
|
stats['sum'] = sum(sorted_list)
|
|
stats['size'] = len(sorted_list)
|
|
stats['average'] = float(stats['sum'])/len(sorted_list)
|
|
stats['median'] = sorted_list[len(sorted_list)//2]
|
|
if average_per_node:
|
|
stats['average-per-node'] = float(stats['sum'])/self.nodes
|
|
|
|
def compute_all(self):
|
|
dep_edges = self.dep_edges
|
|
con_edges = self.con_edges
|
|
sorted_no_dep_edges = sorted(len(x) for x in dep_edges)
|
|
sorted_size_dep_edges = sorted(len(x) for x in chain.from_iterable(dep_edges))
|
|
sorted_no_con_edges = sorted(len(x) for x in con_edges)
|
|
self._list_stats('dependency-clauses', sorted_no_dep_edges)
|
|
self._list_stats('dependency-clause-alternatives', sorted_size_dep_edges, average_per_node=True)
|
|
self._list_stats('negative-dependency-clauses', sorted_no_con_edges)
|