You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
britney2-ubuntu/britney2/inputs/suiteloader.py

562 lines
25 KiB

from abc import abstractmethod
from collections import defaultdict
import apt_pkg
import copy
import logging
import os
import sys
from britney2 import SuiteClass, Suite, TargetSuite, Suites, BinaryPackage, BinaryPackageId, SourcePackage
from britney2.utils import (
read_release_file, possibly_compressed, read_sources_file, create_provides_map, parse_provides, parse_builtusing,
UbuntuComponent
)
class MissingRequiredConfigurationError(RuntimeError):
pass
class SuiteContentLoader(object):
def __init__(self, base_config):
self._base_config = base_config
self._architectures = SuiteContentLoader.config_str_as_list(base_config.architectures)
self._nobreakall_arches = SuiteContentLoader.config_str_as_list(base_config.nobreakall_arches, [])
self._outofsync_arches = SuiteContentLoader.config_str_as_list(base_config.outofsync_arches, [])
self._break_arches = SuiteContentLoader.config_str_as_list(base_config.break_arches, [])
self._new_arches = SuiteContentLoader.config_str_as_list(base_config.new_arches, [])
self._components = []
self._all_binaries = {}
logger_name = ".".join((self.__class__.__module__, self.__class__.__name__))
self.logger = logging.getLogger(logger_name)
@staticmethod
def config_str_as_list(value, default_value=None):
if value is None:
return default_value
if isinstance(value, str):
return value.split()
return value
@property
def architectures(self):
return self._architectures
@property
def nobreakall_arches(self):
return self._nobreakall_arches
@property
def outofsync_arches(self):
return self._outofsync_arches
@property
def break_arches(self):
return self._break_arches
@property
def new_arches(self):
return self._new_arches
@property
def components(self):
return self._components
def all_binaries(self):
return self._all_binaries
@abstractmethod
def load_suites(self): # pragma: no cover
pass
class DebMirrorLikeSuiteContentLoader(SuiteContentLoader):
CHECK_FIELDS = [
'source',
'source_version',
'architecture',
'multi_arch',
'depends',
'conflicts',
'provides',
]
def load_suites(self):
suites = []
target_suite = None
missing_config_msg = "Configuration %s is not set in the config (and cannot be auto-detected)"
for suite in ('testing', 'unstable', 'pu', 'tpu'):
suffix = suite if suite in {'pu', 'tpu'} else ''
if hasattr(self._base_config, suite):
suite_path = getattr(self._base_config, suite)
suite_class = SuiteClass.TARGET_SUITE
if suite != 'testing':
suite_class = SuiteClass.ADDITIONAL_SOURCE_SUITE if suffix else SuiteClass.PRIMARY_SOURCE_SUITE
suites.append(Suite(suite_class, suite, suite_path, suite_short_name=suffix))
else:
target_suite = TargetSuite(suite_class, suite, suite_path, suite_short_name=suffix)
suites.append(target_suite)
else:
if suite in {'testing', 'unstable'}: # pragma: no cover
self.logger.error(missing_config_msg, suite.upper())
raise MissingRequiredConfigurationError(missing_config_msg % suite.upper())
# self.suite_info[suite] = SuiteInfo(name=suite, path=None, excuses_suffix=suffix)
self.logger.info("Optional suite %s is not defined (config option: %s) ", suite, suite.upper())
assert target_suite is not None
self._check_release_file(target_suite, missing_config_msg)
self._setup_architectures()
# read the source and binary packages for the involved distributions. Notes:
# - Load testing last as some live-data tests have more complete information in
# unstable
# - Load all sources before any of the binaries.
for suite in suites:
sources = self._read_sources(suite.path)
self._update_suite_name(suite)
suite.sources = sources
if hasattr(self._base_config, 'partial_unstable'):
testing = suites[0]
unstable = suites[1]
# We need the sources from the target suite available when reading
# Packages files, so the binaries for binary-only migrations can be
# added to the right SourcePackage.
self._merge_sources(testing, unstable)
for suite in suites:
(suite.binaries, suite.provides_table) = self._read_binaries(suite, self._architectures)
if hasattr(self._base_config, 'partial_unstable'):
# _read_binaries might have created 'faux' packages, and these need merging too.
self._merge_sources(testing, unstable)
self._merge_binaries(testing, unstable, self._architectures)
return Suites(suites[0], suites[1:])
def _setup_architectures(self):
allarches = self._architectures
# Re-order the architectures such as that the most important architectures are listed first
# (this is to make the log easier to read as most important architectures will be listed
# first)
arches = [x for x in allarches if x in self._nobreakall_arches]
arches += [x for x in allarches if x not in arches and x not in self._outofsync_arches]
arches += [x for x in allarches if x not in arches and x not in self._break_arches]
arches += [x for x in allarches if x not in arches and x not in self._new_arches]
arches += [x for x in allarches if x not in arches]
# Intern architectures for efficiency; items in this list will be used for lookups and
# building items/keys - by intern strings we reduce memory (considerably).
self._architectures = [sys.intern(arch) for arch in allarches]
assert 'all' not in self._architectures, "all not allowed in architectures"
def _get_suite_name(self, suite, release_file):
for name in ('Suite', 'Codename'):
try:
return release_file[name]
except KeyError:
pass
self.logger.warning("Either of the fields \"Suite\" or \"Codename\" should be present in a release file.")
self.logger.error("Release file for suite %s is missing both the \"Suite\" and the \"Codename\" fields.",
suite.name)
raise KeyError('Suite')
def _update_suite_name(self, suite):
try:
release_file = read_release_file(suite.path)
except FileNotFoundError:
self.logger.info("The %s suite does not have a Release file, unable to update the name",
suite.name)
release_file = None
if release_file is not None:
suite.name = self._get_suite_name(suite, release_file)
self.logger.info("Using suite name from Release file: %s", suite.name)
def _check_release_file(self, target_suite, missing_config_msg):
try:
release_file = read_release_file(target_suite.path)
self.logger.info("Found a Release file in %s - using that for defaults", target_suite.name)
except FileNotFoundError:
self.logger.info("The %s suite does not have a Release file.", target_suite.name)
release_file = None
if release_file is not None:
self._components = release_file['Components'].split()
self.logger.info("Using components listed in Release file: %s", ' '.join(self._components))
if self._architectures is None:
if release_file is None: # pragma: no cover
self.logger.error("No configured architectures and there is no release file in the %s suite.",
target_suite.name)
self.logger.error("Please check if there is a \"Release\" file in %s",
target_suite.path)
self.logger.error("or if the config file contains a non-empty \"ARCHITECTURES\" field")
raise MissingRequiredConfigurationError(missing_config_msg % "ARCHITECTURES")
self._architectures = sorted(release_file['Architectures'].split())
self.logger.info("Using architectures listed in Release file: %s", ' '.join(self._architectures))
def _read_sources(self, basedir):
"""Read the list of source packages from the specified directory
The source packages are read from the `Sources' file within the
directory specified as `basedir' parameter. Considering the
large amount of memory needed, not all the fields are loaded
in memory. The available fields are Version, Maintainer and Section.
The method returns a list where every item represents a source
package as a dictionary.
"""
if self._components:
sources = {}
for component in self._components:
filename = os.path.join(basedir, component, "source", "Sources")
filename = possibly_compressed(filename)
self.logger.info("Loading source packages from %s", filename)
read_sources_file(filename, sources)
else:
filename = os.path.join(basedir, "Sources")
self.logger.info("Loading source packages from %s", filename)
sources = read_sources_file(filename)
return sources
def _merge_sources(self, target, source):
"""Merge sources from `target' into partial suite `source'."""
target_sources = target.sources
source_sources = source.sources
# we need complete copies here, as we might later find some binaries
# which are only in unstable
for pkg, value in target_sources.items():
if pkg not in source_sources:
source_sources[pkg] = copy.deepcopy(value)
def _merge_binaries(self, target, source, arches):
"""Merge `arches' binaries from `target' into partial suite
`source'."""
target_sources = target.sources
target_binaries = target.binaries
source_sources = source.sources
source_binaries = source.binaries
source_provides = source.provides_table
oodsrcs = defaultdict(set)
try:
all_buildarch = self._base_config.all_buildarch
except AttributeError:
all_buildarch = None
def _merge_binaries_arch(arch):
for pkg, value in target_binaries[arch].items():
if pkg in source_binaries[arch]:
continue
# Don't merge binaries rendered stale by new sources in source
# that have built on this architecture.
if value.source not in oodsrcs[arch]:
target_version = target_sources[value.source].version
try:
source_version = source_sources[value.source].version
except KeyError:
self.logger.info("merge_binaries: pkg %s has no source, NBS?" % pkg)
continue
if target_version != source_version:
arches_built = set([source_binaries[b.architecture][b.package_name].architecture
for b in source_sources[value.source].binaries
if b.architecture in (arch, all_buildarch)])
only_builds_arch_all = arches_built == set(['all'])
# If we only build arch:all packages in the source suite,
# it will look like the package is unbuilt on the arches
# other than the all buildarch. But we don't expect a build
# there, so don't merge binaries from the target. If we
# were to, it would look like any dropped non-arch-all
# binaries had gone missing.
if only_builds_arch_all and arch != all_buildarch:
continue
current_arch = value.architecture
built = False
for b in source_sources[value.source].binaries:
if b.architecture == arch:
source_value = source_binaries[arch][b.package_name]
# arch:all packages show up in all Packages
# files, but they are only indicative of a
# build on the all_buildarch.
if current_arch in (
source_value.architecture, 'all') or \
(source_value.architecture == 'all' and arch == all_buildarch):
built = True
break
if built:
continue
oodsrcs[arch].add(value.source)
if pkg in source_binaries[arch]:
for p in source_binaries[arch][pkg].provides:
source_provides[arch][p].remove(pkg)
if not source_provides[arch][p]:
del source_provides[arch][p]
source_binaries[arch][pkg] = value
if value.pkg_id not in source_sources[value.source].binaries:
source_sources[value.source].binaries.add(value.pkg_id)
for p in value.provides:
if p not in source_provides[arch]:
source_provides[arch][p] = []
source_provides[arch][p].append(pkg)
for arch in arches:
_merge_binaries_arch(arch)
@staticmethod
def merge_fields(get_field, *field_names, separator=', '):
"""Merge two or more fields (filtering out empty fields; returning None if all are empty)
"""
return separator.join(filter(None, (get_field(x) for x in field_names))) or None
def _read_packages_file(self, filename, arch, srcdist, packages=None, intern=sys.intern):
self.logger.info("Loading binary packages from %s", filename)
if packages is None:
packages = {}
added_old_binaries = {}
all_binaries = self._all_binaries
tag_file = apt_pkg.TagFile(filename)
get_field = tag_file.section.get
step = tag_file.step
while step():
pkg = get_field('Package')
version = get_field('Version')
section = get_field('Section')
# There may be multiple versions of any arch:all packages
# (in unstable) if some architectures have out-of-date
# binaries. We only ever consider the package with the
# largest version for migration.
pkg = intern(pkg)
version = intern(version)
pkg_id = BinaryPackageId(pkg, version, arch)
if pkg in packages:
old_pkg_data = packages[pkg]
if apt_pkg.version_compare(old_pkg_data.version, version) > 0:
continue
old_pkg_id = old_pkg_data.pkg_id
old_src_binaries = srcdist[old_pkg_data.source].binaries
prev_src = added_old_binaries.get(old_pkg_id, old_pkg_data.source)
ps = srcdist[prev_src]
ps.binaries.remove(old_pkg_id)
try:
del added_old_binaries[old_pkg_id]
except KeyError:
pass
# Is this a take-over, i.e. is old_pkg_data pointing to the wrong source now?
cursrc_binaries = srcdist[source].binaries
if old_pkg_id in cursrc_binaries:
self.logger.info("Removing %s from %s, taken over & NBS?", old_pkg_id, source)
cursrc_binaries.remove(old_pkg_id)
# This may seem weird at first glance, but the current code rely
# on this behaviour to avoid issues like #709460. Admittedly it
# is a special case, but Britney will attempt to remove the
# arch:all packages without this. Even then, this particular
# stop-gap relies on the packages files being sorted by name
# and the version, so it is not particularly resilient.
if pkg_id not in old_src_binaries:
old_src_binaries.add(pkg_id)
added_old_binaries[pkg_id] = old_pkg_data.source
# Merge Pre-Depends with Depends and Conflicts with
# Breaks. Britney is not interested in the "finer
# semantic differences" of these fields anyway.
deps = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Pre-Depends', 'Depends')
conflicts = DebMirrorLikeSuiteContentLoader.merge_fields(get_field, 'Conflicts', 'Breaks')
ess = False
if get_field('Essential', 'no') == 'yes':
ess = True
source = pkg
source_version = version
# retrieve the name and the version of the source package
source_raw = get_field('Source')
if source_raw:
source = intern(source_raw.split(" ")[0])
if "(" in source_raw:
source_version = intern(source_raw[source_raw.find("(")+1:source_raw.find(")")])
provides_raw = get_field('Provides')
if provides_raw:
provides = parse_provides(provides_raw, pkg_id=pkg_id, logger=self.logger)
else:
provides = []
raw_arch = intern(get_field('Architecture'))
if raw_arch not in {'all', arch}: # pragma: no cover
raise AssertionError("%s has wrong architecture (%s) - should be either %s or all" % (
str(pkg_id), raw_arch, arch))
builtusing_raw = get_field('Built-Using')
if builtusing_raw:
builtusing = parse_builtusing(builtusing_raw, pkg_id=pkg_id, logger=self.logger)
else:
builtusing = []
# XXX: Do the get_component thing in a much nicer way that can be upstreamed
dpkg = BinaryPackage(version,
intern(get_field('Section')),
source,
source_version,
raw_arch,
get_field('Multi-Arch'),
deps,
conflicts,
provides,
ess,
pkg_id,
builtusing,
UbuntuComponent.get_component(section),
)
# if the source package is available in the distribution, then register this binary package
if source in srcdist:
# There may be multiple versions of any arch:all packages
# (in unstable) if some architectures have out-of-date
# binaries. We only want to include the package in the
# source -> binary mapping once. It doesn't matter which
# of the versions we include as only the package name and
# architecture are recorded.
srcdist[source].binaries.add(pkg_id)
# if the source package doesn't exist, create a fake one
else:
# XXX: Do the get_component thing in a much nicer way that can be upstreamed
srcdist[source] = SourcePackage(source,
source_version,
'faux',
{pkg_id},
None,
True,
None,
None,
[],
[],
UbuntuComponent.get_component(section))
# add the resulting dictionary to the package list
packages[pkg] = dpkg
if pkg_id in all_binaries:
self._merge_pkg_entries(pkg, arch, all_binaries[pkg_id], dpkg)
else:
all_binaries[pkg_id] = dpkg
# add the resulting dictionary to the package list
packages[pkg] = dpkg
return packages
def _read_binaries(self, suite, architectures):
"""Read the list of binary packages from the specified directory
This method reads all the binary packages for a given suite.
If the "components" config parameter is set, the directory should
be the "suite" directory of a local mirror (i.e. the one containing
the "Release" file). Otherwise, Britney will read the packages
information from all the "Packages_${arch}" files referenced by
the "architectures" parameter.
Considering the
large amount of memory needed, not all the fields are loaded
in memory. The available fields are Version, Source, Multi-Arch,
Depends, Conflicts, Provides and Architecture.
The `Provides' field is used to populate the virtual packages list.
The method returns a tuple of two dicts with architecture as key and
another dict as value. The value dicts of the first dict map
from binary package name to "BinaryPackage" objects; the other second
value dicts map a package name to the packages providing them.
"""
binaries = {}
provides_table = {}
basedir = suite.path
if self._components:
release_file = read_release_file(basedir)
listed_archs = set(release_file['Architectures'].split())
for arch in architectures:
packages = {}
if arch not in listed_archs:
self.logger.info("Skipping arch %s for %s: It is not listed in the Release file",
arch, suite.name)
binaries[arch] = {}
provides_table[arch] = {}
continue
for component in self._components:
binary_dir = "binary-%s" % arch
filename = os.path.join(basedir,
component,
binary_dir,
'Packages')
filename = possibly_compressed(filename)
self._read_packages_file(filename,
arch,
suite.sources,
packages)
if getattr(self._base_config, 'has_udebs', 'yes') == 'yes':
udeb_filename = os.path.join(basedir,
component,
"debian-installer",
binary_dir,
"Packages")
# We assume the udeb Packages file is present if the
# regular one is present
udeb_filename = possibly_compressed(udeb_filename)
self._read_packages_file(udeb_filename,
arch,
suite.sources,
packages)
# create provides
provides = create_provides_map(packages)
binaries[arch] = packages
provides_table[arch] = provides
else:
for arch in architectures:
filename = os.path.join(basedir, "Packages_%s" % arch)
packages = self._read_packages_file(filename,
arch,
suite.sources)
provides = create_provides_map(packages)
binaries[arch] = packages
provides_table[arch] = provides
return (binaries, provides_table)
def _merge_pkg_entries(self, package, parch, pkg_entry1, pkg_entry2):
bad = []
for f in self.CHECK_FIELDS:
v1 = getattr(pkg_entry1, f)
v2 = getattr(pkg_entry2, f)
if v1 != v2: # pragma: no cover
bad.append((f, v1, v2))
if bad: # pragma: no cover
self.logger.error("Mismatch found %s %s %s differs", package, pkg_entry1.version, parch)
for f, v1, v2 in bad:
self.logger.info(" ... %s %s != %s", f, v1, v2)
raise ValueError("Inconsistent / Unsupported data set")
# Merge ESSENTIAL if necessary
assert pkg_entry1.is_essential or not pkg_entry2.is_essential