lpapicache: find binaries from lp

Also update getBinaries() to allow retreival by binary name

This significantly speeds up binary file lookup for non-published
package versions, since we can get the list of binary urls, but
then have to look up the BPPH for each of those urls, which is slow.
If the user only wants a specific binary package and/or arch, this
speeds up getting that, especially for packages with a lot of binary
files (like the kernel).
This commit is contained in:
Dan Streetman 2017-11-21 14:37:42 -05:00 committed by Dan Streetman
parent 3a413760f3
commit fb750e38bb
2 changed files with 127 additions and 47 deletions

View File

@ -317,9 +317,7 @@ class SourcePackage(object):
yield default_url yield default_url
def _binary_files_info(self, arch, name): def _binary_files_info(self, arch, name):
for bpph in self.lp_spph.getBinaries(arch): for bpph in self.lp_spph.getBinaries(arch=arch, name=name):
if name and not re.match(name, bpph.binary_package_name):
continue
yield (bpph.getFileName(), bpph.getUrl(), 0) yield (bpph.getFileName(), bpph.getUrl(), 0)
def pull_dsc(self): def pull_dsc(self):
@ -585,13 +583,11 @@ class DebianSPPH(SourcePackagePublishingHistory):
""" """
resource_type = 'source_package_publishing_history' resource_type = 'source_package_publishing_history'
def getBinaries(self, arch=None): def getBinaries(self, arch, name=None):
if not self._binaries:
Logger.normal('Using Snapshot to find binary packages') Logger.normal('Using Snapshot to find binary packages')
srcpkg = Snapshot.getSourcePackage(self.getPackageName(), srcpkg = Snapshot.getSourcePackage(self.getPackageName(),
version=self.getVersion()) version=self.getVersion())
self._binaries = [b.getBPPH() for b in srcpkg.getBinaryFiles()] return srcpkg.getSPPH().getBinaries(arch=arch, name=name)
return super(DebianSPPH, self).getBinaries(arch)
class DebianSourcePackage(SourcePackage): class DebianSourcePackage(SourcePackage):
@ -652,9 +648,7 @@ class DebianSourcePackage(SourcePackage):
yield self.snapshot_files[name] yield self.snapshot_files[name]
def _binary_files_info(self, arch, name): def _binary_files_info(self, arch, name):
for f in self.snapshot_package.getBinaryFiles(arch): for f in self.snapshot_package.getBinaryFiles(arch=arch, name=name):
if name and not re.match(name, f.package_name):
continue
yield (f.name, f.getUrl(), f.size) yield (f.name, f.getUrl(), f.size)
def pull_dsc(self): def pull_dsc(self):
@ -924,7 +918,7 @@ class SnapshotSourcePackage(SnapshotPackage):
def getAllFiles(self): def getAllFiles(self):
return self.getFiles() + self.getBinaryFiles() return self.getFiles() + self.getBinaryFiles()
def getBinaryFiles(self, arch=None): def getBinaryFiles(self, arch=None, name=None):
if not self._binary_files: if not self._binary_files:
url = "/mr/package/{}/{}/allfiles".format(self.name, self.version) url = "/mr/package/{}/{}/allfiles".format(self.name, self.version)
response = Snapshot.load("{}?fileinfo=1".format(url)) response = Snapshot.load("{}?fileinfo=1".format(url))
@ -934,9 +928,12 @@ class SnapshotSourcePackage(SnapshotPackage):
r['architecture'], self.name) r['architecture'], self.name)
for b in response['result']['binaries'] for r in b['files']] for b in response['result']['binaries'] for r in b['files']]
self._binary_files = files self._binary_files = files
if not arch: bins = self._binary_files.copy()
return list(self._binary_files) if arch:
return filter(lambda f: f.isArch(arch), self._binary_files) bins = filter(lambda b: b.isArch(arch), bins)
if name:
bins = filter(lambda b: re.match(name, b.name), bins)
return bins
def getFiles(self): def getFiles(self):
if not self._files: if not self._files:
@ -970,6 +967,8 @@ class SnapshotBinaryPackage(SnapshotPackage):
f = self.getFiles(arch) f = self.getFiles(arch)
if not f: if not f:
return None return None
if not arch:
raise RuntimeError("Must specify arch")
# Can only be 1 binary file for this pkg name/version/arch # Can only be 1 binary file for this pkg name/version/arch
return f[0].getBPPH() return f[0].getBPPH()
@ -983,7 +982,7 @@ class SnapshotBinaryPackage(SnapshotPackage):
r['architecture'], self.source) r['architecture'], self.source)
for r in response['result']] for r in response['result']]
if not arch: if not arch:
return list(self._files) return self._files.copy()
return filter(lambda f: f.isArch(arch), self._files) return filter(lambda f: f.isArch(arch), self._files)
@ -1145,8 +1144,9 @@ class SnapshotSPPH(object):
new_entries.append(str(block)) new_entries.append(str(block))
return ''.join(new_entries) return ''.join(new_entries)
def getBinaries(self, arch=None): def getBinaries(self, arch, name=None):
return [b.getBPPH() for b in self._pkg.getBinaryFiles(arch)] return [b.getBPPH()
for b in self._pkg.getBinaryFiles(arch=arch, name=name)]
class SnapshotBPPH(object): class SnapshotBPPH(object):

View File

@ -26,6 +26,7 @@
# httplib2.debuglevel = 1 # httplib2.debuglevel = 1
import collections import collections
import re
import sys import sys
from debian.changelog import Changelog from debian.changelog import Changelog
@ -364,6 +365,7 @@ class Archive(BaseWrapper):
series defaults to the current development series if not specified. series defaults to the current development series if not specified.
series must be either a series name string, or DistroArchSeries object. series must be either a series name string, or DistroArchSeries object.
series may be omitted if version is specified.
pocket may be a string or a list. If a list, the highest version pocket may be a string or a list. If a list, the highest version
will be returned. It defaults to all pockets except backports. will be returned. It defaults to all pockets except backports.
@ -423,16 +425,18 @@ class Archive(BaseWrapper):
pass pass
elif series: elif series:
series = dist.getSeries(series) series = dist.getSeries(series)
else: elif not version:
series = dist.getDevelopmentSeries() series = dist.getDevelopmentSeries()
if binary: if binary:
if arch_series is None: if arch_series is None and series:
arch_series = series.getArchSeries(archtag=archtag) arch_series = series.getArchSeries(archtag=archtag)
if archtag is None: if archtag is None and arch_series:
archtag = arch_series.architecture_tag archtag = arch_series.architecture_tag
if archtag is None:
archtag = host_architecture()
index = (name, series.name, archtag, pockets, status, version) index = (name, getattr(series, 'name', None), archtag, pockets, status, version)
if index not in cache: if index not in cache:
params = { params = {
@ -443,9 +447,9 @@ class Archive(BaseWrapper):
if status: if status:
params['status'] = status params['status'] = status
if binary: if arch_series:
params['distro_arch_series'] = arch_series() params['distro_arch_series'] = arch_series()
else: elif series:
params['distro_series'] = series() params['distro_series'] = series()
if len(pockets) == 1: if len(pockets) == 1:
@ -460,9 +464,11 @@ class Archive(BaseWrapper):
for record in records: for record in records:
if record.pocket not in pockets: if record.pocket not in pockets:
continue continue
if latest is None or (Version(latest.source_package_version) r = wrapper(record)
< Version(record.source_package_version)): if binary and archtag and archtag != r.arch:
latest = record continue
if latest is None or latest.getVersion() < r.getVersion():
latest = r
if latest is None: if latest is None:
if name_key == 'binary_name': if name_key == 'binary_name':
@ -471,10 +477,14 @@ class Archive(BaseWrapper):
package_type = "source package" package_type = "source package"
else: else:
package_type = "package" package_type = "package"
msg = ("The %s '%s' does not exist in the %s %s archive" % msg = "The %s '%s' " % (package_type, name)
(package_type, name, dist.display_name, self.name)) if version:
msg += "version %s " % version
msg += ("does not exist in the %s %s archive" %
(dist.display_name, self.name))
if binary: if binary:
msg += " for architecture %s" % archtag msg += " for architecture %s" % archtag
if series:
pockets = [series.name if pocket == 'Release' pockets = [series.name if pocket == 'Release'
else '%s-%s' % (series.name, pocket.lower()) else '%s-%s' % (series.name, pocket.lower())
for pocket in pockets] for pocket in pockets]
@ -483,7 +493,7 @@ class Archive(BaseWrapper):
msg += " in " + ', '.join(pockets) msg += " in " + ', '.join(pockets)
raise PackageNotFoundException(msg) raise PackageNotFoundException(msg)
cache[index] = wrapper(latest) cache[index] = latest
return cache[index] return cache[index]
def copyPackage(self, source_name, version, from_archive, to_pocket, def copyPackage(self, source_name, version, from_archive, to_pocket,
@ -557,7 +567,8 @@ class SourcePackagePublishingHistory(BaseWrapper):
def __init__(self, *args): def __init__(self, *args):
self._archive = None self._archive = None
self._changelog = None self._changelog = None
self._binaries = None self._binaries = {}
self._have_all_binaries = False
self._distro_series = None self._distro_series = None
# Don't share _builds between different # Don't share _builds between different
# SourcePackagePublishingHistory objects # SourcePackagePublishingHistory objects
@ -653,19 +664,88 @@ class SourcePackagePublishingHistory(BaseWrapper):
new_entries.append(str(block)) new_entries.append(str(block))
return ''.join(new_entries) return ''.join(new_entries)
def getBinaries(self, arch=None): def getBinaries(self, arch, name=None):
''' '''
Returns the resulting BinaryPackagePublishingHistorys. Returns the resulting BinaryPackagePublishingHistorys.
If arch is specified, only returns BPPH for that arch. Must specify arch, or use 'all' to get all archs.
If name is specified, only returns BPPH matching that (regex) name.
''' '''
if self._binaries is None:
self._binaries = [BinaryPackagePublishingHistory(bpph)
for bpph in
self._lpobject.getPublishedBinaries()]
if not arch: if not arch:
return list(self._binaries) raise RuntimeError("Must specify arch")
return [b for b in self._binaries if b.arch == arch] # debs with arch 'all' have to be categorized as a specific arch
# so use requested arch if not 'all', or system arch
fallback_arch = arch
if fallback_arch == 'all':
fallback_arch = host_architecture()
if self._have_all_binaries:
# Great!
pass
elif self.status in ["Pending", "Published"]:
# Published, great! Directly query the list of binaries
binaries = map(BinaryPackagePublishingHistory,
self._lpobject.getPublishedBinaries())
for b in binaries:
a = b.arch
if a == 'all':
a = fallback_arch
if a not in self._binaries:
self._binaries[a] = {}
self._binaries[a][b.binary_package_name] = b
self._have_all_binaries = True
else:
# Older version, so we have to go the long way :(
print("Please wait, this may take some time...")
archive = self.getArchive()
urls = self.binaryFileUrls()
for url in urls:
# strip out the URL leading text.
filename = url.rsplit('/', 1)[1]
# strip the file suffix
pkgname = filename.rsplit('.', 1)[0]
# split into name, version, arch
(n, v, a) = pkgname.rsplit('_', 2)
if a == 'all':
a = fallback_arch
# Only check the arch requested - saves time
if arch != 'all' and arch != a:
continue
# Only check the name requested - saves time
if name and not re.match(name, n):
continue
# If we already have this BPPH, keep going
if a in self._binaries and n in self._binaries[a]:
continue
# we ignore the version, as it may be missing epoch
# also we can't use series, as some package versions
# span multiple series! (e.g. for different archs)
params = {'name': n,
'archtag': a,
'version': self.getVersion()}
try:
bpph = archive.getBinaryPackage(**params)
except PackageNotFoundException:
print("Could not find pkg in archive: %s" % filename)
continue
if a not in self._binaries:
self._binaries[a] = {}
self._binaries[a][n] = bpph
if not name and arch == 'all':
# We must have got them all
self._have_all_binaries = True
bpphs = []
if arch == 'all':
for a in self._binaries.values():
bpphs += a.values()
elif arch in self._binaries:
bpphs = self._binaries[arch].copy().values()
if name:
bpphs = filter(lambda b: re.match(name, b.binary_package_name), bpphs)
return bpphs
def _fetch_builds(self): def _fetch_builds(self):
'''Populate self._builds with the build records.''' '''Populate self._builds with the build records.'''