diff options
Diffstat (limited to 'packages/utils.py')
-rw-r--r-- | packages/utils.py | 442 |
1 files changed, 411 insertions, 31 deletions
diff --git a/packages/utils.py b/packages/utils.py index 55b7acf9..c38aa840 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -1,17 +1,42 @@ +from collections import defaultdict +from itertools import chain +from operator import attrgetter, itemgetter +import re + +from django.core.serializers.json import DjangoJSONEncoder from django.db import connection -from django.db.models import Count, Max +from django.db.models import Count, Max, F +from django.db.models.query import QuerySet +from django.contrib.auth.models import User + +from main.models import Package, PackageFile, Arch, Repo +from main.utils import (database_vendor, + groupby_preserve_order, PackageStandin) +from .models import (PackageGroup, PackageRelation, + License, Depend, Conflict, Provision, Replacement, + SignoffSpecification, Signoff, fake_signoff_spec) + + +VERSION_RE = re.compile(r'^((\d+):)?(.+)-([^-]+)$') + -from operator import itemgetter +def parse_version(version): + match = VERSION_RE.match(version) + if not match: + return None, None, 0 + ver = match.group(3) + rel = match.group(4) + if match.group(2): + epoch = int(match.group(2)) + else: + epoch = 0 + return ver, rel, epoch -from main.models import Package -from main.utils import cache_function -from .models import PackageGroup -@cache_function(300) -def get_group_info(): +def get_group_info(include_arches=None): raw_groups = PackageGroup.objects.values_list( 'name', 'pkg__arch__name').order_by('name').annotate( - cnt=Count('pkg'), last_update=Max('pkg__last_update')) + cnt=Count('pkg'), last_update=Max('pkg__last_update')) # now for post_processing. we need to seperate things out and add # the count in for 'any' to all of the other architectures. group_mapping = {} @@ -38,12 +63,30 @@ def get_group_info(): new_g['arch'] = arch arch_groups[grp['name']] = new_g - # now transform it back into a sorted list + # now transform it back into a sorted list, including only the specified + # architectures if we got a list groups = [] - for val in group_mapping.itervalues(): - groups.extend(val.itervalues()) + for key, val in group_mapping.iteritems(): + if not include_arches or key in include_arches: + groups.extend(val.itervalues()) return sorted(groups, key=itemgetter('name', 'arch')) + +def get_split_packages_info(): + '''Return info on split packages that do not have an actual package name + matching the split pkgbase.''' + pkgnames = Package.objects.values('pkgname') + split_pkgs = Package.objects.exclude(pkgname=F('pkgbase')).exclude( + pkgbase__in=pkgnames).values('pkgbase', 'repo', 'arch').annotate( + last_update=Max('last_update')).order_by().distinct() + all_arches = Arch.objects.in_bulk({s['arch'] for s in split_pkgs}) + all_repos = Repo.objects.in_bulk({s['repo'] for s in split_pkgs}) + for split in split_pkgs: + split['arch'] = all_arches[split['arch']] + split['repo'] = all_repos[split['repo']] + return split_pkgs + + class Difference(object): def __init__(self, pkgname, repo, pkg_a, pkg_b): self.pkgname = pkgname @@ -65,12 +108,17 @@ class Difference(object): css_classes.append(self.pkg_b.arch.name) return ' '.join(css_classes) - def __cmp__(self, other): - if isinstance(other, Difference): - return cmp(self.__dict__, other.__dict__) - return False + def __key(self): + return (self.pkgname, hash(self.repo), + hash(self.pkg_a), hash(self.pkg_b)) + + def __eq__(self, other): + return self.__key() == other.__key() + + def __hash__(self): + return hash(self.__key()) + -@cache_function(300) def get_differences_info(arch_a, arch_b): # This is a monster. Join packages against itself, looking for packages in # our non-'any' architectures only, and not having a corresponding package @@ -89,24 +137,25 @@ SELECT p.id, q.id ) WHERE p.arch_id IN (%s, %s) AND ( + q.arch_id IN (%s, %s) + OR q.id IS NULL + ) + AND ( q.id IS NULL - OR - p.pkgver != q.pkgver - OR - p.pkgrel != q.pkgrel + OR p.pkgver != q.pkgver + OR p.pkgrel != q.pkgrel + OR p.epoch != q.epoch ) """ cursor = connection.cursor() - cursor.execute(sql, [arch_a.id, arch_b.id]) + cursor.execute(sql, [arch_a.id, arch_b.id, arch_a.id, arch_b.id]) results = cursor.fetchall() - to_fetch = [] - for row in results: - # column A will always have a value, column B might be NULL - to_fetch.append(row[0]) + # column A will always have a value, column B might be NULL + to_fetch = {row[0] for row in results} # fetch all of the necessary packages - pkgs = Package.objects.in_bulk(to_fetch) - # now build a list of tuples containing differences - differences = [] + pkgs = Package.objects.normal().in_bulk(to_fetch) + # now build a set containing differences + differences = set() for row in results: pkg_a = pkgs.get(row[0]) pkg_b = pkgs.get(row[1]) @@ -119,11 +168,342 @@ SELECT p.id, q.id name = pkg_a.pkgname if pkg_a else pkg_b.pkgname repo = pkg_a.repo if pkg_a else pkg_b.repo item = Difference(name, repo, pkg_b, pkg_a) - if item not in differences: - differences.append(item) + differences.add(item) # now sort our list by repository, package name - differences.sort(key=lambda a: (a.repo.name, a.pkgname)) + key_func = attrgetter('repo.name', 'pkgname') + differences = sorted(differences, key=key_func) return differences + +def multilib_differences(): + # Query for checking multilib out of date-ness + if database_vendor(Package) == 'sqlite': + pkgname_sql = """ + CASE WHEN ml.pkgname LIKE %s + THEN SUBSTR(ml.pkgname, 7) + WHEN ml.pkgname LIKE %s + THEN SUBSTR(ml.pkgname, 1, LENGTH(ml.pkgname) - 9) + ELSE + ml.pkgname + END + """ + else: + pkgname_sql = """ + CASE WHEN ml.pkgname LIKE %s + THEN SUBSTRING(ml.pkgname, 7) + WHEN ml.pkgname LIKE %s + THEN SUBSTRING(ml.pkgname FROM 1 FOR CHAR_LENGTH(ml.pkgname) - 9) + ELSE + ml.pkgname + END + """ + sql = """ +SELECT ml.id, reg.id + FROM packages ml + JOIN packages reg + ON ( + reg.pkgname = (""" + pkgname_sql + """) + AND reg.pkgver != ml.pkgver + ) + JOIN repos r ON reg.repo_id = r.id + WHERE ml.repo_id = %s + AND r.testing = %s + AND r.staging = %s + AND reg.arch_id = %s + ORDER BY ml.last_update + """ + multilib = Repo.objects.get(name__iexact='multilib') + i686 = Arch.objects.get(name='i686') + params = ['lib32-%', '%-multilib', multilib.id, False, False, i686.id] + + cursor = connection.cursor() + cursor.execute(sql, params) + results = cursor.fetchall() + + # fetch all of the necessary packages + to_fetch = set(chain.from_iterable(results)) + pkgs = Package.objects.normal().in_bulk(to_fetch) + + return [(pkgs[ml], pkgs[reg]) for ml, reg in results] + + +def get_wrong_permissions(): + sql = """ +SELECT DISTINCT id + FROM ( + SELECT pr.id, p.repo_id, pr.user_id + FROM packages p + JOIN packages_packagerelation pr ON p.pkgbase = pr.pkgbase + WHERE pr.type = %s + ) mp + LEFT JOIN ( + SELECT user_id, repo_id FROM user_profiles_allowed_repos ar + INNER JOIN user_profiles up ON ar.userprofile_id = up.id + ) ur + ON mp.user_id = ur.user_id AND mp.repo_id = ur.repo_id + WHERE ur.user_id IS NULL; +""" + cursor = connection.cursor() + cursor.execute(sql, [PackageRelation.MAINTAINER]) + to_fetch = [row[0] for row in cursor.fetchall()] + relations = PackageRelation.objects.select_related( + 'user', 'user__userprofile').filter( + id__in=to_fetch) + return relations + + +def attach_maintainers(packages): + '''Given a queryset or something resembling it of package objects, find all + the maintainers and attach them to the packages to prevent N+1 query + cascading.''' + if isinstance(packages, QuerySet): + pkgbases = packages.values('pkgbase') + else: + packages = list(packages) + pkgbases = {p.pkgbase for p in packages if p is not None} + rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER, + pkgbase__in=pkgbases).values_list( + 'pkgbase', 'user_id').order_by().distinct() + + # get all the user objects we will need + user_ids = {rel[1] for rel in rels} + users = User.objects.in_bulk(user_ids) + + # now build a pkgbase -> [maintainers...] map + maintainers = defaultdict(list) + for rel in rels: + maintainers[rel[0]].append(users[rel[1]]) + + annotated = [] + # and finally, attach the maintainer lists on the original packages + for package in packages: + if package is None: + continue + package.maintainers = maintainers[package.pkgbase] + annotated.append(package) + + return annotated + + +def approved_by_signoffs(signoffs, spec): + if signoffs: + good_signoffs = sum(1 for s in signoffs if not s.revoked) + return good_signoffs >= spec.required + return False + + +class PackageSignoffGroup(object): + '''Encompasses all packages in testing with the same pkgbase.''' + def __init__(self, packages): + if len(packages) == 0: + raise Exception + self.packages = packages + self.user = None + self.target_repo = None + self.signoffs = set() + self.default_spec = True + + first = packages[0] + self.pkgbase = first.pkgbase + self.arch = first.arch + self.repo = first.repo + self.version = '' + self.last_update = first.last_update + self.packager = first.packager + self.maintainers = first.maintainers + self.specification = fake_signoff_spec(first.arch) + + version = first.full_version + if all(version == pkg.full_version for pkg in packages): + self.version = version + + @property + def package(self): + '''Try and return a relevant single package object representing this + group. Start by seeing if there is only one package, then look for the + matching package by name, finally falling back to a standin package + object.''' + if len(self.packages) == 1: + return self.packages[0] + + same_pkgs = [p for p in self.packages if p.pkgname == p.pkgbase] + if same_pkgs: + return same_pkgs[0] + + return PackageStandin(self.packages[0]) + + def find_signoffs(self, all_signoffs): + '''Look through a list of Signoff objects for ones matching this + particular group and store them on the object.''' + for s in all_signoffs: + if s.pkgbase != self.pkgbase: + continue + if self.version and not s.full_version == self.version: + continue + if s.arch_id == self.arch.id and s.repo_id == self.repo.id: + self.signoffs.add(s) + + def find_specification(self, specifications): + for spec in specifications: + if spec.pkgbase != self.pkgbase: + continue + if self.version and not spec.full_version == self.version: + continue + if spec.arch_id == self.arch.id and spec.repo_id == self.repo.id: + self.specification = spec + self.default_spec = False + return + + def approved(self): + return approved_by_signoffs(self.signoffs, self.specification) + + @property + def completed(self): + return sum(1 for s in self.signoffs if not s.revoked) + + @property + def required(self): + return self.specification.required + + def user_signed_off(self, user=None): + '''Did a given user signoff on this package? user can be passed as an + argument, or attached to the group object itself so this can be called + from a template.''' + if user is None: + user = self.user + return user in (s.user for s in self.signoffs if not s.revoked) + + def __unicode__(self): + return u'%s-%s (%s): %d' % ( + self.pkgbase, self.version, self.arch, len(self.signoffs)) + + +def signoffs_id_query(model, repos): + sql = """ +SELECT DISTINCT s.id + FROM %s s + JOIN packages p ON ( + s.pkgbase = p.pkgbase + AND s.pkgver = p.pkgver + AND s.pkgrel = p.pkgrel + AND s.epoch = p.epoch + AND s.arch_id = p.arch_id + AND s.repo_id = p.repo_id + ) + WHERE p.repo_id IN (%s) + AND s.repo_id IN (%s) + """ + cursor = connection.cursor() + # query pre-process- fill in table name and placeholders for IN + repo_sql = ','.join(['%s' for _ in repos]) + sql = sql % (model._meta.db_table, repo_sql, repo_sql) + repo_ids = [r.pk for r in repos] + # repo_ids are needed twice, so double the array + cursor.execute(sql, repo_ids * 2) + + results = cursor.fetchall() + return [row[0] for row in results] + + +def get_current_signoffs(repos): + '''Returns a list of signoff objects for the given repos.''' + to_fetch = signoffs_id_query(Signoff, repos) + return Signoff.objects.select_related('user').in_bulk(to_fetch).values() + + +def get_current_specifications(repos): + '''Returns a list of signoff specification objects for the given repos.''' + to_fetch = signoffs_id_query(SignoffSpecification, repos) + return SignoffSpecification.objects.select_related('arch').in_bulk( + to_fetch).values() + + +def get_target_repo_map(repos): + sql = """ +SELECT DISTINCT p1.pkgbase, r.name + FROM packages p1 + JOIN repos r ON p1.repo_id = r.id + JOIN packages p2 ON p1.pkgbase = p2.pkgbase + WHERE r.staging = %s + AND r.testing = %s + AND p2.repo_id IN ( + """ + sql += ','.join(['%s' for _ in repos]) + sql += ")" + + params = [False, False] + params.extend(r.pk for r in repos) + + cursor = connection.cursor() + cursor.execute(sql, params) + return dict(cursor.fetchall()) + + +def get_signoff_groups(repos=None, user=None): + if repos is None: + repos = Repo.objects.filter(testing=True) + repo_ids = [r.pk for r in repos] + + test_pkgs = Package.objects.select_related( + 'arch', 'repo', 'packager').filter(repo__in=repo_ids) + packages = test_pkgs.order_by('pkgname') + packages = attach_maintainers(packages) + + # Filter by user if asked to do so + if user is not None: + packages = [p for p in packages if user == p.packager + or user in p.maintainers] + + # Collect all pkgbase values in testing repos + pkgtorepo = get_target_repo_map(repos) + + # Collect all possible signoffs and specifications for these packages + signoffs = get_current_signoffs(repos) + specs = get_current_specifications(repos) + + same_pkgbase_key = lambda x: (x.repo.name, x.arch.name, x.pkgbase) + grouped = groupby_preserve_order(packages, same_pkgbase_key) + signoff_groups = [] + for group in grouped: + signoff_group = PackageSignoffGroup(group) + signoff_group.target_repo = pkgtorepo.get(signoff_group.pkgbase, + "Unknown") + signoff_group.find_signoffs(signoffs) + signoff_group.find_specification(specs) + signoff_groups.append(signoff_group) + + return signoff_groups + + +class PackageJSONEncoder(DjangoJSONEncoder): + pkg_attributes = ['pkgname', 'pkgbase', 'repo', 'arch', 'pkgver', + 'pkgrel', 'epoch', 'pkgdesc', 'url', 'filename', 'compressed_size', + 'installed_size', 'build_date', 'last_update', 'flag_date', + 'maintainers', 'packager'] + pkg_list_attributes = ['groups', 'licenses', 'conflicts', + 'provides', 'replaces', 'depends'] + + def default(self, obj): + if hasattr(obj, '__iter__'): + # mainly for queryset serialization + return list(obj) + if isinstance(obj, Package): + data = {attr: getattr(obj, attr) for attr in self.pkg_attributes} + for attr in self.pkg_list_attributes: + data[attr] = getattr(obj, attr).all() + return data + if isinstance(obj, PackageFile): + filename = obj.filename or '' + return obj.directory + filename + if isinstance(obj, (Repo, Arch)): + return obj.name.lower() + if isinstance(obj, (PackageGroup, License)): + return obj.name + if isinstance(obj, (Depend, Conflict, Provision, Replacement)): + return unicode(obj) + elif isinstance(obj, User): + return obj.username + return super(PackageJSONEncoder, self).default(obj) + # vim: set ts=4 sw=4 et: |