diff options
Diffstat (limited to 'devel')
-rw-r--r-- | devel/admin.py | 50 | ||||
-rw-r--r-- | devel/fields.py | 28 | ||||
-rw-r--r-- | devel/fixtures/staff_groups.json | 58 | ||||
-rw-r--r-- | devel/forms.py | 104 | ||||
-rw-r--r-- | devel/management/commands/generate_keyring.py | 89 | ||||
-rw-r--r-- | devel/management/commands/pgp_import.py | 252 | ||||
-rw-r--r-- | devel/management/commands/rematch_developers.py | 98 | ||||
-rw-r--r-- | devel/management/commands/reporead.py | 647 | ||||
-rw-r--r-- | devel/management/commands/reporead_inotify.py | 215 | ||||
-rw-r--r-- | devel/management/commands/update_types_permissions.py | 23 | ||||
-rw-r--r-- | devel/migrations/0001_initial.py | 101 | ||||
-rw-r--r-- | devel/migrations/0002_staffgroup.py | 31 | ||||
-rw-r--r-- | devel/migrations/__init__.py | 0 | ||||
-rw-r--r-- | devel/models.py | 137 | ||||
-rw-r--r-- | devel/reports.py | 198 | ||||
-rw-r--r-- | devel/tests.py | 100 | ||||
-rw-r--r-- | devel/urls.py | 15 | ||||
-rw-r--r-- | devel/utils.py | 164 | ||||
-rw-r--r-- | devel/views.py | 295 |
19 files changed, 2278 insertions, 327 deletions
diff --git a/devel/admin.py b/devel/admin.py new file mode 100644 index 00000000..d1729fe3 --- /dev/null +++ b/devel/admin.py @@ -0,0 +1,50 @@ +from django.contrib import admin +from django.contrib.auth.admin import UserAdmin +from django.contrib.auth.models import User + +from .models import UserProfile, StaffGroup, MasterKey, DeveloperKey, PGPSignature + + +class UserProfileInline(admin.StackedInline): + model = UserProfile + + +class UserProfileAdmin(UserAdmin): + inlines = [UserProfileInline] + list_display = ('username', 'email', 'first_name', 'last_name', 'is_staff', 'is_active') + list_filter = ('is_staff', 'is_superuser', 'is_active') + + +class StaffGroupAdmin(admin.ModelAdmin): + list_display = ('name', 'group', 'sort_order', 'member_title', 'slug') + prepopulated_fields = {'slug': ('name',)} + + +class MasterKeyAdmin(admin.ModelAdmin): + list_display = ('pgp_key', 'owner', 'created', 'revoker', 'revoked') + search_fields = ('pgp_key', 'owner__username', 'revoker__username') + date_hierarchy = 'created' + + +class DeveloperKeyAdmin(admin.ModelAdmin): + list_display = ('key', 'parent', 'owner', 'created', 'expires', 'revoked') + search_fields = ('key', 'owner__username') + list_filter = ('owner',) + date_hierarchy = 'created' + + +class PGPSignatureAdmin(admin.ModelAdmin): + list_display = ('signer', 'signee', 'created', 'expires', 'revoked') + search_fields = ('signer', 'signee') + date_hierarchy = 'created' + + +admin.site.unregister(User) +admin.site.register(User, UserProfileAdmin) +admin.site.register(StaffGroup, StaffGroupAdmin) + +admin.site.register(MasterKey, MasterKeyAdmin) +admin.site.register(DeveloperKey, DeveloperKeyAdmin) +admin.site.register(PGPSignature, PGPSignatureAdmin) + +# vim: set ts=4 sw=4 et: diff --git a/devel/fields.py b/devel/fields.py new file mode 100644 index 00000000..dd22a92e --- /dev/null +++ b/devel/fields.py @@ -0,0 +1,28 @@ +from django.db import models +from django.core.validators import RegexValidator + + +class PGPKeyField(models.CharField): + def __init__(self, *args, **kwargs): + super(PGPKeyField, self).__init__(*args, **kwargs) + self.validators.append(RegexValidator(r'^[0-9A-F]{40}$', + "Ensure this value consists of 40 hex characters.", 'hex_char')) + + def to_python(self, value): + if value == '' or value is None: + return None + value = super(PGPKeyField, self).to_python(value) + # remove all spaces + value = value.replace(' ', '') + # prune prefixes, either 0x or 2048R/ type + if value.startswith('0x'): + value = value[2:] + value = value.split('/')[-1] + # make all (hex letters) uppercase + return value.upper() + + def formfield(self, **kwargs): + # override so we don't set max_length form field attribute + return models.Field.formfield(self, **kwargs) + +# vim: set ts=4 sw=4 et: diff --git a/devel/fixtures/staff_groups.json b/devel/fixtures/staff_groups.json new file mode 100644 index 00000000..1f26b54e --- /dev/null +++ b/devel/fixtures/staff_groups.json @@ -0,0 +1,58 @@ +[ +{ + "fields": { + "group": [ + "Hackers" + ], + "description": "This is a list of the current Parabola Hackers. They maintain the [libre] package repository and keep the [core], [extra], and [community] repositories clean of non-Free software, in addition to doing any other developer duties.", + "sort_order": 1, + "member_title": "Hacker", + "slug": "hackers", + "name": "Hackers" + }, + "model": "devel.staffgroup", + "pk": 1 +}, +{ + "fields": { + "group": [ + "Retired Hackers" + ], + "description": "Below you can find a list of ex-hackers (aka project fellows). These folks helped make Parabola what it is today. Thanks!", + "sort_order": 11, + "member_title": "Fellow", + "slug": "hacker-fellows", + "name": "Hacker Fellows" + }, + "model": "devel.staffgroup", + "pk": 3 +}, +{ + "fields": { + "group": [ + "Support Staff" + ], + "description": "This is a list of the current Parabola forum moderators, wiki admins, IRC moderators, mirror maintenance, and everything else that keeps a GNU/Linux distro running smoothly.", + "sort_order": 5, + "member_title": "Staff", + "slug": "support-staff", + "name": "Support Staff" + }, + "model": "devel.staffgroup", + "pk": 5 +}, +{ + "fields": { + "group": [ + "Artists" + ], + "description": "This is a list of the current Parabola Artists. They maintain Parabola Artwork, including digital art, traditional art, skins for applications, customization utilities, and everything else that keeps a GNU/Linux distro for a better visualization and outreach through the free culture.", + "sort_order": 12, + "member_title": "Artist", + "slug": "artists", + "name": "Artists" + }, + "model": "devel.staffgroup", + "pk": 6 +} +] diff --git a/devel/forms.py b/devel/forms.py new file mode 100644 index 00000000..d953c614 --- /dev/null +++ b/devel/forms.py @@ -0,0 +1,104 @@ +import random +from collections import OrderedDict +from string import ascii_letters, digits + +from django import forms +from django.conf import settings +from django.contrib.auth.models import User, Group +from django.contrib.sites.models import Site +from django.core.mail import send_mail +from django.template import loader, Context + +from .models import UserProfile + + +class ProfileForm(forms.Form): + email = forms.EmailField(label='Private email (not shown publicly):', + help_text="Used for out-of-date notifications, etc.") + passwd1 = forms.CharField(label='New Password', required=False, + widget=forms.PasswordInput) + passwd2 = forms.CharField(label='Confirm Password', required=False, + widget=forms.PasswordInput) + + def clean(self): + if self.cleaned_data['passwd1'] != self.cleaned_data['passwd2']: + raise forms.ValidationError('Passwords do not match.') + return self.cleaned_data + + +class UserProfileForm(forms.ModelForm): + def clean_pgp_key(self): + data = self.cleaned_data['pgp_key'] + # strip 0x prefix if provided; store uppercase + if data.startswith('0x'): + data = data[2:] + return data.upper() + + class Meta: + model = UserProfile + exclude = ('allowed_repos', 'user', 'latin_name') + + +class NewUserForm(forms.ModelForm): + username = forms.CharField(max_length=30) + private_email = forms.EmailField() + first_name = forms.CharField(required=False) + last_name = forms.CharField(required=False) + groups = forms.ModelMultipleChoiceField(required=False, + queryset=Group.objects.all()) + + class Meta: + model = UserProfile + exclude = ('picture', 'user') + + def __init__(self, *args, **kwargs): + super(NewUserForm, self).__init__(*args, **kwargs) + # Hack ourself so certain fields appear first + old = self.fields + self.fields = OrderedDict() + keys = ('username', 'private_email', 'first_name', 'last_name', + 'alias', 'public_email') + for key in keys: + self.fields[key] = old[key] + for key, val in old.items(): + if key not in keys: + self.fields[key] = old[key] + + def clean_username(self): + username = self.cleaned_data['username'] + if User.objects.filter(username=username).exists(): + raise forms.ValidationError( + "A user with that username already exists.") + return username + + def save(self, commit=True): + profile = super(NewUserForm, self).save(False) + pwletters = ascii_letters + digits + password = ''.join([random.choice(pwletters) for _ in xrange(8)]) + user = User.objects.create_user(username=self.cleaned_data['username'], + email=self.cleaned_data['private_email'], password=password) + user.first_name = self.cleaned_data['first_name'] + user.last_name = self.cleaned_data['last_name'] + user.save() + # sucks that the MRM.add() method can't take a list directly... we have + # to resort to dirty * magic. + user.groups.add(*self.cleaned_data['groups']) + profile.user = user + if commit: + profile.save() + self.save_m2m() + + template = loader.get_template('devel/new_account.txt') + ctx = Context({ + 'site': Site.objects.get_current(), + 'user': user, + 'password': password, + }) + + send_mail("Your new archweb account", + template.render(ctx), + settings.BRANDING_EMAIL, + [user.email], + fail_silently=False) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py new file mode 100644 index 00000000..9c52dadc --- /dev/null +++ b/devel/management/commands/generate_keyring.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +""" +generate_keyring command + +Assemble a GPG keyring with all known developer keys. + +Usage: ./manage.py generate_keyring <keyserver> <keyring_path> +""" + +from django.core.management.base import BaseCommand, CommandError + +import logging +import subprocess +import sys + +from devel.models import MasterKey, UserProfile + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + args = "<keyserver> <keyring_path> [ownertrust_path]" + help = "Assemble a GPG keyring with all known developer keys." + + def handle(self, *args, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG + + if len(args) < 2: + raise CommandError("keyserver and keyring_path must be provided") + + generate_keyring(args[0], args[1]) + + if len(args) > 2: + generate_ownertrust(args[2]) + + +def generate_keyring(keyserver, keyring): + logger.info("getting all known key IDs") + + # Screw you Django, for not letting one natively do value != <empty string> + key_ids = UserProfile.objects.filter( + pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list( + "pgp_key", flat=True) + logger.info("%d keys fetched from user profiles", len(key_ids)) + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + logger.info("%d keys fetched from master keys", len(master_key_ids)) + + # GPG is stupid and interprets any filename without path portion as being + # in ~/.gnupg/. Fake it out if we just get a bare filename. + if '/' not in keyring: + keyring = './%s' % keyring + gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, + "--keyserver", keyserver, "--recv-keys"] + logger.info("running command: %r", gpg_cmd) + gpg_cmd.extend(key_ids) + gpg_cmd.extend(master_key_ids) + subprocess.check_call(gpg_cmd) + logger.info("keyring at %s successfully updated", keyring) + + +TRUST_LEVELS = { + 'unknown': 0, + 'expired': 1, + 'undefined': 2, + 'never': 3, + 'marginal': 4, + 'fully': 5, + 'ultimate': 6, +} + + +def generate_ownertrust(trust_path): + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + with open(trust_path, "w") as trustfile: + for key_id in master_key_ids: + trustfile.write("%s:%d:\n" % (key_id, TRUST_LEVELS['marginal'])) + logger.info("trust file at %s created or overwritten", trust_path) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/pgp_import.py b/devel/management/commands/pgp_import.py new file mode 100644 index 00000000..7a124f77 --- /dev/null +++ b/devel/management/commands/pgp_import.py @@ -0,0 +1,252 @@ +# -*- coding: utf-8 -*- +""" +pgp_import command + +Import keys and signatures from a given GPG keyring. + +Usage: ./manage.py pgp_import <keyring_path> +""" + +from collections import namedtuple, OrderedDict +from datetime import datetime +import logging +from pytz import utc +import subprocess +import sys + +from django.core.management.base import BaseCommand, CommandError +from django.db import transaction + +from devel.models import DeveloperKey, PGPSignature +from devel.utils import UserFinder + + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + args = "<keyring_path>" + help = "Import keys and signatures from a given GPG keyring." + + def handle(self, *args, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG + + if len(args) < 1: + raise CommandError("keyring_path must be provided") + + import_keys(args[0]) + import_signatures(args[0]) + + +def get_date(epoch_string): + '''Convert a epoch string into a python 'date' object (not datetime).''' + if not epoch_string: + return None + return datetime.utcfromtimestamp(int(epoch_string)).date() + + +def get_datetime(epoch_string): + '''Convert a epoch string into a python 'datetime' object.''' + if not epoch_string: + return None + return datetime.utcfromtimestamp(int(epoch_string)).replace(tzinfo=utc) + + +def call_gpg(keyring, *args): + # GPG is stupid and interprets any filename without path portion as being + # in ~/.gnupg/. Fake it out if we just get a bare filename. + if '/' not in keyring: + keyring = './%s' % keyring + gpg_cmd = ["gpg2", "--no-default-keyring", "--keyring", keyring, + "--with-colons", "--fixed-list-mode"] + gpg_cmd.extend(args) + logger.info("running command: %s", ' '.join(gpg_cmd)) + proc = subprocess.Popen(gpg_cmd, stdout=subprocess.PIPE) + outdata, errdata = proc.communicate() + if proc.returncode != 0: + logger.error(errdata) + raise subprocess.CalledProcessError(proc.returncode, gpg_cmd) + return outdata + + +class KeyData(object): + def __init__(self, key, created, expires): + self.key = key + self.created = get_datetime(created) + self.expires = get_datetime(expires) + self.parent = None + self.revoked = None + self.db_id = None + + +def parse_keydata(data): + keys = OrderedDict() + current_pubkey = None + + # parse all of the output from our successful GPG command + logger.info("parsing command output") + node = None + for line in data.split('\n'): + parts = line.split(':') + if parts[0] == 'pub': + key = parts[4] + current_pubkey = key + keys[key] = KeyData(key, parts[5], parts[6]) + node = parts[0] + elif parts[0] == 'sub': + key = parts[4] + keys[key] = KeyData(key, parts[5], parts[6]) + keys[key].parent = current_pubkey + node = parts[0] + elif parts[0] == 'uid': + node = parts[0] + elif parts[0] == 'rev' and node in ('pub', 'sub'): + keys[current_pubkey].revoked = get_datetime(parts[5]) + + return keys + + +def find_key_owner(key, keys, finder): + '''Recurse up the chain, looking for an owner.''' + if key is None: + return None + owner = finder.find_by_pgp_key(key.key) + if owner: + return owner + if key.parent: + return find_key_owner(keys[key.parent], keys, finder) + return None + + +def import_keys(keyring): + outdata = call_gpg(keyring, "--list-sigs") + keydata = parse_keydata(outdata) + + logger.info("creating or finding %d keys", len(keydata)) + created_ct = updated_ct = 0 + with transaction.atomic(): + finder = UserFinder() + # we are dependent on parents coming before children; parse_keydata + # uses an OrderedDict to ensure this is the case. + for data in keydata.values(): + parent_id = None + if data.parent: + parent_data = keydata.get(data.parent, None) + if parent_data: + parent_id = parent_data.db_id + other = { + 'expires': data.expires, + 'revoked': data.revoked, + 'parent_id': parent_id, + } + dkey, created = DeveloperKey.objects.get_or_create( + key=data.key, created=data.created, defaults=other) + data.db_id = dkey.id + + # set or update any additional data we might need to + needs_save = False + if created: + created_ct += 1 + else: + for k, v in other.items(): + if getattr(dkey, k) != v: + setattr(dkey, k, v) + needs_save = True + if dkey.owner_id is None: + owner = find_key_owner(data, keydata, finder) + if owner is not None: + dkey.owner = owner + needs_save = True + if needs_save: + dkey.save() + updated_ct += 1 + + key_ct = DeveloperKey.objects.all().count() + logger.info("%d total keys in database", key_ct) + logger.info("created %d, updated %d keys", created_ct, updated_ct) + + +class SignatureData(object): + def __init__(self, signer, signee, created): + self.signer = signer + self.signee = signee + self.created = created + self.expires = None + self.revoked = None + + +def parse_sigdata(data): + nodes = {} + edges = [] + current_pubkey = None + + # parse all of the output from our successful GPG command + logger.info("parsing command output") + for line in data.split('\n'): + parts = line.split(':') + if parts[0] == 'pub': + current_pubkey = parts[4] + nodes[current_pubkey] = None + elif parts[0] == 'uid': + uid = parts[9] + # only set uid if this is the first one encountered + if nodes[current_pubkey] is None: + nodes[current_pubkey] = uid + elif parts[0] == 'sig': + signer = parts[4] + created = get_date(parts[5]) + edge = SignatureData(signer, current_pubkey, created) + if parts[6]: + edge.expires = get_date(parts[6]) + edges.append(edge) + elif parts[0] == 'rev': + signer = parts[4] + revoked = get_date(parts[5]) + # revoke any prior edges that match + matches = [e for e in edges if e.signer == signer + and e.signee == current_pubkey] + for edge in matches: + edge.revoked = revoked + + return nodes, edges + + +def import_signatures(keyring): + outdata = call_gpg(keyring, "--list-sigs") + nodes, edges = parse_sigdata(outdata) + + # now prune the data down to what we actually want. + # prune edges not in nodes, remove duplicates, and self-sigs + pruned_edges = {edge for edge in edges + if edge.signer in nodes and edge.signer != edge.signee} + + logger.info("creating or finding up to %d signatures", len(pruned_edges)) + created_ct = updated_ct = 0 + with transaction.atomic(): + for edge in pruned_edges: + sig, created = PGPSignature.objects.get_or_create( + signer=edge.signer, signee=edge.signee, + created=edge.created, expires=edge.expires, + defaults={ 'revoked': edge.revoked }) + if sig.revoked != edge.revoked: + sig.revoked = edge.revoked + sig.save() + updated_ct += 1 + if created: + created_ct += 1 + + sig_ct = PGPSignature.objects.all().count() + logger.info("%d total signatures in database", sig_ct) + logger.info("created %d, updated %d signatures", created_ct, updated_ct) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/rematch_developers.py b/devel/management/commands/rematch_developers.py new file mode 100644 index 00000000..bbb43df0 --- /dev/null +++ b/devel/management/commands/rematch_developers.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +rematch_developers command + +Match all packages with a packager_str but NULL packager_id to a packager if we +can find one. + +Also, match all flag requests with a NULL user_id that have a user_email +matching up to a developer if we can find one. + +Usage: ./manage.py rematch_developers +""" + +from django.core.management.base import NoArgsCommand +from django.db import transaction + +import sys +import logging + +from devel.utils import UserFinder +from main.models import Package +from packages.models import FlagRequest + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Match and map objects in database to developer emails" + + def handle_noargs(self, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG + + finder = UserFinder() + match_packager(finder) + match_flagrequest(finder) + +@transaction.atomic +def match_packager(finder): + logger.info("getting all unmatched packager strings") + package_count = matched_count = 0 + mapping = {} + + unmatched = Package.objects.filter(packager__isnull=True).values_list( + 'packager_str', flat=True).order_by().distinct() + + logger.info("%d packager strings retrieved", len(unmatched)) + for packager in unmatched: + logger.debug("packager string %s", packager) + user = finder.find(packager) + if user: + mapping[packager] = user + logger.debug(" found user %s" % user.username) + matched_count += 1 + + for packager_str, user in mapping.items(): + package_count += Package.objects.filter(packager__isnull=True, + packager_str=packager_str).update(packager=user) + + logger.info("%d packages updated, %d packager strings matched", + package_count, matched_count) + + +@transaction.atomic +def match_flagrequest(finder): + logger.info("getting all flag request email addresses from unknown users") + req_count = matched_count = 0 + mapping = {} + + unmatched = FlagRequest.objects.filter(user__isnull=True).values_list( + 'user_email', flat=True).order_by().distinct() + + logger.info("%d email addresses retrieved", len(unmatched)) + for user_email in unmatched: + logger.debug("email %s", user_email) + user = finder.find_by_email(user_email) + if user: + mapping[user_email] = user + logger.debug(" found user %s" % user.username) + matched_count += 1 + + for user_email, user in mapping.items(): + req_count += FlagRequest.objects.filter(user__isnull=True, + user_email=user_email).update(user=user) + + logger.info("%d request emails updated, %d emails matched", + req_count, matched_count) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 7c468001..c76b5011 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -13,12 +13,10 @@ Example: ./manage.py reporead i686 /tmp/core.db.tar.gz """ -from django.core.management.base import BaseCommand, CommandError -from django.contrib.auth.models import User -from django.db import transaction -from django.db.models import Q - -import codecs +from base64 import b64decode +from collections import defaultdict +from copy import copy +import io import os import re import sys @@ -26,16 +24,26 @@ import tarfile import logging from datetime import datetime from optparse import make_option +from pytz import utc -from logging import ERROR, WARNING, INFO, DEBUG +from django.core.management.base import BaseCommand, CommandError +from django.db import connections, router, transaction +from django.db.utils import IntegrityError +from django.utils.timezone import now + +from devel.utils import UserFinder +from main.models import Arch, Package, PackageFile, Repo +from packages.models import Depend, Conflict, Provision, Replacement, Update +from packages.utils import parse_version -from main.models import Arch, Package, Repo logging.basicConfig( - level=WARNING, + level=logging.WARNING, format='%(asctime)s -> %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) +TRACE = 5 +logging.addLevelName(TRACE, 'TRACE') logger = logging.getLogger() class Command(BaseCommand): @@ -51,8 +59,6 @@ class Command(BaseCommand): def handle(self, arch=None, filename=None, **options): if not arch: raise CommandError('Architecture is required.') - if not validate_arch(arch): - raise CommandError('Specified architecture %s is not currently known.' % arch) if not filename: raise CommandError('Package database file is required.') filename = os.path.normpath(filename) @@ -61,257 +67,414 @@ class Command(BaseCommand): v = int(options.get('verbosity', 0)) if v == 0: - logger.level = ERROR + logger.level = logging.ERROR elif v == 1: - logger.level = INFO - elif v == 2: - logger.level = DEBUG - - import signal, traceback - handler = lambda sig, stack: traceback.print_stack(stack) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGUSR1, handler) + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG return read_repo(arch, filename, options) -class Pkg(object): +class RepoPackage(object): """An interim 'container' object for holding Arch package data.""" - bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'url', 'builddate', 'packager' ) - squash = ( 'license', ) + bare = ( 'name', 'base', 'arch', 'filename', + 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) + collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends', + 'conflicts', 'provides', 'replaces', 'groups', 'license') def __init__(self, repo): self.repo = repo self.ver = None self.rel = None - for k in self.bare + self.squash + self.number: + self.epoch = 0 + self.desc = None + self.pgpsig = None + for k in self.bare + self.number: setattr(self, k, None) + for k in self.collections: + setattr(self, k, ()) + self.builddate = None + self.files = None def populate(self, values): for k, v in values.iteritems(): # ensure we stay under our DB character limit if k in self.bare: setattr(self, k, v[0][:254]) - elif k in self.squash: - setattr(self, k, u', '.join(v)[:254]) elif k in self.number: setattr(self, k, long(v[0])) - elif k == 'force': - setattr(self, k, True) + elif k in ('desc', 'pgpsig'): + # do NOT prune these values at all + if v[0] == None: + v[0] = 'missing' + setattr(self, k, v[0]) elif k == 'version': - ver, rel = v[0].rsplit('-') - setattr(self, 'ver', ver) - setattr(self, 'rel', rel) + self.ver, self.rel, self.epoch = parse_version(v[0]) + elif k == 'builddate': + try: + builddate = datetime.utcfromtimestamp(int(v[0])) + self.builddate = builddate.replace(tzinfo=utc) + except ValueError: + logger.warning( + 'Package %s had unparsable build date %s', + self.name, v[0]) else: - # files, depends, etc. - setattr(self, k, v) - - -def find_user(userstring): - ''' - Attempt to find the corresponding User object for a standard - packager string, e.g. something like - 'A. U. Thor <author@example.com>'. - We start by searching for a matching email address; we then move onto - matching by first/last name. If we cannot find a user, then return None. - ''' - if userstring in find_user.cache: - return find_user.cache[userstring] - matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) - if not matches: - return None + # anything left in collections + setattr(self, k, tuple(v)) - user = None - name = matches.group(1) - email = matches.group(2) - - def user_email(): - return User.objects.get(email=email) - def profile_email(): - return User.objects.get(userprofile_user__public_email=email) - def user_name(): - # yes, a bit odd but this is the easiest way since we can't always be - # sure how to split the name. Ensure every 'token' appears in at least - # one of the two name fields. - name_q = Q() - for token in name.split(): - name_q &= (Q(first_name__icontains=token) | - Q(last_name__icontains=token)) - return User.objects.get(name_q) - - for matcher in (user_email, profile_email, user_name): + @property + def files_list(self): + data_file = io.TextIOWrapper(io.BytesIO(self.files), encoding='UTF-8') try: - user = matcher() - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass + info = parse_info(data_file) + except UnicodeDecodeError: + logger.warn("Could not correctly decode files list for %s", + self.name) + return None + return info['files'] + + @property + def full_version(self): + '''Very similar to the main.models.Package method.''' + if self.epoch > 0: + return u'%d:%s-%s' % (self.epoch, self.ver, self.rel) + return u'%s-%s' % (self.ver, self.rel) + + +DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.+))?$") + +def create_depend(package, dep_str, deptype='D'): + depend = Depend(pkg=package, deptype=deptype) + # lop off any description first, don't get confused by epoch + parts = dep_str.split(': ', 1) + if len(parts) > 1: + depend.description = parts[1].strip() + match = DEPEND_RE.match(parts[0].strip()) + if match: + depend.name = match.group(1) + if match.group(3): + depend.comparison = match.group(3) + if match.group(4): + depend.version = match.group(4) + else: + logger.warning('Package %s had unparsable depend string %s', + package.pkgname, dep_str) + return None + return depend + +def create_related(model, package, rel_str, equals_only=False): + related = model(pkg=package) + match = DEPEND_RE.match(rel_str) + if match: + related.name = match.group(1) + if match.group(3): + comp = match.group(3) + if not equals_only: + related.comparison = comp + elif comp != '=': + logger.warning( + 'Package %s had unexpected comparison operator %s for %s in %s', + package.pkgname, comp, model.__name__, rel_str) + if match.group(4): + related.version = match.group(4) + else: + logger.warning('Package %s had unparsable %s string %s', + package.pkgname, model.___name__, rel_str) + return None + return related + - find_user.cache[userstring] = user - return user +def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): + '''Populate the simplest of multivalued attributes. These are those that + only deal with a 'name' attribute, such as licenses, groups, etc. The input + and output objects and attribute names are specified, and everything is + done via getattr().''' + collection = getattr(dbpkg, db_attr) + collection.all().delete() + model = collection.model + new_items = [] + for name in getattr(repopkg, repo_attr): + new_items.append(model(pkg=dbpkg, name=name)) + if new_items: + model.objects.bulk_create(new_items) -# cached mappings of user strings -> User objects so we don't have to do the -# lookup more than strictly necessary. -find_user.cache = {} +finder = UserFinder() def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): + # we reset the flag date only if the upstream version components change; + # e.g. epoch or pkgver, but not pkgrel + if dbpkg.epoch is None or dbpkg.epoch != repopkg.epoch: + dbpkg.flag_date = None + elif dbpkg.pkgver is None or dbpkg.pkgver != repopkg.ver: + dbpkg.flag_date = None + if repopkg.base: dbpkg.pkgbase = repopkg.base else: dbpkg.pkgbase = repopkg.name dbpkg.pkgver = repopkg.ver dbpkg.pkgrel = repopkg.rel - dbpkg.pkgdesc = repopkg.desc - dbpkg.license = repopkg.license + dbpkg.epoch = repopkg.epoch + try: + dbpkg.pkgdesc = repopkg.desc + except AttributeError: + dbpkg.pkgdesc = "missing" dbpkg.url = repopkg.url dbpkg.filename = repopkg.filename dbpkg.compressed_size = repopkg.csize dbpkg.installed_size = repopkg.isize - try: - dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate)) - except ValueError: - try: - dbpkg.build_date = datetime.strptime(repopkg.builddate, - '%a %b %d %H:%M:%S %Y') - except ValueError: - logger.warning('Package %s had unparsable build date %s' % \ - (repopkg.name, repopkg.builddate)) + dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string - dbpkg.packager = find_user(repopkg.packager) + dbpkg.packager = finder.find(repopkg.packager) + dbpkg.signature_bytes = b64decode(repopkg.pgpsig.encode('utf-8')) if timestamp: - dbpkg.flag_date = None dbpkg.last_update = timestamp dbpkg.save() populate_files(dbpkg, repopkg, force=force) - dbpkg.packagedepend_set.all().delete() - if 'depends' in repopkg.__dict__: - for y in repopkg.depends: - # make sure we aren't adding self depends.. - # yes *sigh* i have seen them in pkgbuilds - dpname, dpvcmp = re.match(r"([a-z0-9._+-]+)(.*)", y).groups() - if dpname == repopkg.name: - logger.warning('Package %s has a depend on itself' % repopkg.name) - continue - dbpkg.packagedepend_set.create(depname=dpname, depvcmp=dpvcmp) - logger.debug('Added %s as dep for pkg %s' % (dpname, repopkg.name)) + dbpkg.depends.all().delete() + deps = [create_depend(dbpkg, y) for y in repopkg.depends] + deps += [create_depend(dbpkg, y, 'O') for y in repopkg.optdepends] + deps += [create_depend(dbpkg, y, 'M') for y in repopkg.makedepends] + deps += [create_depend(dbpkg, y, 'C') for y in repopkg.checkdepends] + Depend.objects.bulk_create(deps) - dbpkg.packagegroup_set.all().delete() - if 'groups' in repopkg.__dict__: - for y in repopkg.groups: - dbpkg.packagegroup_set.create(name=y) + dbpkg.conflicts.all().delete() + conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts] + Conflict.objects.bulk_create(conflicts) + + dbpkg.provides.all().delete() + provides = [create_related(Provision, dbpkg, y, equals_only=True) + for y in repopkg.provides] + Provision.objects.bulk_create(provides) + + dbpkg.replaces.all().delete() + replaces = [create_related(Replacement, dbpkg, y) for y in repopkg.replaces] + Replacement.objects.bulk_create(replaces) + + create_multivalued(dbpkg, repopkg, 'groups', 'groups') + create_multivalued(dbpkg, repopkg, 'licenses', 'license') + + +pkg_same_version = lambda pkg, dbpkg: pkg.ver == dbpkg.pkgver \ + and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch + + +def delete_pkg_files(dbpkg): + database = router.db_for_write(Package, instance=dbpkg) + cursor = connections[database].cursor() + cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id]) + + +def batched_bulk_create(model, all_objects): + cutoff = 10000 + length = len(all_objects) + if length < cutoff: + return model.objects.bulk_create(all_objects) + + def chunks(): + offset = 0 + while offset < length: + yield all_objects[offset:offset + cutoff] + offset += cutoff + + for items in chunks(): + ret = model.objects.bulk_create(items) + + return ret def populate_files(dbpkg, repopkg, force=False): if not force: + if not pkg_same_version(repopkg, dbpkg): + logger.info("DB version (%s) didn't match repo version " + "(%s) for package %s, skipping file list addition", + dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) + return if not dbpkg.files_last_update or not dbpkg.last_update: pass - elif dbpkg.files_last_update > dbpkg.last_update: + elif dbpkg.files_last_update >= dbpkg.last_update: return + # only delete files if we are reading a DB that contains them - if 'files' in repopkg.__dict__: - dbpkg.packagefile_set.all().delete() - logger.info("adding %d files for package %s" % (len(repopkg.files), dbpkg.pkgname)) - for x in repopkg.files: - dbpkg.packagefile_set.create(path=x) - dbpkg.files_last_update = datetime.now() + if repopkg.files: + files = repopkg.files_list + # we had files data, but it couldn't be parsed, so skip + if not files: + return + delete_pkg_files(dbpkg) + logger.info("adding %d files for package %s", + len(files), dbpkg.pkgname) + pkg_files = [] + # sort in normal alpha-order that pacman uses, rather than makepkg's + # default breadth-first, directory-first ordering + for f in sorted(files): + if '/' in f: + dirname, filename = f.rsplit('/', 1) + dirname += '/' + else: + dirname, filename = '', f + if filename == '': + filename = None + pkgfile = PackageFile(pkg=dbpkg, + is_directory=(filename is None), + directory=dirname, + filename=filename) + pkg_files.append(pkgfile) + batched_bulk_create(PackageFile, pkg_files) + dbpkg.files_last_update = now() dbpkg.save() -def db_update(archname, reponame, pkgs, options): - """ - Parses a list and updates the Arch dev database accordingly. - Arguments: - pkgs -- A list of Pkg objects. +def update_common(archname, reponame, pkgs, sanity_check=True): + # If isolation level is repeatable-read, we need to ensure each package + # update starts a new transaction and re-queries the database as + # necessary to guard against simultaneous updates. + with transaction.atomic(): + # force the transaction dirty, even though we will only do reads + transaction.set_dirty() + + repository = Repo.objects.get(name__iexact=reponame) + architecture = Arch.objects.get(name=archname) + # no-arg order_by() removes even the default ordering; we don't need it + dbpkgs = Package.objects.filter( + arch=architecture, repo=repository).order_by() + + logger.info("%d packages in current web DB", len(dbpkgs)) + logger.info("%d packages in new updating DB", len(pkgs)) + if len(dbpkgs): + dbpercent = 100.0 * len(pkgs) / len(dbpkgs) + else: + dbpercent = 0.0 + logger.info("DB package ratio: %.1f%%", dbpercent) + + # Fewer than 20 packages makes the percentage check unreliable, but it + # also means we expect the repo to fluctuate a lot. + msg = "Package database %s (%s) has %.1f%% the number of packages " \ + "the web database" + if not sanity_check: + pass + elif repository.testing or repository.staging: + pass + elif len(dbpkgs) == 0 and len(pkgs) == 0: + pass + elif len(dbpkgs) > 20 and dbpercent < 50.0: + logger.error(msg, reponame, archname, dbpercent) + raise Exception(msg % (reponame, archname, dbpercent)) + elif dbpercent < 75.0: + logger.warning(msg, reponame, archname, dbpercent) + + return dbpkgs + +def db_update(archname, reponame, pkgs, force=False): """ - logger.info('Updating Arch: %s' % archname) - force = options.get('force', False) - filesonly = options.get('filesonly', False) + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, True) repository = Repo.objects.get(name__iexact=reponame) - architecture = Arch.objects.get(name__iexact=archname) - dbpkgs = Package.objects.filter(arch=architecture, repo=repository) - # It makes sense to fully evaluate our DB query now because we will - # be using 99% of the objects in our "in both sets" loop. Force eval - # by calling list() on the QuerySet. - list(dbpkgs) + architecture = Arch.objects.get(name=archname) + # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to # SELECT them by name. - dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs]) - - # go go set theory! - # thank you python for having a set class <3 - logger.debug("Creating sets") - dbset = set([pkg.pkgname for pkg in dbpkgs]) - syncset = set([pkg.name for pkg in pkgs]) - logger.info("%d packages in current web DB" % len(dbset)) - logger.info("%d packages in new updating db" % len(syncset)) - # packages in syncdb and not in database (add to database) - logger.debug("Set theory: Packages in syncdb not in database") - in_sync_not_db = syncset - dbset - logger.info("%d packages in sync not db" % len(in_sync_not_db)) + dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs} - # Try to catch those random orphaning issues that make Eric so unhappy. - if len(dbset) > 20: - dbpercent = 100.0 * len(syncset) / len(dbset) - else: - # we don't have 20 packages in this repo/arch, so this check could - # produce a lot of false positives (or a div by zero). fake it - dbpercent = 100.0 - logger.info("DB package ratio: %.1f%%" % dbpercent) - if dbpercent < 50.0 and not repository.testing: - logger.error(".db.tar.gz has %.1f%% the number of packages in the web database" % dbpercent) - raise Exception( - 'It looks like the syncdb is less than half the size of the web db. WTF?') - - if dbpercent < 75.0: - logger.warning(".db.tar.gz has %.1f%% the number of packages in the web database." % dbpercent) - - if not filesonly: - # packages in syncdb and not in database (add to database) - logger.debug("Set theory: Packages in syncdb not in database") - for p in [x for x in pkgs if x.name in in_sync_not_db]: - logger.info("Adding package %s", p.name) - pkg = Package(pkgname = p.name, arch = architecture, repo = repository) - populate_pkg(pkg, p, timestamp=datetime.now()) - - # packages in database and not in syncdb (remove from database) - logger.debug("Set theory: Packages in database not in syncdb") - in_db_not_sync = dbset - syncset - for p in in_db_not_sync: - logger.info("Removing package %s from database", p) - Package.objects.get( - pkgname=p, arch=architecture, repo=repository).delete() + dbset = set(dbdict.keys()) + syncset = {pkg.name for pkg in pkgs} + + in_sync_not_db = syncset - dbset + logger.info("%d packages in sync not db", len(in_sync_not_db)) + # packages in syncdb and not in database (add to database) + for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): + logger.info("Adding package %s", pkg.name) + timestamp = now() + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository, + created=timestamp) + try: + with transaction.atomic(): + populate_pkg(dbpkg, pkg, timestamp=timestamp) + Update.objects.log_update(None, dbpkg) + except IntegrityError: + if architecture.agnostic: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name) + else: + logger.exception("Could not add package %s", pkg.name) + + # packages in database and not in syncdb (remove from database) + for pkgname in (dbset - syncset): + logger.info("Removing package %s", pkgname) + dbpkg = dbdict[pkgname] + with transaction.atomic(): + Update.objects.log_update(dbpkg, None) + # no race condition here as long as simultaneous threads both + # issue deletes; second delete will be a no-op + delete_pkg_files(dbpkg) + dbpkg.delete() # packages in both database and in syncdb (update in database) - logger.debug("Set theory: Packages in database and syncdb") pkg_in_both = syncset & dbset - for p in [x for x in pkgs if x.name in pkg_in_both]: - logger.debug("Looking for package updates") - dbp = dbdict[p.name] + for pkg in (x for x in pkgs if x.name in pkg_in_both): + logger.debug("Checking package %s", pkg.name) + dbpkg = dbdict[pkg.name] timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. - if filesonly: - pass - elif '-'.join((p.ver, p.rel)) == '-'.join((dbp.pkgver, dbp.pkgrel)): - if not force: + if not force and pkg_same_version(pkg, dbpkg): + continue + elif not force: + timestamp = now() + + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files/depends/all related items to be double-imported. + with transaction.atomic(): + dbpkg = Package.objects.select_for_update().get(id=dbpkg.id) + if not force and pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) continue - else: - timestamp = datetime.now() - if filesonly: - logger.debug("Checking files for package %s in database", p.name) - populate_files(dbp, p) - else: - logger.info("Updating package %s in database", p.name) - populate_pkg(dbp, p, force=force, timestamp=timestamp) + logger.info("Updating package %s", pkg.name) + prevpkg = copy(dbpkg) + populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + Update.objects.log_update(prevpkg, dbpkg) + + logger.info('Finished updating arch: %s', archname) + + +def filesonly_update(archname, reponame, pkgs, force=False): + """ + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating files for %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, False) + dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs} + dbset = set(dbdict.keys()) + + for pkg in (pkg for pkg in pkgs if pkg.name in dbset): + dbpkg = dbdict[pkg.name] + + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files to be double-imported. + with transaction.atomic(): + if not dbpkg.files_last_update or not dbpkg.last_update: + pass + elif not force and dbpkg.files_last_update >= dbpkg.last_update: + logger.debug("Files for %s are up to date", pkg.name) + continue + dbpkg = Package.objects.select_for_update().get(id=dbpkg.id) + logger.debug("Checking files for package %s", pkg.name) + populate_files(dbpkg, pkg, force=force) - logger.info('Finished updating Arch: %s' % archname) + logger.info('Finished updating arch: %s', archname) def parse_info(iofile): @@ -326,7 +489,7 @@ def parse_info(iofile): continue elif line.startswith('%') and line.endswith('%'): blockname = line[1:-1].lower() - logger.debug("Parsing package block %s", blockname) + logger.log(TRACE, "Parsing package block %s", blockname) store[blockname] = [] elif blockname: store[blockname].append(line) @@ -337,7 +500,7 @@ def parse_info(iofile): def parse_repo(repopath): """ - Parses an Arch repo db file, and returns a list of Pkg objects. + Parses an Arch repo db file, and returns a list of RepoPackage objects. Arguments: repopath -- The path of a repository db file. @@ -349,71 +512,95 @@ def parse_repo(repopath): logger.info("Reading repo tarfile %s", repopath) filename = os.path.split(repopath)[1] - m = re.match(r"^(.*)\.(db|files)\.tar\.(.*)$", filename) + m = re.match(r"^(.*)\.(db|files)\.tar(\..*)?$", filename) if m: reponame = m.group(1) else: logger.error("File does not have the proper extension") raise Exception("File does not have the proper extension") - repodb = tarfile.open(repopath,"r:gz") - ## assuming well formed tar, with dir first then files after - ## repo-add enforces this + repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") - dbfiles = ('desc', 'depends', 'files') - pkgs = {} + newpkg = lambda: RepoPackage(reponame) + pkgs = defaultdict(newpkg) for tarinfo in repodb.getmembers(): - if tarinfo.isdir(): - continue - elif tarinfo.isreg(): + if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) - if fname not in dbfiles: - continue - data_file = repodb.extractfile(tarinfo) - data_file = codecs.EncodedFile(data_file, 'utf-8') - try: - data = parse_info(data_file) - p = pkgs.setdefault(pkgid, Pkg(reponame)) - p.populate(data) - except UnicodeDecodeError, e: - logger.warn("Could not correctly decode %s, skipping file" % \ - tarinfo.name) - data_file.close() - - logger.debug("Done parsing file %s", fname) + if fname == 'files': + # don't parse yet for speed and memory consumption reasons + files_data = repodb.extractfile(tarinfo) + pkgs[pkgid].files = files_data.read() + del files_data + elif fname in ('desc', 'depends'): + data_file = repodb.extractfile(tarinfo) + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='UTF-8') + try: + pkgs[pkgid].populate(parse_info(data_file)) + except UnicodeDecodeError: + logger.warn("Could not correctly decode %s, skipping file", + tarinfo.name) + data_file.close() + del data_file + + logger.debug("Done parsing file %s/%s", pkgid, fname) repodb.close() - logger.info("Finished repo parsing, %d total packages" % len(pkgs)) + logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) -def validate_arch(arch): +def locate_arch(arch): "Check if arch is valid." - available_arches = [x.name for x in Arch.objects.all()] - return arch in available_arches + if isinstance(arch, Arch): + return arch + try: + return Arch.objects.get(name=arch) + except Arch.DoesNotExist: + raise CommandError( + 'Specified architecture %s is not currently known.' % arch) -@transaction.commit_on_success -def read_repo(primary_arch, file, options): + +def read_repo(primary_arch, repo_file, options): """ Parses repo.db.tar.gz file and returns exit status. """ - repo, packages = parse_repo(file) + # always returns an Arch object, regardless of what is passed in + primary_arch = locate_arch(primary_arch) + force = options.get('force', False) + filesonly = options.get('filesonly', False) + + repo, packages = parse_repo(repo_file) - # sort packages by arch -- to handle noarch stuff + # group packages by arch -- to handle noarch stuff packages_arches = {} - packages_arches['any'] = [] - packages_arches[primary_arch] = [] + for arch in Arch.objects.filter(agnostic=True): + packages_arches[arch.name] = [] + packages_arches[primary_arch.name] = [] for package in packages: - if package.arch in ('any', primary_arch): + if package.arch in packages_arches: packages_arches[package.arch].append(package) else: - # we don't include mis-arched packages - logger.warning("Package %s arch = %s" % ( - package.name,package.arch)) - logger.info('Starting database updates.') - for (arch, pkgs) in packages_arches.items(): - db_update(arch, repo, pkgs, options) - logger.info('Finished database updates.') + raise Exception( + "Package %s in database %s had wrong architecture %s" % ( + package.name, repo_file, package.arch)) + del packages + + database = router.db_for_write(Package) + connection = connections[database] + if connection.vendor == 'sqlite': + cursor = connection.cursor() + cursor.execute('PRAGMA synchronous = NORMAL') + + logger.info('Starting database updates for %s.', repo_file) + for arch in sorted(packages_arches.keys()): + if filesonly: + filesonly_update(arch, repo, packages_arches[arch], force) + else: + db_update(arch, repo, packages_arches[arch], force) + logger.info('Finished database updates for %s.', repo_file) + connection.commit() + connection.close() return 0 # vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py new file mode 100644 index 00000000..1422ae26 --- /dev/null +++ b/devel/management/commands/reporead_inotify.py @@ -0,0 +1,215 @@ +# -*- coding: utf-8 -*- +""" +reporead_inotify command + +Watches repo.files.tar.gz files for updates and parses them after a short delay +in order to catch all updates in a single bulk update. + +Usage: ./manage.py reporead_inotify [path_template] + +Where 'path_template' is an optional path_template for finding the +repo.files.tar.gz files. The form is '/srv/ftp/%(repo)s/os/%(arch)s/', which is +also the default template if none is specified. While 'repo' is not required to +be present in the path_template, note that 'arch' is so reporead can function +correctly. +""" + +import logging +import multiprocessing +import os +import pyinotify +import sys +import threading +import time + +from django.core.management.base import BaseCommand, CommandError +from django.db import connection, transaction + +from main.models import Arch, Repo +from .reporead import read_repo + +logging.basicConfig( + level=logging.WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + help = "Watch database files and run an update when necessary." + args = "[path_template]" + + def handle(self, path_template=None, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v >= 2: + logger.level = logging.DEBUG + + if not path_template: + path_template = '/srv/ftp/%(repo)s/os/%(arch)s/' + self.path_template = path_template + + notifier = self.setup_notifier() + # this thread is done using the database; all future access is done in + # the spawned read_repo() processes, so close the otherwise completely + # idle connection. + connection.close() + + logger.info('Entering notifier loop') + notifier.loop() + + logger.info('Cancelling remaining threads...') + for thread in threading.enumerate(): + if hasattr(thread, 'cancel'): + thread.cancel() + + @transaction.atomic + def setup_notifier(self): + '''Set up and configure the inotify machinery and logic. + This takes the provided or default path_template and builds a list of + directories we need to watch for database updates. It then validates + and passes these on to the various pyinotify pieces as necessary and + finally builds and returns a notifier object.''' + transaction.commit_manually() + arches = Arch.objects.filter(agnostic=False) + repos = Repo.objects.all() + transaction.set_dirty() + arch_path_map = {arch: None for arch in arches} + all_paths = set() + total_paths = 0 + for arch in arches: + combos = ({ 'repo': repo.name.lower(), 'arch': arch.name } + for repo in repos) + # take a python format string and generate all unique combinations + # of directories from it; using set() ensures we filter it down + paths = {self.path_template % values for values in combos} + total_paths += len(paths) + all_paths |= paths + arch_path_map[arch] = paths + + logger.info('Watching %d total paths', total_paths) + logger.debug(all_paths) + + # sanity check- basically ensure every path we created from the + # template mapped to only one architecture + if total_paths != len(all_paths): + raise CommandError('path template did not uniquely ' + 'determine architecture for each file') + + # A proper atomic replacement of the database as done by rsync is type + # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE. + mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO + + manager = pyinotify.WatchManager() + for name in all_paths: + manager.add_watch(name, mask) + + handler = EventHandler(arch_paths=arch_path_map) + return pyinotify.Notifier(manager, handler) + + +class Database(object): + '''A object representing a pacman database on the filesystem. It stores + various bits of metadata and state representing the file path, when we last + updated, how long our delay is before performing the update, whether we are + updating now, etc.''' + def __init__(self, arch, path, delay=60.0, nice=3): + self.arch = arch + self.path = path + self.delay = delay + self.nice = nice + self.mtime = None + self.last_import = None + self.update_thread = None + self.updating = False + self.run_again = False + self.lock = threading.Lock() + + def _start_update_countdown(self): + self.update_thread = threading.Timer(self.delay, self.update) + logger.info('Starting %.1f second countdown to update %s', + self.delay, self.path) + self.update_thread.start() + + def queue_for_update(self, mtime): + logger.debug('Queueing database %s...', self.path) + with self.lock: + self.mtime = mtime + if self.updating: + # store the fact that we will need to run it again + self.run_again = True + return + if self.update_thread: + self.update_thread.cancel() + self.update_thread = None + self._start_update_countdown() + + def update(self): + logger.debug('Updating database %s...', self.path) + with self.lock: + self.last_import = time.time() + self.updating = True + + try: + # invoke reporead's primary method. we do this in a separate + # process for memory conservation purposes; these processes grow + # rather large so it is best to free up the memory ASAP. + def run(): + if self.nice != 0: + os.nice(self.nice) + read_repo(self.arch, self.path, {}) + + process = multiprocessing.Process(target=run) + process.start() + process.join() + finally: + logger.debug('Done updating database %s.', self.path) + with self.lock: + self.update_thread = None + self.updating = False + if self.run_again: + self.run_again = False + self._start_update_countdown() + + +class EventHandler(pyinotify.ProcessEvent): + '''Our main event handler which listens for database change events. Because + we are watching the whole directory, we filter down and only look at those + events dealing with files databases.''' + + def my_init(self, **kwargs): + self.databases = {} + self.arch_lookup = {} + + # we really want a single path to arch mapping, so massage the data + arch_paths = kwargs['arch_paths'] + for arch, paths in arch_paths.items(): + self.arch_lookup.update((path.rstrip('/'), arch) for path in paths) + + def process_default(self, event): + '''Primary event processing function which kicks off reporead timer + threads if a files database was updated.''' + name = event.name + if not name: + return + # screen to only the files we care about, skipping temp files + if name.endswith('.files.tar.gz') and not name.startswith('.'): + path = event.pathname + stat = os.stat(path) + database = self.databases.get(path, None) + if database is None: + arch = self.arch_lookup.get(event.path, None) + if arch is None: + logger.warning( + 'Could not determine arch for %s, skipping update', + path) + return + database = Database(arch, path) + self.databases[path] = database + database.queue_for_update(stat.st_mtime) + + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/update_types_permissions.py b/devel/management/commands/update_types_permissions.py new file mode 100644 index 00000000..ac8fcfa5 --- /dev/null +++ b/devel/management/commands/update_types_permissions.py @@ -0,0 +1,23 @@ +from django.core.management.base import BaseCommand +from django.apps import apps +from django.contrib.auth.management import create_permissions +from django.contrib.contenttypes.management import update_contenttypes + + +class Command(BaseCommand): + args = '<app app ...>' + help = 'reloads permissions for specified apps, or all apps if no args are specified' + + def handle(self, *args, **options): + if not args: + app_configs = apps.get_app_configs() + else: + app_configs = [] + for arg in args: + apps.append(apps.get_app_config(arg)) + + for app_config in app_configs: + update_contenttypes(app_config, options.get('verbosity', 2)) + create_permissions(app_config, options.get('verbosity', 22)) + +# vim: set ts=4 sw=4 et: diff --git a/devel/migrations/0001_initial.py b/devel/migrations/0001_initial.py new file mode 100644 index 00000000..3dd3582b --- /dev/null +++ b/devel/migrations/0001_initial.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +import pytz + +from django.db import models, migrations +import django_countries.fields +import django.db.models.deletion +from django.conf import settings +import devel.fields + + +class Migration(migrations.Migration): + + dependencies = [ + ('main', '0001_initial'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='DeveloperKey', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('key', devel.fields.PGPKeyField(unique=True, max_length=40, verbose_name=b'PGP key fingerprint')), + ('created', models.DateTimeField()), + ('expires', models.DateTimeField(null=True, blank=True)), + ('revoked', models.DateTimeField(null=True, blank=True)), + ('owner', models.ForeignKey(related_name=b'all_keys', to=settings.AUTH_USER_MODEL, help_text=b'The developer this key belongs to', null=True)), + ('parent', models.ForeignKey(on_delete=django.db.models.deletion.SET_NULL, to='devel.DeveloperKey', null=True)), + ], + options={ + }, + bases=(models.Model,), + ), + migrations.CreateModel( + name='MasterKey', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('pgp_key', devel.fields.PGPKeyField(help_text=b'consists of 40 hex digits; use `gpg --fingerprint`', max_length=40, verbose_name=b'PGP key fingerprint')), + ('created', models.DateField()), + ('revoked', models.DateField(null=True, blank=True)), + ('owner', models.ForeignKey(related_name=b'masterkey_owner', to=settings.AUTH_USER_MODEL, help_text=b'The developer holding this master key')), + ('revoker', models.ForeignKey(related_name=b'masterkey_revoker', to=settings.AUTH_USER_MODEL, help_text=b'The developer holding the revocation certificate')), + ], + options={ + 'ordering': ('created',), + 'get_latest_by': 'created', + }, + bases=(models.Model,), + ), + migrations.CreateModel( + name='PGPSignature', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('signer', devel.fields.PGPKeyField(max_length=40, verbose_name=b'Signer key fingerprint', db_index=True)), + ('signee', devel.fields.PGPKeyField(max_length=40, verbose_name=b'Signee key fingerprint', db_index=True)), + ('created', models.DateField()), + ('expires', models.DateField(null=True, blank=True)), + ('revoked', models.DateField(null=True, blank=True)), + ], + options={ + 'ordering': ('signer', 'signee'), + 'get_latest_by': 'created', + 'verbose_name': 'PGP signature', + }, + bases=(models.Model,), + ), + migrations.CreateModel( + name='UserProfile', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('notify', models.BooleanField(default=True, help_text=b"When enabled, send user 'flag out-of-date' notifications", verbose_name=b'Send notifications')), + ('time_zone', models.CharField(default=b'UTC', help_text=b'Used for developer clock page', max_length=100, choices=[(z, z) for z in pytz.common_timezones])), + ('alias', models.CharField(help_text=b'Required field', max_length=50)), + ('public_email', models.CharField(help_text=b'Required field', max_length=50)), + ('other_contact', models.CharField(max_length=100, null=True, blank=True)), + ('pgp_key', devel.fields.PGPKeyField(help_text=b'consists of 40 hex digits; use `gpg --fingerprint`', max_length=40, null=True, verbose_name=b'PGP key fingerprint', blank=True)), + ('website', models.CharField(max_length=200, null=True, blank=True)), + ('yob', models.IntegerField(null=True, verbose_name=b'Year of birth', blank=True)), + ('country', django_countries.fields.CountryField(blank=True, max_length=2)), + ('location', models.CharField(max_length=50, null=True, blank=True)), + ('languages', models.CharField(max_length=50, null=True, blank=True)), + ('interests', models.CharField(max_length=255, null=True, blank=True)), + ('occupation', models.CharField(max_length=50, null=True, blank=True)), + ('roles', models.CharField(max_length=255, null=True, blank=True)), + ('favorite_distros', models.CharField(max_length=255, null=True, blank=True)), + ('picture', models.FileField(default=b'devs/silhouette.png', help_text=b'Ideally 125px by 125px', upload_to=b'devs')), + ('latin_name', models.CharField(help_text=b'Latin-form name; used only for non-Latin full names', max_length=255, null=True, blank=True)), + ('last_modified', models.DateTimeField(editable=False)), + ('allowed_repos', models.ManyToManyField(to='main.Repo', blank=True)), + ('user', models.OneToOneField(related_name=b'userprofile', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'get_latest_by': 'last_modified', + 'verbose_name': 'additional profile data', + 'verbose_name_plural': 'additional profile data', + 'db_table': 'user_profiles', + }, + bases=(models.Model,), + ), + ] diff --git a/devel/migrations/0002_staffgroup.py b/devel/migrations/0002_staffgroup.py new file mode 100644 index 00000000..5679e6a3 --- /dev/null +++ b/devel/migrations/0002_staffgroup.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('auth', '0001_initial'), + ('devel', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='StaffGroup', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('name', models.CharField(max_length=100)), + ('slug', models.SlugField(unique=True, max_length=100)), + ('sort_order', models.PositiveIntegerField()), + ('member_title', models.CharField(max_length=100)), + ('description', models.TextField(blank=True)), + ('group', models.OneToOneField(to='auth.Group')), + ], + options={ + 'ordering': ('sort_order',), + }, + bases=(models.Model,), + ), + ] diff --git a/devel/migrations/__init__.py b/devel/migrations/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/devel/migrations/__init__.py diff --git a/devel/models.py b/devel/models.py index e69de29b..b05800a5 100644 --- a/devel/models.py +++ b/devel/models.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- +import pytz + +from django.core.urlresolvers import reverse +from django.db import models +from django.db.models.signals import pre_save +from django.contrib.auth.models import User, Group +from django_countries.fields import CountryField + +from .fields import PGPKeyField +from main.utils import make_choice, set_created_field + + +class UserProfile(models.Model): + notify = models.BooleanField( + "Send notifications", + default=True, + help_text="When enabled, send user 'flag out-of-date' notifications") + time_zone = models.CharField( + max_length=100, + choices=make_choice(pytz.common_timezones), + default="UTC", + help_text="Used for developer clock page") + alias = models.CharField( + max_length=50, + help_text="Required field") + public_email = models.CharField( + max_length=50, + help_text="Required field") + other_contact = models.CharField(max_length=100, null=True, blank=True) + pgp_key = PGPKeyField(max_length=40, null=True, blank=True, + verbose_name="PGP key fingerprint", + help_text="consists of 40 hex digits; use `gpg --fingerprint`") + website = models.CharField(max_length=200, null=True, blank=True) + yob = models.IntegerField("Year of birth", null=True, blank=True) + country = CountryField(blank=True) + location = models.CharField(max_length=50, null=True, blank=True) + languages = models.CharField(max_length=50, null=True, blank=True) + interests = models.CharField(max_length=255, null=True, blank=True) + occupation = models.CharField(max_length=50, null=True, blank=True) + roles = models.CharField(max_length=255, null=True, blank=True) + favorite_distros = models.CharField(max_length=255, null=True, blank=True) + picture = models.FileField(upload_to='devs', default='devs/silhouette.png', + help_text="Ideally 125px by 125px") + user = models.OneToOneField(User, related_name='userprofile') + allowed_repos = models.ManyToManyField('main.Repo', blank=True) + latin_name = models.CharField(max_length=255, null=True, blank=True, + help_text="Latin-form name; used only for non-Latin full names") + last_modified = models.DateTimeField(editable=False) + + class Meta: + db_table = 'user_profiles' + get_latest_by = 'last_modified' + verbose_name = 'additional profile data' + verbose_name_plural = 'additional profile data' + + def get_absolute_url(self): + user = self.user + group = StaffGroup.objects.filter(group=user.groups.all()).first() + if group: + return '%s#%s' % (group.get_absolute_url(), user.username) + return None + + +class StaffGroup(models.Model): + name = models.CharField(max_length=100) + slug = models.SlugField(max_length=100, unique=True) + group = models.OneToOneField(Group) + sort_order = models.PositiveIntegerField() + member_title = models.CharField(max_length=100) + description = models.TextField(blank=True) + + class Meta: + ordering = ('sort_order',) + + def __unicode__(self): + return self.name + + def get_absolute_url(self): + return reverse('people', args=[self.slug]) + + +class MasterKey(models.Model): + owner = models.ForeignKey(User, related_name='masterkey_owner', + help_text="The developer holding this master key") + revoker = models.ForeignKey(User, related_name='masterkey_revoker', + help_text="The developer holding the revocation certificate") + pgp_key = PGPKeyField(max_length=40, verbose_name="PGP key fingerprint", + help_text="consists of 40 hex digits; use `gpg --fingerprint`") + created = models.DateField() + revoked = models.DateField(null=True, blank=True) + + class Meta: + ordering = ('created',) + get_latest_by = 'created' + + def __unicode__(self): + return u'%s, created %s' % ( + self.owner.get_full_name(), self.created) + + +class DeveloperKey(models.Model): + owner = models.ForeignKey(User, related_name='all_keys', null=True, + help_text="The developer this key belongs to") + key = PGPKeyField(max_length=40, verbose_name="PGP key fingerprint", + unique=True) + created = models.DateTimeField() + expires = models.DateTimeField(null=True, blank=True) + revoked = models.DateTimeField(null=True, blank=True) + parent = models.ForeignKey('self', null=True, on_delete=models.SET_NULL) + + def __unicode__(self): + return self.key + + +class PGPSignature(models.Model): + signer = PGPKeyField(max_length=40, verbose_name="Signer key fingerprint", + db_index=True) + signee = PGPKeyField(max_length=40, verbose_name="Signee key fingerprint", + db_index=True) + created = models.DateField() + expires = models.DateField(null=True, blank=True) + revoked = models.DateField(null=True, blank=True) + + class Meta: + ordering = ('signer', 'signee') + get_latest_by = 'created' + verbose_name = 'PGP signature' + + def __unicode__(self): + return u'%s → %s' % (self.signer, self.signee) + + +pre_save.connect(set_created_field, sender=UserProfile, + dispatch_uid="devel.models") + +# vim: set ts=4 sw=4 et: diff --git a/devel/reports.py b/devel/reports.py new file mode 100644 index 00000000..66fbd627 --- /dev/null +++ b/devel/reports.py @@ -0,0 +1,198 @@ +from datetime import timedelta +import pytz + +from django.db.models import F +from django.template.defaultfilters import filesizeformat +from django.utils.timezone import now + +from .models import DeveloperKey, UserProfile +from main.models import PackageFile +from packages.models import PackageRelation, Depend + +class DeveloperReport(object): + def __init__(self, slug, name, desc, packages_func, + names=None, attrs=None, personal=True): + self.slug = slug + self.name = name + self.description = desc + self.packages = packages_func + self.names = names + self.attrs = attrs + self.personal = personal + + +def old(packages, username): + cutoff = now() - timedelta(days=365 * 2) + return packages.filter( + build_date__lt=cutoff).order_by('build_date') + + +def outofdate(packages, username): + cutoff = now() - timedelta(days=30) + return packages.filter( + flag_date__lt=cutoff).order_by('flag_date') + + +def big(packages, username): + cutoff = 50 * 1024 * 1024 + packages = packages.filter( + compressed_size__gte=cutoff).order_by('-compressed_size') + # Format the compressed and installed sizes with MB/GB/etc suffixes + for package in packages: + package.compressed_size_pretty = filesizeformat( + package.compressed_size) + package.installed_size_pretty = filesizeformat( + package.installed_size) + return packages + + +def badcompression(packages, username): + cutoff = 0.90 * F('installed_size') + packages = packages.filter(compressed_size__gt=25*1024, + installed_size__gt=25*1024, + compressed_size__gte=cutoff).order_by('-compressed_size') + + # Format the compressed and installed sizes with MB/GB/etc suffixes + for package in packages: + package.compressed_size_pretty = filesizeformat( + package.compressed_size) + package.installed_size_pretty = filesizeformat( + package.installed_size) + ratio = package.compressed_size / float(package.installed_size) + package.ratio = '%.3f' % ratio + package.compress_type = package.filename.split('.')[-1] + + return packages + + +def uncompressed_man(packages, username): + # checking for all '.0'...'.9' + '.n' extensions + bad_files = PackageFile.objects.filter(is_directory=False, + directory__contains='/man/', + filename__regex=r'\.[0-9n]').exclude( + filename__endswith='.gz').exclude( + filename__endswith='.xz').exclude( + filename__endswith='.bz2').exclude( + filename__endswith='.html') + if username: + pkg_ids = set(packages.values_list('id', flat=True)) + bad_files = bad_files.filter(pkg__in=pkg_ids) + bad_files = bad_files.values_list( + 'pkg_id', flat=True).order_by().distinct() + return packages.filter(id__in=set(bad_files)) + + +def uncompressed_info(packages, username): + # we don't worry about looking for '*.info-1', etc., given that an + # uncompressed root page probably exists in the package anyway + bad_files = PackageFile.objects.filter(is_directory=False, + directory__endswith='/info/', filename__endswith='.info') + if username: + pkg_ids = set(packages.values_list('id', flat=True)) + bad_files = bad_files.filter(pkg__in=pkg_ids) + bad_files = bad_files.values_list( + 'pkg_id', flat=True).order_by().distinct() + return packages.filter(id__in=set(bad_files)) + + +def unneeded_orphans(packages, username): + owned = PackageRelation.objects.all().values('pkgbase') + required = Depend.objects.all().values('name') + # The two separate calls to exclude is required to do the right thing + return packages.exclude(pkgbase__in=owned).exclude( + pkgname__in=required) + + +def mismatched_signature(packages, username): + filtered = [] + packages = packages.select_related( + 'arch', 'repo', 'packager').filter(signature_bytes__isnull=False) + known_keys = DeveloperKey.objects.select_related( + 'owner').filter(owner__isnull=False) + known_keys = {dk.key: dk for dk in known_keys} + for package in packages: + bad = False + sig = package.signature + dev_key = known_keys.get(sig.key_id, None) + if dev_key: + package.sig_by = dev_key.owner + if dev_key.owner_id != package.packager_id: + bad = True + else: + package.sig_by = sig.key_id + bad = True + + if bad: + filtered.append(package) + return filtered + + +def signature_time(packages, username): + cutoff = timedelta(hours=24) + filtered = [] + packages = packages.select_related( + 'arch', 'repo', 'packager').filter(signature_bytes__isnull=False) + for package in packages: + sig = package.signature + sig_date = sig.creation_time.replace(tzinfo=pytz.utc) + package.sig_date = sig_date.date() + if sig_date > package.build_date + cutoff: + filtered.append(package) + + return filtered + + +REPORT_OLD = DeveloperReport('old', 'Old', + 'Packages last built more than two years ago', old) + +REPORT_OUTOFDATE = DeveloperReport('long-out-of-date', 'Long Out-of-date', + 'Packages marked out-of-date more than 30 days ago', outofdate) + +REPORT_BIG = DeveloperReport('big', 'Big', + 'Packages with compressed size > 50 MiB', big, + ['Compressed Size', 'Installed Size'], + ['compressed_size_pretty', 'installed_size_pretty']) + +REPORT_BADCOMPRESS = DeveloperReport('badcompression', 'Bad Compression', + 'Packages > 25 KiB with a compression ratio < 10%', badcompression, + ['Compressed Size', 'Installed Size', 'Ratio', 'Type'], + ['compressed_size_pretty', 'installed_size_pretty','ratio', 'compress_type']) + +REPORT_MAN = DeveloperReport('uncompressed-man', 'Uncompressed Manpages', + 'Packages with uncompressed manpages', uncompressed_man) + +REPORT_INFO = DeveloperReport('uncompressed-info', 'Uncompressed Info Pages', + 'Packages with uncompressed info pages', uncompressed_info) + +REPORT_ORPHANS = DeveloperReport('unneeded-orphans', 'Unneeded Orphans', + 'Packages that have no maintainer and are not required by any ' + + 'other package in any repository', unneeded_orphans, + personal=False) + +REPORT_SIGNATURE = DeveloperReport('mismatched-signature', + 'Mismatched Signatures', + 'Packages where the signing key is unknown or signer != packager', + mismatched_signature, + ['Signed By', 'Packager'], + ['sig_by', 'packager']) + +REPORT_SIG_TIME = DeveloperReport('signature-time', 'Signature Time', + 'Packages where the signature timestamp is more than 24 hours ' + + 'after the build timestamp', + signature_time, + ['Signature Date', 'Packager'], + ['sig_date', 'packager']) + + +def available_reports(): + return ( + REPORT_OLD, + REPORT_OUTOFDATE, + REPORT_BIG, + REPORT_BADCOMPRESS, + REPORT_MAN, + REPORT_INFO, + REPORT_ORPHANS, + REPORT_SIGNATURE, + REPORT_SIG_TIME, + ) diff --git a/devel/tests.py b/devel/tests.py index 682f3d92..5c736a30 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,8 +1,10 @@ +from django.contrib.auth.models import User from django.test import TestCase +from .utils import UserFinder +from .models import UserProfile class DevelTest(TestCase): - def test_index(self): response = self.client.get('/devel/') self.assertEqual(response.status_code, 302) @@ -10,13 +12,6 @@ class DevelTest(TestCase): self.assertEqual(response['location'], 'http://testserver/login/?next=/devel/') - def test_notify(self): - response = self.client.get('/devel/notify/') - self.assertEqual(response.status_code, 302) - self.assertEqual(response.has_header('Location'), True) - self.assertEqual(response['location'], - 'http://testserver/login/?next=/devel/notify/') - def test_profile(self): response = self.client.get('/devel/profile/') self.assertEqual(response.status_code, 302) @@ -33,7 +28,88 @@ class DevelTest(TestCase): def test_mirrors(self): response = self.client.get('/mirrors/') - self.assertEqual(response.status_code, 302) - self.assertEqual(response.has_header('Location'), True) - self.assertEqual(response['location'], - 'http://testserver/login/?next=/mirrors/') + self.assertEqual(response.status_code, 200) + +class FindUserTest(TestCase): + + def setUp(self): + self.finder = UserFinder() + + self.user1 = User.objects.create(username="joeuser", first_name="Joe", + last_name="User", email="user1@example.com") + self.user2 = User.objects.create(username="john", first_name="John", + last_name="", email="user2@example.com") + self.user3 = User.objects.create(username="bjones", first_name="Bob", + last_name="Jones", email="user3@example.com") + + for user in (self.user1, self.user2, self.user3): + email_addr = "%s@awesome.com" % user.username + UserProfile.objects.create(user=user, public_email=email_addr) + + self.user4 = User.objects.create(username="tim1", first_name="Tim", + last_name="One", email="tim@example.com") + self.user5 = User.objects.create(username="tim2", first_name="Tim", + last_name="Two", email="timtwo@example.com") + + def test_not_matching(self): + self.assertIsNone(self.finder.find(None)) + self.assertIsNone(self.finder.find("")) + self.assertIsNone(self.finder.find("Bogus")) + self.assertIsNone(self.finder.find("Bogus <invalid")) + self.assertIsNone(self.finder.find("Bogus User <bogus@example.com>")) + self.assertIsNone(self.finder.find("<bogus@example.com>")) + self.assertIsNone(self.finder.find("bogus@example.com")) + self.assertIsNone(self.finder.find("Unknown Packager")) + + def test_by_email(self): + self.assertEqual(self.user1, + self.finder.find("XXX YYY <user1@example.com>")) + self.assertEqual(self.user2, + self.finder.find("YYY ZZZ <user2@example.com>")) + + def test_by_profile_email(self): + self.assertEqual(self.user1, + self.finder.find("XXX <joeuser@awesome.com>")) + self.assertEqual(self.user2, + self.finder.find("YYY <john@awesome.com>")) + self.assertEqual(self.user3, + self.finder.find("ZZZ <bjones@awesome.com>")) + + def test_by_name(self): + self.assertEqual(self.user1, + self.finder.find("Joe User <joe@differentdomain.com>")) + self.assertEqual(self.user1, + self.finder.find("Joe User")) + self.assertEqual(self.user2, + self.finder.find("John <john@differentdomain.com>")) + self.assertEqual(self.user2, + self.finder.find("John")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones <bjones AT Arch Linux DOT org>")) + + def test_by_invalid(self): + self.assertEqual(self.user1, + self.finder.find("Joe User <user1@example.com")) + self.assertEqual(self.user1, + self.finder.find("Joe 'nickname' User <user1@example.com")) + self.assertEqual(self.user1, + self.finder.find("Joe \"nickname\" User <user1@example.com")) + self.assertEqual(self.user1, + self.finder.find("Joe User <joe@differentdomain.com")) + + def test_cache(self): + # simply look two of them up, but then do it repeatedly + for _ in range(5): + self.assertEqual(self.user1, + self.finder.find("XXX YYY <user1@example.com>")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones <bjones AT Arch Linux DOT org>")) + + def test_ambiguous(self): + self.assertEqual(self.user4, + self.finder.find("Tim One <tim@anotherdomain.com>")) + self.assertEqual(self.user5, + self.finder.find("Tim Two <tim@anotherdomain.com>")) + self.assertIsNone(self.finder.find("Tim <tim@anotherdomain.com>")) + +# vim: set ts=4 sw=4 et: diff --git a/devel/urls.py b/devel/urls.py new file mode 100644 index 00000000..472c6456 --- /dev/null +++ b/devel/urls.py @@ -0,0 +1,15 @@ +from django.conf.urls import patterns + +urlpatterns = patterns('devel.views', + (r'^admin_log/$','admin_log'), + (r'^admin_log/(?P<username>.*)/$','admin_log'), + (r'^clock/$', 'clock', {}, 'devel-clocks'), + (r'^$', 'index', {}, 'devel-index'), + (r'^stats/$', 'stats', {}, 'devel-stats'), + (r'^newuser/$', 'new_user_form'), + (r'^profile/$', 'change_profile'), + (r'^reports/(?P<report_name>.*)/(?P<username>.*)/$', 'report'), + (r'^reports/(?P<report_name>.*)/$', 'report'), +) + +# vim: set ts=4 sw=4 et: diff --git a/devel/utils.py b/devel/utils.py index abfdabe5..e745c7a9 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -1,12 +1,21 @@ +import re + +from django.conf import settings from django.contrib.auth.models import User +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from django.db import connection +from django.db.models import Count, Q +from devel.models import UserProfile from main.utils import cache_function +from main.models import Package from packages.models import PackageRelation -@cache_function(300) +@cache_function(283) def get_annotated_maintainers(): - maintainers = User.objects.filter(is_active=True).order_by( + profile_ids = UserProfile.allowed_repos.through.objects.values('userprofile_id') + maintainers = User.objects.filter( + is_active=True, userprofile__id__in=profile_ids).order_by( 'first_name', 'last_name') # annotate the maintainers with # of maintained and flagged packages @@ -28,10 +37,159 @@ SELECT pr.user_id, COUNT(*), COUNT(p.flag_date) pkg_count[k] = total flag_count[k] = flagged + update_count = Package.objects.values_list('packager').order_by( + 'packager').annotate(Count('packager')) + update_count = dict(update_count) + for m in maintainers: m.package_count = pkg_count.get(m.id, 0) m.flagged_count = flag_count.get(m.id, 0) + m.updated_count = update_count.get(m.id, 0) + + # force non-QS context, otherwise pickling doesn't work + return list(maintainers) + + +def ignore_does_not_exist(func): + def new_func(*args, **kwargs): + try: + return func(*args, **kwargs) + except (ObjectDoesNotExist, MultipleObjectsReturned): + return None + return new_func + + +class UserFinder(object): + def __init__(self): + self.cache = {} + self.username_cache = {} + self.email_cache = {} + self.pgp_cache = {} + + @staticmethod + @ignore_does_not_exist + def user_email(name, email): + if email: + return User.objects.get(email=email) + return None + + @staticmethod + @ignore_does_not_exist + def username_email(name, email): + if email and '@' in email: + # split email addr at '@' symbol, ensure domain matches + # or is a subdomain of parabola.nu + username, domain = email.split('@', 1) + if re.match(settings.DOMAIN_RE, domain): + return User.objects.get(username=username) + return None + + @staticmethod + @ignore_does_not_exist + def profile_email(name, email): + if email: + return User.objects.get(userprofile__public_email=email) + return None + + @staticmethod + @ignore_does_not_exist + def user_name(name, email): + # yes, a bit odd but this is the easiest way since we can't always be + # sure how to split the name. Ensure every 'token' appears in at least + # one of the two name fields. + if not name: + return None + name_q = Q() + for token in name.split(): + # ignore quoted parts; e.g. nicknames in strings + if re.match(r'^[\'"].*[\'"]$', token): + continue + name_q &= (Q(first_name__icontains=token) | + Q(last_name__icontains=token)) + return User.objects.get(name_q) + + def find(self, userstring): + ''' + Attempt to find the corresponding User object for a standard + packager string, e.g. something like + 'A. U. Thor <author@example.com>'. + We start by searching for a matching email address; we then move onto + matching by first/last name. If we cannot find a user, then return None. + ''' + if not userstring: + return None + if userstring in self.cache: + return self.cache[userstring] + + name = email = None + + matches = re.match(r'^([^<]+)? ?<([^>]*)>?', userstring) + if not matches: + name = userstring.strip() + else: + name = matches.group(1) + email = matches.group(2) + + user = None + find_methods = (self.user_email, self.profile_email, + self.username_email, self.user_name) + for matcher in find_methods: + user = matcher(name, email) + if user is not None: + break + + self.cache[userstring] = user + self.email_cache[email] = user + return user + + def find_by_username(self, username): + if not username: + return None + if username in self.username_cache: + return self.username_cache[username] + + try: + user = User.objects.get(username=username) + except User.DoesNotExist: + user = None + + self.username_cache[username] = user + return user + + def find_by_email(self, email): + if not email: + return None + if email in self.email_cache: + return self.email_cache[email] + + user = self.user_email(None, email) + if user is None: + user = self.profile_email(None, email) + if user is None: + user = self.username_email(None, email) + + self.email_cache[email] = user + return user + + def find_by_pgp_key(self, pgp_key): + if not pgp_key: + return None + if pgp_key in self.pgp_cache: + return self.pgp_cache[pgp_key] + + try: + user = User.objects.get( + userprofile__pgp_key__endswith=pgp_key) + except User.DoesNotExist: + user = None + + self.pgp_cache[pgp_key] = user + return user - return maintainers + def clear_cache(self): + self.cache = {} + self.username_cache = {} + self.email_cache = {} + self.pgp_cache = {} # vim: set ts=4 sw=4 et: diff --git a/devel/views.py b/devel/views.py index 577a00c4..66f6a965 100644 --- a/devel/views.py +++ b/devel/views.py @@ -1,114 +1,230 @@ -from django import forms +from datetime import timedelta +import operator +import time + from django.http import HttpResponseRedirect -from django.contrib.auth.decorators import login_required, permission_required +from django.contrib.auth.decorators import \ + login_required, permission_required, user_passes_test +from django.contrib import admin +from django.contrib.admin.models import LogEntry, ADDITION from django.contrib.auth.models import User -from django.contrib.sites.models import Site -from django.core.mail import send_mail +from django.contrib.contenttypes.models import ContentType +from django.db import transaction +from django.db.models import Count, Max +from django.http import Http404 +from django.shortcuts import get_object_or_404, render from django.views.decorators.cache import never_cache -from django.views.generic.simple import direct_to_template +from django.utils.encoding import force_unicode +from django.utils.http import http_date +from django.utils.timezone import now -from main.models import Package, Todolist, TodolistPkg +from .forms import ProfileForm, UserProfileForm, NewUserForm +from .models import UserProfile +from .reports import available_reports +from main.models import Package from main.models import Arch, Repo -from main.models import UserProfile -from packages.models import PackageRelation +from news.models import News +from packages.models import PackageRelation, Signoff, FlagRequest +from packages.utils import get_signoff_groups +from todolists.models import TodolistPackage +from todolists.utils import get_annotated_todolists from .utils import get_annotated_maintainers -import random -from string import ascii_letters, digits @login_required -@never_cache def index(request): - '''the Developer dashboard''' - inner_q = PackageRelation.objects.filter(user=request.user).values('pkgbase') - flagged = Package.objects.select_related('arch', 'repo').filter(flag_date__isnull=False) - flagged = flagged.filter(pkgbase__in=inner_q).order_by('pkgname') + """The developer dashboard.""" + if request.user.is_authenticated(): + inner_q = PackageRelation.objects.filter(user=request.user) + else: + inner_q = PackageRelation.objects.none() + inner_q = inner_q.values('pkgbase') + + flagged = Package.objects.normal().filter( + flag_date__isnull=False, pkgbase__in=inner_q).order_by('pkgname') + + todopkgs = TodolistPackage.objects.select_related( + 'todolist', 'pkg', 'arch', 'repo').exclude( + status=TodolistPackage.COMPLETE).filter(removed__isnull=True) + todopkgs = todopkgs.filter(pkgbase__in=inner_q).order_by( + 'todolist__name', 'pkgname') + + todolists = get_annotated_todolists(incomplete_only=True) + + signoffs = sorted(get_signoff_groups(user=request.user), + key=operator.attrgetter('pkgbase')) + + page_dict = { + 'todos': todolists, + 'flagged': flagged, + 'todopkgs': todopkgs, + 'signoffs': signoffs, + 'reports': available_reports(), + } - todopkgs = TodolistPkg.objects.select_related( - 'pkg', 'pkg__arch', 'pkg__repo').filter(complete=False) - todopkgs = todopkgs.filter(pkg__pkgbase__in=inner_q).order_by( - 'list__name', 'pkg__pkgname') + return render(request, 'devel/index.html', page_dict) + + +@login_required +def stats(request): + """The second half of the dev dashboard.""" + arches = Arch.objects.all().annotate( + total_ct=Count('packages'), flagged_ct=Count('packages__flag_date')) + repos = Repo.objects.all().annotate( + total_ct=Count('packages'), flagged_ct=Count('packages__flag_date')) + # the join is huge unless we do this separately, so merge the result here + repo_maintainers = dict(Repo.objects.order_by().filter( + userprofile__user__is_active=True).values_list('id').annotate( + Count('userprofile'))) + for repo in repos: + repo.maintainer_ct = repo_maintainers.get(repo.id, 0) maintainers = get_annotated_maintainers() + maintained = PackageRelation.objects.filter( + type=PackageRelation.MAINTAINER).values('pkgbase') + total_orphans = Package.objects.exclude(pkgbase__in=maintained).count() + total_flagged_orphans = Package.objects.filter( + flag_date__isnull=False).exclude(pkgbase__in=maintained).count() + total_updated = Package.objects.filter(packager__isnull=True).count() + orphan = { + 'package_count': total_orphans, + 'flagged_count': total_flagged_orphans, + 'updated_count': total_updated, + } + page_dict = { - 'todos': Todolist.objects.incomplete().order_by('-date_added'), - 'repos': Repo.objects.all(), - 'arches': Arch.objects.all(), + 'arches': arches, + 'repos': repos, 'maintainers': maintainers, - 'flagged' : flagged, - 'todopkgs' : todopkgs, - } + 'orphan': orphan, + } + + return render(request, 'devel/stats.html', page_dict) - return direct_to_template(request, 'devel/index.html', page_dict) @login_required -def change_notify(request): - maint = User.objects.get(username=request.user.username) - notify = request.POST.get('notify', 'no') - prof = maint.get_profile() - prof.notify = (notify == 'yes') - prof.save() - return HttpResponseRedirect('/devel/') - -class ProfileForm(forms.Form): - email = forms.EmailField(label='E-mail Address') - passwd1 = forms.CharField(label='New Password', required=False, - widget=forms.PasswordInput) - passwd2 = forms.CharField(label='Confirm Password', required=False, - widget=forms.PasswordInput) - - def clean(self): - if self.cleaned_data['passwd1'] != self.cleaned_data['passwd2']: - raise forms.ValidationError('Passwords do not match.') - return self.cleaned_data +def clock(request): + devs = User.objects.filter(is_active=True).order_by( + 'first_name', 'last_name').select_related('userprofile') + + latest_news = dict(News.objects.filter( + author__is_active=True).values_list('author').order_by( + ).annotate(last_post=Max('postdate'))) + latest_package = dict(Package.objects.filter( + packager__is_active=True).values_list('packager').order_by( + ).annotate(last_build=Max('build_date'))) + latest_signoff = dict(Signoff.objects.filter( + user__is_active=True).values_list('user').order_by( + ).annotate(last_signoff=Max('created'))) + # The extra() bit ensures we can use our 'user_id IS NOT NULL' index + latest_flagreq = dict(FlagRequest.objects.filter( + user__is_active=True).extra( + where=['user_id IS NOT NULL']).values_list('user_id').order_by( + ).annotate(last_flagrequest=Max('created'))) + latest_log = dict(LogEntry.objects.filter( + user__is_active=True).values_list('user').order_by( + ).annotate(last_log=Max('action_time'))) + + for dev in devs: + dates = [ + latest_news.get(dev.id, None), + latest_package.get(dev.id, None), + latest_signoff.get(dev.id, None), + latest_flagreq.get(dev.id, None), + latest_log.get(dev.id, None), + dev.last_login, + ] + dates = [d for d in dates if d is not None] + if dates: + dev.last_action = max(dates) + else: + dev.last_action = None + + current_time = now() + page_dict = { + 'developers': devs, + 'utc_now': current_time, + } + + response = render(request, 'devel/clock.html', page_dict) + if not response.has_header('Expires'): + expire_time = current_time.replace(second=0, microsecond=0) + expire_time += timedelta(minutes=1) + expire_time = time.mktime(expire_time.timetuple()) + response['Expires'] = http_date(expire_time) + return response + @login_required @never_cache def change_profile(request): + profile, _ = UserProfile.objects.get_or_create(user=request.user) if request.POST: form = ProfileForm(request.POST) - if form.is_valid(): + profile_form = UserProfileForm(request.POST, request.FILES, + instance=profile) + if form.is_valid() and profile_form.is_valid(): request.user.email = form.cleaned_data['email'] if form.cleaned_data['passwd1']: request.user.set_password(form.cleaned_data['passwd1']) - request.user.save() + with transaction.atomic(): + request.user.save() + profile_form.save() return HttpResponseRedirect('/devel/') else: form = ProfileForm(initial={'email': request.user.email}) - return direct_to_template(request, 'devel/profile.html', {'form': form}) - -class NewUserForm(forms.ModelForm): - class Meta: - model = UserProfile - exclude = ('picture', 'user') - username = forms.CharField(max_length=30) - email = forms.EmailField() - first_name = forms.CharField(required=False) - last_name = forms.CharField(required=False) - - def save(self): - profile = forms.ModelForm.save(self, False) - pwletters = ascii_letters + digits - pw = ''.join([random.choice(pwletters) for i in xrange(8)]) - user = User.objects.create_user(username=self.cleaned_data['username'], - email=self.cleaned_data['email'], password=pw) - user.first_name = self.cleaned_data['first_name'] - user.last_name = self.cleaned_data['last_name'] - user.save() - profile.user = user - profile.save() - domain = Site.objects.get_current().domain - - send_mail("Your new archweb account", - """You can now log into: -https://%s/login/ -with these login details: -Username: %s -Password: %s""" % (domain, user.username, pw), - 'Arch Website Notification <nobody@archlinux.org>', - [user.email], - fail_silently=False) + profile_form = UserProfileForm(instance=profile) + return render(request, 'devel/profile.html', + {'form': form, 'profile_form': profile_form}) + + +@login_required +def report(request, report_name, username=None): + available = {report.slug: report for report in available_reports()} + report = available.get(report_name, None) + if report is None: + raise Http404 + + packages = Package.objects.normal() + user = None + if username: + user = get_object_or_404(User, username=username, is_active=True) + maintained = PackageRelation.objects.filter(user=user, + type=PackageRelation.MAINTAINER).values('pkgbase') + packages = packages.filter(pkgbase__in=maintained) + + maints = User.objects.filter(id__in=PackageRelation.objects.filter( + type=PackageRelation.MAINTAINER).values('user')) + + packages = report.packages(packages, username) + arches = {pkg.arch for pkg in packages} + repos = {pkg.repo for pkg in packages} + context = { + 'all_maintainers': maints, + 'title': report.description, + 'report': report, + 'maintainer': user, + 'packages': packages, + 'arches': sorted(arches), + 'repos': sorted(repos), + 'column_names': report.names, + 'column_attrs': report.attrs, + } + return render(request, 'devel/packages.html', context) + + +def log_addition(request, obj): + """Cribbed from ModelAdmin.log_addition.""" + LogEntry.objects.log_action( + user_id = request.user.pk, + content_type_id = ContentType.objects.get_for_model(obj).pk, + object_id = obj.pk, + object_repr = force_unicode(obj), + action_flag = ADDITION, + change_message = "Added via Create New User form." + ) + @permission_required('auth.add_user') @never_cache @@ -116,7 +232,9 @@ def new_user_form(request): if request.POST: form = NewUserForm(request.POST) if form.is_valid(): - form.save() + with transaction.atomic(): + form.save() + log_addition(request, form.instance.user) return HttpResponseRedirect('/admin/auth/user/%d/' % \ form.instance.user.id) else: @@ -131,6 +249,19 @@ def new_user_form(request): 'title': 'Create User', 'submit_text': 'Create User' } - return direct_to_template(request, 'general_form.html', context) + return render(request, 'general_form.html', context) + + +@user_passes_test(lambda u: u.is_superuser) +def admin_log(request, username=None): + user = None + if username: + user = get_object_or_404(User, username=username) + context = { + 'title': "Admin Action Log", + 'log_user': user, + } + context.update(admin.site.each_context()) + return render(request, 'devel/admin_log.html', context) # vim: set ts=4 sw=4 et: |