summaryrefslogtreecommitdiff
path: root/devel/management
diff options
context:
space:
mode:
Diffstat (limited to 'devel/management')
-rw-r--r--devel/management/commands/generate_keyring.py89
-rw-r--r--devel/management/commands/pgp_import.py252
-rw-r--r--devel/management/commands/rematch_developers.py98
-rw-r--r--devel/management/commands/reporead.py647
-rw-r--r--devel/management/commands/reporead_inotify.py215
-rw-r--r--devel/management/commands/update_types_permissions.py23
6 files changed, 1094 insertions, 230 deletions
diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py
new file mode 100644
index 00000000..9c52dadc
--- /dev/null
+++ b/devel/management/commands/generate_keyring.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+"""
+generate_keyring command
+
+Assemble a GPG keyring with all known developer keys.
+
+Usage: ./manage.py generate_keyring <keyserver> <keyring_path>
+"""
+
+from django.core.management.base import BaseCommand, CommandError
+
+import logging
+import subprocess
+import sys
+
+from devel.models import MasterKey, UserProfile
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+class Command(BaseCommand):
+ args = "<keyserver> <keyring_path> [ownertrust_path]"
+ help = "Assemble a GPG keyring with all known developer keys."
+
+ def handle(self, *args, **options):
+ v = int(options.get('verbosity', None))
+ if v == 0:
+ logger.level = logging.ERROR
+ elif v == 1:
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
+
+ if len(args) < 2:
+ raise CommandError("keyserver and keyring_path must be provided")
+
+ generate_keyring(args[0], args[1])
+
+ if len(args) > 2:
+ generate_ownertrust(args[2])
+
+
+def generate_keyring(keyserver, keyring):
+ logger.info("getting all known key IDs")
+
+ # Screw you Django, for not letting one natively do value != <empty string>
+ key_ids = UserProfile.objects.filter(
+ pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list(
+ "pgp_key", flat=True)
+ logger.info("%d keys fetched from user profiles", len(key_ids))
+ master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True)
+ logger.info("%d keys fetched from master keys", len(master_key_ids))
+
+ # GPG is stupid and interprets any filename without path portion as being
+ # in ~/.gnupg/. Fake it out if we just get a bare filename.
+ if '/' not in keyring:
+ keyring = './%s' % keyring
+ gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring,
+ "--keyserver", keyserver, "--recv-keys"]
+ logger.info("running command: %r", gpg_cmd)
+ gpg_cmd.extend(key_ids)
+ gpg_cmd.extend(master_key_ids)
+ subprocess.check_call(gpg_cmd)
+ logger.info("keyring at %s successfully updated", keyring)
+
+
+TRUST_LEVELS = {
+ 'unknown': 0,
+ 'expired': 1,
+ 'undefined': 2,
+ 'never': 3,
+ 'marginal': 4,
+ 'fully': 5,
+ 'ultimate': 6,
+}
+
+
+def generate_ownertrust(trust_path):
+ master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True)
+ with open(trust_path, "w") as trustfile:
+ for key_id in master_key_ids:
+ trustfile.write("%s:%d:\n" % (key_id, TRUST_LEVELS['marginal']))
+ logger.info("trust file at %s created or overwritten", trust_path)
+
+# vim: set ts=4 sw=4 et:
diff --git a/devel/management/commands/pgp_import.py b/devel/management/commands/pgp_import.py
new file mode 100644
index 00000000..7a124f77
--- /dev/null
+++ b/devel/management/commands/pgp_import.py
@@ -0,0 +1,252 @@
+# -*- coding: utf-8 -*-
+"""
+pgp_import command
+
+Import keys and signatures from a given GPG keyring.
+
+Usage: ./manage.py pgp_import <keyring_path>
+"""
+
+from collections import namedtuple, OrderedDict
+from datetime import datetime
+import logging
+from pytz import utc
+import subprocess
+import sys
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+
+from devel.models import DeveloperKey, PGPSignature
+from devel.utils import UserFinder
+
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+class Command(BaseCommand):
+ args = "<keyring_path>"
+ help = "Import keys and signatures from a given GPG keyring."
+
+ def handle(self, *args, **options):
+ v = int(options.get('verbosity', None))
+ if v == 0:
+ logger.level = logging.ERROR
+ elif v == 1:
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
+
+ if len(args) < 1:
+ raise CommandError("keyring_path must be provided")
+
+ import_keys(args[0])
+ import_signatures(args[0])
+
+
+def get_date(epoch_string):
+ '''Convert a epoch string into a python 'date' object (not datetime).'''
+ if not epoch_string:
+ return None
+ return datetime.utcfromtimestamp(int(epoch_string)).date()
+
+
+def get_datetime(epoch_string):
+ '''Convert a epoch string into a python 'datetime' object.'''
+ if not epoch_string:
+ return None
+ return datetime.utcfromtimestamp(int(epoch_string)).replace(tzinfo=utc)
+
+
+def call_gpg(keyring, *args):
+ # GPG is stupid and interprets any filename without path portion as being
+ # in ~/.gnupg/. Fake it out if we just get a bare filename.
+ if '/' not in keyring:
+ keyring = './%s' % keyring
+ gpg_cmd = ["gpg2", "--no-default-keyring", "--keyring", keyring,
+ "--with-colons", "--fixed-list-mode"]
+ gpg_cmd.extend(args)
+ logger.info("running command: %s", ' '.join(gpg_cmd))
+ proc = subprocess.Popen(gpg_cmd, stdout=subprocess.PIPE)
+ outdata, errdata = proc.communicate()
+ if proc.returncode != 0:
+ logger.error(errdata)
+ raise subprocess.CalledProcessError(proc.returncode, gpg_cmd)
+ return outdata
+
+
+class KeyData(object):
+ def __init__(self, key, created, expires):
+ self.key = key
+ self.created = get_datetime(created)
+ self.expires = get_datetime(expires)
+ self.parent = None
+ self.revoked = None
+ self.db_id = None
+
+
+def parse_keydata(data):
+ keys = OrderedDict()
+ current_pubkey = None
+
+ # parse all of the output from our successful GPG command
+ logger.info("parsing command output")
+ node = None
+ for line in data.split('\n'):
+ parts = line.split(':')
+ if parts[0] == 'pub':
+ key = parts[4]
+ current_pubkey = key
+ keys[key] = KeyData(key, parts[5], parts[6])
+ node = parts[0]
+ elif parts[0] == 'sub':
+ key = parts[4]
+ keys[key] = KeyData(key, parts[5], parts[6])
+ keys[key].parent = current_pubkey
+ node = parts[0]
+ elif parts[0] == 'uid':
+ node = parts[0]
+ elif parts[0] == 'rev' and node in ('pub', 'sub'):
+ keys[current_pubkey].revoked = get_datetime(parts[5])
+
+ return keys
+
+
+def find_key_owner(key, keys, finder):
+ '''Recurse up the chain, looking for an owner.'''
+ if key is None:
+ return None
+ owner = finder.find_by_pgp_key(key.key)
+ if owner:
+ return owner
+ if key.parent:
+ return find_key_owner(keys[key.parent], keys, finder)
+ return None
+
+
+def import_keys(keyring):
+ outdata = call_gpg(keyring, "--list-sigs")
+ keydata = parse_keydata(outdata)
+
+ logger.info("creating or finding %d keys", len(keydata))
+ created_ct = updated_ct = 0
+ with transaction.atomic():
+ finder = UserFinder()
+ # we are dependent on parents coming before children; parse_keydata
+ # uses an OrderedDict to ensure this is the case.
+ for data in keydata.values():
+ parent_id = None
+ if data.parent:
+ parent_data = keydata.get(data.parent, None)
+ if parent_data:
+ parent_id = parent_data.db_id
+ other = {
+ 'expires': data.expires,
+ 'revoked': data.revoked,
+ 'parent_id': parent_id,
+ }
+ dkey, created = DeveloperKey.objects.get_or_create(
+ key=data.key, created=data.created, defaults=other)
+ data.db_id = dkey.id
+
+ # set or update any additional data we might need to
+ needs_save = False
+ if created:
+ created_ct += 1
+ else:
+ for k, v in other.items():
+ if getattr(dkey, k) != v:
+ setattr(dkey, k, v)
+ needs_save = True
+ if dkey.owner_id is None:
+ owner = find_key_owner(data, keydata, finder)
+ if owner is not None:
+ dkey.owner = owner
+ needs_save = True
+ if needs_save:
+ dkey.save()
+ updated_ct += 1
+
+ key_ct = DeveloperKey.objects.all().count()
+ logger.info("%d total keys in database", key_ct)
+ logger.info("created %d, updated %d keys", created_ct, updated_ct)
+
+
+class SignatureData(object):
+ def __init__(self, signer, signee, created):
+ self.signer = signer
+ self.signee = signee
+ self.created = created
+ self.expires = None
+ self.revoked = None
+
+
+def parse_sigdata(data):
+ nodes = {}
+ edges = []
+ current_pubkey = None
+
+ # parse all of the output from our successful GPG command
+ logger.info("parsing command output")
+ for line in data.split('\n'):
+ parts = line.split(':')
+ if parts[0] == 'pub':
+ current_pubkey = parts[4]
+ nodes[current_pubkey] = None
+ elif parts[0] == 'uid':
+ uid = parts[9]
+ # only set uid if this is the first one encountered
+ if nodes[current_pubkey] is None:
+ nodes[current_pubkey] = uid
+ elif parts[0] == 'sig':
+ signer = parts[4]
+ created = get_date(parts[5])
+ edge = SignatureData(signer, current_pubkey, created)
+ if parts[6]:
+ edge.expires = get_date(parts[6])
+ edges.append(edge)
+ elif parts[0] == 'rev':
+ signer = parts[4]
+ revoked = get_date(parts[5])
+ # revoke any prior edges that match
+ matches = [e for e in edges if e.signer == signer
+ and e.signee == current_pubkey]
+ for edge in matches:
+ edge.revoked = revoked
+
+ return nodes, edges
+
+
+def import_signatures(keyring):
+ outdata = call_gpg(keyring, "--list-sigs")
+ nodes, edges = parse_sigdata(outdata)
+
+ # now prune the data down to what we actually want.
+ # prune edges not in nodes, remove duplicates, and self-sigs
+ pruned_edges = {edge for edge in edges
+ if edge.signer in nodes and edge.signer != edge.signee}
+
+ logger.info("creating or finding up to %d signatures", len(pruned_edges))
+ created_ct = updated_ct = 0
+ with transaction.atomic():
+ for edge in pruned_edges:
+ sig, created = PGPSignature.objects.get_or_create(
+ signer=edge.signer, signee=edge.signee,
+ created=edge.created, expires=edge.expires,
+ defaults={ 'revoked': edge.revoked })
+ if sig.revoked != edge.revoked:
+ sig.revoked = edge.revoked
+ sig.save()
+ updated_ct += 1
+ if created:
+ created_ct += 1
+
+ sig_ct = PGPSignature.objects.all().count()
+ logger.info("%d total signatures in database", sig_ct)
+ logger.info("created %d, updated %d signatures", created_ct, updated_ct)
+
+# vim: set ts=4 sw=4 et:
diff --git a/devel/management/commands/rematch_developers.py b/devel/management/commands/rematch_developers.py
new file mode 100644
index 00000000..bbb43df0
--- /dev/null
+++ b/devel/management/commands/rematch_developers.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+rematch_developers command
+
+Match all packages with a packager_str but NULL packager_id to a packager if we
+can find one.
+
+Also, match all flag requests with a NULL user_id that have a user_email
+matching up to a developer if we can find one.
+
+Usage: ./manage.py rematch_developers
+"""
+
+from django.core.management.base import NoArgsCommand
+from django.db import transaction
+
+import sys
+import logging
+
+from devel.utils import UserFinder
+from main.models import Package
+from packages.models import FlagRequest
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+class Command(NoArgsCommand):
+ help = "Match and map objects in database to developer emails"
+
+ def handle_noargs(self, **options):
+ v = int(options.get('verbosity', None))
+ if v == 0:
+ logger.level = logging.ERROR
+ elif v == 1:
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
+
+ finder = UserFinder()
+ match_packager(finder)
+ match_flagrequest(finder)
+
+@transaction.atomic
+def match_packager(finder):
+ logger.info("getting all unmatched packager strings")
+ package_count = matched_count = 0
+ mapping = {}
+
+ unmatched = Package.objects.filter(packager__isnull=True).values_list(
+ 'packager_str', flat=True).order_by().distinct()
+
+ logger.info("%d packager strings retrieved", len(unmatched))
+ for packager in unmatched:
+ logger.debug("packager string %s", packager)
+ user = finder.find(packager)
+ if user:
+ mapping[packager] = user
+ logger.debug(" found user %s" % user.username)
+ matched_count += 1
+
+ for packager_str, user in mapping.items():
+ package_count += Package.objects.filter(packager__isnull=True,
+ packager_str=packager_str).update(packager=user)
+
+ logger.info("%d packages updated, %d packager strings matched",
+ package_count, matched_count)
+
+
+@transaction.atomic
+def match_flagrequest(finder):
+ logger.info("getting all flag request email addresses from unknown users")
+ req_count = matched_count = 0
+ mapping = {}
+
+ unmatched = FlagRequest.objects.filter(user__isnull=True).values_list(
+ 'user_email', flat=True).order_by().distinct()
+
+ logger.info("%d email addresses retrieved", len(unmatched))
+ for user_email in unmatched:
+ logger.debug("email %s", user_email)
+ user = finder.find_by_email(user_email)
+ if user:
+ mapping[user_email] = user
+ logger.debug(" found user %s" % user.username)
+ matched_count += 1
+
+ for user_email, user in mapping.items():
+ req_count += FlagRequest.objects.filter(user__isnull=True,
+ user_email=user_email).update(user=user)
+
+ logger.info("%d request emails updated, %d emails matched",
+ req_count, matched_count)
+
+# vim: set ts=4 sw=4 et:
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py
index 7c468001..c76b5011 100644
--- a/devel/management/commands/reporead.py
+++ b/devel/management/commands/reporead.py
@@ -13,12 +13,10 @@ Example:
./manage.py reporead i686 /tmp/core.db.tar.gz
"""
-from django.core.management.base import BaseCommand, CommandError
-from django.contrib.auth.models import User
-from django.db import transaction
-from django.db.models import Q
-
-import codecs
+from base64 import b64decode
+from collections import defaultdict
+from copy import copy
+import io
import os
import re
import sys
@@ -26,16 +24,26 @@ import tarfile
import logging
from datetime import datetime
from optparse import make_option
+from pytz import utc
-from logging import ERROR, WARNING, INFO, DEBUG
+from django.core.management.base import BaseCommand, CommandError
+from django.db import connections, router, transaction
+from django.db.utils import IntegrityError
+from django.utils.timezone import now
+
+from devel.utils import UserFinder
+from main.models import Arch, Package, PackageFile, Repo
+from packages.models import Depend, Conflict, Provision, Replacement, Update
+from packages.utils import parse_version
-from main.models import Arch, Package, Repo
logging.basicConfig(
- level=WARNING,
+ level=logging.WARNING,
format='%(asctime)s -> %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
stream=sys.stderr)
+TRACE = 5
+logging.addLevelName(TRACE, 'TRACE')
logger = logging.getLogger()
class Command(BaseCommand):
@@ -51,8 +59,6 @@ class Command(BaseCommand):
def handle(self, arch=None, filename=None, **options):
if not arch:
raise CommandError('Architecture is required.')
- if not validate_arch(arch):
- raise CommandError('Specified architecture %s is not currently known.' % arch)
if not filename:
raise CommandError('Package database file is required.')
filename = os.path.normpath(filename)
@@ -61,257 +67,414 @@ class Command(BaseCommand):
v = int(options.get('verbosity', 0))
if v == 0:
- logger.level = ERROR
+ logger.level = logging.ERROR
elif v == 1:
- logger.level = INFO
- elif v == 2:
- logger.level = DEBUG
-
- import signal, traceback
- handler = lambda sig, stack: traceback.print_stack(stack)
- signal.signal(signal.SIGQUIT, handler)
- signal.signal(signal.SIGUSR1, handler)
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
return read_repo(arch, filename, options)
-class Pkg(object):
+class RepoPackage(object):
"""An interim 'container' object for holding Arch package data."""
- bare = ( 'name', 'base', 'arch', 'desc', 'filename',
- 'md5sum', 'url', 'builddate', 'packager' )
- squash = ( 'license', )
+ bare = ( 'name', 'base', 'arch', 'filename',
+ 'md5sum', 'sha256sum', 'url', 'packager' )
number = ( 'csize', 'isize' )
+ collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends',
+ 'conflicts', 'provides', 'replaces', 'groups', 'license')
def __init__(self, repo):
self.repo = repo
self.ver = None
self.rel = None
- for k in self.bare + self.squash + self.number:
+ self.epoch = 0
+ self.desc = None
+ self.pgpsig = None
+ for k in self.bare + self.number:
setattr(self, k, None)
+ for k in self.collections:
+ setattr(self, k, ())
+ self.builddate = None
+ self.files = None
def populate(self, values):
for k, v in values.iteritems():
# ensure we stay under our DB character limit
if k in self.bare:
setattr(self, k, v[0][:254])
- elif k in self.squash:
- setattr(self, k, u', '.join(v)[:254])
elif k in self.number:
setattr(self, k, long(v[0]))
- elif k == 'force':
- setattr(self, k, True)
+ elif k in ('desc', 'pgpsig'):
+ # do NOT prune these values at all
+ if v[0] == None:
+ v[0] = 'missing'
+ setattr(self, k, v[0])
elif k == 'version':
- ver, rel = v[0].rsplit('-')
- setattr(self, 'ver', ver)
- setattr(self, 'rel', rel)
+ self.ver, self.rel, self.epoch = parse_version(v[0])
+ elif k == 'builddate':
+ try:
+ builddate = datetime.utcfromtimestamp(int(v[0]))
+ self.builddate = builddate.replace(tzinfo=utc)
+ except ValueError:
+ logger.warning(
+ 'Package %s had unparsable build date %s',
+ self.name, v[0])
else:
- # files, depends, etc.
- setattr(self, k, v)
-
-
-def find_user(userstring):
- '''
- Attempt to find the corresponding User object for a standard
- packager string, e.g. something like
- 'A. U. Thor <author@example.com>'.
- We start by searching for a matching email address; we then move onto
- matching by first/last name. If we cannot find a user, then return None.
- '''
- if userstring in find_user.cache:
- return find_user.cache[userstring]
- matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring)
- if not matches:
- return None
+ # anything left in collections
+ setattr(self, k, tuple(v))
- user = None
- name = matches.group(1)
- email = matches.group(2)
-
- def user_email():
- return User.objects.get(email=email)
- def profile_email():
- return User.objects.get(userprofile_user__public_email=email)
- def user_name():
- # yes, a bit odd but this is the easiest way since we can't always be
- # sure how to split the name. Ensure every 'token' appears in at least
- # one of the two name fields.
- name_q = Q()
- for token in name.split():
- name_q &= (Q(first_name__icontains=token) |
- Q(last_name__icontains=token))
- return User.objects.get(name_q)
-
- for matcher in (user_email, profile_email, user_name):
+ @property
+ def files_list(self):
+ data_file = io.TextIOWrapper(io.BytesIO(self.files), encoding='UTF-8')
try:
- user = matcher()
- break
- except (User.DoesNotExist, User.MultipleObjectsReturned):
- pass
+ info = parse_info(data_file)
+ except UnicodeDecodeError:
+ logger.warn("Could not correctly decode files list for %s",
+ self.name)
+ return None
+ return info['files']
+
+ @property
+ def full_version(self):
+ '''Very similar to the main.models.Package method.'''
+ if self.epoch > 0:
+ return u'%d:%s-%s' % (self.epoch, self.ver, self.rel)
+ return u'%s-%s' % (self.ver, self.rel)
+
+
+DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.+))?$")
+
+def create_depend(package, dep_str, deptype='D'):
+ depend = Depend(pkg=package, deptype=deptype)
+ # lop off any description first, don't get confused by epoch
+ parts = dep_str.split(': ', 1)
+ if len(parts) > 1:
+ depend.description = parts[1].strip()
+ match = DEPEND_RE.match(parts[0].strip())
+ if match:
+ depend.name = match.group(1)
+ if match.group(3):
+ depend.comparison = match.group(3)
+ if match.group(4):
+ depend.version = match.group(4)
+ else:
+ logger.warning('Package %s had unparsable depend string %s',
+ package.pkgname, dep_str)
+ return None
+ return depend
+
+def create_related(model, package, rel_str, equals_only=False):
+ related = model(pkg=package)
+ match = DEPEND_RE.match(rel_str)
+ if match:
+ related.name = match.group(1)
+ if match.group(3):
+ comp = match.group(3)
+ if not equals_only:
+ related.comparison = comp
+ elif comp != '=':
+ logger.warning(
+ 'Package %s had unexpected comparison operator %s for %s in %s',
+ package.pkgname, comp, model.__name__, rel_str)
+ if match.group(4):
+ related.version = match.group(4)
+ else:
+ logger.warning('Package %s had unparsable %s string %s',
+ package.pkgname, model.___name__, rel_str)
+ return None
+ return related
+
- find_user.cache[userstring] = user
- return user
+def create_multivalued(dbpkg, repopkg, db_attr, repo_attr):
+ '''Populate the simplest of multivalued attributes. These are those that
+ only deal with a 'name' attribute, such as licenses, groups, etc. The input
+ and output objects and attribute names are specified, and everything is
+ done via getattr().'''
+ collection = getattr(dbpkg, db_attr)
+ collection.all().delete()
+ model = collection.model
+ new_items = []
+ for name in getattr(repopkg, repo_attr):
+ new_items.append(model(pkg=dbpkg, name=name))
+ if new_items:
+ model.objects.bulk_create(new_items)
-# cached mappings of user strings -> User objects so we don't have to do the
-# lookup more than strictly necessary.
-find_user.cache = {}
+finder = UserFinder()
def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
+ # we reset the flag date only if the upstream version components change;
+ # e.g. epoch or pkgver, but not pkgrel
+ if dbpkg.epoch is None or dbpkg.epoch != repopkg.epoch:
+ dbpkg.flag_date = None
+ elif dbpkg.pkgver is None or dbpkg.pkgver != repopkg.ver:
+ dbpkg.flag_date = None
+
if repopkg.base:
dbpkg.pkgbase = repopkg.base
else:
dbpkg.pkgbase = repopkg.name
dbpkg.pkgver = repopkg.ver
dbpkg.pkgrel = repopkg.rel
- dbpkg.pkgdesc = repopkg.desc
- dbpkg.license = repopkg.license
+ dbpkg.epoch = repopkg.epoch
+ try:
+ dbpkg.pkgdesc = repopkg.desc
+ except AttributeError:
+ dbpkg.pkgdesc = "missing"
dbpkg.url = repopkg.url
dbpkg.filename = repopkg.filename
dbpkg.compressed_size = repopkg.csize
dbpkg.installed_size = repopkg.isize
- try:
- dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate))
- except ValueError:
- try:
- dbpkg.build_date = datetime.strptime(repopkg.builddate,
- '%a %b %d %H:%M:%S %Y')
- except ValueError:
- logger.warning('Package %s had unparsable build date %s' % \
- (repopkg.name, repopkg.builddate))
+ dbpkg.build_date = repopkg.builddate
dbpkg.packager_str = repopkg.packager
# attempt to find the corresponding django user for this string
- dbpkg.packager = find_user(repopkg.packager)
+ dbpkg.packager = finder.find(repopkg.packager)
+ dbpkg.signature_bytes = b64decode(repopkg.pgpsig.encode('utf-8'))
if timestamp:
- dbpkg.flag_date = None
dbpkg.last_update = timestamp
dbpkg.save()
populate_files(dbpkg, repopkg, force=force)
- dbpkg.packagedepend_set.all().delete()
- if 'depends' in repopkg.__dict__:
- for y in repopkg.depends:
- # make sure we aren't adding self depends..
- # yes *sigh* i have seen them in pkgbuilds
- dpname, dpvcmp = re.match(r"([a-z0-9._+-]+)(.*)", y).groups()
- if dpname == repopkg.name:
- logger.warning('Package %s has a depend on itself' % repopkg.name)
- continue
- dbpkg.packagedepend_set.create(depname=dpname, depvcmp=dpvcmp)
- logger.debug('Added %s as dep for pkg %s' % (dpname, repopkg.name))
+ dbpkg.depends.all().delete()
+ deps = [create_depend(dbpkg, y) for y in repopkg.depends]
+ deps += [create_depend(dbpkg, y, 'O') for y in repopkg.optdepends]
+ deps += [create_depend(dbpkg, y, 'M') for y in repopkg.makedepends]
+ deps += [create_depend(dbpkg, y, 'C') for y in repopkg.checkdepends]
+ Depend.objects.bulk_create(deps)
- dbpkg.packagegroup_set.all().delete()
- if 'groups' in repopkg.__dict__:
- for y in repopkg.groups:
- dbpkg.packagegroup_set.create(name=y)
+ dbpkg.conflicts.all().delete()
+ conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts]
+ Conflict.objects.bulk_create(conflicts)
+
+ dbpkg.provides.all().delete()
+ provides = [create_related(Provision, dbpkg, y, equals_only=True)
+ for y in repopkg.provides]
+ Provision.objects.bulk_create(provides)
+
+ dbpkg.replaces.all().delete()
+ replaces = [create_related(Replacement, dbpkg, y) for y in repopkg.replaces]
+ Replacement.objects.bulk_create(replaces)
+
+ create_multivalued(dbpkg, repopkg, 'groups', 'groups')
+ create_multivalued(dbpkg, repopkg, 'licenses', 'license')
+
+
+pkg_same_version = lambda pkg, dbpkg: pkg.ver == dbpkg.pkgver \
+ and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch
+
+
+def delete_pkg_files(dbpkg):
+ database = router.db_for_write(Package, instance=dbpkg)
+ cursor = connections[database].cursor()
+ cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id])
+
+
+def batched_bulk_create(model, all_objects):
+ cutoff = 10000
+ length = len(all_objects)
+ if length < cutoff:
+ return model.objects.bulk_create(all_objects)
+
+ def chunks():
+ offset = 0
+ while offset < length:
+ yield all_objects[offset:offset + cutoff]
+ offset += cutoff
+
+ for items in chunks():
+ ret = model.objects.bulk_create(items)
+
+ return ret
def populate_files(dbpkg, repopkg, force=False):
if not force:
+ if not pkg_same_version(repopkg, dbpkg):
+ logger.info("DB version (%s) didn't match repo version "
+ "(%s) for package %s, skipping file list addition",
+ dbpkg.full_version, repopkg.full_version, dbpkg.pkgname)
+ return
if not dbpkg.files_last_update or not dbpkg.last_update:
pass
- elif dbpkg.files_last_update > dbpkg.last_update:
+ elif dbpkg.files_last_update >= dbpkg.last_update:
return
+
# only delete files if we are reading a DB that contains them
- if 'files' in repopkg.__dict__:
- dbpkg.packagefile_set.all().delete()
- logger.info("adding %d files for package %s" % (len(repopkg.files), dbpkg.pkgname))
- for x in repopkg.files:
- dbpkg.packagefile_set.create(path=x)
- dbpkg.files_last_update = datetime.now()
+ if repopkg.files:
+ files = repopkg.files_list
+ # we had files data, but it couldn't be parsed, so skip
+ if not files:
+ return
+ delete_pkg_files(dbpkg)
+ logger.info("adding %d files for package %s",
+ len(files), dbpkg.pkgname)
+ pkg_files = []
+ # sort in normal alpha-order that pacman uses, rather than makepkg's
+ # default breadth-first, directory-first ordering
+ for f in sorted(files):
+ if '/' in f:
+ dirname, filename = f.rsplit('/', 1)
+ dirname += '/'
+ else:
+ dirname, filename = '', f
+ if filename == '':
+ filename = None
+ pkgfile = PackageFile(pkg=dbpkg,
+ is_directory=(filename is None),
+ directory=dirname,
+ filename=filename)
+ pkg_files.append(pkgfile)
+ batched_bulk_create(PackageFile, pkg_files)
+ dbpkg.files_last_update = now()
dbpkg.save()
-def db_update(archname, reponame, pkgs, options):
- """
- Parses a list and updates the Arch dev database accordingly.
- Arguments:
- pkgs -- A list of Pkg objects.
+def update_common(archname, reponame, pkgs, sanity_check=True):
+ # If isolation level is repeatable-read, we need to ensure each package
+ # update starts a new transaction and re-queries the database as
+ # necessary to guard against simultaneous updates.
+ with transaction.atomic():
+ # force the transaction dirty, even though we will only do reads
+ transaction.set_dirty()
+
+ repository = Repo.objects.get(name__iexact=reponame)
+ architecture = Arch.objects.get(name=archname)
+ # no-arg order_by() removes even the default ordering; we don't need it
+ dbpkgs = Package.objects.filter(
+ arch=architecture, repo=repository).order_by()
+
+ logger.info("%d packages in current web DB", len(dbpkgs))
+ logger.info("%d packages in new updating DB", len(pkgs))
+ if len(dbpkgs):
+ dbpercent = 100.0 * len(pkgs) / len(dbpkgs)
+ else:
+ dbpercent = 0.0
+ logger.info("DB package ratio: %.1f%%", dbpercent)
+
+ # Fewer than 20 packages makes the percentage check unreliable, but it
+ # also means we expect the repo to fluctuate a lot.
+ msg = "Package database %s (%s) has %.1f%% the number of packages " \
+ "the web database"
+ if not sanity_check:
+ pass
+ elif repository.testing or repository.staging:
+ pass
+ elif len(dbpkgs) == 0 and len(pkgs) == 0:
+ pass
+ elif len(dbpkgs) > 20 and dbpercent < 50.0:
+ logger.error(msg, reponame, archname, dbpercent)
+ raise Exception(msg % (reponame, archname, dbpercent))
+ elif dbpercent < 75.0:
+ logger.warning(msg, reponame, archname, dbpercent)
+
+ return dbpkgs
+
+def db_update(archname, reponame, pkgs, force=False):
"""
- logger.info('Updating Arch: %s' % archname)
- force = options.get('force', False)
- filesonly = options.get('filesonly', False)
+ Parses a list of packages and updates the packages database accordingly.
+ """
+ logger.info('Updating %s (%s)', reponame, archname)
+ dbpkgs = update_common(archname, reponame, pkgs, True)
repository = Repo.objects.get(name__iexact=reponame)
- architecture = Arch.objects.get(name__iexact=archname)
- dbpkgs = Package.objects.filter(arch=architecture, repo=repository)
- # It makes sense to fully evaluate our DB query now because we will
- # be using 99% of the objects in our "in both sets" loop. Force eval
- # by calling list() on the QuerySet.
- list(dbpkgs)
+ architecture = Arch.objects.get(name=archname)
+
# This makes our inner loop where we find packages by name *way* more
# efficient by not having to go to the database for each package to
# SELECT them by name.
- dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs])
-
- # go go set theory!
- # thank you python for having a set class <3
- logger.debug("Creating sets")
- dbset = set([pkg.pkgname for pkg in dbpkgs])
- syncset = set([pkg.name for pkg in pkgs])
- logger.info("%d packages in current web DB" % len(dbset))
- logger.info("%d packages in new updating db" % len(syncset))
- # packages in syncdb and not in database (add to database)
- logger.debug("Set theory: Packages in syncdb not in database")
- in_sync_not_db = syncset - dbset
- logger.info("%d packages in sync not db" % len(in_sync_not_db))
+ dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs}
- # Try to catch those random orphaning issues that make Eric so unhappy.
- if len(dbset) > 20:
- dbpercent = 100.0 * len(syncset) / len(dbset)
- else:
- # we don't have 20 packages in this repo/arch, so this check could
- # produce a lot of false positives (or a div by zero). fake it
- dbpercent = 100.0
- logger.info("DB package ratio: %.1f%%" % dbpercent)
- if dbpercent < 50.0 and not repository.testing:
- logger.error(".db.tar.gz has %.1f%% the number of packages in the web database" % dbpercent)
- raise Exception(
- 'It looks like the syncdb is less than half the size of the web db. WTF?')
-
- if dbpercent < 75.0:
- logger.warning(".db.tar.gz has %.1f%% the number of packages in the web database." % dbpercent)
-
- if not filesonly:
- # packages in syncdb and not in database (add to database)
- logger.debug("Set theory: Packages in syncdb not in database")
- for p in [x for x in pkgs if x.name in in_sync_not_db]:
- logger.info("Adding package %s", p.name)
- pkg = Package(pkgname = p.name, arch = architecture, repo = repository)
- populate_pkg(pkg, p, timestamp=datetime.now())
-
- # packages in database and not in syncdb (remove from database)
- logger.debug("Set theory: Packages in database not in syncdb")
- in_db_not_sync = dbset - syncset
- for p in in_db_not_sync:
- logger.info("Removing package %s from database", p)
- Package.objects.get(
- pkgname=p, arch=architecture, repo=repository).delete()
+ dbset = set(dbdict.keys())
+ syncset = {pkg.name for pkg in pkgs}
+
+ in_sync_not_db = syncset - dbset
+ logger.info("%d packages in sync not db", len(in_sync_not_db))
+ # packages in syncdb and not in database (add to database)
+ for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db):
+ logger.info("Adding package %s", pkg.name)
+ timestamp = now()
+ dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository,
+ created=timestamp)
+ try:
+ with transaction.atomic():
+ populate_pkg(dbpkg, pkg, timestamp=timestamp)
+ Update.objects.log_update(None, dbpkg)
+ except IntegrityError:
+ if architecture.agnostic:
+ logger.warning("Could not add package %s; "
+ "not fatal if another thread beat us to it.",
+ pkg.name)
+ else:
+ logger.exception("Could not add package %s", pkg.name)
+
+ # packages in database and not in syncdb (remove from database)
+ for pkgname in (dbset - syncset):
+ logger.info("Removing package %s", pkgname)
+ dbpkg = dbdict[pkgname]
+ with transaction.atomic():
+ Update.objects.log_update(dbpkg, None)
+ # no race condition here as long as simultaneous threads both
+ # issue deletes; second delete will be a no-op
+ delete_pkg_files(dbpkg)
+ dbpkg.delete()
# packages in both database and in syncdb (update in database)
- logger.debug("Set theory: Packages in database and syncdb")
pkg_in_both = syncset & dbset
- for p in [x for x in pkgs if x.name in pkg_in_both]:
- logger.debug("Looking for package updates")
- dbp = dbdict[p.name]
+ for pkg in (x for x in pkgs if x.name in pkg_in_both):
+ logger.debug("Checking package %s", pkg.name)
+ dbpkg = dbdict[pkg.name]
timestamp = None
# for a force, we don't want to update the timestamp.
# for a non-force, we don't want to do anything at all.
- if filesonly:
- pass
- elif '-'.join((p.ver, p.rel)) == '-'.join((dbp.pkgver, dbp.pkgrel)):
- if not force:
+ if not force and pkg_same_version(pkg, dbpkg):
+ continue
+ elif not force:
+ timestamp = now()
+
+ # The odd select_for_update song and dance here are to ensure
+ # simultaneous updates don't happen on a package, causing
+ # files/depends/all related items to be double-imported.
+ with transaction.atomic():
+ dbpkg = Package.objects.select_for_update().get(id=dbpkg.id)
+ if not force and pkg_same_version(pkg, dbpkg):
+ logger.debug("Package %s was already updated", pkg.name)
continue
- else:
- timestamp = datetime.now()
- if filesonly:
- logger.debug("Checking files for package %s in database", p.name)
- populate_files(dbp, p)
- else:
- logger.info("Updating package %s in database", p.name)
- populate_pkg(dbp, p, force=force, timestamp=timestamp)
+ logger.info("Updating package %s", pkg.name)
+ prevpkg = copy(dbpkg)
+ populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp)
+ Update.objects.log_update(prevpkg, dbpkg)
+
+ logger.info('Finished updating arch: %s', archname)
+
+
+def filesonly_update(archname, reponame, pkgs, force=False):
+ """
+ Parses a list of packages and updates the packages database accordingly.
+ """
+ logger.info('Updating files for %s (%s)', reponame, archname)
+ dbpkgs = update_common(archname, reponame, pkgs, False)
+ dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs}
+ dbset = set(dbdict.keys())
+
+ for pkg in (pkg for pkg in pkgs if pkg.name in dbset):
+ dbpkg = dbdict[pkg.name]
+
+ # The odd select_for_update song and dance here are to ensure
+ # simultaneous updates don't happen on a package, causing
+ # files to be double-imported.
+ with transaction.atomic():
+ if not dbpkg.files_last_update or not dbpkg.last_update:
+ pass
+ elif not force and dbpkg.files_last_update >= dbpkg.last_update:
+ logger.debug("Files for %s are up to date", pkg.name)
+ continue
+ dbpkg = Package.objects.select_for_update().get(id=dbpkg.id)
+ logger.debug("Checking files for package %s", pkg.name)
+ populate_files(dbpkg, pkg, force=force)
- logger.info('Finished updating Arch: %s' % archname)
+ logger.info('Finished updating arch: %s', archname)
def parse_info(iofile):
@@ -326,7 +489,7 @@ def parse_info(iofile):
continue
elif line.startswith('%') and line.endswith('%'):
blockname = line[1:-1].lower()
- logger.debug("Parsing package block %s", blockname)
+ logger.log(TRACE, "Parsing package block %s", blockname)
store[blockname] = []
elif blockname:
store[blockname].append(line)
@@ -337,7 +500,7 @@ def parse_info(iofile):
def parse_repo(repopath):
"""
- Parses an Arch repo db file, and returns a list of Pkg objects.
+ Parses an Arch repo db file, and returns a list of RepoPackage objects.
Arguments:
repopath -- The path of a repository db file.
@@ -349,71 +512,95 @@ def parse_repo(repopath):
logger.info("Reading repo tarfile %s", repopath)
filename = os.path.split(repopath)[1]
- m = re.match(r"^(.*)\.(db|files)\.tar\.(.*)$", filename)
+ m = re.match(r"^(.*)\.(db|files)\.tar(\..*)?$", filename)
if m:
reponame = m.group(1)
else:
logger.error("File does not have the proper extension")
raise Exception("File does not have the proper extension")
- repodb = tarfile.open(repopath,"r:gz")
- ## assuming well formed tar, with dir first then files after
- ## repo-add enforces this
+ repodb = tarfile.open(repopath, "r")
logger.debug("Starting package parsing")
- dbfiles = ('desc', 'depends', 'files')
- pkgs = {}
+ newpkg = lambda: RepoPackage(reponame)
+ pkgs = defaultdict(newpkg)
for tarinfo in repodb.getmembers():
- if tarinfo.isdir():
- continue
- elif tarinfo.isreg():
+ if tarinfo.isreg():
pkgid, fname = os.path.split(tarinfo.name)
- if fname not in dbfiles:
- continue
- data_file = repodb.extractfile(tarinfo)
- data_file = codecs.EncodedFile(data_file, 'utf-8')
- try:
- data = parse_info(data_file)
- p = pkgs.setdefault(pkgid, Pkg(reponame))
- p.populate(data)
- except UnicodeDecodeError, e:
- logger.warn("Could not correctly decode %s, skipping file" % \
- tarinfo.name)
- data_file.close()
-
- logger.debug("Done parsing file %s", fname)
+ if fname == 'files':
+ # don't parse yet for speed and memory consumption reasons
+ files_data = repodb.extractfile(tarinfo)
+ pkgs[pkgid].files = files_data.read()
+ del files_data
+ elif fname in ('desc', 'depends'):
+ data_file = repodb.extractfile(tarinfo)
+ data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
+ encoding='UTF-8')
+ try:
+ pkgs[pkgid].populate(parse_info(data_file))
+ except UnicodeDecodeError:
+ logger.warn("Could not correctly decode %s, skipping file",
+ tarinfo.name)
+ data_file.close()
+ del data_file
+
+ logger.debug("Done parsing file %s/%s", pkgid, fname)
repodb.close()
- logger.info("Finished repo parsing, %d total packages" % len(pkgs))
+ logger.info("Finished repo parsing, %d total packages", len(pkgs))
return (reponame, pkgs.values())
-def validate_arch(arch):
+def locate_arch(arch):
"Check if arch is valid."
- available_arches = [x.name for x in Arch.objects.all()]
- return arch in available_arches
+ if isinstance(arch, Arch):
+ return arch
+ try:
+ return Arch.objects.get(name=arch)
+ except Arch.DoesNotExist:
+ raise CommandError(
+ 'Specified architecture %s is not currently known.' % arch)
-@transaction.commit_on_success
-def read_repo(primary_arch, file, options):
+
+def read_repo(primary_arch, repo_file, options):
"""
Parses repo.db.tar.gz file and returns exit status.
"""
- repo, packages = parse_repo(file)
+ # always returns an Arch object, regardless of what is passed in
+ primary_arch = locate_arch(primary_arch)
+ force = options.get('force', False)
+ filesonly = options.get('filesonly', False)
+
+ repo, packages = parse_repo(repo_file)
- # sort packages by arch -- to handle noarch stuff
+ # group packages by arch -- to handle noarch stuff
packages_arches = {}
- packages_arches['any'] = []
- packages_arches[primary_arch] = []
+ for arch in Arch.objects.filter(agnostic=True):
+ packages_arches[arch.name] = []
+ packages_arches[primary_arch.name] = []
for package in packages:
- if package.arch in ('any', primary_arch):
+ if package.arch in packages_arches:
packages_arches[package.arch].append(package)
else:
- # we don't include mis-arched packages
- logger.warning("Package %s arch = %s" % (
- package.name,package.arch))
- logger.info('Starting database updates.')
- for (arch, pkgs) in packages_arches.items():
- db_update(arch, repo, pkgs, options)
- logger.info('Finished database updates.')
+ raise Exception(
+ "Package %s in database %s had wrong architecture %s" % (
+ package.name, repo_file, package.arch))
+ del packages
+
+ database = router.db_for_write(Package)
+ connection = connections[database]
+ if connection.vendor == 'sqlite':
+ cursor = connection.cursor()
+ cursor.execute('PRAGMA synchronous = NORMAL')
+
+ logger.info('Starting database updates for %s.', repo_file)
+ for arch in sorted(packages_arches.keys()):
+ if filesonly:
+ filesonly_update(arch, repo, packages_arches[arch], force)
+ else:
+ db_update(arch, repo, packages_arches[arch], force)
+ logger.info('Finished database updates for %s.', repo_file)
+ connection.commit()
+ connection.close()
return 0
# vim: set ts=4 sw=4 et:
diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py
new file mode 100644
index 00000000..1422ae26
--- /dev/null
+++ b/devel/management/commands/reporead_inotify.py
@@ -0,0 +1,215 @@
+# -*- coding: utf-8 -*-
+"""
+reporead_inotify command
+
+Watches repo.files.tar.gz files for updates and parses them after a short delay
+in order to catch all updates in a single bulk update.
+
+Usage: ./manage.py reporead_inotify [path_template]
+
+Where 'path_template' is an optional path_template for finding the
+repo.files.tar.gz files. The form is '/srv/ftp/%(repo)s/os/%(arch)s/', which is
+also the default template if none is specified. While 'repo' is not required to
+be present in the path_template, note that 'arch' is so reporead can function
+correctly.
+"""
+
+import logging
+import multiprocessing
+import os
+import pyinotify
+import sys
+import threading
+import time
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import connection, transaction
+
+from main.models import Arch, Repo
+from .reporead import read_repo
+
+logging.basicConfig(
+ level=logging.WARNING,
+ format='%(asctime)s -> %(levelname)s: %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S',
+ stream=sys.stderr)
+logger = logging.getLogger()
+
+class Command(BaseCommand):
+ help = "Watch database files and run an update when necessary."
+ args = "[path_template]"
+
+ def handle(self, path_template=None, **options):
+ v = int(options.get('verbosity', 0))
+ if v == 0:
+ logger.level = logging.ERROR
+ elif v == 1:
+ logger.level = logging.INFO
+ elif v >= 2:
+ logger.level = logging.DEBUG
+
+ if not path_template:
+ path_template = '/srv/ftp/%(repo)s/os/%(arch)s/'
+ self.path_template = path_template
+
+ notifier = self.setup_notifier()
+ # this thread is done using the database; all future access is done in
+ # the spawned read_repo() processes, so close the otherwise completely
+ # idle connection.
+ connection.close()
+
+ logger.info('Entering notifier loop')
+ notifier.loop()
+
+ logger.info('Cancelling remaining threads...')
+ for thread in threading.enumerate():
+ if hasattr(thread, 'cancel'):
+ thread.cancel()
+
+ @transaction.atomic
+ def setup_notifier(self):
+ '''Set up and configure the inotify machinery and logic.
+ This takes the provided or default path_template and builds a list of
+ directories we need to watch for database updates. It then validates
+ and passes these on to the various pyinotify pieces as necessary and
+ finally builds and returns a notifier object.'''
+ transaction.commit_manually()
+ arches = Arch.objects.filter(agnostic=False)
+ repos = Repo.objects.all()
+ transaction.set_dirty()
+ arch_path_map = {arch: None for arch in arches}
+ all_paths = set()
+ total_paths = 0
+ for arch in arches:
+ combos = ({ 'repo': repo.name.lower(), 'arch': arch.name }
+ for repo in repos)
+ # take a python format string and generate all unique combinations
+ # of directories from it; using set() ensures we filter it down
+ paths = {self.path_template % values for values in combos}
+ total_paths += len(paths)
+ all_paths |= paths
+ arch_path_map[arch] = paths
+
+ logger.info('Watching %d total paths', total_paths)
+ logger.debug(all_paths)
+
+ # sanity check- basically ensure every path we created from the
+ # template mapped to only one architecture
+ if total_paths != len(all_paths):
+ raise CommandError('path template did not uniquely '
+ 'determine architecture for each file')
+
+ # A proper atomic replacement of the database as done by rsync is type
+ # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE.
+ mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO
+
+ manager = pyinotify.WatchManager()
+ for name in all_paths:
+ manager.add_watch(name, mask)
+
+ handler = EventHandler(arch_paths=arch_path_map)
+ return pyinotify.Notifier(manager, handler)
+
+
+class Database(object):
+ '''A object representing a pacman database on the filesystem. It stores
+ various bits of metadata and state representing the file path, when we last
+ updated, how long our delay is before performing the update, whether we are
+ updating now, etc.'''
+ def __init__(self, arch, path, delay=60.0, nice=3):
+ self.arch = arch
+ self.path = path
+ self.delay = delay
+ self.nice = nice
+ self.mtime = None
+ self.last_import = None
+ self.update_thread = None
+ self.updating = False
+ self.run_again = False
+ self.lock = threading.Lock()
+
+ def _start_update_countdown(self):
+ self.update_thread = threading.Timer(self.delay, self.update)
+ logger.info('Starting %.1f second countdown to update %s',
+ self.delay, self.path)
+ self.update_thread.start()
+
+ def queue_for_update(self, mtime):
+ logger.debug('Queueing database %s...', self.path)
+ with self.lock:
+ self.mtime = mtime
+ if self.updating:
+ # store the fact that we will need to run it again
+ self.run_again = True
+ return
+ if self.update_thread:
+ self.update_thread.cancel()
+ self.update_thread = None
+ self._start_update_countdown()
+
+ def update(self):
+ logger.debug('Updating database %s...', self.path)
+ with self.lock:
+ self.last_import = time.time()
+ self.updating = True
+
+ try:
+ # invoke reporead's primary method. we do this in a separate
+ # process for memory conservation purposes; these processes grow
+ # rather large so it is best to free up the memory ASAP.
+ def run():
+ if self.nice != 0:
+ os.nice(self.nice)
+ read_repo(self.arch, self.path, {})
+
+ process = multiprocessing.Process(target=run)
+ process.start()
+ process.join()
+ finally:
+ logger.debug('Done updating database %s.', self.path)
+ with self.lock:
+ self.update_thread = None
+ self.updating = False
+ if self.run_again:
+ self.run_again = False
+ self._start_update_countdown()
+
+
+class EventHandler(pyinotify.ProcessEvent):
+ '''Our main event handler which listens for database change events. Because
+ we are watching the whole directory, we filter down and only look at those
+ events dealing with files databases.'''
+
+ def my_init(self, **kwargs):
+ self.databases = {}
+ self.arch_lookup = {}
+
+ # we really want a single path to arch mapping, so massage the data
+ arch_paths = kwargs['arch_paths']
+ for arch, paths in arch_paths.items():
+ self.arch_lookup.update((path.rstrip('/'), arch) for path in paths)
+
+ def process_default(self, event):
+ '''Primary event processing function which kicks off reporead timer
+ threads if a files database was updated.'''
+ name = event.name
+ if not name:
+ return
+ # screen to only the files we care about, skipping temp files
+ if name.endswith('.files.tar.gz') and not name.startswith('.'):
+ path = event.pathname
+ stat = os.stat(path)
+ database = self.databases.get(path, None)
+ if database is None:
+ arch = self.arch_lookup.get(event.path, None)
+ if arch is None:
+ logger.warning(
+ 'Could not determine arch for %s, skipping update',
+ path)
+ return
+ database = Database(arch, path)
+ self.databases[path] = database
+ database.queue_for_update(stat.st_mtime)
+
+
+# vim: set ts=4 sw=4 et:
diff --git a/devel/management/commands/update_types_permissions.py b/devel/management/commands/update_types_permissions.py
new file mode 100644
index 00000000..ac8fcfa5
--- /dev/null
+++ b/devel/management/commands/update_types_permissions.py
@@ -0,0 +1,23 @@
+from django.core.management.base import BaseCommand
+from django.apps import apps
+from django.contrib.auth.management import create_permissions
+from django.contrib.contenttypes.management import update_contenttypes
+
+
+class Command(BaseCommand):
+ args = '<app app ...>'
+ help = 'reloads permissions for specified apps, or all apps if no args are specified'
+
+ def handle(self, *args, **options):
+ if not args:
+ app_configs = apps.get_app_configs()
+ else:
+ app_configs = []
+ for arg in args:
+ apps.append(apps.get_app_config(arg))
+
+ for app_config in app_configs:
+ update_contenttypes(app_config, options.get('verbosity', 2))
+ create_permissions(app_config, options.get('verbosity', 22))
+
+# vim: set ts=4 sw=4 et: