summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2010-02-10 21:28:49 -0600
committerDan McGee <dan@archlinux.org>2010-02-10 21:30:35 -0600
commitc1697ab694fe549d7b6ff81a00737a2ad63e9461 (patch)
tree476b51edc0a535455d9f5e852ee6879b4b2f1e43 /scripts
parent25ce92969ac9e9c7f02f42470c2d019b630b958a (diff)
reporead: turn into a django-admin command
Rather than struggle with getting the environment set up, let's make this a custom Django admin command and use the flexibility that gives us. This is the initial rough cut of making it happen; further commits should clean up some of the rough edges. Signed-off-by: Dan McGee <dan@archlinux.org>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/reporead.py369
1 files changed, 0 insertions, 369 deletions
diff --git a/scripts/reporead.py b/scripts/reporead.py
deleted file mode 100755
index 0806eb4e..00000000
--- a/scripts/reporead.py
+++ /dev/null
@@ -1,369 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
-reporead.py
-
-Parses a repo.db.tar.gz file and updates the Arch database with the relevant
-changes.
-
-Usage: reporead.py ARCH PATH
- ARCH: architecture to update, and can be one of: i686, x86_64
- PATH: full path to the repo.db.tar.gz file.
-
-Example:
- reporead.py i686 /tmp/core.db.tar.gz
-
-"""
-
-###
-### User Variables
-###
-
-# multi value blocks
-REPOVARS = ['arch', 'backup', 'base', 'builddate', 'conflicts', 'csize',
- 'deltas', 'depends', 'desc', 'filename', 'files', 'force',
- 'groups', 'installdate', 'isize', 'license', 'md5sum',
- 'name', 'optdepends', 'packager', 'provides', 'reason',
- 'replaces', 'size', 'url', 'version']
-
-###
-### Imports
-###
-
-import os
-import re
-import sys
-import tarfile
-import logging
-from datetime import datetime
-from django.core.management import setup_environ
-# mung the sys path to get to django root dir, no matter
-# where we are called from
-# TODO this is so fricking ugly
-archweb_app_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-os.chdir(archweb_app_path)
-sys.path[0] = archweb_app_path
-import settings
-setup_environ(settings)
-# the transaction import must be below where we set up our db stuff...
-from django.db import transaction
-from cStringIO import StringIO
-from logging import WARNING,INFO,DEBUG
-from main.models import Arch, Package, Repo
-
-class SomethingFishyException(Exception):
- '''Raised when the database looks like its going to wipe out a bunch of
- packages.'''
- pass
-
-###
-### Initialization
-###
-
-logging.basicConfig(
- level=WARNING,
- format='%(asctime)s -> %(levelname)s: %(message)s',
- datefmt='%Y-%m-%d %H:%M:%S',
- stream=sys.stderr)
-logger = logging.getLogger()
-
-
-###
-### function and class definitions
-###
-
-class Pkg(object):
- """An interim 'container' object for holding Arch package data."""
-
- def __init__(self, val):
- selfdict = {}
- squash = ['arch', 'builddate', 'csize', 'desc', 'filename',
- 'installdate', 'isize', 'license', 'md5sum',
- 'packager', 'size', 'url']
-
- selfdict['name'] = val['name'][0]
- selfdict['base'] = None
- del val['name']
- if 'desc' not in val:
- logger.warning("Package %s has no description" % selfdict['name'])
- val['desc'] = None
- if 'url' not in val:
- val['url'] = None
- if 'license' not in val:
- val['license'] = []
- for x in val.keys():
- if x in squash:
- if val[x] == None or len(val[x]) == 0:
- logger.warning("Package %s has no %s" % (selfdict['name'],x))
- selfdict[x] = None
- else:
- selfdict[x] = ', '.join(val[x])
- # make sure we don't have elements larger than the db char
- # fields
- if len(selfdict[x]) > 255:
- selfdict[x] = selfdict[x][:254]
- elif x == 'base':
- selfdict[x] = val[x][0]
- elif x == 'force':
- selfdict[x] = True
- elif x == 'version':
- version = val[x][0].rsplit('-')
- selfdict['ver'] = version[0]
- selfdict['rel'] = version[1]
- elif x == 'reason':
- selfdict[x] = int(val[x][0])
- else:
- selfdict[x] = val[x]
- self.__dict__ = selfdict
-
- def __getattr__(self,name):
- if name == 'force':
- return False
- else:
- return None
-
-
-def usage():
- """Print the usage of this application."""
- print __doc__.strip()
-
-
-def populate_pkg(dbpkg, repopkg, timestamp=None):
- if not timestamp: timestamp = datetime.now()
- dbpkg.pkgbase = repopkg.base
- dbpkg.pkgver = repopkg.ver
- dbpkg.pkgrel = repopkg.rel
- dbpkg.pkgdesc = repopkg.desc
- dbpkg.license = repopkg.license
- dbpkg.url = repopkg.url
- dbpkg.needupdate = False
- dbpkg.last_update = timestamp
- dbpkg.save()
- # files are not in the repo.db.tar.gz
- #for x in repopkg.files:
- # dbpkg.packagefile_set.create(path=x)
- dbpkg.packagedepend_set.all().delete()
- if 'depends' in repopkg.__dict__:
- for y in repopkg.depends:
- # make sure we aren't adding self depends..
- # yes *sigh* i have seen them in pkgbuilds
- dpname,dpvcmp = re.match(r"([a-z0-9._+-]+)(.*)", y).groups()
- if dpname == repopkg.name:
- logger.warning('Package %s has a depend on itself' % repopkg.name)
- continue
- dbpkg.packagedepend_set.create(depname=dpname, depvcmp=dpvcmp)
- logger.debug('Added %s as dep for pkg %s' % (dpname,repopkg.name))
-
-
-def db_update(archname, pkgs):
- """
- Parses a list and updates the Arch dev database accordingly.
-
- Arguments:
- pkgs -- A list of Pkg objects.
-
- """
- logger.info('Updating Arch: %s' % archname)
- repository = Repo.objects.get(name__iexact=pkgs[0].repo)
- architecture = Arch.objects.get(name__iexact=archname)
- dbpkgs = Package.objects.filter(arch=architecture, repo=repository)
- # It makes sense to fully evaluate our DB query now because we will
- # be using 99% of the objects in our "in both sets" loop. Force eval
- # by calling list() on the QuerySet.
- list(dbpkgs)
- # This makes our inner loop where we find packages by name *way* more
- # efficient by not having to go to the database for each package to
- # SELECT them by name.
- dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs])
- now = datetime.now()
-
- # go go set theory!
- # thank you python for having a set class <3
- logger.debug("Creating sets")
- dbset = set([pkg.pkgname for pkg in dbpkgs])
- syncset = set([pkg.name for pkg in pkgs])
- logger.info("%d packages in current web DB" % len(dbset))
- logger.info("%d packages in new updating db" % len(syncset))
- # packages in syncdb and not in database (add to database)
- logger.debug("Set theory: Packages in syncdb not in database")
- in_sync_not_db = syncset - dbset
- logger.info("%d packages in sync not db" % len(in_sync_not_db))
-
- # Try to catch those random orphaning issues that make Eric so unhappy.
- if len(dbset) > 20:
- dbpercent = 100.0 * len(syncset) / len(dbset)
- else:
- # we don't have 20 packages in this repo/arch, so this check could
- # produce a lot of false positives (or a div by zero). fake it
- dbpercent = 100.0
- logger.info("DB package ratio: %.1f%%" % dbpercent)
- if dbpercent < 50.0 and repository.name.lower().find('testing') == -1:
- logger.error(".db.tar.gz has %.1f%% the number of packages in the web database" % dbpercent)
- raise SomethingFishyException(
- 'It looks like the syncdb is less than half the size of the web db. WTF?')
-
- if dbpercent < 75.0:
- logger.warning(".db.tar.gz has %.1f%% the number of packages in the web database." % dbpercent)
-
- for p in [x for x in pkgs if x.name in in_sync_not_db]:
- logger.info("Adding package %s", p.name)
- pkg = Package(pkgname = p.name, arch = architecture, repo = repository)
- populate_pkg(pkg, p, timestamp=now)
-
- # packages in database and not in syncdb (remove from database)
- logger.debug("Set theory: Packages in database not in syncdb")
- in_db_not_sync = dbset - syncset
- for p in in_db_not_sync:
- logger.info("Removing package %s from database", p)
- Package.objects.get(
- pkgname=p, arch=architecture, repo=repository).delete()
-
- # packages in both database and in syncdb (update in database)
- logger.debug("Set theory: Packages in database and syncdb")
- pkg_in_both = syncset & dbset
- for p in [x for x in pkgs if x.name in pkg_in_both]:
- logger.debug("Looking for package updates")
- dbp = dbdict[p.name]
- if ''.join((p.ver,p.rel)) == ''.join((dbp.pkgver,dbp.pkgrel)):
- continue
- logger.info("Updating package %s in database", p.name)
- pkg = Package.objects.get(
- pkgname=p.name,arch=architecture, repo=repository)
- populate_pkg(pkg, p, timestamp=now)
-
- logger.info('Finished updating Arch: %s' % archname)
-
-
-def parse_inf(iofile):
- """
- Parses an Arch repo db information file, and returns variables as a list.
-
- Arguments:
- iofile -- A StringIO, FileType, or other object with readlines method.
-
- """
- store = {}
- lines = iofile.readlines()
- blockname = None
- max = len(lines)
- i = 0
- while i < max:
- line = lines[i].strip()
- if len(line) > 0 and line[0] == '%' and line[1:-1].lower() in REPOVARS:
- blockname = line[1:-1].lower()
- logger.debug("Parsing package block %s",blockname)
- store[blockname] = []
- i += 1
- while i < max and len(lines[i].strip()) > 0:
- store[blockname].append(lines[i].strip())
- i += 1
- # here is where i would convert arrays to strings
- # based on count and type, but i dont think it is needed now
- i += 1
-
- return store
-
-
-def parse_repo(repopath):
- """
- Parses an Arch repo db file, and returns a list of Pkg objects.
-
- Arguments:
- repopath -- The path of a repository db file.
-
- """
- logger.info("Starting repo parsing")
- if not os.path.exists(repopath):
- logger.error("Could not read file %s", repopath)
-
- logger.info("Reading repo tarfile %s", repopath)
- filename = os.path.split(repopath)[1]
- rindex = filename.rindex('.db.tar.gz')
- reponame = filename[:rindex]
-
- repodb = tarfile.open(repopath,"r:gz")
- ## assuming well formed tar, with dir first then files after
- ## repo-add enforces this
- logger.debug("Starting package parsing")
- pkgs = []
- tpkg = None
- while True:
- tarinfo = repodb.next()
- if tarinfo == None or tarinfo.isdir():
- if tpkg != None:
- tpkg.reset()
- data = parse_inf(tpkg)
- p = Pkg(data)
- p.repo = reponame
- logger.debug("Done parsing package %s", p.name)
- pkgs.append(p)
- if tarinfo == None:
- break
- # set new tpkg
- tpkg = StringIO()
- if tarinfo.isreg():
- if os.path.split(tarinfo.name)[1] in ('desc','depends'):
- tpkg.write(repodb.extractfile(tarinfo).read())
- tpkg.write('\n') # just in case
- repodb.close()
- logger.info("Finished repo parsing")
- return pkgs
-
-
-@transaction.commit_on_success
-def main(argv=None):
- """
- Parses repo.db.tar.gz file and returns exit status.
-
- Keyword Arguments:
- argv -- A list/array simulating a sys.argv (default None)
- If left empty, sys.argv is used
-
- """
- if argv == None:
- argv = sys.argv
- if len(argv) != 3:
- usage()
- return 0
- # check if arch is valid
- available_arches = [x.name for x in Arch.objects.all()]
- if argv[1] not in available_arches:
- usage()
- return 0
- else:
- primary_arch = argv[1]
-
- repo_file = os.path.normpath(argv[2])
- packages = parse_repo(repo_file)
-
- # sort packages by arch -- to handle noarch stuff
- packages_arches = {}
- for arch in available_arches:
- packages_arches[arch] = []
-
- for package in packages:
- if package.arch in ('any', primary_arch):
- packages_arches[package.arch].append(package)
- else:
- logger.warning("Package %s arch = %s" % (
- package.name,package.arch))
- #package.arch = primary_arch
-
-
- logger.info('Starting database updates.')
- for (arch, pkgs) in packages_arches.iteritems():
- if len(pkgs) > 0:
- db_update(arch,pkgs)
- logger.info('Finished database updates.')
- return 0
-
-
-###
-### Main eval
-###
-
-if __name__ == '__main__':
- logger.level = INFO
- sys.exit(main())
-
-# vim: set ts=4 sw=4 et: