diff options
Diffstat (limited to 'devel/management/commands/reporead.py')
-rw-r--r-- | devel/management/commands/reporead.py | 84 |
1 files changed, 56 insertions, 28 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e31478c5..09e48559 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -27,9 +27,17 @@ import logging from datetime import datetime from optparse import make_option +# New in 2.6, but fast (C implementation) in 2.7. We will use it over codecs if +# available. Eventually remove the codecs import completely. +io = None +try: + import io +except ImportError: + pass + from logging import ERROR, WARNING, INFO, DEBUG -from main.models import Arch, Package, Repo +from main.models import Arch, Package, PackageDepend, PackageFile, Repo logging.basicConfig( level=WARNING, @@ -79,14 +87,13 @@ class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', 'md5sum', 'url', 'builddate', 'packager' ) - squash = ( 'license', ) number = ( 'csize', 'isize' ) def __init__(self, repo): self.repo = repo self.ver = None self.rel = None - for k in self.bare + self.squash + self.number: + for k in self.bare + self.number: setattr(self, k, None) def populate(self, values): @@ -94,8 +101,6 @@ class Pkg(object): # ensure we stay under our DB character limit if k in self.bare: setattr(self, k, v[0][:254]) - elif k in self.squash: - setattr(self, k, u', '.join(v)[:254]) elif k in self.number: setattr(self, k, long(v[0])) elif k == 'force': @@ -158,6 +163,20 @@ def find_user(userstring): # lookup more than strictly necessary. find_user.cache = {} +def create_depend(package, dep_str, optional=False): + depend = PackageDepend(pkg=package, optional=optional) + # lop off any description first + parts = dep_str.split(':', 1) + if len(parts) > 1: + depend.description = parts[1].strip() + match = re.match(r"^(.+?)((>=|<=|=|>|<)(.*))?$", parts[0].strip()) + if match: + depend.depname = match.group(1) + if match.group(2): + depend.depvcmp = match.group(2) + depend.save(force_insert=True) + return depend + def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): if repopkg.base: dbpkg.pkgbase = repopkg.base @@ -166,7 +185,6 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.pkgver = repopkg.ver dbpkg.pkgrel = repopkg.rel dbpkg.pkgdesc = repopkg.desc - dbpkg.license = repopkg.license dbpkg.url = repopkg.url dbpkg.filename = repopkg.filename dbpkg.compressed_size = repopkg.csize @@ -192,21 +210,22 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() - if 'depends' in repopkg.__dict__: + if hasattr(repopkg, 'depends'): for y in repopkg.depends: - # make sure we aren't adding self depends.. - # yes *sigh* i have seen them in pkgbuilds - dpname, dpvcmp = re.match(r"([a-z0-9._+-]+)(.*)", y).groups() - if dpname == repopkg.name: - logger.warning('Package %s has a depend on itself', repopkg.name) - continue - dbpkg.packagedepend_set.create(depname=dpname, depvcmp=dpvcmp) - logger.debug('Added %s as dep for pkg %s', dpname, repopkg.name) + dep = create_depend(dbpkg, y) + if hasattr(repopkg, 'optdepends'): + for y in repopkg.optdepends: + dep = create_depend(dbpkg, y, True) - dbpkg.packagegroup_set.all().delete() - if 'groups' in repopkg.__dict__: + dbpkg.groups.all().delete() + if hasattr(repopkg, 'groups'): for y in repopkg.groups: - dbpkg.packagegroup_set.create(name=y) + dbpkg.groups.create(name=y) + + dbpkg.licenses.all().delete() + if hasattr(repopkg, 'license'): + for y in repopkg.license: + dbpkg.licenses.create(name=y) def populate_files(dbpkg, repopkg, force=False): @@ -222,12 +241,21 @@ def populate_files(dbpkg, repopkg, force=False): elif dbpkg.files_last_update > dbpkg.last_update: return # only delete files if we are reading a DB that contains them - if 'files' in repopkg.__dict__: + if hasattr(repopkg, 'files'): dbpkg.packagefile_set.all().delete() logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) - for x in repopkg.files: - dbpkg.packagefile_set.create(path=x) + for f in repopkg.files: + dirname, filename = f.rsplit('/', 1) + if filename == '': + filename = None + # this is basically like calling dbpkg.packagefile_set.create(), + # but much faster as we can skip a lot of the repeated code paths + pkgfile = PackageFile(pkg=dbpkg, + is_directory=(filename is None), + directory=dirname + '/', + filename=filename) + pkgfile.save() dbpkg.files_last_update = datetime.now() dbpkg.save() @@ -363,21 +391,21 @@ def parse_repo(repopath): logger.error("File does not have the proper extension") raise Exception("File does not have the proper extension") - repodb = tarfile.open(repopath,"r") - ## assuming well formed tar, with dir first then files after - ## repo-add enforces this + repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") dbfiles = ('desc', 'depends', 'files') pkgs = {} for tarinfo in repodb.getmembers(): - if tarinfo.isdir(): - continue - elif tarinfo.isreg(): + if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) if fname not in dbfiles: continue data_file = repodb.extractfile(tarinfo) - data_file = codecs.EncodedFile(data_file, 'utf-8') + if io is None: + data_file = codecs.EncodedFile(data_file, 'utf-8') + else: + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='utf=8') try: data = parse_info(data_file) p = pkgs.setdefault(pkgid, Pkg(reponame)) |