diff options
author | Luke Shumaker <LukeShu@sbcglobal.net> | 2013-12-22 21:50:58 -0500 |
---|---|---|
committer | Luke Shumaker <LukeShu@sbcglobal.net> | 2013-12-22 21:50:58 -0500 |
commit | 2bfec5b17ca3ee3ea3b347c029b9d8dad3c5b4d0 (patch) | |
tree | 6bda86f6489fbdfdce67d282b20586fc6032735b /devel | |
parent | 3579e3cb34a53abd16da3af78be738c2a4dd0d5c (diff) | |
parent | 36f5e1df9afbeac1d04fee38dde4c28f81144a20 (diff) |
Merge tag 'release_2013-12-19' into archweb-generic
Mirror URL page, other random stuff
Diffstat (limited to 'devel')
-rw-r--r-- | devel/management/commands/reporead.py | 78 |
1 files changed, 54 insertions, 24 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 8b591aeb..1945469f 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -82,8 +82,7 @@ class RepoPackage(object): 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends', - 'conflicts', 'provides', 'replaces', 'groups', 'license', - 'files' ) + 'conflicts', 'provides', 'replaces', 'groups', 'license') def __init__(self, repo): self.repo = repo @@ -98,7 +97,6 @@ class RepoPackage(object): setattr(self, k, ()) self.builddate = None self.files = None - self.has_files = False def populate(self, values): for k, v in values.iteritems(): @@ -122,14 +120,22 @@ class RepoPackage(object): logger.warning( 'Package %s had unparsable build date %s', self.name, v[0]) - elif k == 'files': - self.files = tuple(v) - self.has_files = True else: # anything left in collections setattr(self, k, tuple(v)) @property + def files_list(self): + data_file = io.TextIOWrapper(io.BytesIO(self.files), encoding='UTF-8') + try: + info = parse_info(data_file) + except UnicodeDecodeError: + logger.warn("Could not correctly decode files list for %s", + self.name) + return None + return info['files'] + + @property def full_version(self): '''Very similar to the main.models.Package method.''' if self.epoch > 0: @@ -265,6 +271,24 @@ def delete_pkg_files(dbpkg): cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id]) +def batched_bulk_create(model, all_objects): + cutoff = 10000 + length = len(all_objects) + if length < cutoff: + return model.objects.bulk_create(all_objects) + + def chunks(): + offset = 0 + while offset < length: + yield all_objects[offset:offset + cutoff] + offset += cutoff + + for items in chunks(): + ret = model.objects.bulk_create(items) + + return ret + + def populate_files(dbpkg, repopkg, force=False): if not force: if not pkg_same_version(repopkg, dbpkg): @@ -278,15 +302,18 @@ def populate_files(dbpkg, repopkg, force=False): return # only delete files if we are reading a DB that contains them - if repopkg.has_files: + if repopkg.files: + files = repopkg.files_list + # we had files data, but it couldn't be parsed, so skip + if not files: + return delete_pkg_files(dbpkg) logger.info("adding %d files for package %s", - len(repopkg.files), dbpkg.pkgname) + len(files), dbpkg.pkgname) pkg_files = [] # sort in normal alpha-order that pacman uses, rather than makepkg's # default breadth-first, directory-first ordering - files = sorted(repopkg.files) - for f in files: + for f in sorted(files): if '/' in f: dirname, filename = f.rsplit('/', 1) dirname += '/' @@ -299,7 +326,7 @@ def populate_files(dbpkg, repopkg, force=False): directory=dirname, filename=filename) pkg_files.append(pkgfile) - PackageFile.objects.bulk_create(pkg_files) + batched_bulk_create(PackageFile, pkg_files) dbpkg.files_last_update = now() dbpkg.save() @@ -494,24 +521,27 @@ def parse_repo(repopath): repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") - dbfiles = ('desc', 'depends', 'files') newpkg = lambda: RepoPackage(reponame) pkgs = defaultdict(newpkg) for tarinfo in repodb.getmembers(): if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) - if fname not in dbfiles: - continue - data_file = repodb.extractfile(tarinfo) - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='UTF-8') - try: - pkgs[pkgid].populate(parse_info(data_file)) - except UnicodeDecodeError: - logger.warn("Could not correctly decode %s, skipping file", - tarinfo.name) - data_file.close() - del data_file + if fname == 'files': + # don't parse yet for speed and memory consumption reasons + files_data = repodb.extractfile(tarinfo) + pkgs[pkgid].files = files_data.read() + del files_data + elif fname in ('desc', 'depends'): + data_file = repodb.extractfile(tarinfo) + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='UTF-8') + try: + pkgs[pkgid].populate(parse_info(data_file)) + except UnicodeDecodeError: + logger.warn("Could not correctly decode %s, skipping file", + tarinfo.name) + data_file.close() + del data_file logger.debug("Done parsing file %s/%s", pkgid, fname) |