summaryrefslogtreecommitdiff
path: root/devel
diff options
context:
space:
mode:
authorLuke Shumaker <LukeShu@sbcglobal.net>2013-12-22 21:50:58 -0500
committerLuke Shumaker <LukeShu@sbcglobal.net>2013-12-22 21:50:58 -0500
commit2bfec5b17ca3ee3ea3b347c029b9d8dad3c5b4d0 (patch)
tree6bda86f6489fbdfdce67d282b20586fc6032735b /devel
parent3579e3cb34a53abd16da3af78be738c2a4dd0d5c (diff)
parent36f5e1df9afbeac1d04fee38dde4c28f81144a20 (diff)
Merge tag 'release_2013-12-19' into archweb-generic
Mirror URL page, other random stuff
Diffstat (limited to 'devel')
-rw-r--r--devel/management/commands/reporead.py78
1 files changed, 54 insertions, 24 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py
index 8b591aeb..1945469f 100644
--- a/devel/management/commands/reporead.py
+++ b/devel/management/commands/reporead.py
@@ -82,8 +82,7 @@ class RepoPackage(object):
'md5sum', 'sha256sum', 'url', 'packager' )
number = ( 'csize', 'isize' )
collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends',
- 'conflicts', 'provides', 'replaces', 'groups', 'license',
- 'files' )
+ 'conflicts', 'provides', 'replaces', 'groups', 'license')
def __init__(self, repo):
self.repo = repo
@@ -98,7 +97,6 @@ class RepoPackage(object):
setattr(self, k, ())
self.builddate = None
self.files = None
- self.has_files = False
def populate(self, values):
for k, v in values.iteritems():
@@ -122,14 +120,22 @@ class RepoPackage(object):
logger.warning(
'Package %s had unparsable build date %s',
self.name, v[0])
- elif k == 'files':
- self.files = tuple(v)
- self.has_files = True
else:
# anything left in collections
setattr(self, k, tuple(v))
@property
+ def files_list(self):
+ data_file = io.TextIOWrapper(io.BytesIO(self.files), encoding='UTF-8')
+ try:
+ info = parse_info(data_file)
+ except UnicodeDecodeError:
+ logger.warn("Could not correctly decode files list for %s",
+ self.name)
+ return None
+ return info['files']
+
+ @property
def full_version(self):
'''Very similar to the main.models.Package method.'''
if self.epoch > 0:
@@ -265,6 +271,24 @@ def delete_pkg_files(dbpkg):
cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id])
+def batched_bulk_create(model, all_objects):
+ cutoff = 10000
+ length = len(all_objects)
+ if length < cutoff:
+ return model.objects.bulk_create(all_objects)
+
+ def chunks():
+ offset = 0
+ while offset < length:
+ yield all_objects[offset:offset + cutoff]
+ offset += cutoff
+
+ for items in chunks():
+ ret = model.objects.bulk_create(items)
+
+ return ret
+
+
def populate_files(dbpkg, repopkg, force=False):
if not force:
if not pkg_same_version(repopkg, dbpkg):
@@ -278,15 +302,18 @@ def populate_files(dbpkg, repopkg, force=False):
return
# only delete files if we are reading a DB that contains them
- if repopkg.has_files:
+ if repopkg.files:
+ files = repopkg.files_list
+ # we had files data, but it couldn't be parsed, so skip
+ if not files:
+ return
delete_pkg_files(dbpkg)
logger.info("adding %d files for package %s",
- len(repopkg.files), dbpkg.pkgname)
+ len(files), dbpkg.pkgname)
pkg_files = []
# sort in normal alpha-order that pacman uses, rather than makepkg's
# default breadth-first, directory-first ordering
- files = sorted(repopkg.files)
- for f in files:
+ for f in sorted(files):
if '/' in f:
dirname, filename = f.rsplit('/', 1)
dirname += '/'
@@ -299,7 +326,7 @@ def populate_files(dbpkg, repopkg, force=False):
directory=dirname,
filename=filename)
pkg_files.append(pkgfile)
- PackageFile.objects.bulk_create(pkg_files)
+ batched_bulk_create(PackageFile, pkg_files)
dbpkg.files_last_update = now()
dbpkg.save()
@@ -494,24 +521,27 @@ def parse_repo(repopath):
repodb = tarfile.open(repopath, "r")
logger.debug("Starting package parsing")
- dbfiles = ('desc', 'depends', 'files')
newpkg = lambda: RepoPackage(reponame)
pkgs = defaultdict(newpkg)
for tarinfo in repodb.getmembers():
if tarinfo.isreg():
pkgid, fname = os.path.split(tarinfo.name)
- if fname not in dbfiles:
- continue
- data_file = repodb.extractfile(tarinfo)
- data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
- encoding='UTF-8')
- try:
- pkgs[pkgid].populate(parse_info(data_file))
- except UnicodeDecodeError:
- logger.warn("Could not correctly decode %s, skipping file",
- tarinfo.name)
- data_file.close()
- del data_file
+ if fname == 'files':
+ # don't parse yet for speed and memory consumption reasons
+ files_data = repodb.extractfile(tarinfo)
+ pkgs[pkgid].files = files_data.read()
+ del files_data
+ elif fname in ('desc', 'depends'):
+ data_file = repodb.extractfile(tarinfo)
+ data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
+ encoding='UTF-8')
+ try:
+ pkgs[pkgid].populate(parse_info(data_file))
+ except UnicodeDecodeError:
+ logger.warn("Could not correctly decode %s, skipping file",
+ tarinfo.name)
+ data_file.close()
+ del data_file
logger.debug("Done parsing file %s/%s", pkgid, fname)