From 56af1e4f50587333da4e8c38800be8f720af98b7 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 12 Dec 2011 09:58:53 -0600 Subject: reporead: more efficient deletion of files Rather than delegating to Django and batch deletion by ID, force issuing of a single delete query to clear out all existing file objects when necessary. This should speed up the deletion and update of packages with a lot of files by a non-trivial amount. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'devel/management') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 7e058afc..aa04a31f 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -239,6 +239,12 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch +def delete_pkg_files(dbpkg): + database = router.db_for_write(Package, instance=dbpkg) + cursor = connections[database].cursor() + cursor.execute('DELETE FROM package_files WHERE pkg_id = %s', [dbpkg.id]) + + def populate_files(dbpkg, repopkg, force=False): if not force: if not pkg_same_version(repopkg, dbpkg): @@ -253,7 +259,7 @@ def populate_files(dbpkg, repopkg, force=False): # only delete files if we are reading a DB that contains them if repopkg.has_files: - dbpkg.packagefile_set.all().delete() + delete_pkg_files(dbpkg) logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) for f in repopkg.files: @@ -262,6 +268,7 @@ def populate_files(dbpkg, repopkg, force=False): filename = None # this is basically like calling dbpkg.packagefile_set.create(), # but much faster as we can skip a lot of the repeated code paths + # TODO use Django 1.4 bulk_create pkgfile = PackageFile(pkg=dbpkg, is_directory=(filename is None), directory=dirname + '/', @@ -361,6 +368,7 @@ def db_update(archname, reponame, pkgs, force=False): with transaction.commit_on_success(): # no race condition here as long as simultaneous threads both # issue deletes; second delete will be a no-op + delete_pkg_files(dbpkg) dbpkg.delete() # packages in both database and in syncdb (update in database) -- cgit v1.2.3-54-g00ecf