summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2009-04-06 00:16:54 -0500
committerDan McGee <dan@archlinux.org>2009-04-06 00:16:54 -0500
commit5a551b2351ffa341eca24d0e4ba12876abb77b20 (patch)
treecf6bc828dcb2de39c16cc4097e5ee9d3edf72da9
parent6d4ecc5023d7d8d1bcd57e217f4f6041a0fc086e (diff)
reporead: dramatically reduce number of queries
Commit 789b5445cf originally set out to fix this problem but was not complete. When doing an update of [extra], the code still did > 2000 database queries because of an ill-placed call to QuerySet.get(). By using a dictionary lookup, we reduce the number of database queries to a measly 4 when updating extra and no new packages are present. Signed-off-by: Dan McGee <dan@archlinux.org>
-rwxr-xr-xscripts/reporead.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/scripts/reporead.py b/scripts/reporead.py
index c9d9e9e9..047463bd 100755
--- a/scripts/reporead.py
+++ b/scripts/reporead.py
@@ -165,8 +165,12 @@ def db_update(archname, pkgs):
dbpkgs = Package.objects.filter(arch=architecture, repo=repository)
# It makes sense to fully evaluate our DB query now because we will
# be using 99% of the objects in our "in both sets" loop. Force eval
- # by calling len() on the QuerySet.
- dblist = list(dbpkgs)
+ # by calling list() on the QuerySet.
+ list(dbpkgs)
+ # This makes our inner loop where we find packages by name *way* more
+ # efficient by not having to go to the database for each package to
+ # SELECT them by name.
+ dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs])
now = datetime.now()
# go go set theory!
@@ -229,7 +233,7 @@ def db_update(archname, pkgs):
pkg_in_both = syncset & dbset
for p in [x for x in pkgs if x.name in pkg_in_both]:
logger.debug("Looking for package updates")
- dbp = dbpkgs.get(pkgname=p.name)
+ dbp = dbdict[p.name]
if ''.join((p.ver,p.rel)) == ''.join((dbp.pkgver,dbp.pkgrel)):
continue
logger.info("Updating package %s in database", p.name)