From 5a551b2351ffa341eca24d0e4ba12876abb77b20 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 6 Apr 2009 00:16:54 -0500 Subject: reporead: dramatically reduce number of queries Commit 789b5445cf originally set out to fix this problem but was not complete. When doing an update of [extra], the code still did > 2000 database queries because of an ill-placed call to QuerySet.get(). By using a dictionary lookup, we reduce the number of database queries to a measly 4 when updating extra and no new packages are present. Signed-off-by: Dan McGee --- scripts/reporead.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/reporead.py b/scripts/reporead.py index c9d9e9e9..047463bd 100755 --- a/scripts/reporead.py +++ b/scripts/reporead.py @@ -165,8 +165,12 @@ def db_update(archname, pkgs): dbpkgs = Package.objects.filter(arch=architecture, repo=repository) # It makes sense to fully evaluate our DB query now because we will # be using 99% of the objects in our "in both sets" loop. Force eval - # by calling len() on the QuerySet. - dblist = list(dbpkgs) + # by calling list() on the QuerySet. + list(dbpkgs) + # This makes our inner loop where we find packages by name *way* more + # efficient by not having to go to the database for each package to + # SELECT them by name. + dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs]) now = datetime.now() # go go set theory! @@ -229,7 +233,7 @@ def db_update(archname, pkgs): pkg_in_both = syncset & dbset for p in [x for x in pkgs if x.name in pkg_in_both]: logger.debug("Looking for package updates") - dbp = dbpkgs.get(pkgname=p.name) + dbp = dbdict[p.name] if ''.join((p.ver,p.rel)) == ''.join((dbp.pkgver,dbp.pkgrel)): continue logger.info("Updating package %s in database", p.name) -- cgit v1.2.3-54-g00ecf