summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2014-10-19 14:19:05 -0500
committerDan McGee <dan@archlinux.org>2014-10-19 14:19:05 -0500
commit1ff2e37e049004852681794537417a1947bf6f18 (patch)
tree09ab371e9c0d8a4067ac04ea5715ae53ac2ae2b3
parent7c26f6b7a4d29faede58d2feb13ef961e4725637 (diff)
Simplify last modified and etags processing for feeds
We had this elaborate system set up with caching and invalidation, which is overkill since we cache the result of the view anyway. Just hit the database when needed to find the last change to the respective model class and be done with it. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--feeds.py14
-rw-r--r--main/models.py6
-rw-r--r--main/utils.py42
-rw-r--r--news/models.py7
4 files changed, 12 insertions, 57 deletions
diff --git a/feeds.py b/feeds.py
index feb8a84a..d1836178 100644
--- a/feeds.py
+++ b/feeds.py
@@ -4,11 +4,11 @@ from pytz import utc
from django.contrib.sites.models import Site
from django.contrib.syndication.views import Feed
+from django.db import connection
from django.db.models import Q
from django.utils.feedgenerator import Rss201rev2Feed
from django.views.decorators.http import condition
-from main.utils import retrieve_latest
from main.models import Arch, Repo, Package
from news.models import News
from releng.models import Release
@@ -64,13 +64,15 @@ class GuidNotPermalinkFeed(Rss201rev2Feed):
def package_etag(request, *args, **kwargs):
- latest = retrieve_latest(Package)
+ latest = package_last_modified(request)
if latest:
return hashlib.md5(str(kwargs) + str(latest)).hexdigest()
return None
def package_last_modified(request, *args, **kwargs):
- return retrieve_latest(Package)
+ cursor = connection.cursor()
+ cursor.execute("SELECT MAX(last_update) FROM packages")
+ return cursor.fetchone()[0]
class PackageFeed(Feed):
@@ -148,13 +150,15 @@ class PackageFeed(Feed):
def news_etag(request, *args, **kwargs):
- latest = retrieve_latest(News, 'last_modified')
+ latest = news_last_modified(request)
if latest:
return hashlib.md5(str(latest)).hexdigest()
return None
def news_last_modified(request, *args, **kwargs):
- return retrieve_latest(News, 'last_modified')
+ cursor = connection.cursor()
+ cursor.execute("SELECT MAX(last_modified) FROM news")
+ return cursor.fetchone()[0]
class NewsFeed(Feed):
diff --git a/main/models.py b/main/models.py
index 09b1adc0..1b95f3fa 100644
--- a/main/models.py
+++ b/main/models.py
@@ -443,12 +443,8 @@ class PackageFile(models.Model):
db_table = 'package_files'
-# connect signals needed to keep cache in line with reality
-from main.utils import refresh_latest
-from django.db.models.signals import pre_save, post_save
+from django.db.models.signals import pre_save
-post_save.connect(refresh_latest, sender=Package,
- dispatch_uid="main.models")
# note: reporead sets the 'created' field on Package objects, so no signal
# listener is set up here to do so
pre_save.connect(set_created_field, sender=Donor,
diff --git a/main/utils.py b/main/utils.py
index 97cc540a..cf156566 100644
--- a/main/utils.py
+++ b/main/utils.py
@@ -12,11 +12,6 @@ from django.utils.timezone import now
from django.template.defaultfilters import slugify
-CACHE_TIMEOUT = 1800
-INVALIDATE_TIMEOUT = 10
-CACHE_LATEST_PREFIX = 'cache_latest_'
-
-
def cache_function_key(func, args, kwargs):
raw = [func.__name__, func.__module__, args, kwargs]
pickled = pickle.dumps(raw, protocol=pickle.HIGHEST_PROTOCOL)
@@ -76,43 +71,6 @@ def format_http_headers(request):
make_choice = lambda l: [(str(m), str(m)) for m in l]
-# These are in here because we would be jumping around in some import circles
-# and hoops otherwise. The only thing currently using these keys is the feed
-# caching stuff.
-
-def refresh_latest(**kwargs):
- '''A post_save signal handler to clear out the cached latest value for a
- given model.'''
- cache_key = CACHE_LATEST_PREFIX + kwargs['sender'].__name__
- # We could delete the value, but that could open a race condition
- # where the new data wouldn't have been committed yet by the calling
- # thread. Instead, explicitly set it to None for a short amount of time.
- # Hopefully by the time it expires we will have committed, and the cache
- # will be valid again. See "Scaling Django" by Mike Malone, slide 30.
- cache.set(cache_key, None, INVALIDATE_TIMEOUT)
-
-
-def retrieve_latest(sender, latest_by=None):
- # we could break this down based on the request url, but it would probably
- # cost us more in query time to do so.
- cache_key = CACHE_LATEST_PREFIX + sender.__name__
- latest = cache.get(cache_key)
- if latest:
- return latest
- try:
- if latest_by is None:
- latest_by = sender._meta.get_latest_by
- latest = sender.objects.values(latest_by).latest(latest_by)[latest_by]
- # Using add means "don't overwrite anything in there". What could be in
- # there is an explicit None value that our refresh signal set, which
- # means we want to avoid race condition possibilities for a bit.
- cache.add(cache_key, latest, CACHE_TIMEOUT)
- return latest
- except sender.DoesNotExist:
- pass
- return None
-
-
def set_created_field(sender, **kwargs):
'''This will set the 'created' field on any object to the current UTC time
if it is unset.
diff --git a/news/models.py b/news/models.py
index d51db7c7..985c1088 100644
--- a/news/models.py
+++ b/news/models.py
@@ -45,12 +45,9 @@ def set_news_fields(sender, **kwargs):
news.guid = 'tag:%s,%s:%s' % (Site.objects.get_current(),
current_time.strftime('%Y-%m-%d'), news.get_absolute_url())
-# connect signals needed to keep cache in line with reality
-from main.utils import refresh_latest
-from django.db.models.signals import pre_save, post_save
-post_save.connect(refresh_latest, sender=News,
- dispatch_uid="news.models")
+from django.db.models.signals import pre_save
+
pre_save.connect(set_news_fields, sender=News,
dispatch_uid="news.models")