From ee6bf2782068b917232c71189aea0011b47e876d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:10:28 -0500 Subject: Small performance tweaks to mirror status JSON encoding Do a few things to speed up the encoding of the JSON, including better usage of list comprehensions, less dynamic setattr() usage, and removal of the queryset specialization since we can easily do it outside of the encoder. Signed-off-by: Dan McGee --- mirrors/utils.py | 30 ++++++++++++++---------------- mirrors/views.py | 7 ++----- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index fe18cd6a..4484fa24 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -84,19 +84,16 @@ GROUP BY l.url_id, u.mirror_id def annotate_url(url, url_data): '''Given a MirrorURL object, add a few more attributes to it regarding status, including completion_pct, delay, and score.''' - known_attrs = ( - ('success_count', 0), - ('check_count', 0), - ('completion_pct', None), - ('duration_avg', None), - ('duration_stddev', None), - ('last_check', None), - ('last_sync', None), - ('delay', None), - ('score', None), - ) - for k, v in known_attrs: - setattr(url, k, v) + # set up some sane default values in case we are missing anything + url.success_count = 0 + url.check_count = 0 + url.completion_pct = None + url.duration_avg = None + url.duration_stddev = None + url.last_check = None + url.last_sync = None + url.delay = None + url.score = None for k, v in url_data.items(): if k not in ('url_id', 'mirror_id'): setattr(url, k, v) @@ -107,7 +104,7 @@ def annotate_url(url, url_data): if url.delay is not None: hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0 - if url.completion_pct > 0: + if url.completion_pct > 0.0: divisor = url.completion_pct else: # arbitrary small value @@ -115,6 +112,8 @@ def annotate_url(url, url_data): stddev = url.duration_stddev or 0.0 url.score = (hours + url.duration_avg + stddev) / divisor + return url + def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -133,8 +132,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): id__in=valid_urls).order_by('mirror__id', 'url') if urls: - for url in urls: - annotate_url(url, url_data.get(url.id, {})) + urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) diff --git a/mirrors/views.py b/mirrors/views.py index 55c40c4d..0bf0a267 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -275,9 +275,6 @@ class MirrorStatusJSONEncoder(DjangoJSONEncoder): if isinstance(obj, timedelta): # always returned as integer seconds return obj.days * 24 * 3600 + obj.seconds - if hasattr(obj, '__iter__'): - # mainly for queryset serialization - return list(obj) if isinstance(obj, MirrorUrl): data = {attr: getattr(obj, attr) for attr in self.url_attributes} country = obj.country @@ -298,8 +295,8 @@ class ExtendedMirrorStatusJSONEncoder(MirrorStatusJSONEncoder): if isinstance(obj, MirrorUrl): data = super(ExtendedMirrorStatusJSONEncoder, self).default(obj) cutoff = now() - DEFAULT_CUTOFF - data['logs'] = obj.logs.filter( - check_time__gte=cutoff).order_by('check_time') + data['logs'] = list(obj.logs.filter( + check_time__gte=cutoff).order_by('check_time')) return data if isinstance(obj, MirrorLog): return {attr: getattr(obj, attr) for attr in self.log_attributes} -- cgit v1.2.3 From 087b4b00031fed52eeddf05ae36825cb498680f0 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:11:48 -0500 Subject: Remove queryset specialization in JSON encoder Signed-off-by: Dan McGee --- mirrors/views.py | 5 +---- releng/views.py | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/mirrors/views.py b/mirrors/views.py index 0bf0a267..1a9741ed 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -323,9 +323,6 @@ class LocationJSONEncoder(DjangoJSONEncoder): '''Base JSONEncoder extended to handle CheckLocation objects.''' def default(self, obj): - if hasattr(obj, '__iter__'): - # mainly for queryset serialization - return list(obj) if isinstance(obj, CheckLocation): return { 'id': obj.pk, @@ -341,7 +338,7 @@ class LocationJSONEncoder(DjangoJSONEncoder): def locations_json(request): data = {} data['version'] = 1 - data['locations'] = CheckLocation.objects.all().order_by('pk') + data['locations'] = list(CheckLocation.objects.all().order_by('pk')) to_json = json.dumps(data, ensure_ascii=False, cls=LocationJSONEncoder) response = HttpResponse(to_json, content_type='application/json') return response diff --git a/releng/views.py b/releng/views.py index ef81a65c..dbb65c2e 100644 --- a/releng/views.py +++ b/releng/views.py @@ -247,9 +247,6 @@ class ReleaseJSONEncoder(DjangoJSONEncoder): 'created', 'md5_sum', 'sha1_sum') def default(self, obj): - if hasattr(obj, '__iter__'): - # mainly for queryset serialization - return list(obj) if isinstance(obj, Release): data = {attr: getattr(obj, attr) or None for attr in self.release_attributes} @@ -276,7 +273,7 @@ def releases_json(request): data = { 'version': 1, - 'releases': releases, + 'releases': list(releases), 'latest_version': latest_version, } to_json = json.dumps(data, ensure_ascii=False, cls=ReleaseJSONEncoder) -- cgit v1.2.3 From 86fd0b722afb53670ef9a155a3c55d688f275c6d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:39:06 -0500 Subject: Reduce complexity of status data URL query Get rid of all the junk trying to only return URLs that have been checked in the last 24 hours; it just isn't worth it. Instead, do that screening only in the views that need it, namely the HTML status page. Signed-off-by: Dan McGee --- mirrors/utils.py | 14 +++++--------- mirrors/views.py | 5 ++++- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index 4484fa24..8edceb9b 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -118,20 +118,16 @@ def annotate_url(url, url_data): def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff - valid_urls = MirrorUrl.objects.filter( - logs__check_time__gte=cutoff_time).distinct() - + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').order_by('mirror__id', 'url') if mirror_id: - valid_urls = valid_urls.filter(mirror_id=mirror_id) + urls = urls.filter(mirror_id=mirror_id) if not show_all: - valid_urls = valid_urls.filter(active=True, mirror__active=True, + urls = urls.filter(active=True, mirror__active=True, mirror__public=True) - url_data = status_data(cutoff, mirror_id) - urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter( - id__in=valid_urls).order_by('mirror__id', 'url') - if urls: + url_data = status_data(cutoff, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) diff --git a/mirrors/views.py b/mirrors/views.py index 1a9741ed..90787763 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -245,7 +245,10 @@ def status(request, tier=None): if tier is not None and url.mirror.tier != tier: continue # split them into good and bad lists based on delay - if not url.delay or url.delay > bad_timedelta: + if url.completion_pct is None: + # skip URLs that have never been checked + continue + elif not url.delay or url.delay > bad_timedelta: bad_urls.append(url) else: good_urls.append(url) -- cgit v1.2.3 From 48509bfdbdadb8255f32c56d993f91262516916f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:00:48 -0500 Subject: Simplify/clean-up finding of download mirror Signed-off-by: Dan McGee --- mirrors/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index 8edceb9b..fb867dee 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -189,23 +189,21 @@ def get_mirror_url_for_download(cutoff=DEFAULT_CUTOFF): if status_data['check_time__max'] is not None: min_check_time = status_data['check_time__max'] - timedelta(minutes=5) min_sync_time = status_data['last_sync__max'] - timedelta(minutes=20) - best_logs = MirrorLog.objects.filter(is_success=True, + best_logs = MirrorLog.objects.select_related('url').filter( + is_success=True, check_time__gte=min_check_time, last_sync__gte=min_sync_time, url__active=True, url__mirror__public=True, url__mirror__active=True, url__protocol__default=True).order_by( 'duration')[:1] if best_logs: - return MirrorUrl.objects.get(id=best_logs[0].url_id) + return best_logs[0].url mirror_urls = MirrorUrl.objects.filter(active=True, - mirror__public=True, mirror__active=True, protocol__default=True) - # look first for a country-agnostic URL, then fall back to any HTTP URL - filtered_urls = mirror_urls.filter(country='')[:1] - if not filtered_urls: - filtered_urls = mirror_urls[:1] - if not filtered_urls: + mirror__public=True, mirror__active=True, + protocol__default=True)[:1] + if not mirror_urls: return None - return filtered_urls[0] + return mirror_urls[0] # vim: set ts=4 sw=4 et: -- cgit v1.2.3 From f53ea0b102d0251a98116aad445d55570c71931c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:01:53 -0500 Subject: Move caching of function data back to get_mirror_statuses We've moved this around a few times, including changing the parameters to ensure they are stable (commit bdfa22500f4). However, the bulk of the work takes place in the mashing up of the data, so cache the full result rather than just the result of a single query. Signed-off-by: Dan McGee --- mirrors/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index fb867dee..5685c9c8 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -21,9 +21,7 @@ def dictfetchall(cursor): for row in cursor.fetchall() ] -@cache_function(178) -def status_data(cutoff=DEFAULT_CUTOFF, mirror_id=None): - cutoff_time = now() - cutoff +def status_data(cutoff_time, mirror_id=None): if mirror_id is not None: params = [cutoff_time, mirror_id] mirror_where = 'AND u.mirror_id = %s' @@ -115,6 +113,7 @@ def annotate_url(url, url_data): return url +@cache_function(178) def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -127,7 +126,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): mirror__public=True) if urls: - url_data = status_data(cutoff, mirror_id) + url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) -- cgit v1.2.3 From 72535f8e93b144528baf98c4998430ba3a030a70 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:04:39 -0500 Subject: Update django_countries to latest release Signed-off-by: Dan McGee --- requirements.txt | 2 +- requirements_prod.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e2b49aba..5d544e3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,11 @@ -e git+git://github.com/fredj/cssmin.git@master#egg=cssmin --e git+git://github.com/SmileyChris/django-countries.git@a2c924074dbe2f0b9b3059bf70064aeadf5643ed#egg=django-countries Django==1.7 IPy==0.81 Jinja2==2.7.3 Markdown==2.4.1 bencode==1.0 django-jinja==1.0.4 +django_countries==3.0 jsmin==2.0.11 pgpdump==1.5 pytz>=2014.7 diff --git a/requirements_prod.txt b/requirements_prod.txt index 020a5ac4..e3bc0ae2 100644 --- a/requirements_prod.txt +++ b/requirements_prod.txt @@ -1,11 +1,11 @@ -e git+git://github.com/fredj/cssmin.git@master#egg=cssmin --e git+git://github.com/SmileyChris/django-countries.git@a2c924074dbe2f0b9b3059bf70064aeadf5643ed#egg=django-countries Django==1.7 IPy==0.81 Jinja2==2.7.3 Markdown==2.4.1 bencode==1.0 django-jinja==1.0.4 +django_countries==3.0 jsmin==2.0.11 pgpdump==1.5 psycopg2==2.5.4 -- cgit v1.2.3 From c86ef5c326212f09a22f5ae3502a0bc79033a23a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:08:56 -0500 Subject: Use cache_page on mirror status JSON Signed-off-by: Dan McGee --- mirrors/views.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mirrors/views.py b/mirrors/views.py index 90787763..c2736da8 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -11,6 +11,7 @@ from django.db.models import Q from django.http import Http404, HttpResponse from django.shortcuts import get_object_or_404, redirect, render from django.utils.timezone import now +from django.views.decorators.cache import cache_page from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import condition from django_countries.data import COUNTRIES @@ -306,6 +307,7 @@ class ExtendedMirrorStatusJSONEncoder(MirrorStatusJSONEncoder): return super(ExtendedMirrorStatusJSONEncoder, self).default(obj) +@cache_page(67) @condition(last_modified_func=status_last_modified) def status_json(request, tier=None): if tier is not None: -- cgit v1.2.3 From 9f20cf7c81a38283fa08552f59a149d6abd76516 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:32:09 -0500 Subject: Remove old suggested recommended settings Signed-off-by: Dan McGee --- local_settings.py.example | 4 ---- 1 file changed, 4 deletions(-) diff --git a/local_settings.py.example b/local_settings.py.example index df141521..ffd6d8a6 100644 --- a/local_settings.py.example +++ b/local_settings.py.example @@ -1,5 +1,3 @@ -### Django settings for archlinux project. - ## Debug settings DEBUG = False TEMPLATE_DEBUG = False @@ -41,8 +39,6 @@ CACHES = { #'LOCATION': '127.0.0.1:11211', } } -CACHE_MIDDLEWARE_KEY_PREFIX = 'arch' -CACHE_MIDDLEWARE_SECONDS = 300 ## Use secure session cookies? Make this true if you want all ## logged-in actions to take place over HTTPS only. If developing -- cgit v1.2.3 From 69d771978bcf7d70d106c3e704fde203451fd48e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:32:29 -0500 Subject: Fix 500 when no URLs have been checked Signed-off-by: Dan McGee --- mirrors/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mirrors/utils.py b/mirrors/utils.py index 5685c9c8..51daf50e 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -128,8 +128,8 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): if urls: url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] - last_check = max([u.last_check for u in urls if u.last_check]) - num_checks = max([u.check_count for u in urls]) + last_check = max([u.last_check for u in urls if u.last_check] or [None]) + num_checks = max(u.check_count for u in urls) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) if mirror_id: check_info = check_info.filter(url__mirror_id=mirror_id) -- cgit v1.2.3