From ee6bf2782068b917232c71189aea0011b47e876d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:10:28 -0500 Subject: Small performance tweaks to mirror status JSON encoding Do a few things to speed up the encoding of the JSON, including better usage of list comprehensions, less dynamic setattr() usage, and removal of the queryset specialization since we can easily do it outside of the encoder. Signed-off-by: Dan McGee --- mirrors/utils.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) (limited to 'mirrors/utils.py') diff --git a/mirrors/utils.py b/mirrors/utils.py index fe18cd6a..4484fa24 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -84,19 +84,16 @@ def status_data(cutoff=DEFAULT_CUTOFF, mirror_id=None): def annotate_url(url, url_data): '''Given a MirrorURL object, add a few more attributes to it regarding status, including completion_pct, delay, and score.''' - known_attrs = ( - ('success_count', 0), - ('check_count', 0), - ('completion_pct', None), - ('duration_avg', None), - ('duration_stddev', None), - ('last_check', None), - ('last_sync', None), - ('delay', None), - ('score', None), - ) - for k, v in known_attrs: - setattr(url, k, v) + # set up some sane default values in case we are missing anything + url.success_count = 0 + url.check_count = 0 + url.completion_pct = None + url.duration_avg = None + url.duration_stddev = None + url.last_check = None + url.last_sync = None + url.delay = None + url.score = None for k, v in url_data.items(): if k not in ('url_id', 'mirror_id'): setattr(url, k, v) @@ -107,7 +104,7 @@ def annotate_url(url, url_data): if url.delay is not None: hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0 - if url.completion_pct > 0: + if url.completion_pct > 0.0: divisor = url.completion_pct else: # arbitrary small value @@ -115,6 +112,8 @@ def annotate_url(url, url_data): stddev = url.duration_stddev or 0.0 url.score = (hours + url.duration_avg + stddev) / divisor + return url + def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -133,8 +132,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): id__in=valid_urls).order_by('mirror__id', 'url') if urls: - for url in urls: - annotate_url(url, url_data.get(url.id, {})) + urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) -- cgit v1.2.3-54-g00ecf From 86fd0b722afb53670ef9a155a3c55d688f275c6d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:39:06 -0500 Subject: Reduce complexity of status data URL query Get rid of all the junk trying to only return URLs that have been checked in the last 24 hours; it just isn't worth it. Instead, do that screening only in the views that need it, namely the HTML status page. Signed-off-by: Dan McGee --- mirrors/utils.py | 14 +++++--------- mirrors/views.py | 5 ++++- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'mirrors/utils.py') diff --git a/mirrors/utils.py b/mirrors/utils.py index 4484fa24..8edceb9b 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -118,20 +118,16 @@ def annotate_url(url, url_data): def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff - valid_urls = MirrorUrl.objects.filter( - logs__check_time__gte=cutoff_time).distinct() - + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').order_by('mirror__id', 'url') if mirror_id: - valid_urls = valid_urls.filter(mirror_id=mirror_id) + urls = urls.filter(mirror_id=mirror_id) if not show_all: - valid_urls = valid_urls.filter(active=True, mirror__active=True, + urls = urls.filter(active=True, mirror__active=True, mirror__public=True) - url_data = status_data(cutoff, mirror_id) - urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter( - id__in=valid_urls).order_by('mirror__id', 'url') - if urls: + url_data = status_data(cutoff, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) diff --git a/mirrors/views.py b/mirrors/views.py index 1a9741ed..90787763 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -245,7 +245,10 @@ def status(request, tier=None): if tier is not None and url.mirror.tier != tier: continue # split them into good and bad lists based on delay - if not url.delay or url.delay > bad_timedelta: + if url.completion_pct is None: + # skip URLs that have never been checked + continue + elif not url.delay or url.delay > bad_timedelta: bad_urls.append(url) else: good_urls.append(url) -- cgit v1.2.3-54-g00ecf From 48509bfdbdadb8255f32c56d993f91262516916f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:00:48 -0500 Subject: Simplify/clean-up finding of download mirror Signed-off-by: Dan McGee --- mirrors/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'mirrors/utils.py') diff --git a/mirrors/utils.py b/mirrors/utils.py index 8edceb9b..fb867dee 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -189,23 +189,21 @@ def get_mirror_url_for_download(cutoff=DEFAULT_CUTOFF): if status_data['check_time__max'] is not None: min_check_time = status_data['check_time__max'] - timedelta(minutes=5) min_sync_time = status_data['last_sync__max'] - timedelta(minutes=20) - best_logs = MirrorLog.objects.filter(is_success=True, + best_logs = MirrorLog.objects.select_related('url').filter( + is_success=True, check_time__gte=min_check_time, last_sync__gte=min_sync_time, url__active=True, url__mirror__public=True, url__mirror__active=True, url__protocol__default=True).order_by( 'duration')[:1] if best_logs: - return MirrorUrl.objects.get(id=best_logs[0].url_id) + return best_logs[0].url mirror_urls = MirrorUrl.objects.filter(active=True, - mirror__public=True, mirror__active=True, protocol__default=True) - # look first for a country-agnostic URL, then fall back to any HTTP URL - filtered_urls = mirror_urls.filter(country='')[:1] - if not filtered_urls: - filtered_urls = mirror_urls[:1] - if not filtered_urls: + mirror__public=True, mirror__active=True, + protocol__default=True)[:1] + if not mirror_urls: return None - return filtered_urls[0] + return mirror_urls[0] # vim: set ts=4 sw=4 et: -- cgit v1.2.3-54-g00ecf From f53ea0b102d0251a98116aad445d55570c71931c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:01:53 -0500 Subject: Move caching of function data back to get_mirror_statuses We've moved this around a few times, including changing the parameters to ensure they are stable (commit bdfa22500f4). However, the bulk of the work takes place in the mashing up of the data, so cache the full result rather than just the result of a single query. Signed-off-by: Dan McGee --- mirrors/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mirrors/utils.py') diff --git a/mirrors/utils.py b/mirrors/utils.py index fb867dee..5685c9c8 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -21,9 +21,7 @@ def dictfetchall(cursor): for row in cursor.fetchall() ] -@cache_function(178) -def status_data(cutoff=DEFAULT_CUTOFF, mirror_id=None): - cutoff_time = now() - cutoff +def status_data(cutoff_time, mirror_id=None): if mirror_id is not None: params = [cutoff_time, mirror_id] mirror_where = 'AND u.mirror_id = %s' @@ -115,6 +113,7 @@ def annotate_url(url, url_data): return url +@cache_function(178) def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -127,7 +126,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): mirror__public=True) if urls: - url_data = status_data(cutoff, mirror_id) + url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) -- cgit v1.2.3-54-g00ecf From 69d771978bcf7d70d106c3e704fde203451fd48e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:32:29 -0500 Subject: Fix 500 when no URLs have been checked Signed-off-by: Dan McGee --- mirrors/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mirrors/utils.py') diff --git a/mirrors/utils.py b/mirrors/utils.py index 5685c9c8..51daf50e 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -128,8 +128,8 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): if urls: url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] - last_check = max([u.last_check for u in urls if u.last_check]) - num_checks = max([u.check_count for u in urls]) + last_check = max([u.last_check for u in urls if u.last_check] or [None]) + num_checks = max(u.check_count for u in urls) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) if mirror_id: check_info = check_info.filter(url__mirror_id=mirror_id) -- cgit v1.2.3-54-g00ecf