summaryrefslogtreecommitdiff
path: root/mirrors/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'mirrors/utils.py')
-rw-r--r--mirrors/utils.py146
1 files changed, 91 insertions, 55 deletions
diff --git a/mirrors/utils.py b/mirrors/utils.py
index d18dc22f..eb1211f1 100644
--- a/mirrors/utils.py
+++ b/mirrors/utils.py
@@ -2,6 +2,7 @@ from datetime import timedelta
from django.db import connection
from django.db.models import Avg, Count, Max, Min, StdDev
+from django.utils.dateparse import parse_datetime
from django.utils.timezone import now
from django_countries.fields import Country
@@ -11,47 +12,103 @@ from .models import MirrorLog, MirrorProtocol, MirrorUrl
DEFAULT_CUTOFF = timedelta(hours=24)
-def annotate_url(url, delay):
- '''Given a MirrorURL object, add a few more attributes to it regarding
- status, including completion_pct, delay, and score.'''
- url.completion_pct = float(url.success_count) / url.check_count
- if delay is not None:
- url.delay = delay
- hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0
- if url.completion_pct > 0:
- divisor = url.completion_pct
- else:
- # arbitrary small value
- divisor = 0.005
- url.score = (hours + url.duration_avg + url.duration_stddev) / divisor
- else:
- url.delay = None
- url.score = None
+def dictfetchall(cursor):
+ "Returns all rows from a cursor as a dict."
+ desc = cursor.description
+ return [
+ dict(zip([col[0] for col in desc], row))
+ for row in cursor.fetchall()
+ ]
-def url_delays(cutoff_time, mirror_id=None):
- cursor = connection.cursor()
- if mirror_id is None:
- sql= """
-SELECT url_id, AVG(check_time - last_sync)
-FROM mirrors_mirrorlog
-WHERE is_success = %s AND check_time >= %s AND last_sync IS NOT NULL
-GROUP BY url_id
+def status_data(cutoff_time, mirror_id=None):
+ if mirror_id is not None:
+ params = [cutoff_time, mirror_id]
+ mirror_where = 'AND u.mirror_id = %s'
+ else:
+ params = [cutoff_time]
+ mirror_where = ''
+
+ vendor = database_vendor(MirrorUrl)
+ if vendor == 'sqlite':
+ sql = """
+SELECT l.url_id, u.mirror_id,
+ COUNT(l.id) AS check_count,
+ COUNT(l.duration) AS success_count,
+ MAX(l.last_sync) AS last_sync,
+ MAX(l.check_time) AS last_check,
+ AVG(l.duration) AS duration_avg,
+ 0.0 AS duration_stddev,
+ AVG(STRFTIME('%%s', check_time) - STRFTIME('%%s', last_sync)) AS delay
+FROM mirrors_mirrorlog l
+JOIN mirrors_mirrorurl u ON u.id = l.url_id
+WHERE l.check_time >= %s
+""" + mirror_where + """
+GROUP BY l.url_id, u.mirror_id
"""
- cursor.execute(sql, [True, cutoff_time])
else:
sql = """
-SELECT l.url_id, avg(check_time - last_sync)
+SELECT l.url_id, u.mirror_id,
+ COUNT(l.id) AS check_count,
+ COUNT(l.duration) AS success_count,
+ MAX(l.last_sync) AS last_sync,
+ MAX(l.check_time) AS last_check,
+ AVG(l.duration) AS duration_avg,
+ STDDEV(l.duration) AS duration_stddev,
+ AVG(check_time - last_sync) AS delay
FROM mirrors_mirrorlog l
JOIN mirrors_mirrorurl u ON u.id = l.url_id
-WHERE is_success = %s AND check_time >= %s AND last_sync IS NOT NULL
-AND mirror_id = %s
-GROUP BY url_id
+WHERE l.check_time >= %s
+""" + mirror_where + """
+GROUP BY l.url_id, u.mirror_id
"""
- cursor.execute(sql, [True, cutoff_time, mirror_id])
- return {url_id: delay for url_id, delay in cursor.fetchall()}
+ cursor = connection.cursor()
+ cursor.execute(sql, params)
+ url_data = dictfetchall(cursor)
+
+ # sqlite loves to return less than ideal types
+ if vendor == 'sqlite':
+ for item in url_data:
+ item['delay'] = timedelta(seconds=item['delay'])
+ item['last_sync'] = parse_datetime(item['last_sync'])
+ item['last_check'] = parse_datetime(item['last_check'])
+
+ return {item['url_id']: item for item in url_data}
+
+
+def annotate_url(url, url_data):
+ '''Given a MirrorURL object, add a few more attributes to it regarding
+ status, including completion_pct, delay, and score.'''
+ known_attrs = (
+ ('success_count', 0),
+ ('check_count', 0),
+ ('completion_pct', None),
+ ('last_check', None),
+ ('last_sync', None),
+ ('delay', None),
+ ('score', None),
+ )
+ for k, v in known_attrs:
+ setattr(url, k, v)
+ for k, v in url_data.items():
+ if k not in ('url_id', 'mirror_id'):
+ setattr(url, k, v)
+
+ if url.check_count > 0:
+ url.completion_pct = float(url.success_count) / url.check_count
+
+ if url.delay is not None:
+ hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0
+
+ if url.completion_pct > 0:
+ divisor = url.completion_pct
+ else:
+ # arbitrary small value
+ divisor = 0.005
+ stddev = url.duration_stddev or 0.0
+ url.score = (hours + url.duration_avg + stddev) / divisor
@cache_function(123)
@@ -65,29 +122,14 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None):
if mirror_id:
valid_urls = valid_urls.filter(mirror_id=mirror_id)
- url_data = MirrorUrl.objects.values('id', 'mirror_id').filter(
- id__in=valid_urls, logs__check_time__gte=cutoff_time).annotate(
- check_count=Count('logs'),
- success_count=Count('logs__duration'),
- last_sync=Max('logs__last_sync'),
- last_check=Max('logs__check_time'),
- duration_avg=Avg('logs__duration'))
-
- vendor = database_vendor(MirrorUrl)
- if vendor != 'sqlite':
- url_data = url_data.annotate(duration_stddev=StdDev('logs__duration'))
-
+ url_data = status_data(cutoff_time, mirror_id)
urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
id__in=valid_urls).order_by('mirror__id', 'url')
- delays = url_delays(cutoff_time, mirror_id)
if urls:
- url_data = dict((item['id'], item) for item in url_data)
for url in urls:
- for k, v in url_data.get(url.id, {}).items():
- if k not in ('id', 'mirror_id'):
- setattr(url, k, v)
- last_check = max([u.last_check for u in urls])
+ annotate_url(url, url_data.get(url.id, {}))
+ last_check = max([u.last_check for u in urls if u.last_check])
num_checks = max([u.check_count for u in urls])
check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time)
if mirror_id:
@@ -104,12 +146,6 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None):
num_checks = 0
check_frequency = None
- for url in urls:
- # fake the standard deviation for local testing setups
- if vendor == 'sqlite':
- setattr(url, 'duration_stddev', 0.0)
- annotate_url(url, delays.get(url.id, None))
-
return {
'cutoff': cutoff,
'last_check': last_check,