summaryrefslogtreecommitdiff
path: root/mirrors/utils.py
blob: aa1e9f76353978fb9ff891cc66a3575180ec9b65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from datetime import timedelta

from django.db.models import Avg, Count, Max, Min, StdDev

from main.utils import cache_function, utc_now
from .models import MirrorLog, MirrorProtocol, MirrorUrl


default_cutoff = timedelta(hours=24)

def annotate_url(url, delays):
    '''Given a MirrorURL object, add a few more attributes to it regarding
    status, including completion_pct, delay, and score.'''
    url.completion_pct = float(url.success_count) / url.check_count
    if url.id in delays:
        url_delays = delays[url.id]
        url.delay = sum(url_delays, timedelta()) / len(url_delays)
        hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0

        if url.completion_pct > 0:
            divisor = url.completion_pct
        else:
            # arbitrary small value
            divisor = 0.005
        url.score = (hours + url.duration_avg + url.duration_stddev) / divisor
    else:
        url.delay = None
        url.score = None


@cache_function(123)
def get_mirror_statuses(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    protocols = list(MirrorProtocol.objects.filter(is_download=True))
    # I swear, this actually has decent performance...
    urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
            mirror__active=True, mirror__public=True,
            protocol__in=protocols,
            logs__check_time__gte=cutoff_time).annotate(
            check_count=Count('logs'),
            success_count=Count('logs__duration'),
            last_sync=Max('logs__last_sync'),
            last_check=Max('logs__check_time'),
            duration_avg=Avg('logs__duration'),
            duration_stddev=StdDev('logs__duration')
            ).order_by('-last_sync', '-duration_avg')

    # The Django ORM makes it really hard to get actual average delay in the
    # above query, so run a seperate query for it and we will process the
    # results here.
    times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
            check_time__gte=cutoff_time)
    delays = {}
    for log in times:
        delay = log.check_time - log.last_sync
        delays.setdefault(log.url_id, []).append(delay)

    if urls:
        last_check = max([u.last_check for u in urls])
        num_checks = max([u.check_count for u in urls])
        check_info = MirrorLog.objects.filter(
                check_time__gte=cutoff_time).aggregate(
                mn=Min('check_time'), mx=Max('check_time'))
        if num_checks > 1:
            check_frequency = (check_info['mx'] - check_info['mn']) \
                    / (num_checks - 1)
        else:
            check_frequency = None
    else:
        last_check = None
        num_checks = 0
        check_frequency = None

    for url in urls:
        annotate_url(url, delays)

    return {
        'cutoff': cutoff,
        'last_check': last_check,
        'num_checks': num_checks,
        'check_frequency': check_frequency,
        'urls': urls,
    }


@cache_function(117)
def get_mirror_errors(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    errors = MirrorLog.objects.filter(
            is_success=False, check_time__gte=cutoff_time,
            url__mirror__active=True, url__mirror__public=True).values(
            'url__url', 'url__country_old', 'url__protocol__protocol',
            'url__mirror__country_old', 'error').annotate(
            error_count=Count('error'), last_occurred=Max('check_time')
            ).order_by('-last_occurred', '-error_count')
    errors = list(errors)
    for err in errors:
        err['country'] = err['url__country_old'] or err['url__mirror__country_old']
    return errors


@cache_function(295)
def get_mirror_url_for_download(cutoff=default_cutoff):
    '''Find a good mirror URL to use for package downloads. If we have mirror
    status data available, it is used to determine a good choice by looking at
    the last batch of status rows.'''
    cutoff_time = utc_now() - cutoff
    status_data = MirrorLog.objects.filter(
            check_time__gte=cutoff_time).aggregate(
            Max('check_time'), Max('last_sync'))
    if status_data['check_time__max'] is not None:
        min_check_time = status_data['check_time__max'] - timedelta(minutes=5)
        min_sync_time = status_data['last_sync__max'] - timedelta(minutes=30)
        best_logs = MirrorLog.objects.filter(is_success=True,
                check_time__gte=min_check_time, last_sync__gte=min_sync_time,
                url__mirror__public=True, url__mirror__active=True,
                url__protocol__protocol__iexact='HTTP').order_by(
                'duration')[:1]
        if best_logs:
            return MirrorUrl.objects.get(id=best_logs[0].url_id)

    mirror_urls = MirrorUrl.objects.filter(
            mirror__public=True, mirror__active=True,
            protocol__protocol__iexact='HTTP')
    # look first for an 'Any' URL, then fall back to any HTTP URL
    filtered_urls = mirror_urls.filter(mirror__country_old='Any')[:1]
    if not filtered_urls:
        filtered_urls = mirror_urls[:1]
    if not filtered_urls:
        return None
    return filtered_urls[0]

# vim: set ts=4 sw=4 et: