diff options
author | Dan McGee <dan@archlinux.org> | 2010-09-21 17:39:46 -0500 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2010-09-21 17:39:46 -0500 |
commit | b3883820a249a0bb6ba6237f815b8cdffd630fcd (patch) | |
tree | 5c17812e1dde0c2885bf3520f435debc3e1122c9 | |
parent | 4a99d313bfb2a226e6777a39a9a8588106f42685 (diff) | |
parent | 8ff8190c5ca29473cbcc398fb12b33b4430cc050 (diff) |
Merge branch 'mirror-check'
-rw-r--r-- | mirrors/management/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/management/commands/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/management/commands/mirrorcheck.py | 153 | ||||
-rw-r--r-- | mirrors/migrations/0003_auto__add_mirrorlog.py | 72 | ||||
-rw-r--r-- | mirrors/models.py | 14 | ||||
-rw-r--r-- | mirrors/templatetags/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/templatetags/mirror_status.py | 15 | ||||
-rw-r--r-- | mirrors/views.py | 57 | ||||
-rw-r--r-- | templates/mirrors/status.html | 101 | ||||
-rw-r--r-- | templates/mirrors/status_table.html | 29 | ||||
-rw-r--r-- | templates/public/index.html | 13 | ||||
-rw-r--r-- | urls.py | 3 |
12 files changed, 452 insertions, 5 deletions
diff --git a/mirrors/management/__init__.py b/mirrors/management/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/management/__init__.py diff --git a/mirrors/management/commands/__init__.py b/mirrors/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/management/commands/__init__.py diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py new file mode 100644 index 00000000..1662b15c --- /dev/null +++ b/mirrors/management/commands/mirrorcheck.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +""" +mirrorcheck command + +Poll every active mirror URL we have in the database, grab the 'lastsync' file, +and record details about how long it took and how up to date the mirror is. If +we encounter errors, record those as well. + +Usage: ./manage.py mirrorcheck +""" + +from django.core.management.base import NoArgsCommand +from django.db.models import Q + +from datetime import datetime, timedelta +import logging +import re +import socket +import sys +import time +import thread +from threading import Thread +from Queue import Queue, Empty +import urllib2 + +from logging import ERROR, WARNING, INFO, DEBUG + +from mirrors.models import Mirror, MirrorUrl, MirrorLog + +logging.basicConfig( + level=WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Runs a check on all known mirror URLs to determine their up-to-date status." + + def handle_noargs(self, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = ERROR + elif v == 1: + logger.level = WARNING + elif v == 2: + logger.level = DEBUG + + import signal, traceback + handler = lambda sig, stack: traceback.print_stack(stack) + signal.signal(signal.SIGQUIT, handler) + signal.signal(signal.SIGUSR1, handler) + + return check_current_mirrors() + +def parse_rfc3339_datetime(time): + # '2010-09-02 11:05:06+02:00' + m = re.match('^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time) + if m: + vals = m.groups() + parsed = datetime(int(vals[0]), int(vals[1]), int(vals[2]), + int(vals[3]), int(vals[4]), int(vals[5])) + # now account for time zone offset + sign = vals[6] + offset = timedelta(hours=int(sign + vals[7]), + minutes=int(sign + vals[8])) + # subtract the offset, e.g. '-04:00' should be moved up 4 hours + return parsed - offset + return None + +def check_mirror_url(mirror_url): + url = mirror_url.url + 'lastsync' + logger.info("checking URL %s" % url) + log = MirrorLog(url=mirror_url, check_time=datetime.utcnow()) + try: + start = time.time() + result = urllib2.urlopen(url, timeout=10) + data = result.read() + result.close() + end = time.time() + # lastsync should be an epoch value, but some mirrors + # are creating their own in RFC-3339 format: + # '2010-09-02 11:05:06+02:00' + try: + parsed_time = datetime.utcfromtimestamp(int(data)) + except ValueError: + # it is bad news to try logging the lastsync value; + # sometimes we get a crazy-encoded web page. + logger.info("attempting to parse generated lastsync file" + " from mirror %s" % url) + parsed_time = parse_rfc3339_datetime(data) + + log.last_sync = parsed_time + log.duration = end - start + logger.debug("success: %s, %.2f" % (url, log.duration)) + except urllib2.HTTPError, e: + log.is_success = False + log.error =str(e) + logger.debug("failed: %s, %s" % (url, log.error)) + except urllib2.URLError, e: + log.is_success=False + log.error = e.reason + if isinstance(e.reason, socket.timeout): + log.error = "Connection timed out." + elif isinstance(e.reason, socket.error): + log.error = e.reason.args[1] + logger.debug("failed: %s, %s" % (url, log.error)) + + log.save() + return log + +def mirror_url_worker(queue): + while True: + try: + item = queue.get(block=False) + check_mirror_url(item) + queue.task_done() + except Empty: + return 0 + +class MirrorCheckPool(object): + def __init__(self, work, num_threads=10): + self.tasks = Queue() + for i in work: + self.tasks.put(i) + self.threads = [] + for i in range(num_threads): + thread = Thread(target=mirror_url_worker, args=(self.tasks,)) + thread.daemon = True + self.threads.append(thread) + + def run_and_join(self): + logger.debug("starting threads") + for t in self.threads: + t.start() + logger.debug("joining on all threads") + self.tasks.join() + +def check_current_mirrors(): + urls = MirrorUrl.objects.filter( + Q(protocol__protocol__iexact='HTTP') | + Q(protocol__protocol__iexact='FTP'), + mirror__active=True, mirror__public=True) + + pool = MirrorCheckPool(urls) + pool.run_and_join() + return 0 + +# For lack of a better place to put it, here is a query to get latest check +# result joined with mirror details: +# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1; + +# vim: set ts=4 sw=4 et: diff --git a/mirrors/migrations/0003_auto__add_mirrorlog.py b/mirrors/migrations/0003_auto__add_mirrorlog.py new file mode 100644 index 00000000..5b4c225b --- /dev/null +++ b/mirrors/migrations/0003_auto__add_mirrorlog.py @@ -0,0 +1,72 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'MirrorLog' + db.create_table('mirrors_mirrorlog', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('url', self.gf('django.db.models.fields.related.ForeignKey')(related_name='logs', to=orm['mirrors.MirrorUrl'])), + ('check_time', self.gf('django.db.models.fields.DateTimeField')(db_index=True)), + ('last_sync', self.gf('django.db.models.fields.DateTimeField')(null=True)), + ('duration', self.gf('django.db.models.fields.FloatField')(null=True)), + ('is_success', self.gf('django.db.models.fields.BooleanField')(default=True)), + ('error', self.gf('django.db.models.fields.CharField')(default='', max_length=255, blank=True)), + )) + db.send_create_signal('mirrors', ['MirrorLog']) + + def backwards(self, orm): + # Deleting model 'MirrorLog' + db.delete_table('mirrors_mirrorlog') + + models = { + 'mirrors.mirror': { + 'Meta': {'ordering': "('country', 'name')", 'object_name': 'Mirror'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'admin_email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}), + 'country': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'isos': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'notes': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'public': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'rsync_password': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'rsync_user': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'tier': ('django.db.models.fields.SmallIntegerField', [], {'default': '2'}), + 'upstream': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['mirrors.Mirror']", 'null': 'True'}) + }, + 'mirrors.mirrorlog': { + 'Meta': {'object_name': 'MirrorLog'}, + 'check_time': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'duration': ('django.db.models.fields.FloatField', [], {'null': 'True'}), + 'error': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_success': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'last_sync': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}), + 'url': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'logs'", 'to': "orm['mirrors.MirrorUrl']"}) + }, + 'mirrors.mirrorprotocol': { + 'Meta': {'object_name': 'MirrorProtocol'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'protocol': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '10'}) + }, + 'mirrors.mirrorrsync': { + 'Meta': {'object_name': 'MirrorRsync'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'ip': ('django.db.models.fields.CharField', [], {'max_length': '24'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'rsync_ips'", 'to': "orm['mirrors.Mirror']"}) + }, + 'mirrors.mirrorurl': { + 'Meta': {'object_name': 'MirrorUrl'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.Mirror']"}), + 'protocol': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.MirrorProtocol']"}), + 'url': ('django.db.models.fields.CharField', [], {'max_length': '255'}) + } + } + + complete_apps = ['mirrors'] diff --git a/mirrors/models.py b/mirrors/models.py index 94256a9c..5cab9db6 100644 --- a/mirrors/models.py +++ b/mirrors/models.py @@ -54,4 +54,18 @@ class MirrorRsync(models.Model): class Meta: verbose_name = 'Mirror Rsync IP' +class MirrorLog(models.Model): + url = models.ForeignKey(MirrorUrl, related_name="logs") + check_time = models.DateTimeField(db_index=True) + last_sync = models.DateTimeField(null=True) + duration = models.FloatField(null=True) + is_success = models.BooleanField(default=True) + error = models.CharField(max_length=255, blank=True, default='') + + def __unicode__(self): + return "Check of %s at %s" % (url.url, check_time) + + class Meta: + verbose_name = 'Mirror Check Log' + # vim: set ts=4 sw=4 et: diff --git a/mirrors/templatetags/__init__.py b/mirrors/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/templatetags/__init__.py diff --git a/mirrors/templatetags/mirror_status.py b/mirrors/templatetags/mirror_status.py new file mode 100644 index 00000000..09c5b331 --- /dev/null +++ b/mirrors/templatetags/mirror_status.py @@ -0,0 +1,15 @@ +from django import template + +register = template.Library() + +@register.filter +def duration(value): + if not value: + return u'\u221e' + # does not take microseconds into account + total_secs = value.seconds + value.days * 24 * 3600 + mins, secs = divmod(total_secs, 60) + hrs, mins = divmod(mins, 60) + return '%d:%02d' % (hrs, mins) + +# vim: set ts=4 sw=4 et: diff --git a/mirrors/views.py b/mirrors/views.py index ddc42cbb..59d6337b 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -1,9 +1,14 @@ from django import forms +from django.db.models import Avg, Count, Max, Min, StdDev +from django.db.models import Q from django.views.decorators.csrf import csrf_exempt from django.views.generic.simple import direct_to_template from main.utils import make_choice from .models import Mirror, MirrorUrl, MirrorProtocol +from .models import MirrorLog + +import datetime class MirrorlistForm(forms.Form): country = forms.MultipleChoiceField(required=False) @@ -21,7 +26,7 @@ class MirrorlistForm(forms.Form): self.fields['protocol'].initial = [t[0] for t in protos] @csrf_exempt -def generate(request): +def generate_mirrorlist(request): if request.REQUEST.get('country', ''): form = MirrorlistForm(data=request.REQUEST) if form.is_valid(): @@ -49,4 +54,54 @@ def find_mirrors(request, countries=None, protocols=None): }, mimetype='text/plain') +def status(request): + cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) + bad_timedelta = datetime.timedelta(days=3) + + protocols = MirrorProtocol.objects.exclude(protocol__iexact='rsync') + # I swear, this actually has decent performance... + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').filter( + mirror__active=True, mirror__public=True, + protocol__in=protocols).filter( + logs__check_time__gte=cutoff_time).annotate( + check_count=Count('logs'), last_sync=Max('logs__last_sync'), + last_check=Max('logs__check_time'), + duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'), + duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration') + ).order_by('-last_sync', '-duration_avg') + # errors during check process go in another table + error_logs = MirrorLog.objects.filter( + is_success=False, check_time__gte=cutoff_time).values( + 'url__url', 'url__protocol__protocol', 'url__mirror__country', + 'error').annotate( + error_count=Count('error'), last_occurred=Max('check_time') + ).order_by('-last_occurred', '-error_count') + + last_check = max([u.last_check for u in urls]) + + good_urls = [] + bad_urls = [] + for url in urls: + if url.last_check and url.last_sync: + d = url.last_check - url.last_sync + url.delay = d + url.score = d.days * 24 + d.seconds / 3600 + url.duration_avg + url.duration_stddev + else: + url.delay = None + url.score = None + # split them into good and bad lists based on delay + if not url.delay or url.delay > bad_timedelta: + bad_urls.append(url) + else: + good_urls.append(url) + + context = { + 'last_check': last_check, + 'good_urls': good_urls, + 'bad_urls': bad_urls, + 'error_logs': error_logs, + } + return direct_to_template(request, 'mirrors/status.html', context) + # vim: set ts=4 sw=4 et: diff --git a/templates/mirrors/status.html b/templates/mirrors/status.html new file mode 100644 index 00000000..5743e47b --- /dev/null +++ b/templates/mirrors/status.html @@ -0,0 +1,101 @@ +{% extends "base.html" %} + +{% block title %}Arch Linux - Mirror Status{% endblock %} + +{% block content %} +<div id="mirrorstatus" class="box"> + <h2>Mirror Status</h2> + <p>This page reports the status of all known, public, and active Arch Linux + mirrors. All data on this page reflects the status of the mirrors within + the <em>last 24 hours</em>. All listed times are UTC. The check script runs + on a regular basis and polls for the <tt>lastsync</tt> file in the root of + our repository layout. This file is regularly updated on the central + repository, so checking the value within allows one to see if the mirror + has synced recently. This page contains several pieces of information about + each mirror.</p> + <ul> + <li><em>Mirror URL:</em> Mirrors are checked on a per-URL basis. If + both FTP and HTTP access are provided, both will be listed here.</li> + <li><em>Last Sync:</em> The timestamp retrieved from the + <tt>lastsync</tt> file on the mirror. If this file could not be + retrieved or contained data we didn't recognize, this column will show + 'unknown'.</li> + <li><em>Delay:</em> The calculated mirroring delay; e.g. <code>last + check − last sync</code>.</li> + <li><em>μ Duration:</em> The average (mean) time it took to connect and + retrieve the <tt>lastsync</tt> file from the given URL. Note that this + connection time is from the location of the Arch server; your geography + may product different results.</li> + <li><em>σ Duration:</em> The standard deviation of the connect and + retrieval time. A high standard deviation can indicate an unstable or + overloaded mirror.</li> + <li><em>Mirror Score:</em> A very rough calculation for ranking + mirrors. It is currently calculated as <code>hours delay + average + duration + standard deviation</code>. Lower is better.</li> + </ul> + <p>The final table on this page is an error log, which shows any errors + that occurred while contacting mirrors. This only shows errors that + occurred within the last 24 hours.</p> + <ul> + <li><a href="#outofsync">Out of Sync Mirrors</a></li> + <li><a href="#successful">Successfully Syncing Mirrors</a></li> + <li><a href="#errorlog">Mirror Syncing Error Log</a></li> + </ul> + + <p>The last mirror check ran at {{ last_check|date:'Y-m-d H:i' }} UTC.</p> + + <a name="outofsync"/> + <h3>Out of Sync Mirrors</h3> + {% with bad_urls as urls %} + {% with 'outofsync_mirrors' as table_id %} + {% include "mirrors/status_table.html" %} + {% endwith %} + {% endwith %} + + <a name="successful"/> + <h3>Successfully Syncing Mirrors</h3> + {% with good_urls as urls %} + {% with 'successful_mirrors' as table_id %} + {% include "mirrors/status_table.html" %} + {% endwith %} + {% endwith %} + + <a name="errorlog"/> + <h3>Mirror Syncing Error Log</h3> + <table id="errorlog_mirrors" class="results"> + <thead> + <tr> + <th>Mirror URL</th> + <th>Protocol</th> + <th>Country</th> + <th>Error Message</th> + <th>Last Occurred</th> + <th>Occurrences (last 24 hours)</th> + </tr> + </thead> + <tbody> + {% for log in error_logs %} + <tr class="{% cycle 'odd' 'even' %}"> + <td>{{ log.url__url }}</td> + <td>{{ log.url__protocol__protocol }}</td> + <td>{{ log.url__mirror__country }}</td> + <td>{{ log.error }}</td> + <td>{{ log.last_occurred|date:'Y-m-d H:i' }}</td> + <td>{{ log.error_count }}</td> + </tr> + {% endfor %} + </tbody> + </table> + +</div> +{% load cdn %}{% jquery %} +<script type="text/javascript" src="/media/jquery.tablesorter.min.js"></script> +<script type="text/javascript"> +$(document).ready(function() { + $("#outofsync_mirrors").add("#successful_mirrors").tablesorter( + {widgets: ['zebra'], sortList: [[3,1], [5,1]]}); + $("#errorlog_mirrors").tablesorter( + {widgets: ['zebra'], sortList: [[4,1], [5,1]]}); +}); +</script> +{% endblock %} diff --git a/templates/mirrors/status_table.html b/templates/mirrors/status_table.html new file mode 100644 index 00000000..90bbf6b6 --- /dev/null +++ b/templates/mirrors/status_table.html @@ -0,0 +1,29 @@ +{% load mirror_status %} +<table id="{{ table_id }}" class="results"> + <thead> + <tr> + <th>Mirror URL</th> + <th>Protocol</th> + <th>Country</th> + <th>Last Sync</th> + <th>Delay (hh:mm)</th> + <th>μ Duration (secs)</th> + <th>σ Duration (secs)</th> + <th>Mirror Score</th> + </tr> + </thead> + <tbody> + {% for m_url in urls %} + <tr class="{% cycle 'odd' 'even' %}"> + <td>{{ m_url.url }}</td> + <td>{{ m_url.protocol }}</td> + <td>{{ m_url.mirror.country }}</td> + <td>{{ m_url.last_sync|date:'Y-m-d H:i'|default:'unknown' }}</td> + <td>{{ m_url.delay|duration|default:'unknown' }}</td> + <td>{{ m_url.duration_avg|floatformat:2|default:'unknown' }}</td> + <td>{{ m_url.duration_stddev|floatformat:2|default:'unknown' }}</td> + <td>{{ m_url.score|floatformat:1|default:'unknown' }}</td> + </tr> + {% endfor %} + </tbody> +</table> diff --git a/templates/public/index.html b/templates/public/index.html index 5a87787d..cc7aa21b 100644 --- a/templates/public/index.html +++ b/templates/public/index.html @@ -132,17 +132,24 @@ title="T-shirts">Schwag via Freewear</a></li> </ul> - <h4>Development</h4> + <h4>Tools</h4> <ul> <li><a href="{% url mirrorlist %}" title="Get a custom mirrorlist from our database">Mirrorlist Updater</a></li> + <li><a href="{% url mirror-status %}" + title="Check the status of all known mirrors">Mirror Status</a></li> + <li><a href="/packages/differences/" + title="See differences in packages between available architectures">Differences by Architecture</a></li> + </ul> + + <h4>Development</h4> + + <ul> <li><a href="/packages/" title="View/search the package repository database">Packages</a></li> <li><a href="/groups/" title="View the available package groups">Package Groups</a></li> - <li><a href="/packages/differences/" - title="See differences in packages between available architectures">Differences by Architecture</a></li> <li><a href="http://bugs.archlinux.org/" title="Report/track bugs or make feature requests">Bug Tracker</a></li> <li><a href="{% url page-svn %}" @@ -72,8 +72,9 @@ urlpatterns = patterns('', (r'^news/$', 'news.views.news_list', {}, 'news-list'), (r'^mirrors/$', 'devel.views.mirrorlist', {}, 'mirrors-list'), + (r'^mirrors/status/$', 'mirrors.views.status', {}, 'mirror-status'), - (r'^mirrorlist/$', 'mirrors.views.generate', {}, 'mirrorlist'), + (r'^mirrorlist/$', 'mirrors.views.generate_mirrorlist', {}, 'mirrorlist'), (r'^mirrorlist/all/$', 'mirrors.views.find_mirrors', {'countries': ['all']}), (r'^mirrorlist/all/ftp/$', 'mirrors.views.find_mirrors', {'countries': ['all'], 'protocols': ['ftp']}), |