blob: 4cc6908eddef9cec279b11864bb0b23b8307309b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
import re
import urllib
from HTMLParser import HTMLParser, HTMLParseError
from django.core.management.base import BaseCommand, CommandError
from isotests.models import Iso
from settings import ISOLISTURL
class IsoListParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.hyperlinks = []
self.url_re = re.compile('(?!\.{2})/$')
def handle_starttag(self, tag, attrs):
if tag == 'a':
for name, value in attrs:
if name == "href":
if value != '../' and self.url_re.search(value) != None:
self.hyperlinks.append(value[:len(value)-1])
def parse(self, url):
try:
f = urllib.urlopen(url)
s = f.read()
f.close()
self.feed(s)
self.close()
return self.hyperlinks
except HTMLParseError:
raise CommandError('Couldn\'t parse "%s"' % url)
class Command(BaseCommand):
help = 'Gets new isos from %s' % ISOLISTURL
def handle(self, *args, **options):
parser = IsoListParser()
isonames = Iso.objects.values_list('name', flat=True)
new_isos = parser.parse(ISOLISTURL)
for iso in new_isos:
if iso not in isonames:
new = Iso(name=iso)
new.save()
# vim: set ts=4 sw=4 et:
|