diff options
author | Dan McGee <dan@archlinux.org> | 2014-01-11 13:07:40 -0600 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2014-01-11 13:07:40 -0600 |
commit | 3827215fa3335f8da3c82d4d098eb402b6d29dbc (patch) | |
tree | a057c6f3d5ca70da72b01b6a416a7ab667bc9e56 | |
parent | f4d49590153a5c39d4b60ba0a9c2901c344ff45a (diff) |
Speed up feeds generation by batching writes
The XML generation underlying our package feeds was doing 1600+ calls to
the write() method on the outfile. For some reason, the Python standard
library insists on calling flush() after every write, which really makes
performance take a nosedive. Wrap the write calls and do them in batches
to remove some of the overhead and make feed generation a bit snappier.
Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r-- | feeds.py | 35 |
1 files changed, 32 insertions, 3 deletions
@@ -14,6 +14,22 @@ from news.models import News from releng.models import Release +class BatchWritesWrapper(object): + def __init__(self, outfile, chunks=20): + self.outfile = outfile + self.chunks = chunks + self.buf = [] + def write(self, s): + buf = self.buf + buf.append(s) + if len(buf) >= self.chunks: + self.outfile.write(''.join(buf)) + self.buf = [] + def flush(self): + self.outfile.write(''.join(self.buf)) + self.outfile.flush() + + class GuidNotPermalinkFeed(Rss201rev2Feed): @staticmethod def check_for_unique_id(f): @@ -26,13 +42,26 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): return wrapper def write_items(self, handler): - # Totally disgusting. Monkey-patch the hander so if it sees a - # 'unique-id' field come through, add an isPermalink="false" attribute. - # Workaround for http://code.djangoproject.com/ticket/9800 + ''' + Totally disgusting. Monkey-patch the handler so if it sees a + 'unique-id' field come through, add an isPermalink="false" attribute. + Workaround for http://code.djangoproject.com/ticket/9800 + ''' handler.addQuickElement = self.check_for_unique_id( handler.addQuickElement) super(GuidNotPermalinkFeed, self).write_items(handler) + def write(self, outfile, encoding): + ''' + Batch the underlying 'write' calls on the outfile because Python's + default saxutils XmlGenerator is a POS that insists on unbuffered + write/flush calls. This sucks when it is making 1-byte calls to write + '>' closing tags and over 1600 write calls in our package feed. + ''' + wrapper = BatchWritesWrapper(outfile) + super(GuidNotPermalinkFeed, self).write(wrapper, encoding) + wrapper.flush() + def package_etag(request, *args, **kwargs): latest = retrieve_latest(Package) |