From 3827215fa3335f8da3c82d4d098eb402b6d29dbc Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 11 Jan 2014 13:07:40 -0600 Subject: Speed up feeds generation by batching writes The XML generation underlying our package feeds was doing 1600+ calls to the write() method on the outfile. For some reason, the Python standard library insists on calling flush() after every write, which really makes performance take a nosedive. Wrap the write calls and do them in batches to remove some of the overhead and make feed generation a bit snappier. Signed-off-by: Dan McGee --- feeds.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/feeds.py b/feeds.py index ecdb7d9a..feb8a84a 100644 --- a/feeds.py +++ b/feeds.py @@ -14,6 +14,22 @@ from releng.models import Release +class BatchWritesWrapper(object): + def __init__(self, outfile, chunks=20): + self.outfile = outfile + self.chunks = chunks + self.buf = [] + def write(self, s): + buf = self.buf + buf.append(s) + if len(buf) >= self.chunks: + self.outfile.write(''.join(buf)) + self.buf = [] + def flush(self): + self.outfile.write(''.join(self.buf)) + self.outfile.flush() + + class GuidNotPermalinkFeed(Rss201rev2Feed): @staticmethod def check_for_unique_id(f): @@ -26,13 +42,26 @@ def wrapper(name, contents=None, attrs=None): return wrapper def write_items(self, handler): - # Totally disgusting. Monkey-patch the hander so if it sees a - # 'unique-id' field come through, add an isPermalink="false" attribute. - # Workaround for http://code.djangoproject.com/ticket/9800 + ''' + Totally disgusting. Monkey-patch the handler so if it sees a + 'unique-id' field come through, add an isPermalink="false" attribute. + Workaround for http://code.djangoproject.com/ticket/9800 + ''' handler.addQuickElement = self.check_for_unique_id( handler.addQuickElement) super(GuidNotPermalinkFeed, self).write_items(handler) + def write(self, outfile, encoding): + ''' + Batch the underlying 'write' calls on the outfile because Python's + default saxutils XmlGenerator is a POS that insists on unbuffered + write/flush calls. This sucks when it is making 1-byte calls to write + '>' closing tags and over 1600 write calls in our package feed. + ''' + wrapper = BatchWritesWrapper(outfile) + super(GuidNotPermalinkFeed, self).write(wrapper, encoding) + wrapper.flush() + def package_etag(request, *args, **kwargs): latest = retrieve_latest(Package) -- cgit v1.2.3-54-g00ecf