From cf8ecdf9fce0573ad207d024708f21a5dbbbb120 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 2 May 2012 09:46:52 -0500 Subject: rematch_developers: do mass updates instead of single saves When updating a lot of objects, it makes much more sense to perform targeted update queries rather than one-row-at-a-time saves. Signed-off-by: Dan McGee --- devel/management/commands/rematch_developers.py | 61 ++++++++++++------------- 1 file changed, 30 insertions(+), 31 deletions(-) (limited to 'devel/management/commands/rematch_developers.py') diff --git a/devel/management/commands/rematch_developers.py b/devel/management/commands/rematch_developers.py index 8383cc8d..ab2f0f4b 100644 --- a/devel/management/commands/rematch_developers.py +++ b/devel/management/commands/rematch_developers.py @@ -46,52 +46,51 @@ def handle_noargs(self, **options): @transaction.commit_on_success def match_packager(finder): - logger.info("getting all unmatched packages") + logger.info("getting all unmatched packager strings") package_count = matched_count = 0 - unknown = set() - - for package in Package.objects.filter(packager__isnull=True): - if package.packager_str in unknown: - continue - logger.debug("package %s, packager string %s", - package.pkgname, package.packager_str) - package_count += 1 - user = finder.find(package.packager_str) + mapping = {} + + unmatched = Package.objects.filter(packager__isnull=True).values_list( + 'packager_str', flat=True).order_by().distinct() + + for packager in unmatched: + logger.debug("packager string %s", packager) + user = finder.find(packager) if user: - package.packager = user + mapping[packager] = user logger.debug(" found user %s" % user.username) - package.save() matched_count += 1 - else: - unknown.add(package.packager_str) - logger.info("%d packager strings checked, %d newly matched", + for packager_str, user in mapping.items(): + package_count += Package.objects.filter(packager__isnull=True, + packager_str=packager_str).update(packager=user) + + logger.info("%d packages updated, %d packager strings matched", package_count, matched_count) - logger.debug("unknown packagers:\n%s", - "\n".join(unknown)) @transaction.commit_on_success def match_flagrequest(finder): - logger.info("getting all non-user flag requests") + logger.info("getting all flag requests emails from unknown users") req_count = matched_count = 0 - unknown = set() - - for request in FlagRequest.objects.filter(user__isnull=True): - if request.user_email in unknown: - continue - logger.debug("email %s", request.user_email) - req_count += 1 - user = finder.find_by_email(request.user_email) + mapping = {} + + unmatched = FlagRequest.objects.filter(user__isnull=True).values_list( + 'user_email', flat=True).order_by().distinct() + + for user_email in unmatched: + logger.debug("email %s", user_email) + user = finder.find_by_email(user_email) if user: - request.user = user + mapping[user_email] = user logger.debug(" found user %s" % user.username) - request.save() matched_count += 1 - else: - unknown.add(request.user_email) - logger.info("%d request emails checked, %d newly matched", + for user_email, user in mapping.items(): + req_count += FlagRequest.objects.filter(user__isnull=True, + user_email=user_email).update(user=user) + + logger.info("%d request emails updated, %d emails matched", req_count, matched_count) # vim: set ts=4 sw=4 et: -- cgit v1.2.3-54-g00ecf From ca0011c585ec28f9dde0f400a77fd6f859d520b0 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Aug 2012 08:11:00 -0500 Subject: Add ability to rematch developers on @archlinux.org addresses This makes this matcher catch a bit more with the wide net we were already casting. Signed-off-by: Dan McGee --- devel/management/commands/rematch_developers.py | 4 +- devel/utils.py | 50 +++++++++++++++++-------- 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'devel/management/commands/rematch_developers.py') diff --git a/devel/management/commands/rematch_developers.py b/devel/management/commands/rematch_developers.py index ab2f0f4b..2b379588 100644 --- a/devel/management/commands/rematch_developers.py +++ b/devel/management/commands/rematch_developers.py @@ -53,6 +53,7 @@ def match_packager(finder): unmatched = Package.objects.filter(packager__isnull=True).values_list( 'packager_str', flat=True).order_by().distinct() + logger.info("%d packager strings retrieved", len(unmatched)) for packager in unmatched: logger.debug("packager string %s", packager) user = finder.find(packager) @@ -71,13 +72,14 @@ def match_packager(finder): @transaction.commit_on_success def match_flagrequest(finder): - logger.info("getting all flag requests emails from unknown users") + logger.info("getting all flag request email addresses from unknown users") req_count = matched_count = 0 mapping = {} unmatched = FlagRequest.objects.filter(user__isnull=True).values_list( 'user_email', flat=True).order_by().distinct() + logger.info("%d email addresses retrieved", len(unmatched)) for user_email in unmatched: logger.debug("email %s", user_email) user = finder.find_by_email(user_email) diff --git a/devel/utils.py b/devel/utils.py index 85b4e42f..e8e3a6c4 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -1,6 +1,7 @@ import re from django.contrib.auth.models import User +from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from django.db import connection from django.db.models import Count, Q @@ -44,6 +45,15 @@ def get_annotated_maintainers(): return maintainers +def ignore_does_not_exist(func): + def new_func(*args, **kwargs): + try: + return func(*args, **kwargs) + except (ObjectDoesNotExist, MultipleObjectsReturned): + return None + return new_func + + class UserFinder(object): def __init__(self): self.cache = {} @@ -52,18 +62,33 @@ def __init__(self): self.pgp_cache = {} @staticmethod + @ignore_does_not_exist def user_email(name, email): if email: return User.objects.get(email=email) return None @staticmethod + @ignore_does_not_exist + def username_email(name, email): + if email and '@' in email: + # split email addr at '@' symbol, ensure domain matches + # or is a subdomain of archlinux.org + # TODO: configurable domain/regex somewhere? + username, domain = email.split('@', 1) + if re.match(r'^(.+\.)?archlinux.org$', domain): + return User.objects.get(username=username) + return None + + @staticmethod + @ignore_does_not_exist def profile_email(name, email): if email: return User.objects.get(userprofile__public_email=email) return None @staticmethod + @ignore_does_not_exist def user_name(name, email): # yes, a bit odd but this is the easiest way since we can't always be # sure how to split the name. Ensure every 'token' appears in at least @@ -102,14 +127,12 @@ def find(self, userstring): email = matches.group(2) user = None - find_methods = (self.user_email, self.profile_email, self.user_name) + find_methods = (self.user_email, self.profile_email, + self.username_email, self.user_name) for matcher in find_methods: - try: - user = matcher(name, email) - if user != None: - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass + user = matcher(name, email) + if user != None: + break self.cache[userstring] = user self.email_cache[email] = user @@ -135,14 +158,11 @@ def find_by_email(self, email): if email in self.email_cache: return self.email_cache[email] - user = None - try: - user = self.user_email(None, email) - except User.DoesNotExist: - try: - user = self.profile_email(None, email) - except User.DoesNotExist: - pass + user = self.user_email(None, email) + if user is None: + user = self.profile_email(None, email) + if user is None: + user = self.username_email(None, email) self.email_cache[email] = user return user -- cgit v1.2.3-54-g00ecf