From da20949c8cc185e91dbaae1b8369fcffa3447081 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 19:11:12 -0500 Subject: Move find_user method to devel utils This could be handy elsewhere as well, and it is loosely coupled to anything else in reporead. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 51 +---------------------------------- 1 file changed, 1 insertion(+), 50 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index baf7fee1..138931ff 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -16,7 +16,6 @@ from django.core.management.base import BaseCommand, CommandError from django.contrib.auth.models import User from django.db import transaction -from django.db.models import Q from collections import defaultdict import io @@ -28,6 +27,7 @@ from datetime import datetime from optparse import make_option +from devel.utils import find_user from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -130,55 +130,6 @@ def full_version(self): return u'%s-%s' % (self.ver, self.rel) -def find_user(userstring): - ''' - Attempt to find the corresponding User object for a standard - packager string, e.g. something like - 'A. U. Thor '. - We start by searching for a matching email address; we then move onto - matching by first/last name. If we cannot find a user, then return None. - ''' - if userstring in find_user.cache: - return find_user.cache[userstring] - matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) - if not matches: - return None - - user = None - name = matches.group(1) - email = matches.group(2) - - def user_email(): - return User.objects.get(email=email) - def profile_email(): - return User.objects.get(userprofile__public_email=email) - def user_name(): - # yes, a bit odd but this is the easiest way since we can't always be - # sure how to split the name. Ensure every 'token' appears in at least - # one of the two name fields. - name_q = Q() - for token in name.split(): - # ignore quoted parts; e.g. nicknames in strings - if re.match(r'^[\'"].*[\'"]$', token): - continue - name_q &= (Q(first_name__icontains=token) | - Q(last_name__icontains=token)) - return User.objects.get(name_q) - - for matcher in (user_email, profile_email, user_name): - try: - user = matcher() - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass - - find_user.cache[userstring] = user - return user - -# cached mappings of user strings -> User objects so we don't have to do the -# lookup more than strictly necessary. -find_user.cache = {} - DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.*))?$") def create_depend(package, dep_str, optional=False): -- cgit v1.2.3-54-g00ecf From 9156003d2d93de57c663901c39ac66316a3d969e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 19:50:46 -0500 Subject: Turn find_user into UserFinder class This moves the cache inside an instance. Also add a few more tests. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 ++- devel/tests.py | 69 ++++++++++++++++++++-------- devel/utils.py | 86 +++++++++++++++++++---------------- 3 files changed, 103 insertions(+), 58 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 138931ff..470b785d 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -27,7 +27,7 @@ from datetime import datetime from optparse import make_option -from devel.utils import find_user +from devel.utils import UserFinder from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -182,6 +182,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): for name in getattr(repopkg, repo_attr): collection.create(name=name) +finder = UserFinder() + def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): db_score = 1 @@ -200,7 +202,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string - dbpkg.packager = find_user(repopkg.packager) + dbpkg.packager = finder.find(repopkg.packager) if timestamp: dbpkg.flag_date = None diff --git a/devel/tests.py b/devel/tests.py index 33b02582..c982e502 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,7 +1,7 @@ from django.test import TestCase from django.contrib.auth.models import User -from devel.utils import find_user +from devel.utils import UserFinder from main.models import UserProfile class DevelTest(TestCase): @@ -33,6 +33,8 @@ def test_mirrors(self): class FindUserTest(TestCase): def setUp(self): + self.finder = UserFinder() + self.user1 = User.objects.create(username="joeuser", first_name="Joe", last_name="User", email="user1@example.com") self.user2 = User.objects.create(username="john", first_name="John", @@ -44,29 +46,60 @@ def setUp(self): email_addr = "%s@awesome.com" % user.username UserProfile.objects.create(user=user, public_email=email_addr) + self.user4 = User.objects.create(username="tim1", first_name="Tim", + last_name="One", email="tim@example.com") + self.user5 = User.objects.create(username="tim2", first_name="Tim", + last_name="Two", email="timtwo@example.com") + def test_not_matching(self): - self.assertIsNone(find_user(None)) - self.assertIsNone(find_user("")) - self.assertIsNone(find_user("Bogus")) - self.assertIsNone(find_user("Bogus ")) - self.assertIsNone(find_user("")) - self.assertIsNone(find_user("bogus@example.com")) - self.assertIsNone(find_user("Unknown Packager")) + self.assertIsNone(self.finder.find(None)) + self.assertIsNone(self.finder.find("")) + self.assertIsNone(self.finder.find("Bogus")) + self.assertIsNone(self.finder.find("Bogus ")) + self.assertIsNone(self.finder.find("")) + self.assertIsNone(self.finder.find("bogus@example.com")) + self.assertIsNone(self.finder.find("Unknown Packager")) def test_by_email(self): - self.assertEqual(self.user1, find_user("XXX YYY ")) - self.assertEqual(self.user2, find_user("YYY ZZZ ")) + self.assertEqual(self.user1, + self.finder.find("XXX YYY ")) + self.assertEqual(self.user2, + self.finder.find("YYY ZZZ ")) def test_by_profile_email(self): - self.assertEqual(self.user1, find_user("XXX ")) - self.assertEqual(self.user2, find_user("YYY ")) - self.assertEqual(self.user3, find_user("ZZZ ")) + self.assertEqual(self.user1, + self.finder.find("XXX ")) + self.assertEqual(self.user2, + self.finder.find("YYY ")) + self.assertEqual(self.user3, + self.finder.find("ZZZ ")) def test_by_name(self): - self.assertEqual(self.user1, find_user("Joe User ")) - self.assertEqual(self.user1, find_user("Joe User")) - self.assertEqual(self.user2, find_user("John ")) - self.assertEqual(self.user3, find_user("Bob Jones ")) + self.assertEqual(self.user1, + self.finder.find("Joe User ")) + self.assertEqual(self.user1, + self.finder.find("Joe User")) + self.assertEqual(self.user2, + self.finder.find("John ")) + self.assertEqual(self.user2, + self.finder.find("John")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones ")) + + def test_cache(self): + # simply look two of them up, but then do it repeatedly + for i in range(50): + self.assertEqual(self.user1, + self.finder.find("XXX YYY ")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones ")) + + def test_ambiguous(self): + self.assertEqual(self.user4, + self.finder.find("Tim One ")) + self.assertEqual(self.user5, + self.finder.find("Tim Two ")) + self.assertIsNone(self.finder.find("Tim ")) # vim: set ts=4 sw=4 et: diff --git a/devel/utils.py b/devel/utils.py index acdda959..3a6ad699 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -43,35 +43,25 @@ def get_annotated_maintainers(): return maintainers -def find_user(userstring): - ''' - Attempt to find the corresponding User object for a standard - packager string, e.g. something like - 'A. U. Thor '. - We start by searching for a matching email address; we then move onto - matching by first/last name. If we cannot find a user, then return None. - ''' - if not userstring: - return None - if userstring in find_user.cache: - return find_user.cache[userstring] - matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) - if not matches: - name = userstring - email = None - else: - name = matches.group(1) - email = matches.group(2) - - def user_email(): + +class UserFinder(object): + def __init__(self): + self.cache = {} + + @staticmethod + def user_email(name, email): if email: return User.objects.get(email=email) return None - def profile_email(): + + @staticmethod + def profile_email(name, email): if email: return User.objects.get(userprofile__public_email=email) return None - def user_name(): + + @staticmethod + def user_name(name, email): # yes, a bit odd but this is the easiest way since we can't always be # sure how to split the name. Ensure every 'token' appears in at least # one of the two name fields. @@ -86,20 +76,40 @@ def user_name(): Q(last_name__icontains=token)) return User.objects.get(name_q) - user = None - for matcher in (user_email, profile_email, user_name): - try: - user = matcher() - if user != None: - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass - - find_user.cache[userstring] = user - return user - -# cached mappings of user strings -> User objects so we don't have to do the -# lookup more than strictly necessary. -find_user.cache = {} + def find(self, userstring): + ''' + Attempt to find the corresponding User object for a standard + packager string, e.g. something like + 'A. U. Thor '. + We start by searching for a matching email address; we then move onto + matching by first/last name. If we cannot find a user, then return None. + ''' + if not userstring: + return None + if userstring in self.cache: + return self.cache[userstring] + matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) + if not matches: + name = userstring + email = None + else: + name = matches.group(1) + email = matches.group(2) + + user = None + find_methods = (self.user_email, self.profile_email, self.user_name) + for matcher in find_methods: + try: + user = matcher(name, email) + if user != None: + break + except (User.DoesNotExist, User.MultipleObjectsReturned): + pass + + self.cache[userstring] = user + return user + + def clear_cache(self): + self.cache = {} # vim: set ts=4 sw=4 et: -- cgit v1.2.3-54-g00ecf From 82289ebb4432b3372b959430581afa0a2158acb9 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 20:11:07 -0500 Subject: Add a rematch_packager management command This allows quick resolution of all unmatched packages, especially after tweaking the way find_user works. Signed-off-by: Dan McGee --- devel/management/commands/rematch_packager.py | 64 +++++++++++++++++++++++++++ devel/tests.py | 12 ++++- devel/utils.py | 8 ++-- 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 devel/management/commands/rematch_packager.py (limited to 'devel/management/commands') diff --git a/devel/management/commands/rematch_packager.py b/devel/management/commands/rematch_packager.py new file mode 100644 index 00000000..ba6e6a54 --- /dev/null +++ b/devel/management/commands/rematch_packager.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" +rematch_packager command + +Match all packages with a packager_str but NULL packager_id to a packager if we +can find one. + +Usage: ./manage.py rematch_packager +""" + +from django.core.management.base import NoArgsCommand + +import sys +import logging + +from devel.utils import UserFinder +from main.models import Package + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Runs a check on all active mirror URLs to determine if they are reachable via IPv4 and/or v6." + + def handle_noargs(self, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + return match_packager() + +def match_packager(): + finder = UserFinder() + logger.info("getting all unmatched packages") + package_count = matched_count = 0 + unknown = set() + + for package in Package.objects.filter(packager__isnull=True): + logger.debug("package %s, packager string %s", + package.pkgname, package.packager_str) + package_count += 1 + user = finder.find(package.packager_str) + if user: + package.packager = user + logger.debug(" found user %s" % user.username) + package.save() + matched_count += 1 + else: + unknown.add(package.packager_str) + + logger.info("%d packages checked, %d newly matched", + package_count, matched_count) + logger.debug("unknown packagers:\n%s", + "\n".join(unknown)) + +# vim: set ts=4 sw=4 et: diff --git a/devel/tests.py b/devel/tests.py index c982e502..36691179 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,7 +1,7 @@ from django.test import TestCase from django.contrib.auth.models import User -from devel.utils import UserFinder +from devel.utils import UserFinder from main.models import UserProfile class DevelTest(TestCase): @@ -87,6 +87,16 @@ def test_by_name(self): self.assertEqual(self.user3, self.finder.find("Bob Jones ")) + def test_by_invalid(self): + self.assertEqual(self.user1, + self.finder.find("Joe User ]*)>', userstring) + + name = email = None + + matches = re.match(r'^([^<]+)? ?<([^>]*)>?', userstring) if not matches: - name = userstring - email = None + name = userstring.strip() else: name = matches.group(1) email = matches.group(2) -- cgit v1.2.3-54-g00ecf