diff options
Diffstat (limited to 'cron-jobs/check_archlinux/check_packages.py')
-rwxr-xr-x | cron-jobs/check_archlinux/check_packages.py | 494 |
1 files changed, 494 insertions, 0 deletions
diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py new file mode 100755 index 0000000..0a2b10e --- /dev/null +++ b/cron-jobs/check_archlinux/check_packages.py @@ -0,0 +1,494 @@ +#!/usr/bin/python2 +# +# check_archlinux.py +# +# Original script by Scott Horowitz <stonecrest@gmail.com> +# Rewritten by Xavier Chantry <shiningxc@gmail.com> +# +# This script currently checks for a number of issues in your ABS tree: +# 1. Directories with missing PKGBUILDS +# 2. Invalid PKGBUILDs (bash syntax error for instance) +# 3. PKGBUILD names that don't match their directory +# 4. Duplicate PKGBUILDs +# 5. Valid arch's in PKGBUILDS +# 6. Missing (make-)dependencies +# 7. Hierarchy of repos (e.g., that a core package doesn't depend on +# a non-core package) +# 8. Circular dependencies + +import os,re,commands,getopt,sys,tarfile,alpm +import pdb + +DBEXT='.db.tar.gz' + +packages = {} # pkgname : PacmanPackage +repopkgs = {} # pkgname : PacmanPackage +provisions = {} # provision : PacmanPackage +pkgdeps,makepkgdeps = {},{} # PacmanPackage : list of the PacmanPackage dependencies +invalid_pkgbuilds = [] +missing_pkgbuilds = [] +dups = [] + +dbonly = [] +absonly = [] + +mismatches = [] +missing_deps = [] +missing_makedeps = [] +invalid_archs = [] +dep_hierarchy = [] +makedep_hierarchy = [] +circular_deps = [] # pkgname>dep1>dep2>...>pkgname +checked_deps = [] + +class PacmanPackage: + def __init__(self): + self.name,self.version = "","" + self.base = "" + self.path,self.repo = "","" + self.deps,self.makedeps = [],[] + self.provides,self.conflicts = [],[] + self.archs = [] + +class Depend: + def __init__(self,name,version,mod): + self.name = name + self.version = version + self.mod = mod + +def parse_pkgbuilds(repos,arch): + for absroot in absroots: + for repo in repos: + cmd = os.path.dirname(os.path.realpath(sys.argv[0])) + '/parse_pkgbuilds.sh ' + cmd += arch + ' ' + absroot + '/' + repo + (status,output) = commands.getstatusoutput(cmd) + if status != 0: + print "Error : failed to run '%s'" % cmd + sys.exit() + parse_data(repo,output) + +def parse_data(repo,data): + attrname = None + + for line in data.split('\n'): + if line.startswith('%'): + attrname = line.strip('%').lower() + elif line.strip() == '': + attrname = None + elif attrname == "invalid": + if repo in repos: + invalid_pkgbuilds.append(line) + elif attrname == "missing": + if repo in repos: + missing_pkgbuilds.append(line) + elif attrname == "name": + pkg = PacmanPackage() + pkg.name = line + pkg.repo = repo + dup = None + if pkg.name in packages: + dup = packages[pkg.name] + else: + packages[pkg.name] = pkg + elif attrname == "base": + pkg.base = line + elif attrname == "version": + pkg.version = line + elif attrname == "path": + pkg.path = line + if dup != None and (pkg.repo in repos or dup.repo in repos): + dups.append(pkg.path + " vs. " + dup.path) + elif attrname == "arch": + pkg.archs.append(line) + elif attrname == "depends": + pkg.deps.append(line) + elif attrname == "makedepends": + pkg.makedeps.append(line) + elif attrname == "conflicts": + pkg.conflicts.append(line) + elif attrname == "provides": + pkg.provides.append(line) + +def parse_dbs(repos,arch): + dbpkgs = {} + for repo in repos: + pkgs = set([]) + db = tarfile.open(os.path.join(repodir,repo,'os',arch,repo + DBEXT)) + for line in db.getnames(): + if not '/' in line: + pkgs.add(line.rsplit('-',2)[0]) + dbpkgs[repo] = pkgs + return(dbpkgs) + +def splitdep(dep): + name = dep + version = "" + mod = "" + for char in (">=", "<=", "=", ">", "<"): + pos = dep.find(char) + if pos > -1: + name = dep[:pos] + version = dep[pos:].replace(char, "") + mod = char + break + return Depend(name,version,mod) + +def splitprov(prov): + name = prov + version = "" + pos = prov.find("=") + if pos > -1: + name = prov[:pos] + version = prov[pos:].replace("=", "") + return (name,version) + +def vercmp(v1,mod,v2): + res = alpm.vercmp(v1,v2) + if res == 0: + return (mod.find("=") > -1) + elif res < 0: + return (mod.find("<") > -1) + elif res > 0: + return (mod.find(">") > -1) + return False + + +def depcmp(name,version,dep): + if name != dep.name: + return False + if dep.version == "" or dep.mod == "": + return True + if version == "": + return False + return vercmp(version,dep.mod,dep.version) + +def provcmp(pkg,dep): + for prov in pkg.provides: + (provname,provver) = splitprov(prov) + if depcmp(provname,provver,dep): + return True + return False + +def verify_dep(dep): + dep = splitdep(dep) + if dep.name in packages: + pkg = packages[dep.name] + if depcmp(pkg.name,pkg.version,dep): + return [pkg] + if dep.name in provisions: + provlist = provisions[dep.name] + results = [] + for prov in provlist: + if provcmp(prov,dep): + results.append(prov) + return results + return [] + +def verify_deps(name,repo,deps): + pkg_deps = [] + missdeps = [] + hierarchy = [] + for dep in deps: + pkglist = verify_dep(dep) + if pkglist == []: + missdeps.append(repo + "/" + name + " --> '" + dep + "'") + else: + valid_repos = get_repo_hierarchy(repo) + pkgdep = None + for pkg in pkglist: + if pkg.repo in valid_repos: + pkgdep = pkg + break + if not pkgdep: + pkgdep = pkglist[0] + hierarchy.append((repo,name,pkgdep)) + + pkg_deps.append(pkgdep) + + return (pkg_deps,missdeps,hierarchy) + +def compute_deplist(pkg): + list = [] + stack = [pkg] + while stack != []: + dep = stack.pop() + if dep in pkgdeps: + for dep2 in pkgdeps[dep]: + if dep2 not in list: + list.append(dep2) + stack.append(dep2) + if dep in makepkgdeps: + for dep2 in makepkgdeps[dep]: + if dep2 not in list: + list.append(dep2) + stack.append(dep2) + return list + +def check_hierarchy(deph): + hierarchy = [] + for (repo,name,pkgdep) in deph: + deplist = compute_deplist(pkgdep) + valid_repos = get_repo_hierarchy(repo) + extdeps = [] + for dep in deplist: + if dep.repo not in valid_repos: + extdeps.append(dep.name) + string = repo + "/" + name + " depends on " + pkgdep.repo + "/" + pkgdep.name + " (" + string += "%s extra (make)deps to pull" % len(extdeps) + if 0 < len(extdeps) < 10: + string += " : " + ' '.join(extdeps) + string += ")" + hierarchy.append(string) + return hierarchy + +def get_repo_hierarchy(repo): + repo_hierarchy = {'core': ['core'], \ + 'extra': ['core', 'extra'], \ + 'community': ['core', 'extra', 'community'], \ + 'multilib': ['core', 'extra', 'community', 'multilib'] } + if repo in repo_hierarchy: + return repo_hierarchy[repo] + else: + return ['core','extra','community'] + +def verify_archs(name,repo,archs): + valid_archs = ['any', 'i686', 'x86_64'] + invalid_archs = [] + for arch in archs: + if arch not in valid_archs: + invalid_archs.append(repo + "/" + name + " --> " + arch) + return invalid_archs + +def find_scc(packages): + # reset all variables + global index,S,pkgindex,pkglowlink + index = 0 + S = [] + pkgindex = {} + pkglowlink = {} + cycles = [] + for pkg in packages: + tarjan(pkg) + +def tarjan(pkg): + global index,S,pkgindex,pkglowlink,cycles + pkgindex[pkg] = index + pkglowlink[pkg] = index + index += 1 + checked_deps.append(pkg) + S.append(pkg) + deps = [] + if pkg in pkgdeps: + deps = pkgdeps[pkg] + for dep in deps: + if dep not in pkgindex: + tarjan(dep) + pkglowlink[pkg] = min(pkglowlink[pkg],pkglowlink[dep]) + elif dep in S: + pkglowlink[pkg] = min(pkglowlink[pkg],pkgindex[dep]) + if pkglowlink[pkg] == pkgindex[pkg]: + dep = S.pop() + if pkg == dep: + return + path = pkg.name + while pkg != dep: + path = dep.repo + "/" + dep.name + ">" + path + dep = S.pop() + path = dep.name + ">" + path + if pkg.repo in repos: + circular_deps.append(path) + +def print_heading(heading): + print "" + print "=" * (len(heading) + 4) + print "= " + heading + " =" + print "=" * (len(heading) + 4) + +def print_subheading(subheading): + print "" + print subheading + print "-" * (len(subheading) + 2) + +def print_missdeps(pkgname,missdeps) : + for d in missdeps: + print pkgname + " : " + d + +def print_result(list, subheading): + if len(list) > 0: + list.sort() + print_subheading(subheading) + for item in list: + print item + +def print_results(): + print_result(missing_pkgbuilds, "Missing PKGBUILDs") + print_result(invalid_pkgbuilds, "Invalid PKGBUILDs") + print_result(mismatches, "Mismatched Pkgnames") + print_result(dups, "Duplicate PKGBUILDs") + print_result(invalid_archs, "Invalid Archs") + print_result(missing_deps, "Missing Dependencies") + print_result(missing_makedeps, "Missing Makedepends") + print_result(dep_hierarchy, "Repo Hierarchy for Dependencies") + print_result(makedep_hierarchy, "Repo Hierarchy for Makedepends") + print_result(circular_deps, "Circular Dependencies") + print_result(dbonly, "Packages found in db, but not in tree") + print_result(absonly,"Packages found in tree, but not in db") + print_subheading("Summary") + print "Missing PKGBUILDs: ", len(missing_pkgbuilds) + print "Invalid PKGBUILDs: ", len(invalid_pkgbuilds) + print "Mismatching PKGBUILD names: ", len(mismatches) + print "Duplicate PKGBUILDs: ", len(dups) + print "Invalid archs: ", len(invalid_archs) + print "Missing (make)dependencies: ", len(missing_deps)+len(missing_makedeps) + print "Repo hierarchy problems: ", len(dep_hierarchy)+len(makedep_hierarchy) + print "Circular dependencies: ", len(circular_deps) + print "In db, but not in tree: ", len(dbonly) + print "In tree, but not in db: ", len(absonly) + print "" + +def print_usage(): + print "" + print "Usage: ./check_packages.py [OPTION]" + print "" + print "Options:" + print " --abs-tree=<path[,path]> Check the specified tree(s) (default : /var/abs)" + print " --repos=<r1,r2,...> Check the specified repos (default : core,extra)" + print " --arch=<i686|x86_64> Check the specified arch (default : i686)" + print " --repo-dir=<path> Check the dbs at the specified path (default : /srv/ftp)" + print " -h, --help Show this help and exit" + print "" + print "Examples:" + print "\n Check core and extra in existing abs tree:" + print " ./check_packages.py --abs-tree=/var/abs --repos=core,extra --arch=i686" + print "\n Check community:" + print " ./check_packages.py --abs-tree=/var/abs --repos=community --arch=i686" + print "" + +## Default path to the abs root directory +absroots = ["/var/abs"] +## Default list of repos to check +repos = ['core', 'extra'] +## Default arch +arch = "i686" +## Default repodir +repodir = "/srv/ftp" + +try: + opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=", + "arch=", "repo-dir="]) +except getopt.GetoptError: + print_usage() + sys.exit() +if opts != []: + for o, a in opts: + if o in ("--abs-tree"): + absroots = a.split(',') + elif o in ("--repos"): + repos = a.split(",") + elif o in ("--arch"): + arch = a + elif o in ("--repo-dir"): + repodir = a + else: + print_usage() + sys.exit() + if args != []: + print_usage() + sys.exit() + +for absroot in absroots: + if not os.path.isdir(absroot): + print "Error : the abs tree " + absroot + " does not exist" + sys.exit() + for repo in repos: + repopath = absroot + "/" + repo + if not os.path.isdir(repopath): + print("Warning : the repository " + repo + " does not exist in " + absroot) + +if not os.path.isdir(repodir): + print "Error: the repository directory %s does not exist" % repodir + sys.exit() +for repo in repos: + path = os.path.join(repodir,repo,'os',arch,repo + DBEXT) + if not os.path.isfile(path): + print "Error : repo DB %s : File not found" % path + sys.exit() + if not tarfile.is_tarfile(path): + print "Error : Cant open repo DB %s, not a valid tar file" % path + sys.exit() +# repos which need to be loaded +loadrepos = set([]) +for repo in repos: + loadrepos = loadrepos | set(get_repo_hierarchy(repo)) + +print_heading("Integrity Check " + arch + " of " + ",".join(repos)) +print("\nPerforming integrity checks...") + +print("==> parsing pkgbuilds") +parse_pkgbuilds(loadrepos,arch) + +# fill provisions +for name,pkg in packages.iteritems(): + for prov in pkg.provides: + provname=prov.split("=")[0] + if provname not in provisions: + provisions[provname] = [] + provisions[provname].append(pkg) + +# fill repopkgs +for name,pkg in packages.iteritems(): + if pkg.repo in repos: + repopkgs[name] = pkg + +print("==> parsing db files") +dbpkgs = parse_dbs(repos,arch) + +print("==> checking mismatches") +for name,pkg in repopkgs.iteritems(): + pkgdirname = pkg.path.split("/")[-1] + if name != pkgdirname and pkg.base != pkgdirname: + mismatches.append(name + " vs. " + pkg.path) + +print("==> checking archs") +for name,pkg in repopkgs.iteritems(): + archs = verify_archs(name,pkg.repo,pkg.archs) + invalid_archs.extend(archs) + +deph,makedeph = [],[] + +print("==> checking dependencies") +for name,pkg in repopkgs.iteritems(): + (deps,missdeps,hierarchy) = verify_deps(name,pkg.repo,pkg.deps) + pkgdeps[pkg] = deps + missing_deps.extend(missdeps) + deph.extend(hierarchy) + +print("==> checking makedepends") +for name,pkg in repopkgs.iteritems(): + (makedeps,missdeps,hierarchy) = verify_deps(name,pkg.repo,pkg.makedeps) + makepkgdeps[pkg] = makedeps + missing_makedeps.extend(missdeps) + makedeph.extend(hierarchy) + +print("==> checking hierarchy") +dep_hierarchy = check_hierarchy(deph) +makedep_hierarchy = check_hierarchy(makedeph) + +print("==> checking for circular dependencies") +# make sure pkgdeps is filled for every package +for name,pkg in packages.iteritems(): + if pkg not in pkgdeps: + (deps,missdeps,_) = verify_deps(name,pkg.repo,pkg.deps) + pkgdeps[pkg] = deps +find_scc(repopkgs.values()) + +print("==> checking for differences between db files and pkgbuilds") +for repo in repos: + for pkg in dbpkgs[repo]: + if not (pkg in repopkgs and repopkgs[pkg].repo == repo): + dbonly.append("%s/%s" % (repo,pkg)) +for name,pkg in repopkgs.iteritems(): + if not name in dbpkgs[pkg.repo]: + absonly.append("%s/%s" % (pkg.repo,name)) + +print_results() |