summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierre Schmitz <pierre@archlinux.de>2010-08-18 16:20:27 +0200
committerPierre Schmitz <pierre@archlinux.de>2010-08-18 16:20:27 +0200
commit683db123710ff9092ae99929fce232620932fdb0 (patch)
treed27df12e90e3d1d1de5b5d52d9a6c83afd8c2fc5
parent8647de4f52e05eadc0a4832813e9a57c20a487dd (diff)
Rewrite ftpdir-cleanup
* runtime reduced to < 1 second * uses file lists and the comm command * searches for missing packages * removes empty legacy directories like extra/os/any
-rwxr-xr-xcron-jobs/ftpdir-cleanup116
-rwxr-xr-xmisc-scripts/ftpdir-cleanup-repo187
2 files changed, 92 insertions, 211 deletions
diff --git a/cron-jobs/ftpdir-cleanup b/cron-jobs/ftpdir-cleanup
index 3456e9c..0a43bd1 100755
--- a/cron-jobs/ftpdir-cleanup
+++ b/cron-jobs/ftpdir-cleanup
@@ -3,37 +3,105 @@
. "$(dirname $0)/../db-functions"
. "$(dirname $0)/../config"
-repos="$(get_repos_for_host)"
+clean_pkg() {
+ local pkg
+ local target
+
+ if ! ${CLEANUP_DRYRUN}; then
+ for pkg in "$@"; do
+ if [ -h "$pkg" ]; then
+ rm -f "$pkg"
+ else
+ mv -f "$pkg" "$CLEANUP_DESTDIR"
+ fi
+ done
+ fi
+}
+
+repos=($(get_repos_for_host))
script_lock
-#adjust the nice level to run at a lower priority
-/usr/bin/renice +10 -p $$ > /dev/null
+for repo in ${repos[@]}; do
+ for arch in ${ARCHES[@]}; do
+ repo_lock ${repo} ${arch} || exit 1
+ done
+done
+
+${CLEANUP_DRYRUN} && warning 'dry run mode is active'
-repopaths=''
-for repo in $repos; do
- $(dirname $0)/../misc-scripts/ftpdir-cleanup-repo $repo
- repopaths="${repopaths} ${FTP_BASE}/${repo}/os/"
+for repo in ${repos[@]}; do
+ for arch in ${ARCHES[@]}; do
+ if [ ! -f "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT}" ]; then
+ warning "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT} not found, skipping"
+ continue
+ fi
+ # get a list of actual available package files
+ find "${FTP_BASE}/${repo}/os/${arch}" -xtype f -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/repo-${repo}-${arch}"
+ # get a list of package files defined in the repo db
+ bsdtar -xOf "${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT}" | awk '/^%FILENAME%/{getline;print}' | sort > "${WORKDIR}/db-${repo}-${arch}"
+
+ missing_pkgs=($(comm -13 "${WORKDIR}/repo-${repo}-${arch}" "${WORKDIR}/db-${repo}-${arch}"))
+ if [ ${#missing_pkgs[@]} -ge 1 ]; then
+ error "Missing packages in [${repo}] (${arch})..."
+ for missing_pkg in ${missing_pkgs[@]}; do
+ msg2 "${missing_pkg}"
+ done
+ fi
+
+ old_pkgs=($(comm -23 "${WORKDIR}/repo-${repo}-${arch}" "${WORKDIR}/db-${repo}-${arch}"))
+ if [ ${#old_pkgs[@]} -ge 1 ]; then
+ msg "Removing old packages from [${repo}] (${arch})..."
+ for old_pkg in ${old_pkgs[@]}; do
+ msg2 "${old_pkg}"
+ clean_pkg "${FTP_BASE}/${repo}/os/${arch}/${old_pkg}"
+ done
+ fi
+ done
done
-to_cleanup=""
-poolpath="$FTP_BASE/$(get_pkgpool_for_host)/"
-pushd $poolpath >/dev/null
-for pkg in *$PKGEXT; do
-[ -f "$pkg" ] || continue # in case we get a file named "*.pkg.tar.gz"
-LINKS="$(/usr/bin/find $repopaths -type l -name "$pkg" 2>/dev/null)"
-if [ -z "$LINKS" ]; then
- to_cleanup="$to_cleanup $poolpath/$pkg"
+# get a list of all available packages in the pacakge pool
+find "$FTP_BASE/$(get_pkgpool_for_host)" -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/pool"
+# create a list of packages in our db
+cat "${WORKDIR}/db-"* | sort -u > "${WORKDIR}/db"
+
+old_pkgs=($(comm -23 "${WORKDIR}/pool" "${WORKDIR}/db"))
+if [ ${#old_pkgs[@]} -ge 1 ]; then
+ msg "Removing old packages from package pool..."
+ for old_pkg in ${old_pkgs[@]}; do
+ msg2 "${old_pkg}"
+ clean_pkg "$FTP_BASE/$(get_pkgpool_for_host)/${old_pkg}"
+ done
fi
+
+# cleanup of legacy $repo/os/any directories
+for repo in ${repos[@]}; do
+ if [ ! -d "${FTP_BASE}/${repo}/os/any" ]; then
+ continue
+ fi
+ if [ -n "$(find "${FTP_BASE}/${repo}/os/any" -type d -empty)" ]; then
+ msg "Removing empty legacy directory ${repo}/os/any"
+ ${CLEANUP_DRYRUN} && rmdir "${FTP_BASE}/${repo}/os/any"
+ continue
+ fi
+ find "${FTP_BASE}/${repo}/os/any" -name "*${PKGEXT}" -printf '%f\n' | sort > "${WORKDIR}/any-${repo}"
+ cat "${WORKDIR}/db-${repo}-"* | sort -u > "${WORKDIR}/all-${repo}"
+
+ old_pkgs=($(comm -23 "${WORKDIR}/any-${repo}" "${WORKDIR}/all-${repo}"))
+ if [ ${#old_pkgs[@]} -ge 1 ]; then
+ msg "Removing old packages from [${repo}] (any)..."
+ for old_pkg in ${old_pkgs[@]}; do
+ msg2 "${old_pkg}"
+ clean_pkg "${FTP_BASE}/${repo}/os/any/${old_pkg}"
+ done
+ fi
+done
+
+
+for repo in ${repos[@]}; do
+ for arch in ${ARCHES[@]}; do
+ repo_unlock ${repo} ${arch}
+ done
done
-popd >/dev/null
-
-if [ -n "$to_cleanup" ]; then
- msg "The following packages are no longer in any repo"
- for f in $to_cleanup; do
- msg2 "$(basename "$f")"
- done
- ${CLEANUP_DRYRUN} || mv $to_cleanup "$CLEANUP_DESTDIR"
-fi
script_unlock
diff --git a/misc-scripts/ftpdir-cleanup-repo b/misc-scripts/ftpdir-cleanup-repo
deleted file mode 100755
index bfc971e..0000000
--- a/misc-scripts/ftpdir-cleanup-repo
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/bin/bash
-
-if [ $# -ne 1 ]; then
- msg "usage: $(basename $0) <reponame>"
- exit 1
-fi
-
-reponame=$1
-
-############################################################
-
-. "$(dirname $0)/../db-functions"
-. "$(dirname $0)/../config"
-
-clean_pkgs () {
- if ! ${CLEANUP_DRYRUN}; then
- for pkg in "$@"; do
- if [ -h "$pkg" ]; then
- rm -f "$pkg"
- else
- mv "$pkg" "$CLEANUP_DESTDIR"
- fi
- done
- fi
-}
-
-${CLEANUP_DRYRUN} && warning 'dry run mode is active'
-
-ftppath_base="$FTP_BASE/$reponame/os"
-
-for arch in ${ARCHES[@]}; do
-
- repo_lock $reponame $arch $LOCK_TIMEOUT || continue
-
- CLEANUP_TMPDIR=$(mktemp -d ${WORKDIR}/cleanup-XXXXXX) || exit 1
- ftppath="$ftppath_base/$arch"
- MISSINGFILES=""
- DELETEFILES=""
- DELETESYMLINKS=""
- EXTRAFILES=""
-
- if [ ! -d "$ftppath" ]; then
- error "FTP path '$ftppath' does not exist"
- exit 1
- fi
-
- if ! cd "${CLEANUP_TMPDIR}" ; then
- error "Failed to cd to ${CLEANUP_TMPDIR}"
- exit 1
- fi
-
- if [ ! -f "$ftppath/$reponame$DBEXT" ]; then
- msg "The file \"$ftppath/$reponame$DBEXT\" could not be found, skipping."
- repo_unlock $reponame $arch
- continue
- fi
-
- if ! bsdtar xf "$ftppath/$reponame$DBEXT"; then
- error "Command failed: bsdtar xf \"$ftppath/$reponame$DBEXT\""
- exit 1
- fi
-
- cd "$ftppath"
-
- for pkg in $CLEANUP_TMPDIR/*; do
- [ ! -d "${pkg}" ] && continue
- filename=$(grep -A1 '^%FILENAME%$' "${pkg}/desc" | tail -n1)
-
- if [ ! -e "${filename}" ]; then
- MISSINGFILES="${MISSINGFILES} ${filename}"
- else
- pkgname="$(getpkgname ${filename})"
- for otherfile in ${pkgname}-*; do
- if [ "${otherfile}" != "${filename}" -a "${pkgname}" = "$(getpkgname ${otherfile})" ]; then
- if [ -h "${otherfile}" ]; then
- DELETESYMLINKS="${DELETESYMLINKS} ${otherfile}"
- else
- DELETEFILES="${DELETEFILES} ${otherfile}"
- fi
- fi
- done
- fi
- done
-
- for pkg in *$PKGEXT; do
- if [ ! -e "$pkg" ]; then
- continue
- fi
- pkgname="$(getpkgname $pkg)"
- for p in ${CLEANUP_TMPDIR}/${pkgname}-*; do
- [ ! -d "${p}" ] || continue 2
- dbpkgname=$(grep -A1 '^%FILENAME%$' "${p}/desc" 2>/dev/null| tail -n1)
- if [ "${dbpkgname}" = "${pkgname}" ]; then
- continue 2
- fi
- done
- EXTRAFILES="$EXTRAFILES $pkg"
- done
-
- rm -rf ${CLEANUP_TMPDIR}
-
- # Do a quick check to see if a missing ARCHINDEPFILE is in the any dir
- # If it is, and the file is MISSING, restore it
- missfiles="$MISSINGFILES"
- MISSINGFILES=""
- for mf in $missfiles; do
- if [ -e "${ftppath_base}/any/${mf}" ]; then
- msg "Restoring missing 'any' symlink: ${mf}"
- ${CLEANUP_DRYRUN} || ln -s "../any/${mf}" "${ftppath}"
- else
- MISSINGFILES="${MISSINGFILES} ${mf}"
- fi
- done
-
- repo_unlock $reponame $arch
-
- #Make sure we've done *something* before outputting anything
- if [ -z "$DELETEFILES$DELETESYMLINKS$MISSINGFILES$EXTRAFILES" ]; then
- continue
- fi
-
- msg "Scan complete for $reponame ($arch) at ${ftppath}"
-
- if [ -n "$MISSINGFILES" ]; then
- for f in $MISSINGFILES; do
- error "$f is missing"
- done
- fi
-
- if [ -n "${DELETEFILES}" ]; then
- msg "The following files are out of date"
- for f in $DELETEFILES; do
- msg2 "$f"
- done
- clean_pkgs ${DELETEFILES}
- fi
-
- if [ -n "${DELETESYMLINKS}" ]; then
- msg "The following symlinks are out of date"
- for f in $DELETESYMLINKS; do
- msg2 "$f"
- done
- clean_pkgs ${DELETESYMLINKS}
- fi
-
- if [ -n "${EXTRAFILES}" ]; then
- msg "The following files are in the repo but not the db"
- for f in $EXTRAFILES; do
- msg2 "$f"
- done
- clean_pkgs ${EXTRAFILES}
- fi
-
-done
-
-ARCHINDEPFILES=""
-
-if [ -d "$ftppath_base/any" ]; then
- cd "$ftppath_base/any"
- for pkg in *$PKGEXT; do
- [ -f "$pkg" ] || continue # in case we get a file named "*.pkg.tar.gz"
- found=0
- #check for any existing symlinks
- for arch in ${ARCHES[@]}; do
- if [ -h "$ftppath_base/$arch/$pkg" ]; then
- found=1
- break
- fi
- done
- if [ $found -eq 0 ]; then
- # We found no symlinks to this, delete it
- ARCHINDEPFILES="$ARCHINDEPFILES $pkg"
- fi
- done
-fi
-
-if [ -n "$ARCHINDEPFILES" ]; then
- msg "The following architecture independent packages are not symlinked in the architecture repositories."
- for f in $ARCHINDEPFILES; do
- msg2 "$f"
- done
-fi
-
-if [ -d "$ftppath_base/any" -a -n "${ARCHINDEPFILES}" ]; then
- cd "$ftppath_base/any"
- clean_pkgs ${ARCHINDEPFILES}
-fi