diff options
Diffstat (limited to 'extra/coadde/db-sync')
-rwxr-xr-x | extra/coadde/db-sync | 208 |
1 files changed, 208 insertions, 0 deletions
diff --git a/extra/coadde/db-sync b/extra/coadde/db-sync new file mode 100755 index 0000000..2194fe6 --- /dev/null +++ b/extra/coadde/db-sync @@ -0,0 +1,208 @@ +#!/bin/bash +# Syncs Arch repos based on info contained in repo.db files +# License: GPLv3 + +# Principles +# * Get repo.db from an Arch-like repo +# * Generate a list of available packages +# * Create sync whitelist (based on package blacklist) +# * Get packages +# * Check package signatures +# * Check database signatures +# * Sync repo => repo + +# TODO +# * make a tarball of files used for forensics + +# Run as `V=true db-sync` to get verbose output +VERBOSE=${V} +extra=() +${VERBOSE} && extra+=(-v) + +WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX") +trap "rm -rf -- $(printf '%q' "${WORKDIR}")" EXIT + +# Returns contents of a repo +get_repos() { + # Exclude everything but db files + rsync "${extra[@]}" --no-motd -mrtlH --no-p --include="*/" \ + --include="*.db" \ + --include="*${DBEXT}" \ + --include="*.files" \ + --include="*${FILESEXT}" \ + --exclude="*" \ + --delete-after \ + "rsync://${mirror}/${mirrorpath}/" "$WORKDIR" +} + +get_repo_content() { + # Return all contents + bsdtar tf "${1}" | \ + cut -d "/" -f 1 | \ + sort -u +} + +# Prints blacklisted packages +get_blacklist() { + cut -d ':' -f 1 "${BLACKLIST_FILE}" +} + +# repo +# arch +get_repo_file() { + echo "${WORKDIR}/${1}/os/${2}/${1}" +} + +# Process the databases and get the libre packages +init() { + + # Get the blacklisted packages + blacklist=($(get_blacklist)) + # Store all the whitelist files + whitelists=() + + msg "%d packages in blacklist" ${#blacklist[@]} + + test ${#blacklist[@]} -eq 0 && fatal_error "Empty blacklist" + + # Sync the repos databases + get_repos + + # Traverse all repo-arch pairs + for _repo in "${ARCHREPOS[@]}"; do + for _arch in "${ARCHARCHES[@]}"; do + msg "Processing %s-%s" "${_repo}-${_arch}" + + db_file=$(get_repo_file "${_repo}" "${_arch}")${DBEXT} + files_file=$(get_repo_file "${_repo}" "${_arch}")${FILESEXT} + + if [ ! -f "${db_file}" ]; then + warning "%s doesn't exist, skipping this repo-arch" "${db_file}" + continue + fi + if [ ! -f "${files_file}" ]; then + warning "%s doesn't exist, skipping this repo-arch" "${files_file}" + continue + fi + + # Remove blacklisted packages and count them + # TODO capture all removed packages for printing on debug mode + msg2 "Removing blacklisted packages from %s database..." .db + LC_ALL=C repo-remove "${db_file}" "${blacklist[@]}" \ + |& sed -n 's/-> Removing/ &/p' + msg2 "Removing blacklisted packages from %s database..." .files + LC_ALL=C repo-remove "${files_file}" "${blacklist[@]}" \ + |& sed -n 's/-> Removing/ &/p' + # Get db contents + db=($(get_repo_content "${db_file}")) + + msg2 "Process clean db for syncing..." + + # Create a whitelist, add * wildcard to end + # TODO due to lack of -arch suffix, the pool sync retrieves every arch even if + # we aren't syncing them + # IMPORTANT: the . in the sed command is needed because an empty + # whitelist would consist of a single * allowing any package to + # pass through + printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > "/tmp/${_repo}-${_arch}.whitelist" + + msg2 "%d packages in whitelist" "$(wc -l /tmp/${_repo}-${_arch}.whitelist | cut -d' ' -f1)" + + # Sync excluding everything but whitelist + # We delete here for cleanup + rsync "${extra[@]}" --no-motd -rtlH \ + --delete-after \ + --delete-excluded \ + --delay-updates \ + --include-from="/tmp/${_repo}-${_arch}.whitelist" \ + --exclude="*" \ + "rsync://${mirror}/${mirrorpath}/${_repo}/os/${_arch}/" \ + "${FTP_BASE}/${_repo}/os/${_arch}/" + + # Add a new whitelist + whitelists+=(/tmp/${_repo}-${_arch}.whitelist) + + msg "Putting databases back in place" + rsync "${extra[@]}" --no-motd -rtlH \ + --delay-updates \ + --safe-links \ + "${WORKDIR}/${_repo}/os/${_arch}/" \ + "${FTP_BASE}/${_repo}/os/${_arch}/" + + # Cleanup + unset db + done + done + + + msg "Syncing package pool" + # Concatenate all whitelists, check for single *s just in case + cat "${whitelists[@]}" | grep -v "^\*$" | sort -u > /tmp/any.whitelist + + msg2 "Retrieving %d packages from pool" "$(wc -l /tmp/any.whitelist | cut -d' ' -f1)" + + # Sync + # *Don't delete-after*, this is the job of cleanup scripts. It will remove our + # packages too + local pkgpool + for pkgpool in "${ARCHPKGPOOLS[@]}"; do + rsync "${extra[@]}" --no-motd -rtlH \ + --delay-updates \ + --safe-links \ + --include-from=/tmp/any.whitelist \ + --exclude="*" \ + "rsync://${mirror}/${mirrorpath}/${pkgpool}/" \ + "${FTP_BASE}/${pkgpool}/" + done + + # Sync sources + msg "Syncing source pool" + #sed "s|\.pkg\.tar\.|.src.tar.|" /tmp/any.whitelist > /tmp/any-src.whitelist + #msg2 "Retrieving %d sources from pool" $(wc -l < /tmp/any-src.whitelist) + + # Sync + # *Don't delete-after*, this is the job of cleanup scripts. It will remove our + # packages too + local srcpool + for srcpool in "${ARCHSRCPOOLS[@]}"; do + rsync "${extra[@]}" --no-motd -rtlH \ + --delay-updates \ + --safe-links \ + --include-from=/tmp/any.whitelist \ + --exclude="*" \ + "rsync://${mirror}/${mirrorpath}/${srcpool}/" \ + "${FTP_BASE}/${srcpool}/" + done + + date -u +%s > "${FTP_BASE}/lastsync" + + # Cleanup + unset blacklist whitelists _arch _repo repo_file +} + +trap_exit() { + local signal=$1; shift + echo + error "$@" + trap -- "$signal" + kill "-$signal" "$$" +} + +source "$(dirname "$(readlink -e "$0")")/config" +source "$(dirname "$(readlink -e "$0")")/db-sync.conf" +source "$(dirname "$(readlink -e "$0")")/db-libremessages" + +# Check variables presence +for var in DBEXT FILESEXT mirror mirrorpath WORKDIR BLACKLIST_FILE FTP_BASE ARCHSRCPOOLS ARCHPKGPOOLS; do + test -z "${!var}" && fatal_error "Empty %s" "${var}" +done + +# From makepkg +set -E +for signal in TERM HUP QUIT; do + trap "trap_exit $signal '%s signal caught. Exiting...' $signal" "$signal" +done +trap 'trap_exit INT "Aborted by user! Exiting..."' INT +trap 'trap_exit USR1 "An unknown error has occurred. Exiting..."' ERR + +init |