#!/bin/bash # Syncs Arch repos based on info contained in repo.db files # License: GPLv3 # Principles # * Get repo.db from an Arch-like repo # * Generate a list of available packages # * Create sync whitelist (based on package blacklist) # * Get packages # * Check package signatures # * Check database signatures # * Sync repo => repo # TODO # * make a tarball of files used for forensics set -e # Run as `V=true db-import-pkg-archlinux` to get verbose output VERBOSE=${V} extra=() ${VERBOSE} && extra+=(-v) WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX") trap "rm -rf -- $(printf '%q' "${WORKDIR}")" EXIT # Returns contents of a repo get_repos() { # Exclude everything but db files rsync "${extra[@]}" --no-motd -mrtlH --no-p --include="*/" \ --include="*.db" \ --include="*${DBEXT}" \ --include="*.files" \ --include="*${FILESEXT}" \ --exclude="*" \ --delete-after \ "rsync://${mirror}/${mirrorpath}/" "$WORKDIR" } get_repo_content() { # Return all contents bsdtar tf "${1}" | \ cut -d "/" -f 1 | \ sort -u } # Prints blacklisted packages get_blacklist() { cut -d ':' -f 1 "${BLACKLIST_FILE}" } # repo # arch get_repo_file() { echo "${WORKDIR}/${1}/os/${2}/${1}" } # Process the databases and get the libre packages init() { # Get the blacklisted packages blacklist=($(get_blacklist)) # Store all the whitelist files whitelists=() msg "%d packages in blacklist" ${#blacklist[@]} test ${#blacklist[@]} -eq 0 && fatal_error "Empty blacklist" # Sync the repos databases get_repos # Traverse all repo-arch pairs for _repo in "${ARCHREPOS[@]}"; do for _arch in "${ARCHARCHES[@]}"; do msg "Processing %s-%s" "${_repo}" "${_arch}" db_file=$(get_repo_file "${_repo}" "${_arch}")${DBEXT} files_file=$(get_repo_file "${_repo}" "${_arch}")${FILESEXT} if [ ! -f "${db_file}" ]; then warning "%s doesn't exist, skipping this repo-arch" "${db_file}" continue fi if [ ! -f "${files_file}" ]; then warning "%s doesn't exist, skipping this repo-arch" "${files_file}" continue fi # Remove blacklisted packages and count them # TODO capture all removed packages for printing on debug mode msg2 "Removing blacklisted packages from %s database..." .db LC_ALL=C repo-remove "${db_file}" "${blacklist[@]}" \ |& sed -n 's/-> Removing/ &/p' msg2 "Removing blacklisted packages from %s database..." .files LC_ALL=C repo-remove "${files_file}" "${blacklist[@]}" \ |& sed -n 's/-> Removing/ &/p' # Get db contents db=($(get_repo_content "${db_file}")) msg2 "Process clean db for syncing..." # Create a whitelist, add * wildcard to end # TODO due to lack of -arch suffix, the pool sync retrieves every arch even if # we aren't syncing them # IMPORTANT: the . in the sed command is needed because an empty # whitelist would consist of a single * allowing any package to # pass through printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > "/tmp/${_repo}-${_arch}.whitelist" msg2 "%d packages in whitelist" "$(wc -l /tmp/${_repo}-${_arch}.whitelist | cut -d' ' -f1)" # Sync excluding everything but whitelist # We delete here for cleanup rsync "${extra[@]}" --no-motd -rtlH \ --delete-after \ --delete-excluded \ --delay-updates \ --include-from="/tmp/${_repo}-${_arch}.whitelist" \ --exclude="*" \ "rsync://${mirror}/${mirrorpath}/${_repo}/os/${_arch}/" \ "${FTP_BASE}/${_repo}/os/${_arch}/" # Add a new whitelist whitelists+=(/tmp/${_repo}-${_arch}.whitelist) msg "Putting databases back in place" rsync "${extra[@]}" --no-motd -rtlH \ --delay-updates \ --safe-links \ "${WORKDIR}/${_repo}/os/${_arch}/" \ "${FTP_BASE}/${_repo}/os/${_arch}/" # Cleanup unset db done done msg "Syncing package pool" # Concatenate all whitelists, check for single *s just in case cat "${whitelists[@]}" | grep -v "^\*$" | sort -u > /tmp/any.whitelist msg2 "Retrieving %d packages from pool" "$(wc -l /tmp/any.whitelist | cut -d' ' -f1)" # Sync # *Don't delete-after*, this is the job of cleanup scripts. It will remove our # packages too local pkgpool for pkgpool in "${ARCHPKGPOOLS[@]}"; do rsync "${extra[@]}" --no-motd -rtlH \ --delay-updates \ --safe-links \ --include-from=/tmp/any.whitelist \ --exclude="*" \ "rsync://${mirror}/${mirrorpath}/${pkgpool}/" \ "${FTP_BASE}/${pkgpool}/" done # Sync sources msg "Syncing source pool" #sed "s|\.pkg\.tar\.|.src.tar.|" /tmp/any.whitelist > /tmp/any-src.whitelist #msg2 "Retrieving %d sources from pool" $(wc -l < /tmp/any-src.whitelist) # Sync # *Don't delete-after*, this is the job of cleanup scripts. It will remove our # packages too local srcpool for srcpool in "${ARCHSRCPOOLS[@]}"; do rsync "${extra[@]}" --no-motd -rtlH \ --delay-updates \ --safe-links \ --include-from=/tmp/any.whitelist \ --exclude="*" \ "rsync://${mirror}/${mirrorpath}/${srcpool}/" \ "${FTP_BASE}/${srcpool}/" done date -u +%s > "${FTP_BASE}/lastsync" # Cleanup unset blacklist whitelists _arch _repo repo_file } trap_exit() { local signal=$1; shift echo error "$@" trap -- "$signal" kill "-$signal" "$$" } fatal_error() { error "$@" exit 1 } source "$(dirname "$(readlink -e "$0")")/config" source "$(dirname "$(readlink -e "$0")")/db-import-archlinux.conf" source "$(librelib messages)" # Check variables presence for var in DBEXT FILESEXT mirror mirrorpath WORKDIR BLACKLIST_FILE FTP_BASE ARCHSRCPOOLS ARCHPKGPOOLS; do test -z "${!var}" && fatal_error "Empty %s" "${var}" done # From makepkg set -E for signal in TERM HUP QUIT; do trap "trap_exit $signal '%s signal caught. Exiting...' $signal" "$signal" done trap 'trap_exit INT "Aborted by user! Exiting..."' INT trap 'trap_exit USR1 "An unknown error has occurred. Exiting..."' ERR init