#!/bin/bash
# Syncs Arch repos based on info contained in repo.db files
# License: GPLv3

# Principles
# * Get repo.db from an Arch-like repo
# * Generate a list of available packages
# * Create sync whitelist (based on package blacklist)
# * Get packages
# * Check package signatures
# * Check database signatures
# * Sync repo => repo

# TODO
# * make a tarball of files used for forensics

# Run as `V=true db-sync` to get verbose output
VERBOSE=${V}
${VERBOSE} && extra="-v"

WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
trap "rm -rf -- $(printf '%q' "${WORKDIR}")" EXIT

# Returns contents of a repo
get_repos() {
	# Exclude everything but db files
	rsync ${extra} --no-motd -mrtlH --no-p --include="*/" \
		--include="*.db" \
		--include="*${DBEXT}" \
		--include="*.files" \
		--include="*${FILESEXT}" \
		--exclude="*" \
		--delete-after \
		rsync://${mirror}/${mirrorpath}/ "$WORKDIR"
}

get_repo_content() {
	# Return all contents
	bsdtar tf ${1} | \
		cut -d "/" -f 1 | \
		sort -u
}

# Prints blacklisted packages
get_blacklist() {
	cut -d ':' -f 1 "${BLACKLIST_FILE}"
}

# repo
# arch
get_repo_file() {
	echo "${WORKDIR}/${1}/os/${2}/${1}"
}

# Process the databases and get the libre packages
init() {

	# Get the blacklisted packages
	blacklist=($(get_blacklist))
	# Store all the whitelist files
	whitelists=()

	msg "%d packages in blacklist" ${#blacklist[@]}

	test ${#blacklist[@]} -eq 0 && fatal_error "Empty blacklist"

	# Sync the repos databases
	get_repos

	# Traverse all repo-arch pairs
	for _repo in ${ARCHREPOS[@]}; do
		for _arch in ${ARCHARCHES[@]}; do
			msg "Processing ${_repo}-${_arch}"

			db_file=$(get_repo_file ${_repo} ${_arch})${DBEXT}
			files_file=$(get_repo_file ${_repo} ${_arch})${FILESEXT}

			if [ ! -f "${db_file}" ]; then
				warning "%s doesn't exist, skipping this repo-arch" "${db_file}"
				continue
			fi
			if [ ! -f "${files_file}" ]; then
				warning "%s doesn't exist, skipping this repo-arch" "${files_file}"
				continue
			fi

			# Remove blacklisted packages and count them
			# TODO capture all removed packages for printing on debug mode
			msg2 "Removing blacklisted packages from %s database..." .db
			LC_ALL=C repo-remove "${db_file}" "${blacklist[@]}" \
			|& sed -n 's/-> Removing/	&/p'
			msg2 "Removing blacklisted packages from %s database..." .files
			LC_ALL=C repo-remove "${files_file}" "${blacklist[@]}" \
			|& sed -n 's/-> Removing/	&/p'
			# Get db contents
			db=($(get_repo_content ${db_file}))

			msg2 "Process clean db for syncing..."

			# Create a whitelist, add * wildcard to end
			# TODO due to lack of -arch suffix, the pool sync retrieves every arch even if
			# we aren't syncing them
			# IMPORTANT: the . in the sed command is needed because an empty
			# whitelist would consist of a single * allowing any package to
			# pass through
			printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > /tmp/${_repo}-${_arch}.whitelist

			msg2 "$(wc -l /tmp/${_repo}-${_arch}.whitelist | cut -d' ' -f1) packages in whitelist"

			# Sync excluding everything but whitelist
			# We delete here for cleanup
			rsync ${extra} --no-motd -rtlH \
				--delete-after \
				--delete-excluded \
				--delay-updates \
				--include-from=/tmp/${_repo}-${_arch}.whitelist \
				--exclude="*" \
				rsync://${mirror}/${mirrorpath}/${_repo}/os/${_arch}/ \
				${FTP_BASE}/${_repo}/os/${_arch}/

			# Add a new whitelist
			whitelists+=(/tmp/${_repo}-${_arch}.whitelist)

			msg "Putting databases back in place"
			rsync ${extra} --no-motd -rtlH \
				--delay-updates \
				--safe-links \
				${WORKDIR}/${_repo}/os/${_arch}/ \
				${FTP_BASE}/${_repo}/os/${_arch}/

			# Cleanup
			unset db 
		done
	done


	msg "Syncing package pool"
	# Concatenate all whitelists, check for single *s just in case
	cat ${whitelists[@]} | grep -v "^\*$" | sort -u > /tmp/any.whitelist

	msg2 "Retrieving $(wc -l /tmp/any.whitelist | cut -d' ' -f1) packages from pool"

	# Sync
	# *Don't delete-after*, this is the job of cleanup scripts. It will remove our
	# packages too
	local pkgpool
	for pkgpool in ${ARCHPKGPOOLS[@]}; do
		rsync ${extra} --no-motd -rtlH \
			--delay-updates \
			--safe-links \
			--include-from=/tmp/any.whitelist \
			--exclude="*" \
			rsync://${mirror}/${mirrorpath}/${pkgpool}/ \
			${FTP_BASE}/${pkgpool}/
	done

	# Sync sources
	msg "Syncing source pool"
	#sed "s|\.pkg\.tar\.|.src.tar.|" /tmp/any.whitelist > /tmp/any-src.whitelist
	#msg2 "Retrieving %d sources from pool" $(wc -l < /tmp/any-src.whitelist)

	# Sync
	# *Don't delete-after*, this is the job of cleanup scripts. It will remove our
	# packages too
	local srcpool
	for srcpool in ${ARCHSRCPOOLS[@]}; do
		rsync ${extra} --no-motd -rtlH \
			--delay-updates \
			--safe-links \
			--include-from=/tmp/any.whitelist \
			--exclude="*" \
			rsync://${mirror}/${mirrorpath}/${srcpool}/ \
			${FTP_BASE}/${srcpool}/
	done

	# Cleanup
	unset blacklist whitelists _arch _repo repo_file
}

trap_exit() {
	local signal=$1; shift
	echo
	error "$@"
	trap -- "$signal"
	kill "-$signal" "$$"
}

source "$(dirname "$(readlink -e "$0")")/config"
source "$(dirname "$(readlink -e "$0")")/db-sync.conf"
source "$(dirname "$(readlink -e "$0")")/libremessages"

# Check variables presence
for var in DBEXT FILESEXT mirror mirrorpath WORKDIR BLACKLIST_FILE FTP_BASE ARCHSRCPOOLS ARCHPKGPOOLS; do

	test -z "${!var}" && fatal_error "Empty ${var}"
done

# From makepkg
set -E
for signal in TERM HUP QUIT; do
		trap "trap_exit $signal '%s signal caught. Exiting...' $signal" $signal
done
trap 'trap_exit INT "Aborted by user! Exiting..."' INT
trap 'trap_exit USR1 "An unknown error has occurred. Exiting..."' ERR

init