summaryrefslogtreecommitdiff
path: root/db-import-pkg-archlinux
diff options
context:
space:
mode:
Diffstat (limited to 'db-import-pkg-archlinux')
-rwxr-xr-xdb-import-pkg-archlinux210
1 files changed, 210 insertions, 0 deletions
diff --git a/db-import-pkg-archlinux b/db-import-pkg-archlinux
new file mode 100755
index 0000000..81221fd
--- /dev/null
+++ b/db-import-pkg-archlinux
@@ -0,0 +1,210 @@
+#!/bin/bash
+# Syncs Arch repos based on info contained in repo.db files
+# License: GPLv3
+
+# Principles
+# * Get repo.db from an Arch-like repo
+# * Generate a list of available packages
+# * Create sync whitelist (based on package blacklist)
+# * Get packages
+# * Check package signatures
+# * Check database signatures
+# * Sync repo => repo
+
+# TODO
+# * make a tarball of files used for forensics
+
+set -e
+
+# Run as `V=true db-import-pkg-archlinux` to get verbose output
+VERBOSE=${V}
+extra=()
+${VERBOSE} && extra+=(-v)
+
+WORKDIR=$(mktemp -dt "${0##*/}.XXXXXXXXXX")
+trap "rm -rf -- $(printf '%q' "${WORKDIR}")" EXIT
+
+# Returns contents of a repo
+get_repos() {
+ # Exclude everything but db files
+ rsync "${extra[@]}" --no-motd -mrtlH --no-p --include="*/" \
+ --include="*.db" \
+ --include="*${DBEXT}" \
+ --include="*.files" \
+ --include="*${FILESEXT}" \
+ --exclude="*" \
+ --delete-after \
+ "rsync://${mirror}/${mirrorpath}/" "$WORKDIR"
+}
+
+get_repo_content() {
+ # Return all contents
+ bsdtar tf "${1}" | \
+ cut -d "/" -f 1 | \
+ sort -u
+}
+
+# Prints blacklisted packages
+get_blacklist() {
+ cut -d ':' -f 1 "${BLACKLIST_FILE}"
+}
+
+# repo
+# arch
+get_repo_file() {
+ echo "${WORKDIR}/${1}/os/${2}/${1}"
+}
+
+# Process the databases and get the libre packages
+init() {
+
+ # Get the blacklisted packages
+ blacklist=($(get_blacklist))
+ # Store all the whitelist files
+ whitelists=()
+
+ msg "%d packages in blacklist" ${#blacklist[@]}
+
+ test ${#blacklist[@]} -eq 0 && fatal_error "Empty blacklist"
+
+ # Sync the repos databases
+ get_repos
+
+ # Traverse all repo-arch pairs
+ for _repo in "${ARCHREPOS[@]}"; do
+ for _arch in "${ARCHARCHES[@]}"; do
+ msg "Processing %s-%s" "${_repo}" "${_arch}"
+
+ db_file=$(get_repo_file "${_repo}" "${_arch}")${DBEXT}
+ files_file=$(get_repo_file "${_repo}" "${_arch}")${FILESEXT}
+
+ if [ ! -f "${db_file}" ]; then
+ warning "%s doesn't exist, skipping this repo-arch" "${db_file}"
+ continue
+ fi
+ if [ ! -f "${files_file}" ]; then
+ warning "%s doesn't exist, skipping this repo-arch" "${files_file}"
+ continue
+ fi
+
+ # Remove blacklisted packages and count them
+ # TODO capture all removed packages for printing on debug mode
+ msg2 "Removing blacklisted packages from %s database..." .db
+ LC_ALL=C repo-remove "${db_file}" "${blacklist[@]}" \
+ |& sed -n 's/-> Removing/ &/p'
+ msg2 "Removing blacklisted packages from %s database..." .files
+ LC_ALL=C repo-remove "${files_file}" "${blacklist[@]}" \
+ |& sed -n 's/-> Removing/ &/p'
+ # Get db contents
+ db=($(get_repo_content "${db_file}"))
+
+ msg2 "Process clean db for syncing..."
+
+ # Create a whitelist, add * wildcard to end
+ # TODO due to lack of -arch suffix, the pool sync retrieves every arch even if
+ # we aren't syncing them
+ # IMPORTANT: the . in the sed command is needed because an empty
+ # whitelist would consist of a single * allowing any package to
+ # pass through
+ printf '%s\n' "${db[@]}" | sed "s|.$|&*|g" > "/tmp/${_repo}-${_arch}.whitelist"
+
+ msg2 "%d packages in whitelist" "$(wc -l /tmp/${_repo}-${_arch}.whitelist | cut -d' ' -f1)"
+
+ # Sync excluding everything but whitelist
+ # We delete here for cleanup
+ rsync "${extra[@]}" --no-motd -rtlH \
+ --delete-after \
+ --delete-excluded \
+ --delay-updates \
+ --include-from="/tmp/${_repo}-${_arch}.whitelist" \
+ --exclude="*" \
+ "rsync://${mirror}/${mirrorpath}/${_repo}/os/${_arch}/" \
+ "${FTP_BASE}/${_repo}/os/${_arch}/"
+
+ # Add a new whitelist
+ whitelists+=(/tmp/${_repo}-${_arch}.whitelist)
+
+ msg "Putting databases back in place"
+ rsync "${extra[@]}" --no-motd -rtlH \
+ --delay-updates \
+ --safe-links \
+ "${WORKDIR}/${_repo}/os/${_arch}/" \
+ "${FTP_BASE}/${_repo}/os/${_arch}/"
+
+ # Cleanup
+ unset db
+ done
+ done
+
+
+ msg "Syncing package pool"
+ # Concatenate all whitelists, check for single *s just in case
+ cat "${whitelists[@]}" | grep -v "^\*$" | sort -u > /tmp/any.whitelist
+
+ msg2 "Retrieving %d packages from pool" "$(wc -l /tmp/any.whitelist | cut -d' ' -f1)"
+
+ # Sync
+ # *Don't delete-after*, this is the job of cleanup scripts. It will remove our
+ # packages too
+ local pkgpool
+ for pkgpool in "${ARCHPKGPOOLS[@]}"; do
+ rsync "${extra[@]}" --no-motd -rtlH \
+ --delay-updates \
+ --safe-links \
+ --include-from=/tmp/any.whitelist \
+ --exclude="*" \
+ "rsync://${mirror}/${mirrorpath}/${pkgpool}/" \
+ "${FTP_BASE}/${pkgpool}/"
+ done
+
+ # Sync sources
+ msg "Syncing source pool"
+ #sed "s|\.pkg\.tar\.|.src.tar.|" /tmp/any.whitelist > /tmp/any-src.whitelist
+ #msg2 "Retrieving %d sources from pool" $(wc -l < /tmp/any-src.whitelist)
+
+ # Sync
+ # *Don't delete-after*, this is the job of cleanup scripts. It will remove our
+ # packages too
+ local srcpool
+ for srcpool in "${ARCHSRCPOOLS[@]}"; do
+ rsync "${extra[@]}" --no-motd -rtlH \
+ --delay-updates \
+ --safe-links \
+ --include-from=/tmp/any.whitelist \
+ --exclude="*" \
+ "rsync://${mirror}/${mirrorpath}/${srcpool}/" \
+ "${FTP_BASE}/${srcpool}/"
+ done
+
+ date -u +%s > "${FTP_BASE}/lastsync"
+
+ # Cleanup
+ unset blacklist whitelists _arch _repo repo_file
+}
+
+trap_exit() {
+ local signal=$1; shift
+ echo
+ error "$@"
+ trap -- "$signal"
+ kill "-$signal" "$$"
+}
+
+source "$(dirname "$(readlink -e "$0")")/config"
+source "$(dirname "$(readlink -e "$0")")/db-import-pkg-archlinux.conf"
+source "$(dirname "$(readlink -e "$0")")/db-libremessages"
+
+# Check variables presence
+for var in DBEXT FILESEXT mirror mirrorpath WORKDIR BLACKLIST_FILE FTP_BASE ARCHSRCPOOLS ARCHPKGPOOLS; do
+ test -z "${!var}" && fatal_error "Empty %s" "${var}"
+done
+
+# From makepkg
+set -E
+for signal in TERM HUP QUIT; do
+ trap "trap_exit $signal '%s signal caught. Exiting...' $signal" "$signal"
+done
+trap 'trap_exit INT "Aborted by user! Exiting..."' INT
+trap 'trap_exit USR1 "An unknown error has occurred. Exiting..."' ERR
+
+init