#!/bin/bash # alfplayer # 2014-06-12 # Bash script to mirror only some repositories of Parabola. # # Synchronizing first from Arch (maybe using db-sync and any-to-ours of dbscripts), can be # recommended to pull packages from the local Arch mirror afterwards using this script. script_filename="$(basename "$0")" # Create temporary log file log_tmp="$(mktemp -p /var/tmp)" # Default configuration values. # They can be overridden by setting the variables in the calling environment. # 1 to set, 0 to unset : ${project:=parabola} #: ${SERVER:=rsync://repo.parabola.nu:875/repos} : ${SERVER:=rsync://alfplayer.com/repos/parabola} # remote rsync directory #: ${SERVER:=rsync://parabolagnulinux.mirrors.linux.ro/parabolagnulinux} : ${base_dir:=/srv/http} : ${local_dir:=${base_dir}/${project}} # symlink to the last snapshot : ${repos:=core extra community multilib libre kernels libre-multilib libre-multilib-testing libre-testing nonprism nonprism-testing java cross pcr} # mips64el is also excluded in some rsync invocations in this script : ${max_delete:=10000} # maximum amount of files to delete in the local directory [[ ! ${BW_LIMIT} ]] && \ : ${BW_LIMIT:=1300} # KB/s : ${log_file:=${script_filename}.log} : ${OPTIONS:=-rltvH --no-p --no-g --max-delete=$max_delete --exclude=.* --bwlimit=${BW_LIMIT} --no-motd --chmod=Dug=srwx,Fug=rw --log-file=${log_tmp}} : ${oldest_version:=100} # delete versions older than this version : ${link_dest[0]:=${base_dir}/archlinux} #: ${link_dest[1]:=${base_dir}/parabola.secondary_mirror.tmp} # link-dest does not support any network URI like rsync://... [[ ! ${mail_to} ]] && \ : ${mail_to:=your@mail.com} # set to enable mailing log file, or comment out to disable it : ${date_exclude:=2014.06.19} # disable running this script this date (see next line) : ${forcerun:=0} # set to 1 to force running on excluded date : ${terminal:=1} # outputs to stdout using rsync --progress (and logs to a file) : ${alternative_mirror:=0} # use alternative mirror; forces no_snapshot_delete and disables snapshot_symlink_update; synchronized files end up in ${local_dir}.tmp : ${no_snapshot_delete:=0} # disable deletion of oldest snapshots; alternative_mirror enables it forcefully : ${leave_tmp:=0} # leave updated tree in {project}.tmp instead of creating a dated snapshot directory : ${snapshot_symlink_update:=1} # update symlink to last snapshot # does nothing if leave_tmp is enabled : ${TZ:=UTC} # set timezone to UTC (affects rsync log output) : ${db_and_symlinks_update:=1} # update DB files and package symlinks # disabling this uses existing file /tmp/parabola-mirror which can be left over the last invocation of parabola-mirror-repos with this option set : ${pools_update:=1} # update files in package pools : ${no_file_delete:=1} # do not delete files : ${link_dest_snapshot_count=3} # number of snapshots that are passed as --link-dest to rsync [[ ! {path_list} ]] && \ : ${path_list:=docs sources mirrorlist.txt lastupdate} # list of extra paths to synchronize export TZ if [[ ${alternative_mirror} == 1 ]] ; then no_snapshot_delete=1 leave_tmp=1 #SERVER="rsync://repo.parabola.nu:875/repos" SERVER="rsync://parabolagnulinux.mirrors.linux.ro/parabolagnulinux" local_dir="${base_dir}/${project}.secondary_mirror" #link_dest=("${base_dir}/archlinux" "${base_dir}/parabola.tmp") link_dest+=("${base_dir}/${project}.tmp") for dir in "$local_dir" "$local_dir".tmp ${local_dir}.tmp/pool ; do if [[ ! -d "$dir" ]] ; then mkdir -pv "$dir" chmod -v 2770 "$dir" fi done else #link_dest+=("${base_dir}/${project}.tmp") link_dest+=("${base_dir}/${project}.secondary_mirror") fi remote_pool_files="/tmp/${project}-remote-files" local_pool_files="/tmp/${project}-local-files" POOL_FILES_TO_DELETE="/tmp/${project}-to-delete-files" # Lock with flock (provided by util-linux), save to log file and send email on exit LOCKFILE="/var/lock/${script_filename}" LOCKFD=99 _lock() { flock -$1 $LOCKFD; } # Wait until there is no process writing to ${log_tmp} # Waiting can be necessary if command groups are not executed until the end _wait_log_tmp() { if [[ -e "${log_tmp}" ]] ; then while fuser -s "${log_tmp}" ; do sleep 0.2 done fi } _no_more_locking() { set +e # Save exit status es=$? _wait_log_tmp cat "${log_tmp}" >> "${log_file}" if [[ $? == 0 ]] ; then log_written=1 else echo "=> ERROR: Failed to write to log file: ${log_file}" >&2 fi if [[ -e ${local_dir}.tmp ]] ; then echo "=> WARNING: Temporary directory ${local_dir}.tmp remains in file system" fi if [[ ${mail_to} ]] ; then echo "=> Sending output to ${mail_to}" mail -s "[$(hostname)] ${script_filename}" "${mail_to}" < "${log_tmp}" fi if [[ ${log_written} == 1 ]] ; then rm -f "${log_tmp}" fi if [[ $es != 0 ]] ; then echo "=> WARNING: Unsuccessful script termination. Exit status: $es" if [[ ${mail_to} ]] ; then echo "=> Sending error notification to ${mail_to}" mail -s "[$(hostname)] ${script_filename} failed. See logged output." "${mail_to}" < /dev/null fi fi _lock u _lock xn && \ rm -f $LOCKFILE } _prepare_locking() { eval "exec $LOCKFD>\"$LOCKFILE\""; trap _no_more_locking EXIT; } _prepare_locking # Lock now. The lock is disabled automatically when the script exits (with any error code). if ! _lock xn ; then echo "=> ERROR: Could not obtain lock. Exiting." >&2 exit 1 fi DATE="$(date +%Y.%m.%d)" current="${local_dir}-${DATE}" current_component="${current##*/}" for (( link_dest_count=1 ; link_dest_count <= ${link_dest_snapshot_count} ; link_dest_count++ )) ; do date_count="$(date -d @$(( $(date +"%s") - ${link_dest_count} * 86400)) +"%Y.%m.%d")" link_dest+=("${base_dir}/${project}-${date_count}") done current_exists=0 tmp_exists=0 local_useful=0 date_exact="" first_run=0 if [[ ${BW_LIMIT} ]] ; then OPTIONS+=" --bwlimit=${BW_LIMIT}" fi path_list_array=(${path_list}) repos_array=(${repos}) error() { echo "$@" >&2 exit 1 } { if [[ ${date_exclude} && ${forcerun} != 1 ]] ; then if [[ $DATE == ${date_exclude} ]] ; then echo "Manually disabled: ${DATE}. Exiting." exit 0 fi fi # Parse options. # -t or --terminal enables rsync option --progress if [[ $# == 1 ]] ; then if [[ $1 == -t || $1 == --terminal ]] ; then terminal=1 elif [[ $1 == -h ]] ; then echo "Available options: -h, -t (rsync --progress)" else echo "=> Wrong argument: $1" fi elif [[ $# -gt 1 ]] ; then echo "=> ${script_filename} has a wrong number of arguments" fi if [[ $terminal == 1 ]] ; then OPTIONS+=" --progress" fi for i in ${!link_dest[@]}; do if [[ -d ${link_dest[$i]} ]] ; then link_dest_option[i]="${link_dest[@]/#/--link-dest=}" else echo "=> WARNING: Argument to rsync option --link-dest is not an existing directory: ${link_dest[$i]}" fi done # Test if ${current} exists if [[ -e ${current} ]] ; then current_exists=1 fi # Test if ${local_dir}.tmp exists if [[ -e ${local_dir}.tmp ]] ; then tmp_exists=1 fi # Test if ${local_dir} is an existing symlink pointing to an existing directory if [[ -h ${local_dir} ]] ; then last_path="$(readlink -f "${local_dir}")" if [[ -d ${last_path} ]] ; then last="${last_path##*/}" local_useful=1 else error "=> ERROR: ${local_dir} is a symlink which does not point to an existing directory." fi else if [[ -e ${local_dir} ]] || stat -t ${local_dir}-* >/dev/null 2>&1 ; then error "=> ERROR: ${local_dir} exists but is not a symlink, or a file (or directory) ${local_dir}-* was found. Fix this before running ${script_filename} again." else echo "=> WARNING: ${local_dir} does not exist or is not a symlink, and no snapshot directories were found, so it is assumed this is the first time ${script_filename} is run using \"${base_dir}\" as the base directory." first_run=1 fi fi # Check the current tree and issue warnings and errors based on the current tree state # Also, it sets up the temporary directory if [[ ${current_exists} == 1 ]] ; then echo "=> WARNING: ${current} already exists. It will be preserved." no_snapshot_delete=1 if [[ ${local_useful} == 1 ]] ; then if [[ ${tmp_exists} == 0 ]] ; then echo "=> WARNING: ${local_dir}.tmp does not exist." cp -al "${current}" "${local_dir}".tmp fi date_exact="$(date +%Y.%m.%d-%T)" echo "=> WARNING: Snapshot ${local_dir}-${date_exact} will be created because ${current} already exists" else echo "=> WARNING: ${local_dir} does not exist. It will be created." if [[ ${tmp_exists} == 0 ]] ; then echo "=> WARNING: As ${local_dir}.tmp does not exist, data transfer will start from ${current}" cp -al "${current}" "${local_dir}".tmp else echo "=> WARNING: Data transfer will start from ${local_dir}.tmp" fi fi elif [[ ${local_useful} == 0 ]] ; then if [[ ${tmp_exists} == 1 ]] ; then echo "=> ${local_dir} is not useful but ${local_dir}.tmp exists. Resuming from ${local_dir}.tmp" echo "=> WARNING: Symlink ${local_dir} does not exist. It will be created." else if [[ ${first_run} == 0 ]] ; then error "=> ERROR: ${local_dir}.tmp does not exist and ${local_dir} is not useful. Exiting." else mkdir "${local_dir}".tmp fi fi else if [[ ${tmp_exists} == 1 ]] ; then echo "=> ${local_dir}.tmp already exists. Symlink \"${project}\" currently points to ${last_path}." else cp -al "${last_path}" "${local_dir}".tmp fi fi echo echo "=> Creating snapshot for date ${DATE}" # Change to the temporary directory cd "${local_dir}".tmp if [[ ${db_and_symlinks_update} == 1 ]] ; then # Delete temporary files that may be left over by a previous invocation of parabola-mirror-repos rm -f "$remote_pool_files" "$local_pool_files" "$POOL_FILES_TO_DELETE" || true remote_repo_dirs="${repos_array[@]/#/${SERVER}/}" remote_repo_dirs="${remote_repo_dirs[@]/%//os}" echo "=> Getting pool path list" rsync -lrv --out-format="%L" --dry-run --exclude 'mips64el' --exclude '*mips64el.pkg.tar.*' ${remote_repo_dirs[@]} "/tmp/${script_filename}.unexistent_filename" \ | grep -- "->" \ | grep "pool.*[^/]$" \ | sed -e "s#.*\(pool/.*/.*\)#\1#" \ >> "$remote_pool_files" || error " => ERROR: Failed with error code: $?" if [[ ${first_run} == 0 ]] ; then echo "=> Getting local pool package file list" # Build a list of local packages find pool -mindepth 2 >> "$local_pool_files" echo "=> Building list of local pool package files to delete" # Avoid duplicates (comes from -any packages present in both i686/ and x86_64/) sort -u -o "$remote_pool_files" "$remote_pool_files" sort -o "$local_pool_files" "$local_pool_files" # Keep lines that only appears in local_pool_files comm -13 "$remote_pool_files" "$local_pool_files" > "$POOL_FILES_TO_DELETE" NUMBER_TO_DELETE="$(wc -l $POOL_FILES_TO_DELETE | cut -d ' ' -f 1)" if [[ "$NUMBER_TO_DELETE" -gt "$max_delete" ]] ; then error " => ERROR: The number of pool package files to be deleted is ${NUMBER_TO_DELETE}, greater than the specified maximum which is ${max_delete}" fi if [[ $NUMBER_TO_DELETE -gt 0 ]] ; then if [[ ${no_file_delete} == 1 ]] ; then echo "=> Deleting ${NUMBER_TO_DELETE} old pool package files" find $(cat "$POOL_FILES_TO_DELETE") -print -exec rm -f {} \; else echo "=> Deletion of pool package files (${NUMBER_TO_DELETE}) is disabled. List of files:" printf '%s\n' $(cat "$POOL_FILES_TO_DELETE") fi fi fi fi echo "=> List of local repositories. Existing files will be hard linked from these instead of being fetched from the remote server." printf -- '%s\n' "${link_dest[@]}" } &> >(tee -a "${log_tmp}") _wait_log_tmp if [[ ${db_and_symlinks_update} == 1 ]] ; then echo "=> Starting to synchronize repository directories (symlinks and db.* files)" rsync $OPTIONS --stats --exclude 'mips64el' --delete-after --safe-links "${link_dest_option[@]}" --link-dest="$local_dir" "${repos_array[@]/#/${SERVER}/}" "$local_dir".tmp || error " => ERROR: rsync terminated with an error code: $?" fi { if [[ ${pools_update} == 1 ]] ; then echo "=> Starting to synchronize package pools from remote server $SERVER" fi # end "if [[ ${pools_update} == 1 ]]" } &> >(tee -a "${log_tmp}") _wait_log_tmp if [[ ${pools_update} == 1 ]] ; then rsync $OPTIONS --stats --exclude '*-mips64el.pkg.tar.*' --safe-links --files-from="$remote_pool_files" "${link_dest_option[@]}" --link-dest="$local_dir" $SERVER "${local_dir}".tmp fi # end "if [[ ${pools_update} == 1 ]]" if [[ ${path_list} ]] ; then echo "=> Synchronizing extra paths" rsync $OPTIONS --stats --safe-links "${link_dest_option[@]/%//$path}" --link-dest="$local_dir"/"$path" ${path_list_array[@]/#/${SERVER}/} "${local_dir}".tmp/ || error " => ERROR: rsync terminated with an error code: $?" fi { if [[ ${no_snapshot_delete} != 1 ]] ; then echo "=> DRY-RUN: Delete versions older than the version number: ${oldest_version}." delete_list=$(find ${base_dir} -regextype sed -maxdepth 1 -regex "${local_dir}-[0-9]\{4\}\.[0-9]\{2\}.[0-9]\{2\}" | head -n -"${oldest_version}") && \ if [[ ${delete_list} ]] ; then echo "DRY_RUN: rm -rf ${delete_list}" else echo "=> Nothing to delete" fi fi # closes [[ ${no_snapshot_delete} == 0 ]] if [[ ${leave_tmp} == 0 ]] ; then echo "=> Starting to serve the new repository version" if [[ ${date_exact} ]] ; then echo " => Renaming ${local_dir}.tmp to ${local_dir}"-"${date_exact}" mv "${local_dir}".tmp "${local_dir}"-"${date_exact}" else echo " => Renaming ${local_dir}.tmp to ${current}" mv "${local_dir}".tmp "${current}" fi if [[ ${snapshot_symlink_update} == 1 ]] ; then cd ${base_dir} if [[ ${local_useful} == 1 ]] ; then echo " => Deleting symlink $local_dir" rm -rf "${local_dir}" fi # Create symlink if [[ ${date_exact} ]] ; then #echo " => Creating symlink ${local_dir}-${date_exact} to ${current_component}" #ln -s ${current_component}-"${date_exact}" ${local_dir}-"${date_exact}" echo " => Creating symlink ${local_dir} to ${project}-${date_exact}" ln -s ${project}-"${date_exact}" ${local_dir} else echo " => Creating symlink \"${project}\" to ${current_component}" ln -s ${current_component} ${project} fi fi fi rm -f "$remote_pool_files" "$local_pool_files" "$POOL_FILES_TO_DELETE" "$local_dir".old || true echo "=> Disk space report" df -h "${base_dir}" echo "=> ${script_filename} finished successfully. Finish time: $(date --rfc-3339=seconds)" } &> >(tee -a "${log_tmp}") _wait_log_tmp