diff options
Diffstat (limited to 'parabola-mirror-repos')
-rwxr-xr-x | parabola-mirror-repos | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/parabola-mirror-repos b/parabola-mirror-repos new file mode 100755 index 0000000..7052bc8 --- /dev/null +++ b/parabola-mirror-repos @@ -0,0 +1,421 @@ +#!/bin/bash + +# alfplayer +# 2014-06-12 + +# Bash script to mirror only some repositories of Parabola. +# +# Synchronizing first from Arch (maybe using db-sync and any-to-ours of dbscripts), can be +# recommended to pull packages from the local Arch mirror afterwards using this script. + +script_filename="$(basename "$0")" + +# Create temporary log file +log_tmp="$(mktemp -p /var/tmp)" + +# Default configuration values. +# They can be overridden by setting the variables in the calling environment. +# 1 to set, 0 to unset +: ${project:=parabola} +#: ${SERVER:=rsync://repo.parabola.nu:875/repos} +: ${SERVER:=rsync://alfplayer.com/repos/parabola} # remote rsync directory +#: ${SERVER:=rsync://parabolagnulinux.mirrors.linux.ro/parabolagnulinux} +: ${base_dir:=/srv/http} +: ${LOCAL:=${base_dir}/${project}} # symlink to the last snapshot +: ${REPOS:=core extra community multilib libre kernels libre-multilib libre-multilib-testing libre-testing nonprism nonprism-testing java cross pcr} + # mips64el is also excluded in some rsync invocations in this script +: ${MAXDELETE:=10000} # maximum amount of files to delete in the local directory +[[ ! ${BW_LIMIT} ]] && \ + : ${BW_LIMIT:=1300} # KB/s +: ${log_file:=${script_filename}.log} +: ${OPTIONS:=-rltvH --no-p --no-g --max-delete=$MAXDELETE --exclude=.* --bwlimit=${BW_LIMIT} --no-motd --chmod=Dug=srwx,Fug=rw --log-file=${log_tmp}} +: ${oldest_version:=100} # delete versions older than this version +: ${link_dest[0]:=${base_dir}/archlinux} +#: ${link_dest[1]:=${base_dir}/parabola.secondary_mirror.tmp} + # link-dest does not support any network URI like rsync://... +[[ ! ${mail_to} ]] && \ + : ${mail_to:=your@mail.com} # set to enable mailing log file, or comment out to disable it +: ${date_exclude:=2014.06.19} # disable running this script this date (see next line) +: ${forcerun:=0} # set to 1 to force running on excluded date +: ${terminal:=1} # outputs to stdout using rsync --progress (and logs to a file) +: ${alternative_mirror:=0} # use alternative mirror; forces no_snapshot_delete and disables snapshot_symlink_update; synchronized files end up in ${LOCAL}.tmp +: ${no_snapshot_delete:=0} # disable deletion of oldest snapshots; alternative_mirror enables it forcefully +: ${leave_tmp:=0} # leave updated tree in {project}.tmp instead of creating a dated snapshot directory +: ${snapshot_symlink_update:=1} # update symlink to last snapshot + # does nothing if leave_tmp is enabled +: ${TZ:=UTC} # set timezone to UTC (affects rsync log output) +: ${db_and_symlinks_update:=1} # update DB files and package symlinks + # disabling this uses existing file /tmp/parabola-mirror which can be left over the last invocation of parabola-mirror-repos with this option set +: ${pools_update:=1} # update files in package pools +: ${no_file_delete:=1} # do not delete files +: ${link_dest_snapshot_count=3} # number of snapshots that are passed as --link-dest to rsync +[[ ! {path_list} ]] && \ + : ${path_list:=docs sources mirrorlist.txt lastupdate} # list of extra paths to synchronize + +export TZ + +if [[ ${alternative_mirror} == 1 ]] ; then + no_snapshot_delete=1 + leave_tmp=1 + #SERVER="rsync://repo.parabolagnulinux.org:875/repos" + SERVER="rsync://mirror.yandex.ru/mirrors/parabola" + LOCAL="${base_dir}/${project}.secondary_mirror" + #link_dest=("${base_dir}/archlinux" "${base_dir}/parabola.tmp") + link_dest+=("${base_dir}/${project}.tmp") + for dir in "$LOCAL" "$LOCAL".tmp ${LOCAL}.tmp/pool ; do + if [[ ! -d "$dir" ]] ; then + mkdir -pv "$dir" + chmod -v 2770 "$dir" + fi + done +else + #link_dest+=("${base_dir}/${project}.tmp") + link_dest+=("${base_dir}/${project}.secondary_mirror") +fi + + +REMOTE_POOL_FILES="/tmp/${project}-remote-files" +LOCAL_POOL_FILES="/tmp/${project}-local-files" +POOL_FILES_TO_DELETE="/tmp/${project}-to-delete-files" + +# Lock with flock (provided by util-linux), save to log file and send email on exit +LOCKFILE="/var/lock/${script_filename}" +LOCKFD=99 + +_lock() { flock -$1 $LOCKFD; } + +# Wait until there is no process writing to ${log_tmp} +# Waiting can be necessary if command groups are not executed until the end +_wait_log_tmp() { + if [[ -e "${log_tmp}" ]] ; then + while fuser -s "${log_tmp}" ; do + sleep 0.2 + done + fi +} + +_no_more_locking() { + set +e + + # Save exit status + es=$? + + _wait_log_tmp + cat "${log_tmp}" >> "${log_file}" + + if [[ $? == 0 ]] ; then + log_written=1 + else + echo "=> ERROR: Failed to write to log file: ${log_file}" >&2 + fi + + if [[ -e ${LOCAL}.tmp ]] ; then + echo "=> WARNING: Temporary directory ${LOCAL}.tmp remains in file system" + fi + + if [[ ${mail_to} ]] ; then + echo "=> Sending output to ${mail_to}" + mail -s "[$(hostname)] ${script_filename}" "${mail_to}" < "${log_tmp}" + fi + + if [[ ${log_written} == 1 ]] ; then + rm -f "${log_tmp}" + fi + + if [[ $es != 0 ]] ; then + echo "=> WARNING: Unsuccessful script termination. Exit status: $es" + if [[ ${mail_to} ]] ; then + echo "=> Sending error notification to ${mail_to}" + mail -s "[$(hostname)] ${script_filename} failed. See logged output." "${mail_to}" < /dev/null + fi + fi + + _lock u + _lock xn && \ + rm -f $LOCKFILE +} + +_prepare_locking() { eval "exec $LOCKFD>\"$LOCKFILE\""; trap _no_more_locking EXIT; } + +_prepare_locking + +# Lock now. The lock is disabled automatically when the script exits (with any error code). +if ! _lock xn ; then + echo "=> ERROR: Could not obtain lock. Exiting." >&2 + exit 1 +fi + +DATE="$(date +%Y.%m.%d)" +current="${LOCAL}-${DATE}" +current_component="${current##*/}" +for (( link_dest_count=1 ; link_dest_count <= ${link_dest_snapshot_count} ; link_dest_count++ )) ; do + date_count="$(date -d @$(( $(date +"%s") - ${link_dest_count} * 86400)) +"%Y.%m.%d")" + link_dest+=("${base_dir}/${project}-${date_count}") +done +current_exists=0 +tmp_exists=0 +local_useful=0 +date_exact="" +first_run=0 +if [[ ${BW_LIMIT} ]] ; then + OPTIONS+=" --bwlimit=${BW_LIMIT}" +fi +path_list_array=(${path_list}) +repos_array=(${REPOS}) + +error() { + echo "$@" >&2 + exit 1 +} + +{ + +if [[ ${date_exclude} && ${forcerun} != 1 ]] ; then + if [[ $DATE == ${date_exclude} ]] ; then + echo "Manually disabled: ${DATE}. Exiting." + exit 0 + fi +fi + +# Parse options. +# -t or --terminal enables rsync option --progress +if [[ $# == 1 ]] ; then + if [[ $1 == -t || $1 == --terminal ]] ; then + terminal=1 + elif [[ $1 == -h ]] ; then + echo "Available options: -h, -t (rsync --progress)" + else + echo "=> Wrong argument: $1" + fi +elif [[ $# -gt 1 ]] ; then + echo "=> ${script_filename} has a wrong number of arguments" +fi + +if [[ $terminal == 1 ]] ; then + OPTIONS+=" --progress" +fi + +for i in ${!link_dest[@]}; do + if [[ -d ${link_dest[$i]} ]] ; then + link_dest_option[i]="${link_dest[@]/#/--link-dest=}" + else + echo "=> WARNING: Argument to rsync option --link-dest is not an existing directory: ${link_dest[$i]}" + fi +done + +# Test if ${current} exists +if [[ -e ${current} ]] ; then + current_exists=1 +fi + +# Test if ${LOCAL}.tmp exists +if [[ -e ${LOCAL}.tmp ]] ; then + tmp_exists=1 +fi + +# Test if ${LOCAL} is an existing symlink pointing to an existing directory +if [[ -h ${LOCAL} ]] ; then + last_path="$(readlink -f "${LOCAL}")" + if [[ -d ${last_path} ]] ; then + last="${last_path##*/}" + local_useful=1 + else + error "=> ERROR: ${LOCAL} is a symlink which does not point to an existing directory." + fi +else + if [[ -e ${LOCAL} ]] || stat -t ${LOCAL}-* >/dev/null 2>&1 ; then + error "=> ERROR: ${LOCAL} exists but is not a symlink, or a file (or directory) ${LOCAL}-* was found. Fix this before running ${script_filename} again." + else + echo "=> WARNING: ${LOCAL} does not exist or is not a symlink, and no snapshot directories were found, so it is assumed this is the first time ${script_filename} is run using \"${base_dir}\" as the base directory." + first_run=1 + fi +fi + +# Check the current tree and issue warnings and errors based on the current tree state +# Also, it sets up the temporary directory +if [[ ${current_exists} == 1 ]] ; then + echo "=> WARNING: ${current} already exists. It will be preserved." + no_snapshot_delete=1 + if [[ ${local_useful} == 1 ]] ; then + if [[ ${tmp_exists} == 0 ]] ; then + echo "=> WARNING: ${LOCAL}.tmp does not exist." + cp -al "${current}" "${LOCAL}".tmp + fi + date_exact="$(date +%Y.%m.%d-%T)" + echo "=> WARNING: Snapshot ${LOCAL}-${date_exact} will be created because ${current} already exists" + else + echo "=> WARNING: ${LOCAL} does not exist. It will be created." + if [[ ${tmp_exists} == 0 ]] ; then + echo "=> WARNING: As ${LOCAL}.tmp does not exist, data transfer will start from ${current}" + cp -al "${current}" "${LOCAL}".tmp + else + echo "=> WARNING: Data transfer will start from ${LOCAL}.tmp" + fi + fi +elif [[ ${local_useful} == 0 ]] ; then + if [[ ${tmp_exists} == 1 ]] ; then + echo "=> ${LOCAL} is not useful but ${LOCAL}.tmp exists. Resuming from ${LOCAL}.tmp" + echo "=> WARNING: Symlink ${LOCAL} does not exist. It will be created." + else + if [[ ${first_run} == 0 ]] ; then + error "=> ERROR: ${LOCAL}.tmp does not exist and ${LOCAL} is not useful. Exiting." + else + mkdir "${LOCAL}".tmp + fi + fi +else + if [[ ${tmp_exists} == 1 ]] ; then + echo "=> ${LOCAL}.tmp already exists. Symlink \"${project}\" currently points to ${last_path}." + else + cp -al "${last_path}" "${LOCAL}".tmp + fi +fi + +echo +echo "=> Creating snapshot for date ${DATE}" + +# Change to the temporary directory +cd "${LOCAL}".tmp + +if [[ ${db_and_symlinks_update} == 1 ]] ; then + + # Delete temporary files that may be left over by a previous invocation of parabola-mirror-repos + rm -f "$REMOTE_POOL_FILES" "$LOCAL_POOL_FILES" "$POOL_FILES_TO_DELETE" || true + + remote_repo_dirs="${repos_array[@]/#/${SERVER}/}" + remote_repo_dirs="${remote_repo_dirs[@]/%//os}" + + echo "=> Getting pool path list" + rsync -lrv --out-format="%L" --dry-run --exclude 'mips64el' --exclude '*mips64el.pkg.tar.*' ${remote_repo_dirs[@]} "/tmp/${script_filename}.unexistent_filename" \ + | grep -- "->" \ + | grep "pool.*[^/]$" \ + | sed -e "s#.*\(pool/.*/.*\)#\1#" \ + >> "$REMOTE_POOL_FILES" || error " => ERROR: Failed with error code: $?" + + if [[ ${first_run} == 0 ]] ; then + echo "=> Getting local pool package file list" + # Build a list of local packages + find pool -mindepth 2 >> "$LOCAL_POOL_FILES" + + echo "=> Building list of local pool package files to delete" + + # Avoid duplicates (comes from -any packages present in both i686/ and x86_64/) + sort -u -o "$REMOTE_POOL_FILES" "$REMOTE_POOL_FILES" + + sort -o "$LOCAL_POOL_FILES" "$LOCAL_POOL_FILES" + + # Keep lines that only appears in LOCAL_POOL_FILES + comm -13 "$REMOTE_POOL_FILES" "$LOCAL_POOL_FILES" > "$POOL_FILES_TO_DELETE" + + NUMBER_TO_DELETE="$(wc -l $POOL_FILES_TO_DELETE | cut -d ' ' -f 1)" + + if [[ "$NUMBER_TO_DELETE" -gt "$MAXDELETE" ]] ; then + error " => ERROR: The number of pool package files to be deleted is ${NUMBER_TO_DELETE}, greater than the specified maximum which is ${MAXDELETE}" + fi + + if [[ $NUMBER_TO_DELETE -gt 0 ]] ; then + if [[ ${no_file_delete} == 1 ]] ; then + echo "=> Deleting ${NUMBER_TO_DELETE} old pool package files" + find $(cat "$POOL_FILES_TO_DELETE") -print -exec rm -f {} \; + else + echo "=> Deletion of pool package files (${NUMBER_TO_DELETE}) is disabled. List of files:" + printf '%s\n' $(cat "$POOL_FILES_TO_DELETE") + fi + fi + fi +fi + +echo "=> List of local repositories. Existing files will be hard linked from these instead of being fetched from the remote server." +printf -- '%s\n' "${link_dest[@]}" + +} &> >(tee -a "${log_tmp}") + +_wait_log_tmp + +if [[ ${db_and_symlinks_update} == 1 ]] ; then + +echo "=> Starting to synchronize repository directories (symlinks and db.* files)" + +rsync $OPTIONS --stats --exclude 'mips64el' --delete-after --safe-links "${link_dest_option[@]}" --link-dest="$LOCAL" "${repos_array[@]/#/${SERVER}/}" "$LOCAL".tmp || error " => ERROR: rsync terminated with an error code: $?" + +fi + +{ + +if [[ ${pools_update} == 1 ]] ; then + +echo "=> Starting to synchronize package pools from remote server $SERVER" + +fi # end "if [[ ${pools_update} == 1 ]]" + +} &> >(tee -a "${log_tmp}") + +_wait_log_tmp + +if [[ ${pools_update} == 1 ]] ; then + + rsync $OPTIONS --stats --exclude '*-mips64el.pkg.tar.*' --safe-links --files-from="$REMOTE_POOL_FILES" "${link_dest_option[@]}" --link-dest="$LOCAL" $SERVER "${LOCAL}".tmp + +fi # end "if [[ ${pools_update} == 1 ]]" + +if [[ ${path_list} ]] ; then + echo "=> Synchronizing extra paths" + rsync $OPTIONS --stats --safe-links "${link_dest_option[@]/%//$path}" --link-dest="$LOCAL"/"$path" ${path_list_array[@]/#/${SERVER}/} "${LOCAL}".tmp/ || error " => ERROR: rsync terminated with an error code: $?" +fi + +{ + +if [[ ${no_snapshot_delete} != 1 ]] ; then + +echo "=> DRY-RUN: Delete versions older than the version number: ${oldest_version}." +delete_list=$(find ${base_dir} -regextype sed -maxdepth 1 -regex "${LOCAL}-[0-9]\{4\}\.[0-9]\{2\}.[0-9]\{2\}" | head -n -"${oldest_version}") && \ + if [[ ${delete_list} ]] ; then + echo "DRY_RUN: rm -rf ${delete_list}" + else + echo "=> Nothing to delete" + fi + +fi # closes [[ ${no_snapshot_delete} == 0 ]] + +if [[ ${leave_tmp} == 0 ]] ; then + echo "=> Starting to serve the new repository version" + + if [[ ${date_exact} ]] ; then + echo " => Renaming ${LOCAL}.tmp to ${LOCAL}"-"${date_exact}" + mv "${LOCAL}".tmp "${LOCAL}"-"${date_exact}" + else + echo " => Renaming ${LOCAL}.tmp to ${current}" + mv "${LOCAL}".tmp "${current}" + fi + + if [[ ${snapshot_symlink_update} == 1 ]] ; then + cd ${base_dir} + + if [[ ${local_useful} == 1 ]] ; then + echo " => Deleting symlink $LOCAL" + rm -rf "${LOCAL}" + fi + + # Create symlink + if [[ ${date_exact} ]] ; then + #echo " => Creating symlink ${LOCAL}-${date_exact} to ${current_component}" + #ln -s ${current_component}-"${date_exact}" ${LOCAL}-"${date_exact}" + echo " => Creating symlink ${LOCAL} to ${project}-${date_exact}" + ln -s ${project}-"${date_exact}" ${LOCAL} + else + echo " => Creating symlink \"${project}\" to ${current_component}" + ln -s ${current_component} ${project} + fi + fi +fi + +rm -f "$REMOTE_POOL_FILES" "$LOCAL_POOL_FILES" "$POOL_FILES_TO_DELETE" "$LOCAL".old || true + +echo "=> Disk space report" +df -h "${base_dir}" + +echo "=> ${script_filename} finished successfully. Finish time: $(date --rfc-3339=seconds)" + +} &> >(tee -a "${log_tmp}") + +_wait_log_tmp |