From 71e3ca00e35e26fc7738ed955bb7178fac4c919d Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 2 Jul 2017 18:36:07 -0400 Subject: Use pools in git --- Makefile | 7 +++--- bin/gitify | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ bin/gitthing | 71 ----------------------------------------------------- bin/poolify | 2 +- 4 files changed, 84 insertions(+), 76 deletions(-) create mode 100755 bin/gitify delete mode 100755 bin/gitthing diff --git a/Makefile b/Makefile index 0272edb..2ee9a42 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ dirfail = ( r=$$?; mv -- '$@'{,.bak}; exit $$r; ) all: $(MAKE) dat/urlkeys.mk $(MAKE) dat/index.mk - $(MAKE) dat/pools dat/git + $(MAKE) dat/git fix: grep -rl '

503' dat | xargs rm -fv -- @@ -69,10 +69,9 @@ dat/pools.txt: $(addsuffix metadata.txt,$(content-dir)) dat/index.txt dat/pools: $(download) dat/pools.txt dat/index.txt rm -rf -- $@ $@.bak poolify dat/pools.txt dat/index.txt || $(dirfail) - -dat/git: $(download) dat/index.txt +dat/git: dat/pools $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir)) rm -rf -- $@ $@.bak - gitthing dat/git < dat/index.txt || $(dirfail) + gitify $@ || $(dirfail) ################################################################################ endif diff --git a/bin/gitify b/bin/gitify new file mode 100755 index 0000000..55ddff8 --- /dev/null +++ b/bin/gitify @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +empty() { + [[ $(stat -c %s "$1") -eq 0 ]] +} + +url2murl() { + local x + x=$1 + x=${x//'^'/'^5E'} + x=${x//':'/'^3A'} + x=${x//'%'/'^25'} + printf '%s' "$x" +} + +murl2url() { + local x + x=$1 + x=${x//'^25'/'%'} + x=${x//'^3A'/':'} + x=${x//'^5E'/'^'} + printf '%s' "$x" +} + +main() { + set -euE -o pipefail + shopt -s nullglob + shopt -s globstar + + top=$PWD + mkdir -p "$1" + cd "$1" + git init + echo 'ref: refs/heads/PROGRAMS/CVTUTF' > .git/HEAD + git commit --allow-empty -m 'initial commit' + + for snapshot in "$top"/dat/pools/snaps/*; do + snapname="${snapshot##*/}" + time="${snapname%%-*}" + dirpart="${snapname#*-}" + dirpart="${dirpart//_/\/}" + + branch=$dirpart + git checkout PROGRAMS/CVTUTF + git checkout -b "$branch" || true + git checkout "$branch" + + rm -f -- * .metadata.txt + cp -- "$snapshot"/* . + + if [[ "$time" != *99 ]]; then + listingdir=("$top/dat/content-dir/$time/"**"/Public/$dirpart") + if [[ ${#listingdir[@]} != 1 ]]; then + >&2 printf 'Cannot find wayback listing for %s' "$snapname" + false + fi + if ! [[ -f readme.txt ]] && ! empty "$listingdir/readme.txt"; then + cp "$listingdir/readme.txt" . + fi + cp "$listingdir/metadata.txt" .metadata.txt + fi + + if [[ -n "$(git status -s .)" ]]; then + if [[ "$time" = *99 ]]; then + msg="Synthesized listing: ${time} ${dirpart}" + time="${time%99}00" + else + waurl="http://web.archive.org/web/$(murl2url "${listingdir##*/content-dir/}/")" + msg="$waurl" + fi + gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")" + git add . + export GIT_AUTHOR_DATE=$gitdate + export GIT_COMMITTER_DATE=$gitdate + git commit -m "$msg" + fi + done +} + +main "$@" diff --git a/bin/gitthing b/bin/gitthing deleted file mode 100755 index ff7b5ac..0000000 --- a/bin/gitthing +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env bash - -empty() { - [[ $(stat -c %s "$1") -eq 0 ]] -} - -url2murl() { - local x - x=$1 - x=${x//'^'/'^5E'} - x=${x//':'/'^3A'} - x=${x//'%'/'^25'} - printf '%s' "$x" -} - -murl2url() { - local x - x=$1 - x=${x//'^25'/'%'} - x=${x//'^3A'/':'} - x=${x//'^5E'/'^'} - printf '%s' "$x" -} - -main() { - set -euE -o pipefail - top=$PWD - - mkdir -p "$1" - cd "$1" - git init - echo 'ref: refs/heads/PROGRAMS/CVTUTF' > .git/HEAD - git commit --allow-empty -m 'initial commit' - - while read -r time url; do - suffix="${url##*/Public/}" - dirpart="${suffix%/*}" - filepart="${suffix##*/}" - - branch=$dirpart - - git checkout PROGRAMS/CVTUTF - git checkout -b "$branch" || true - git checkout "$branch" - - if [[ -n "$filepart" ]]; then - file="$top/dat/content-file/$time/$(url2murl "$url")" - cp "$file" . - else - dir="$top/dat/content-dir/$time/$(url2murl "$url")" - comm -23 \ - <(git ls-files) \ - <(< "$dir/metadata.txt" awk '{print $1}') \ - | xargs -r0 rm -f -- - if ! empty "$dir/readme.txt"; then - cp "$dir/readme.txt" . - fi - cp "$dir/metadata.txt" .metadata.txt - fi - - if [[ -n "$(git status -s .)" ]]; then - gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")" - git add . - export GIT_AUTHOR_DATE=$gitdate - export GIT_COMMITTER_DATE=$gitdate - git commit -m "$time $url" - fi - done -} - -main "$@" diff --git a/bin/poolify b/bin/poolify index 48dc828..f47f2f3 100755 --- a/bin/poolify +++ b/bin/poolify @@ -48,7 +48,7 @@ main() { false fi # Ok, now copy that snapshot - snapdir=dat/pools/snaps/"${datetime}00-${dirpart//\//_}" + snapdir=dat/pools/snaps/"${datetime}99-${dirpart//\//_}" cp -aT dat/pools/snaps/"$prevsnap" "$snapdir" # And touch file files we need to change for filepart in $newfiles; do -- cgit v1.2.3