From 55f9bd8d5226eb49f5899c5ddca89c655ef96e9b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 1 Jul 2017 18:17:38 -0400 Subject: stuff --- bin/dateify | 14 ++++++++++++ bin/fmt-metadata | 25 ++++++++++++++++++++ bin/gitthing | 4 +++- bin/poolify | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 1 deletion(-) create mode 100755 bin/dateify create mode 100755 bin/fmt-metadata create mode 100755 bin/poolify (limited to 'bin') diff --git a/bin/dateify b/bin/dateify new file mode 100755 index 0000000..7aefdd9 --- /dev/null +++ b/bin/dateify @@ -0,0 +1,14 @@ +#!/usr/bin/sed -rf +s/\b([0-9]{2})-([A-Z][a-z][a-z])-([0-9]{4})(\b|T|_)/\3-\2-\1\4/ +s/Jan/01/ +s/Feb/02/ +s/Mar/03/ +s/Apr/04/ +s/May/05/ +s/Jun/06/ +s/Jul/07/ +s/Aug/08/ +s/Sep/09/ +s/Oct/10/ +s/Nov/11/ +s/Dec/12/ diff --git a/bin/fmt-metadata b/bin/fmt-metadata new file mode 100755 index 0000000..0682414 --- /dev/null +++ b/bin/fmt-metadata @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby +require 'time' + +snapshot = ARGV.first.to_i + +$stdin.each_line do |line| + m = /^ (\S+) +(..-\S+-.... ..:..) +([0-9.]+)(\S+) *$/.match(line) + raise "Malformed line: #{line}" unless m + name = m[1] + datetime = m[2] + size_numb = m[3] + size_unit = m[4] + + next if name.downcase == "parent directory" + + # The Unicode.org web server switched the timezone of timestamps + # in May 2004 + if snapshot < 20040500000000 + datetime = Time.parse("#{datetime} +01:00").utc.strftime('%Y-%m-%d %H:%M') + else + datetime = Time.parse("#{datetime} +00:00").utc.strftime('%Y-%m-%d %H:%M') + end + + puts ("%-22s %s %3s%s" % [ name, datetime, size_numb, size_unit ]) +end diff --git a/bin/gitthing b/bin/gitthing index a54c017..7bac2e2 100755 --- a/bin/gitthing +++ b/bin/gitthing @@ -62,7 +62,9 @@ main() { if [[ -n "$(git status -s .)" ]]; then gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")" git add . - GIT_AUTHOR_DATE=$gitdate GIT_AUTHOR_DATE=$gitdate git commit -m "$time $url" + export GIT_AUTHOR_DATE=$gitdate + export GIT_COMMITTER_DATE=$gitdate + git commit -m "$time $url" fi done } diff --git a/bin/poolify b/bin/poolify new file mode 100755 index 0000000..34e0b42 --- /dev/null +++ b/bin/poolify @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +url2murl() { + local x + x=$1 + x=${x//'^'/'^5E'} + x=${x//':'/'^3A'} + x=${x//'%'/'^25'} + printf '%s' "$x" +} + +main() { + set -euE -o pipefail + shopt -s nullglob + + while read -r snap name date time size; do + dirpart="${name%/*}" + filepart="${name##*/}" + filedir=dat/pools/files/"${date//-/}${time//:/}-${name//\//_}" + snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}" + mkdir -p -- "$filedir" "$snapdir" + ln -sr "$filedir/$filepart" "$snapdir" + done < "$1" + + while read -r time url; do + name="${url##*/Public/}" + dirpart="${name%/*}" + filepart="${name##*/}" + + if [[ -z "$filepart" ]]; then + continue + fi + + pools=(dat/pools/files/*-"${name//\//_}") + pools=("${pools[@]##*/}") + + mypool='' + for pool in "${pools[@]}"; do + pooltime="${pool%%-*}" + if [[ "${pooltime}00" -le "$time" ]]; then + mypool=$pool + fi + done + if [[ -z "$mypool" ]]; then + >&2 printf 'Could not find pool for %s %s' "$time" "$url" + false + fi + + waurl="http://web.archive.org/web/$time/$url" + file="dat/content-file/$(url2murl "${waurl#http://}")" + + declare -i i=0 + while true; do + link="dat/pools/files/$mypool/$filepart.$i" + link="${link%.0}" + a="$(readlink -f "$link")" || true + b="$(readlink -f "$file")" + if cmp -s -- "$a" "$b"; then + break + fi + if ln -sr "$b" "$link"; then + break + fi + i+=1 + done + done < "$2" +} + +main "$@" -- cgit v1.2.3