summaryrefslogtreecommitdiff
path: root/bin
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2017-07-01 18:17:38 -0400
committerLuke Shumaker <lukeshu@lukeshu.com>2017-07-01 18:17:38 -0400
commit55f9bd8d5226eb49f5899c5ddca89c655ef96e9b (patch)
tree53096770e331f2d571fc9e49af5000cfed1fa0d8 /bin
parente4109fab0bece003dc53c78c7cc8608b68328312 (diff)
stuff
Diffstat (limited to 'bin')
-rwxr-xr-xbin/dateify14
-rwxr-xr-xbin/fmt-metadata25
-rwxr-xr-xbin/gitthing4
-rwxr-xr-xbin/poolify69
4 files changed, 111 insertions, 1 deletions
diff --git a/bin/dateify b/bin/dateify
new file mode 100755
index 0000000..7aefdd9
--- /dev/null
+++ b/bin/dateify
@@ -0,0 +1,14 @@
+#!/usr/bin/sed -rf
+s/\b([0-9]{2})-([A-Z][a-z][a-z])-([0-9]{4})(\b|T|_)/\3-\2-\1\4/
+s/Jan/01/
+s/Feb/02/
+s/Mar/03/
+s/Apr/04/
+s/May/05/
+s/Jun/06/
+s/Jul/07/
+s/Aug/08/
+s/Sep/09/
+s/Oct/10/
+s/Nov/11/
+s/Dec/12/
diff --git a/bin/fmt-metadata b/bin/fmt-metadata
new file mode 100755
index 0000000..0682414
--- /dev/null
+++ b/bin/fmt-metadata
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+require 'time'
+
+snapshot = ARGV.first.to_i
+
+$stdin.each_line do |line|
+ m = /^ (\S+) +(..-\S+-.... ..:..) +([0-9.]+)(\S+) *$/.match(line)
+ raise "Malformed line: #{line}" unless m
+ name = m[1]
+ datetime = m[2]
+ size_numb = m[3]
+ size_unit = m[4]
+
+ next if name.downcase == "parent directory"
+
+ # The Unicode.org web server switched the timezone of timestamps
+ # in May 2004
+ if snapshot < 20040500000000
+ datetime = Time.parse("#{datetime} +01:00").utc.strftime('%Y-%m-%d %H:%M')
+ else
+ datetime = Time.parse("#{datetime} +00:00").utc.strftime('%Y-%m-%d %H:%M')
+ end
+
+ puts ("%-22s %s %3s%s" % [ name, datetime, size_numb, size_unit ])
+end
diff --git a/bin/gitthing b/bin/gitthing
index a54c017..7bac2e2 100755
--- a/bin/gitthing
+++ b/bin/gitthing
@@ -62,7 +62,9 @@ main() {
if [[ -n "$(git status -s .)" ]]; then
gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")"
git add .
- GIT_AUTHOR_DATE=$gitdate GIT_AUTHOR_DATE=$gitdate git commit -m "$time $url"
+ export GIT_AUTHOR_DATE=$gitdate
+ export GIT_COMMITTER_DATE=$gitdate
+ git commit -m "$time $url"
fi
done
}
diff --git a/bin/poolify b/bin/poolify
new file mode 100755
index 0000000..34e0b42
--- /dev/null
+++ b/bin/poolify
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+url2murl() {
+ local x
+ x=$1
+ x=${x//'^'/'^5E'}
+ x=${x//':'/'^3A'}
+ x=${x//'%'/'^25'}
+ printf '%s' "$x"
+}
+
+main() {
+ set -euE -o pipefail
+ shopt -s nullglob
+
+ while read -r snap name date time size; do
+ dirpart="${name%/*}"
+ filepart="${name##*/}"
+ filedir=dat/pools/files/"${date//-/}${time//:/}-${name//\//_}"
+ snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}"
+ mkdir -p -- "$filedir" "$snapdir"
+ ln -sr "$filedir/$filepart" "$snapdir"
+ done < "$1"
+
+ while read -r time url; do
+ name="${url##*/Public/}"
+ dirpart="${name%/*}"
+ filepart="${name##*/}"
+
+ if [[ -z "$filepart" ]]; then
+ continue
+ fi
+
+ pools=(dat/pools/files/*-"${name//\//_}")
+ pools=("${pools[@]##*/}")
+
+ mypool=''
+ for pool in "${pools[@]}"; do
+ pooltime="${pool%%-*}"
+ if [[ "${pooltime}00" -le "$time" ]]; then
+ mypool=$pool
+ fi
+ done
+ if [[ -z "$mypool" ]]; then
+ >&2 printf 'Could not find pool for %s %s' "$time" "$url"
+ false
+ fi
+
+ waurl="http://web.archive.org/web/$time/$url"
+ file="dat/content-file/$(url2murl "${waurl#http://}")"
+
+ declare -i i=0
+ while true; do
+ link="dat/pools/files/$mypool/$filepart.$i"
+ link="${link%.0}"
+ a="$(readlink -f "$link")" || true
+ b="$(readlink -f "$file")"
+ if cmp -s -- "$a" "$b"; then
+ break
+ fi
+ if ln -sr "$b" "$link"; then
+ break
+ fi
+ i+=1
+ done
+ done < "$2"
+}
+
+main "$@"