summaryrefslogtreecommitdiff
path: root/bin/poolify
blob: 34e0b423c78962309ef04da8bb13c9f99f64d458 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env bash

url2murl() {
	local x
	x=$1
	x=${x//'^'/'^5E'}
	x=${x//':'/'^3A'}
	x=${x//'%'/'^25'}
	printf '%s' "$x"
}

main() {
	set -euE -o pipefail
	shopt -s nullglob

	while read -r snap name date time size; do
		dirpart="${name%/*}"
		filepart="${name##*/}"
		filedir=dat/pools/files/"${date//-/}${time//:/}-${name//\//_}"
		snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}"
		mkdir -p -- "$filedir" "$snapdir"
		ln -sr "$filedir/$filepart" "$snapdir"
	done < "$1"

	while read -r time url; do
		name="${url##*/Public/}"
		dirpart="${name%/*}"
		filepart="${name##*/}"

		if [[ -z "$filepart" ]]; then
			continue
		fi

		pools=(dat/pools/files/*-"${name//\//_}")
		pools=("${pools[@]##*/}")

		mypool=''
		for pool in "${pools[@]}"; do
			pooltime="${pool%%-*}"
			if [[ "${pooltime}00" -le "$time" ]]; then
				mypool=$pool
			fi
		done
		if [[ -z "$mypool" ]]; then
			>&2 printf 'Could not find pool for %s %s' "$time" "$url"
			false
		fi

		waurl="http://web.archive.org/web/$time/$url"
		file="dat/content-file/$(url2murl "${waurl#http://}")"

		declare -i i=0
		while true; do
			link="dat/pools/files/$mypool/$filepart.$i"
			link="${link%.0}"
			a="$(readlink -f "$link")" || true
			b="$(readlink -f "$file")"
			if cmp -s -- "$a" "$b"; then
				break
			fi
			if ln -sr "$b" "$link"; then
				break
			fi
			i+=1
		done
	done < "$2"
}

main "$@"