summaryrefslogtreecommitdiff
path: root/bin/poolify
blob: b49cbd9e9359996c155fc74b732ec2def80761ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env bash
# Copyright (c) 2017, 2023  Luke Shumaker <lukeshu@lukeshu.com>
#
# This work is free.  You can redistribute it and/or modify it under
# the terms of the Do What The Fuck You Want To Public License,
# Version 2, as published by Sam Hocevar.  See the COPYING file for
# more details.

url2murl() {
	local x
	x=$1
	x=${x//'^'/'^5E'}
	x=${x//':'/'^3A'}
	x=${x//'%'/'^25'}
	printf '%s' "$x"
}

main() {
	set -euE -o pipefail
	shopt -s nullglob

	arg_metadata_txt=$1
	arg_index_txt=$2

	# Overrides ############################################################

	declare -A override_datetime
	override_datetime[200109261739]=200303310700

	override_synthetic_listings=(
		#YYYYMMDDHHMM branch_name         newfiles
		'200307291500 ALPHA/CVTUTF-1-1    ExpectedOutput.txt readme.txt'
	)

	# Main #################################################################

	echo '# Pass 1 (initialize snapshots from $arg_metadata_txt)'
	while read -r snap name date time; do
		dirpart="${name%/*}"
		filepart="${name##*/}"
		datetime="${date//-/}${time//:/}"
		datetime="${override_datetime[$datetime]:-$datetime}"
		filedir=dat/pools/files/"${datetime}-${name//\//_}"
		snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}"
		if [[ -d "${filedir/.OLD/}" ]]; then
			filedir="${filedir/.OLD/}"
		fi
		mkdir -p -- "$filedir" "$snapdir"
		ln -sr "$filedir/$filepart" "$snapdir"
	done < "$arg_metadata_txt"

	echo '# Pass 1.5 (initialize synthetic snapshots)'
	# Looking at the data, there are 3 revisions that we DON'T
	# have directory listings for.  So we need to synthesize
	# those.
	#
	# I created the list of listings to synthesize by not
	# synthesizing anything, then looking for files ending in
	# ".1".  They are created during pass 2 if we have a file with
	# no matching listing.
	for line in "${override_synthetic_listings[@]}"; do
		read -r datetime dirpart newfiles <<<"$line"
		# We need to figure out which files to put in the
		# directory listing.  We're going to do that by
		# mimicking the previous listing with that dirpart.
		prevsnap=''
		for isnap in dat/pools/snaps/*-"${dirpart//\//_}"; do
			isnap=${isnap##*/}
			if [[ "${isnap%%-*}" -lt "${datetime}00" ]]; then
				prevsnap=$isnap
			fi
		done
		if [[ -z "$prevsnap" ]]; then
			>& printf 'Could not find listing of %s before %s\n' "$dirpart" "$datetime"
			false
		fi
		# Ok, now copy that snapshot
		snapdir=dat/pools/snaps/"${datetime}99-${dirpart//\//_}"
		cp -aT dat/pools/snaps/"$prevsnap" "$snapdir"
		# And touch file files we need to change
		for filepart in $newfiles; do
			name="$dirpart/$filepart"
			filedir=dat/pools/files/"${datetime}-${name//\//_}"
			mkdir -p -- "$filedir"
			rm -- "$snapdir/$filepart"
			ln -sr "$filedir/$filepart" "$snapdir"
		done
	done

	echo '# Pass 2 (resolve files)'
	while read -r time url; do
		if [[ "$url" == */ ]]; then
			# Skip directories
			continue
		fi
		name="${url##*/Public/}"
		dirpart="${name%/*}"
		filepart="${name##*/}"

		pools=(dat/pools/files/*-"${name//\//_}")
		if [[ "$name" = *.OLD* ]]; then
			pname="${name//\//_}"
			pools+=(dat/pools/files/*-"${pname/.OLD/}")
			readarray -t pools < <(printf '%s\n' "${pools[@]}" | sort)
		fi
		pools=("${pools[@]##*/}")

		mypool=''
		for pool in "${pools[@]}"; do
			pooltime="${pool%%-*}"
			if [[ "${pooltime}00" -le "$time" ]]; then
				mypool=$pool
			fi
		done
		if [[ -z "$mypool" ]]; then
			>&2 printf 'Could not find pool for %s %s\n' "$time" "$url"
			false
		fi

		file="dat/content-file/$time/$(url2murl $url)"

		declare -i i=0
		while true; do
			link="dat/pools/files/$mypool/$filepart.$i"
			link="${link%.0}"
			a="$(readlink -f "$link")" || true
			b="$(readlink -f "$file")"
			if cmp -s -- "$a" "$b"; then
				break
			fi
			if ln -sr "$b" "$link"; then
				break
			fi
			i+=1
		done
	done < "$arg_index_txt"

	echo '# Pass 3 (resolve missing files)'
	while read -r missing; do
		if [[ -f "${missing/.OLD}/${missing##*_}" ]]; then
			ln -sr "${missing/.OLD}/${missing##*_}" "$missing"
		fi
	done < <(find dat/pools/files/*-PROGRAMS_CVTUTF.OLD_* -type d -empty)
}

main "$@"