blob: 92e3ccedf69fa96f714787983aa2d9af23aa0ec7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
#!/usr/bin/env bash
url2murl() {
local x
x=$1
x=${x//'^'/'^5E'}
x=${x//':'/'^3A'}
x=${x//'%'/'^25'}
printf '%s' "$x"
}
main() {
set -euE -o pipefail
shopt -s nullglob
arg_metadata_txt=$1
arg_index_txt=$2
# Overrides ############################################################
declare -A override_datetime
override_datetime[200109261739]=200303310700
override_synthetic_listings=(
#YYYYMMDDHHMM branch_name newfiles
'200307291500 ALPHA/CVTUTF-1-1 ExpectedOutput.txt readme.txt'
)
# Main #################################################################
echo '# Pass 1 (initialize snapshots from $arg_metadata_txt)'
while read -r snap name date time; do
dirpart="${name%/*}"
filepart="${name##*/}"
datetime="${date//-/}${time//:/}"
datetime="${override_datetime[$datetime]:-$datetime}"
filedir=dat/pools/files/"${datetime}-${name//\//_}"
snapdir=dat/pools/snaps/"${snap}-${dirpart//\//_}"
if [[ -d "${filedir/.OLD/}" ]]; then
filedir="${filedir/.OLD/}"
fi
mkdir -p -- "$filedir" "$snapdir"
ln -sr "$filedir/$filepart" "$snapdir"
done < "$arg_metadata_txt"
echo '# Pass 1.5 (initialize synthetic snapshots)'
# Looking at the data, there are 3 revisions that we DON'T
# have directory listings for. So we need to synthesize
# those.
#
# I created the list of listings to synthesize by not
# synthesizing anything, then looking for files ending in
# ".1". They are created during pass 2 if we have a file with
# no matching listing.
for line in "${override_synthetic_listings[@]}"; do
read -r datetime dirpart newfiles <<<"$line"
# We need to figure out which files to put in the
# directory listing. We're going to do that by
# mimicking the previous listing with that dirpart.
prevsnap=''
for isnap in dat/pools/snaps/*-"${dirpart//\//_}"; do
isnap=${isnap##*/}
if [[ "${isnap%%-*}" -lt "${datetime}00" ]]; then
prevsnap=$isnap
fi
done
if [[ -z "$prevsnap" ]]; then
>& printf 'Could not find listing of %s before %s\n' "$dirpart" "$datetime"
false
fi
# Ok, now copy that snapshot
snapdir=dat/pools/snaps/"${datetime}99-${dirpart//\//_}"
cp -aT dat/pools/snaps/"$prevsnap" "$snapdir"
# And touch file files we need to change
for filepart in $newfiles; do
name="$dirpart/$filepart"
filedir=dat/pools/files/"${datetime}-${name//\//_}"
mkdir -p -- "$filedir"
rm -- "$snapdir/$filepart"
ln -sr "$filedir/$filepart" "$snapdir"
done
done
echo '# Pass 2 (resolve files)'
while read -r time url; do
if [[ "$url" == */ ]]; then
# Skip directories
continue
fi
name="${url##*/Public/}"
dirpart="${name%/*}"
filepart="${name##*/}"
pools=(dat/pools/files/*-"${name//\//_}")
if [[ "$name" = *.OLD* ]]; then
pname="${name//\//_}"
pools+=(dat/pools/files/*-"${pname/.OLD/}")
readarray -t pools < <(printf '%s\n' "${pools[@]}" | sort)
fi
pools=("${pools[@]##*/}")
mypool=''
for pool in "${pools[@]}"; do
pooltime="${pool%%-*}"
if [[ "${pooltime}00" -le "$time" ]]; then
mypool=$pool
fi
done
if [[ -z "$mypool" ]]; then
>&2 printf 'Could not find pool for %s %s\n' "$time" "$url"
false
fi
file="dat/content-file/$time/$(url2murl $url)"
declare -i i=0
while true; do
link="dat/pools/files/$mypool/$filepart.$i"
link="${link%.0}"
a="$(readlink -f "$link")" || true
b="$(readlink -f "$file")"
if cmp -s -- "$a" "$b"; then
break
fi
if ln -sr "$b" "$link"; then
break
fi
i+=1
done
done < "$arg_index_txt"
echo '# Pass 3 (resolve missing files)'
while read -r missing; do
if [[ -f "${missing/.OLD}/${missing##*_}" ]]; then
ln -sr "${missing/.OLD}/${missing##*_}" "$missing"
fi
done < <(find dat/pools/files/*-PROGRAMS_CVTUTF.OLD_* -type d -empty)
}
main "$@"
|