blob: a54c0171a6cf0177c7f06c60bd5f45148b122914 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
#!/usr/bin/env bash
empty() {
[[ $(stat -c %s "$1") -eq 0 ]]
}
url2murl() {
local x
x=$1
x=${x//'^'/'^5E'}
x=${x//':'/'^3A'}
x=${x//'%'/'^25'}
printf '%s' "$x"
}
murl2url() {
local x
x=$1
x=${x//'^25'/'%'}
x=${x//'^3A'/':'}
x=${x//'^5E'/'^'}
printf '%s' "$x"
}
main() {
set -euE -o pipefail
top=$PWD
mkdir -p "$1"
cd "$1"
git init
echo 'ref: refs/heads/PROGRAMS/CVTUTF' > .git/HEAD
git commit --allow-empty -m 'initial commit'
while read -r time url; do
suffix="${url##*/Public/}"
dirpart="${suffix%/*}"
filepart="${suffix##*/}"
branch=$dirpart
git checkout PROGRAMS/CVTUTF
git checkout -b "$branch" || true
git checkout "$branch"
waurl="http://web.archive.org/web/$time/$url"
if [[ -n "$filepart" ]]; then
file="$top/dat/content-file/$(url2murl "${waurl#http://}")"
cp "$file" .
else
dir="$top/dat/content-dir/$(url2murl "${waurl#http://}")"
comm -23 \
<(git ls-files) \
<(< "$dir/metadata.txt" awk '{print $1}') \
| xargs -r0 rm -f --
if ! empty "$dir/readme.txt"; then
cp "$dir/readme.txt" .
fi
cp "$dir/metadata.txt" .metadata.txt
fi
if [[ -n "$(git status -s .)" ]]; then
gitdate="$(sed -r 's/(....)(..)(..)(..)(..)(..)/\1-\2-\3T\4:\5:\6/' <<<"$time")"
git add .
GIT_AUTHOR_DATE=$gitdate GIT_AUTHOR_DATE=$gitdate git commit -m "$time $url"
fi
done
}
main "$@"
|