summaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2017-07-01 18:17:38 -0400
committerLuke Shumaker <lukeshu@lukeshu.com>2017-07-01 18:17:38 -0400
commit55f9bd8d5226eb49f5899c5ddca89c655ef96e9b (patch)
tree53096770e331f2d571fc9e49af5000cfed1fa0d8 /Makefile
parente4109fab0bece003dc53c78c7cc8608b68328312 (diff)
stuff
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile34
1 files changed, 22 insertions, 12 deletions
diff --git a/Makefile b/Makefile
index 12c5bb6..2b96cd7 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,9 @@ export PATH
url2murl = $(subst %,^25,$(subst :,^3A,$(subst ^,^5E,$1)))
murl2url = $(subst ^5E,^,$(subst ^3A,:,$(subst ^25,%,$1)))
+dirfail = ( r=$$?; mv -- '$@'{,.bak}; exit $$r; )
-all: download
+all: dat/git dat/pools
fix:
grep -rl '<html><body><h1>503' dat | xargs rm -fv --
@@ -24,34 +25,43 @@ dat/urlkeys.mk: dat/urlkeys.txt
dat/each-cdx/%.txt:
@mkdir -p '$(@D)'
cdxget "url=$$(urlkey2url '$*')" 'filter=statuscode:200' 'collapse=digest' 'fl=timestamp,original' > '$@'
-dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys)))
- cat -- $(foreach c,$^,'$c') | sort > $@
+dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys))) urlkeys.txt
+ cat -- $(foreach c,$(filter dat/each-cdx/%,$^),'$c') | sort > $@
dat/index.mk: dat/index.txt
- < $< sed 's,^,index+=web.archive.org/web/,;s, ,/,' > $@
+ < $< sed -e 's,^,index+=,' -e 's, ,/,' > $@
-include dat/index.mk
dat/content-dir/%/index.wahtml:
@mkdir -p '$(@D)'
- curl -s 'http://$(call murl2url,$*)' > $@
+ curl -s 'http://web.archive.org/web/$(call murl2url,$*)' > $@
dat/content-dir/%/index.html: dat/content-dir/%/index.wahtml
wayfore < $< > $@
dat/content-dir/%/readme.txt: dat/content-dir/%/index.html
< $< sed -n '/^<pre>$$/,/<\/pre>/p' | sed -e 1d -e 's,</pre>.*,,' > $@
dat/content-dir/%/metadata.txt: dat/content-dir/%/index.html
- < $< grep '^<img' | sed 's/<[^>]*>//g' | grep -vi 'parent directory' > $@
+ < $< grep '^<img' | sed 's/<[^>]*>//g' | fmt-metadata $(firstword $(subst /, ,$*)) > $@
content-dir = $(foreach u,$(filter %/,$(index)),dat/content-dir/$(call url2murl,$(u)))
-download: $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir))
+download += $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir))
dat/content-file/%:
@mkdir -p '$(@D)'
- curl -s 'http://$(call murl2url,$*)' > $@
+ curl -s 'http://web.archive.org/web/$(call murl2url,$*)' > $@
content-file = $(foreach u,$(filter-out %/,$(index)),dat/content-file/$(call url2murl,$(u)))
-download: $(content-file)
+download += $(content-file)
-git: download
- gitthing dat/git < dat/index.txt
+download: $(download)
-.PHONY: all fix download git
+dat/pools.txt: $(addsuffix metadata.txt,$(content-dir)) dat/index.txt
+ grep ^ $(foreach c,$(filter %/metadata.txt,$^),'$c') | bin/dateify | sed -r -e 's,.*web\.archive\.org/web/,,' -e 's,/.*/Public/, ,' -e 's,/metadata\.txt: ,/,' -e 's,\s+, ,g' | sort -u > $@
+dat/pools: $(download) dat/pools.txt dat/index.txt
+ rm -rf -- $@ $@.bak
+ poolify dat/pools.txt dat/index.txt || $(dirfail)
+
+dat/git: $(download) dat/index.txt
+ rm -rf -- $@ $@.bak
+ gitthing dat/git < dat/index.txt || $(dirfail)
+
+.PHONY: all fix download
.DELETE_ON_ERROR:
.SECONDARY: