diff options
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 34 |
1 files changed, 22 insertions, 12 deletions
@@ -4,8 +4,9 @@ export PATH url2murl = $(subst %,^25,$(subst :,^3A,$(subst ^,^5E,$1))) murl2url = $(subst ^5E,^,$(subst ^3A,:,$(subst ^25,%,$1))) +dirfail = ( r=$$?; mv -- '$@'{,.bak}; exit $$r; ) -all: download +all: dat/git dat/pools fix: grep -rl '<html><body><h1>503' dat | xargs rm -fv -- @@ -24,34 +25,43 @@ dat/urlkeys.mk: dat/urlkeys.txt dat/each-cdx/%.txt: @mkdir -p '$(@D)' cdxget "url=$$(urlkey2url '$*')" 'filter=statuscode:200' 'collapse=digest' 'fl=timestamp,original' > '$@' -dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys))) - cat -- $(foreach c,$^,'$c') | sort > $@ +dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys))) urlkeys.txt + cat -- $(foreach c,$(filter dat/each-cdx/%,$^),'$c') | sort > $@ dat/index.mk: dat/index.txt - < $< sed 's,^,index+=web.archive.org/web/,;s, ,/,' > $@ + < $< sed -e 's,^,index+=,' -e 's, ,/,' > $@ -include dat/index.mk dat/content-dir/%/index.wahtml: @mkdir -p '$(@D)' - curl -s 'http://$(call murl2url,$*)' > $@ + curl -s 'http://web.archive.org/web/$(call murl2url,$*)' > $@ dat/content-dir/%/index.html: dat/content-dir/%/index.wahtml wayfore < $< > $@ dat/content-dir/%/readme.txt: dat/content-dir/%/index.html < $< sed -n '/^<pre>$$/,/<\/pre>/p' | sed -e 1d -e 's,</pre>.*,,' > $@ dat/content-dir/%/metadata.txt: dat/content-dir/%/index.html - < $< grep '^<img' | sed 's/<[^>]*>//g' | grep -vi 'parent directory' > $@ + < $< grep '^<img' | sed 's/<[^>]*>//g' | fmt-metadata $(firstword $(subst /, ,$*)) > $@ content-dir = $(foreach u,$(filter %/,$(index)),dat/content-dir/$(call url2murl,$(u))) -download: $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir)) +download += $(addsuffix readme.txt,$(content-dir)) $(addsuffix metadata.txt,$(content-dir)) dat/content-file/%: @mkdir -p '$(@D)' - curl -s 'http://$(call murl2url,$*)' > $@ + curl -s 'http://web.archive.org/web/$(call murl2url,$*)' > $@ content-file = $(foreach u,$(filter-out %/,$(index)),dat/content-file/$(call url2murl,$(u))) -download: $(content-file) +download += $(content-file) -git: download - gitthing dat/git < dat/index.txt +download: $(download) -.PHONY: all fix download git +dat/pools.txt: $(addsuffix metadata.txt,$(content-dir)) dat/index.txt + grep ^ $(foreach c,$(filter %/metadata.txt,$^),'$c') | bin/dateify | sed -r -e 's,.*web\.archive\.org/web/,,' -e 's,/.*/Public/, ,' -e 's,/metadata\.txt: ,/,' -e 's,\s+, ,g' | sort -u > $@ +dat/pools: $(download) dat/pools.txt dat/index.txt + rm -rf -- $@ $@.bak + poolify dat/pools.txt dat/index.txt || $(dirfail) + +dat/git: $(download) dat/index.txt + rm -rf -- $@ $@.bak + gitthing dat/git < dat/index.txt || $(dirfail) + +.PHONY: all fix download .DELETE_ON_ERROR: .SECONDARY: |