SHELL=bash -o pipefail
PATH:=$(CURDIR)/bin:$(PATH)
export PATH
all: dat/index.txt
fix:
grep -rl '
503' dat | xargs rm -fv --
dat:
mkdir -p $@
dat/cdxindex.txt: | dat
cdxget 'url=www.unicode.org/Public/*' 'fl=urlkey' 'filter=statuscode:200' 'filter=urlkey:.*(cvt|convert)utf.*' > $@
dat/urlkeys.txt: dat/cdxindex.txt
cat $^ | cut -d '?' -f1 | sort -u > $@
dat/urlkeys.mk: dat/urlkeys.txt
cat $^ | sed 's/^/urlkeys+=/' < $< > $@
-include dat/urlkeys.mk
dat/each-cdx/%.txt:
@mkdir -p '$(@D)'
cdxget "url=$$(urlkey2url '$*')" 'filter=statuscode:200' 'collapse=digest' 'fl=timestamp,original' > '$@'
dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys)))
cat -- $(foreach c,$^,'$c') | sort > $@
.DELETE_ON_ERROR:
.SECONDARY: