diff options
author | Luke Shumaker <lukeshu@lukeshu.com> | 2017-06-30 20:52:56 -0400 |
---|---|---|
committer | Luke Shumaker <lukeshu@lukeshu.com> | 2017-06-30 20:52:56 -0400 |
commit | 99011e7fcebeccc26a3da591e3445a93ffadad3c (patch) | |
tree | 563f52145da316f27d243317ad727b376127aff3 /Makefile | |
parent | e46a74fe8a143936eee2b9be1fd6b5f963357d9d (diff) |
more
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 24 |
1 files changed, 11 insertions, 13 deletions
@@ -1,30 +1,28 @@ +SHELL=bash -o pipefail PATH:=$(CURDIR)/bin:$(PATH) export PATH -all: each-cdx +all: dat/index.txt + +fix: + grep -rl '<html><body><h1>503' dat | xargs rm -fv -- dat: mkdir -p $@ -dat/ftp.txt: | dat - cdxget 'url=ftp.unicode.org/Public/*' 'fl=urlkey' 'filter=statuscode:200' 'filter=urlkey:.*(cvt|convert)utf.*' > $@ -dat/www.txt: | dat +dat/cdxindex.txt: | dat cdxget 'url=www.unicode.org/Public/*' 'fl=urlkey' 'filter=statuscode:200' 'filter=urlkey:.*(cvt|convert)utf.*' > $@ -dat/urlkeys.txt: dat/ftp.txt dat/www.txt - cat $^ | cut -d '?' -f1 | sed 's/,ftp)/)/' | sort -u > $@ +dat/urlkeys.txt: dat/cdxindex.txt + cat $^ | cut -d '?' -f1 | sort -u > $@ dat/urlkeys.mk: dat/urlkeys.txt cat $^ | sed 's/^/urlkeys+=/' < $< > $@ -include dat/urlkeys.mk -rp = ) -c = , -all_urlkeys = $(urlkeys) $(subst $(rp),$(c)ftp$(rp),$(urlkeys)) dat/each-cdx/%.txt: - mkdir -p '$(@D)' + @mkdir -p '$(@D)' cdxget "url=$$(urlkey2url '$*')" 'filter=statuscode:200' 'collapse=digest' 'fl=timestamp,original' > '$@' - -each-cdx: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(all_urlkeys))) -.PHONY: each-cdx +dat/index.txt: $(addprefix dat/each-cdx/,$(addsuffix .txt,$(urlkeys))) + cat -- $(foreach c,$^,'$c') | sort > $@ .DELETE_ON_ERROR: .SECONDARY: |