diff options
author | Luke Shumaker <LukeShu@sbcglobal.net> | 2013-04-25 21:06:10 -0400 |
---|---|---|
committer | Luke Shumaker <LukeShu@sbcglobal.net> | 2013-04-25 21:06:10 -0400 |
commit | 952f2444763af9100339876b1bfc88c169346967 (patch) | |
tree | d368fb61ac0a56ace28de419382e234351ce0eb4 | |
parent | 8d81be0dcd7ab9d14a88f0e981046f50e67526fe (diff) |
improve jh-checksource
Code-wise:
* add `-m` flag for machine readable
* use mime-types instead of user-readable strings
* ignore files less than 3 bytes
Settings-wise:
* ignore _all_ `inode/*`, `text/*`, `image/*` and `video/*` types
* ignore PDFs and PostScript
* ignore .hg files
-rw-r--r-- | jh-checksource.sh | 43 |
1 files changed, 31 insertions, 12 deletions
diff --git a/jh-checksource.sh b/jh-checksource.sh index 1fecfa5..9e3a9d6 100644 --- a/jh-checksource.sh +++ b/jh-checksource.sh @@ -1,13 +1,15 @@ #!/bin/bash -sep='<nofileevercontainsthis>' -resep='@' +sep='<no-filename-ever-contains-this>' -safe_types_regexp=('text' '(GIF|JPEG|PNG) image data' 'MS Windows icon') -safe_types_string=('empty') -safe_files_regexp=('/\.(git|svn)/') +safe_types_regexp=('^(inode|text|image|video)/') +safe_types_string=('application/pdf' 'application/postscript') +safe_files_regexp=('/\.(git|hg|svn)/') safe_files_string=() +# don't care about files less than 3 bytes. +min_size=3 + normalize_filename() { local cwd="`pwd`" readlink -m -- "$1"|sed "s|^$cwd/|./|" @@ -35,18 +37,36 @@ matches_regexp() { return 1 } +print-human() { + libremessages warning "The source directory `pwd` contains binary files:" + sed 's/./ -> &/' +} + +print-machine() { + cat +} + main() { + format=human # Parse arguments - for file in "$@"; do safe_files_string+=("$(normalize_filename "$file")"); done + for arg in "$@"; do + case "$arg" in + -m) format=machine;; + *) safe_files_string+=("$(normalize_filename "$arg")");; + esac + done # Init unsafe_files="$(mktemp)" # Heavy lifting - find . -type f -exec file -F"$sep" {} + | while read line; do - file="$(echo "$line"|sed "s${resep}${sep}.*${resep}${resep}")" - type="$(echo "$line"|sed "s${resep}.*${sep}\s*${resep}${resep}")" - + find . -type f -printf '%s %h/%f\n' | # find all files + while read -r size file; do # filter out files smaller than $min_size + [[ $size < $min_size ]] || printf '%s\n' "$file" + done | + xargs -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes + sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse + while IFS=: read -r type file; do file="$(normalize_filename "$file")" if \ @@ -61,8 +81,7 @@ main() { done > "$unsafe_files" if [[ -n "$(cat "$unsafe_files")" ]]; then - echo "==> WARNING: The source directory `pwd` contains binary files:" - <"$unsafe_files" sort | sed 's/./ -> &/' + <"$unsafe_files" sort | print-$format rm -f "$unsafe_files" exit 1 else |