From 952f2444763af9100339876b1bfc88c169346967 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 25 Apr 2013 21:06:10 -0400 Subject: improve jh-checksource Code-wise: * add `-m` flag for machine readable * use mime-types instead of user-readable strings * ignore files less than 3 bytes Settings-wise: * ignore _all_ `inode/*`, `text/*`, `image/*` and `video/*` types * ignore PDFs and PostScript * ignore .hg files --- jh-checksource.sh | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/jh-checksource.sh b/jh-checksource.sh index 1fecfa5..9e3a9d6 100644 --- a/jh-checksource.sh +++ b/jh-checksource.sh @@ -1,13 +1,15 @@ #!/bin/bash -sep='' -resep='@' +sep='' -safe_types_regexp=('text' '(GIF|JPEG|PNG) image data' 'MS Windows icon') -safe_types_string=('empty') -safe_files_regexp=('/\.(git|svn)/') +safe_types_regexp=('^(inode|text|image|video)/') +safe_types_string=('application/pdf' 'application/postscript') +safe_files_regexp=('/\.(git|hg|svn)/') safe_files_string=() +# don't care about files less than 3 bytes. +min_size=3 + normalize_filename() { local cwd="`pwd`" readlink -m -- "$1"|sed "s|^$cwd/|./|" @@ -35,18 +37,36 @@ matches_regexp() { return 1 } +print-human() { + libremessages warning "The source directory `pwd` contains binary files:" + sed 's/./ -> &/' +} + +print-machine() { + cat +} + main() { + format=human # Parse arguments - for file in "$@"; do safe_files_string+=("$(normalize_filename "$file")"); done + for arg in "$@"; do + case "$arg" in + -m) format=machine;; + *) safe_files_string+=("$(normalize_filename "$arg")");; + esac + done # Init unsafe_files="$(mktemp)" # Heavy lifting - find . -type f -exec file -F"$sep" {} + | while read line; do - file="$(echo "$line"|sed "s${resep}${sep}.*${resep}${resep}")" - type="$(echo "$line"|sed "s${resep}.*${sep}\s*${resep}${resep}")" - + find . -type f -printf '%s %h/%f\n' | # find all files + while read -r size file; do # filter out files smaller than $min_size + [[ $size < $min_size ]] || printf '%s\n' "$file" + done | + xargs -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes + sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse + while IFS=: read -r type file; do file="$(normalize_filename "$file")" if \ @@ -61,8 +81,7 @@ main() { done > "$unsafe_files" if [[ -n "$(cat "$unsafe_files")" ]]; then - echo "==> WARNING: The source directory `pwd` contains binary files:" - <"$unsafe_files" sort | sed 's/./ -> &/' + <"$unsafe_files" sort | print-$format rm -f "$unsafe_files" exit 1 else -- cgit v1.2.3-54-g00ecf