#!/bin/bash sep='' safe_types_regexp=('^(inode|text|image|video)/') safe_types_string=('application/pdf' 'application/postscript') safe_files_regexp=('/\.(git|hg|svn)/') safe_files_string=() # don't care about files less than 3 bytes. min_size=3 normalize_filename() { local cwd="`pwd`" readlink -m -- "$1"|sed "s|^$cwd/|./|" } matches_string() { local needle=$1 shift for straw in "$@"; do if [[ "$needle" = "$straw" ]]; then return 0 fi done return 1 } matches_regexp() { local needle=$1 shift for straw in "$@"; do if [[ "$needle" =~ $straw ]]; then return 0 fi done return 1 } print-human() { libremessages warning "The source directory `pwd` contains binary files:" sed 's/./ -> &/' } print-machine() { cat } main() { format=human # Parse arguments for arg in "$@"; do case "$arg" in -m) format=machine;; *) safe_files_string+=("$(normalize_filename "$arg")");; esac done # Init unsafe_files="$(mktemp)" cleanup() { rm -f -- "$unsafe_files"; } trap cleanup EXIT # Heavy lifting find . -type f -printf '%s %h/%f\n' | # find all files while read -r size file; do # filter out files smaller than $min_size [[ $size < $min_size ]] || printf '%s\n' "$file" done | xargs -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse while IFS=: read -r type file; do file="$(normalize_filename "$file")" if \ matches_string "$file" "${safe_files_string[@]}" || \ matches_string "$type" "${safe_types_string[@]}" || \ matches_regexp "$file" "${safe_files_regexp[@]}" || \ matches_regexp "$type" "${safe_types_regexp[@]}" ; then : # do nothing else printf "%s\n" "$file" fi done > "$unsafe_files" if [[ -n "$(cat "$unsafe_files")" ]]; then <"$unsafe_files" sort | print-$format exit 1 fi } main "$@"