#!/bin/bash # Copyright © 2013-2014 Luke Shumaker # This work is free. You can redistribute it and/or modify it under the # terms of the Do What The Fuck You Want To Public License, Version 2, # as published by Sam Hocevar. See the COPYING file for more details. sep='' safe_types_regexp=('^(inode|text|image|video|audio)/') safe_types_string=('application/pdf' 'application/postscript' 'application/xml' 'application/ogg' 'message/rfc822') safe_dirs_glob=(.{git,hg,svn} '*.git') safe_files_regexp=('/po/[^/]*.gmo$' '\.flw$' '\.odg$' '\.ppt$') safe_files_string=() # don't care about files less than 3 bytes. min_size=3 cwd="$(readlink -m -- "$PWD")" normalize_filenames() { while IFS='' read -r filename; do readlink -m -- "$filename" done | sed "s|^$cwd/|./|" } matches_string() { local needle=$1 shift for straw in "$@"; do if [[ "$needle" = "$straw" ]]; then return 0 fi done return 1 } matches_regexp() { local needle=$1 shift for straw in "$@"; do if [[ "$needle" =~ $straw ]]; then return 0 fi done return 1 } print-human() { libremessages warning "The source directory %s contains binary files:" "$PWD" sed 's/^/ -> /' } print-machine() { cat } main() { local format=human # Parse arguments local arg for arg in "$@"; do case "$arg" in -m) format=machine;; *) safe_files_string+=("$(normalize_filenames <<<"$arg")");; esac done # Init local unsafe_files="$(mktemp --tmpdir "${0##*/}.XXXXXXXXXX")" trap "$(printf 'rm -f -- %q' "$unsafe_files")" EXIT # Heavy lifting local filter_dirs=() local glob for glob in "${safe_dirs_glob[@]}"; do filter_dirs+=(-type d -name "$glob" -prune -o) done find . "${filter_dirs[@]}" -type f -printf '%s %p\n' | # find all files while read -r size file; do # filter out files smaller than $min_size [[ $size -lt $min_size ]] || printf '%s\n' "$file" done | normalize_filenames | xargs -r -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse while IFS=: read -r type file; do declare -A cached_types if ! { matches_string "$file" "${safe_files_string[@]}" || \ matches_regexp "$file" "${safe_files_regexp[@]}" ;}; then if [[ -z ${cached_types[$type]} ]]; then if matches_string "$type" "${safe_types_string[@]}" || \ matches_regexp "$type" "${safe_types_regexp[@]}" ; then cached_types[$type]=false else cached_types[$type]=true fi fi if "${cached_types[$type]}"; then printf "%s\n" "$file" fi fi done > "$unsafe_files" if [[ "$(stat -c '%s' -- "$unsafe_files")" -gt 0 ]]; then <"$unsafe_files" sort | print-$format exit 1 fi } main "$@"