blob: 4aa932a3efe00c91d67cd0404726c7e26795ae24 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
#!/bin/bash
# Copyright © 2013-2014 Luke Shumaker <lukeshu@sbcglobal.net>
# This work is free. You can redistribute it and/or modify it under the
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See the COPYING file for more details.
# Regular expressions are POSIX EREs, and must match the entirety of the string
safe_dirs_glob=(.{git,hg,svn} '*.git')
safe_types_regexp=(
'(inode|text|image|video|audio)/.*'
'application/(pdf|postscript|xml|ogg|x-java-keystore)'
'application/vnd\.ms-(office|powerpoint)'
)
safe_files_regexp=('.*/po/[^/]*.gmo' '.*\.(flw|odg)')
safe_files_string=()
# don't care about files less than 3 bytes ('c' is for characters)
min_size=3c
sep='<no-filename-ever-contains-this>'
################################################################################
cwd="$(readlink -m -- "$PWD")"
normalize_filenames() {
while IFS='' read -r filename; do
readlink -m -- "$filename"
done | sed "s|^$cwd/|./|"
}
print-human() {
libremessages warning "The source directory %s contains binary files:" "$PWD"
sed 's/^/ -> /'
}
print-machine() {
cat
}
main() {
local format=human
# Parse arguments
local arg
for arg in "$@"; do
case "$arg" in
-m) format=machine;;
*) safe_files_string+=("$(normalize_filenames <<<"$arg")");;
esac
done
# Init
local unsafe_files="$(mktemp --tmpdir "${0##*/}.XXXXXXXXXX")"
trap "$(printf 'rm -f -- %q' "$unsafe_files")" EXIT
# Turn the variables up top into a bunch of `find(1)` filters
local filters=()
local glob
for glob in "${safe_dirs_glob[@]}"; do
filters+=(-type d -name "$glob" -prune -o)
done
filters+=(-type f -size +"${min_size}")
# Heavy lifting
find . "${filters[@]}" -print | # find all files
normalize_filenames |
grep -Fxvf <(printf '%s\n' "${safe_files_string[@]}") |
grep -Exvf <(printf '%s\n' "${safe_files_regexp[@]}") |
xargs -r -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes
sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse
grep -Exvf <(printf '%s:.*\n' "${safe_types_regexp[@]}") |
cut -d: -f2- > "$unsafe_files"
if [[ "$(stat -c '%s' -- "$unsafe_files")" -gt 0 ]]; then
<"$unsafe_files" sort | print-$format
exit 1
fi
}
main "$@"
|