summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <LukeShu@sbcglobal.net>2014-06-20 16:07:31 -0400
committerLuke Shumaker <LukeShu@sbcglobal.net>2014-06-20 16:07:31 -0400
commit3bc47deab0ec8288c5bf3f01a6f881c1e17bda70 (patch)
tree8047004c0b69a302a1a52e55ab86abb41b23b140
parentf1559ccab1f9de1f03659a9de6572a593abbbda5 (diff)
jh-checksource: optimize further
-rw-r--r--jh-checksource.sh72
1 files changed, 20 insertions, 52 deletions
diff --git a/jh-checksource.sh b/jh-checksource.sh
index 9de91c0..5296226 100644
--- a/jh-checksource.sh
+++ b/jh-checksource.sh
@@ -4,16 +4,19 @@
# terms of the Do What The Fuck You Want To Public License, Version 2,
# as published by Sam Hocevar. See the COPYING file for more details.
-sep='<no-filename-ever-contains-this>'
+# Regular expressions are POSIX EREs, and must match the entirety of the string
-safe_types_regexp=('^(inode|text|image|video|audio)/')
-safe_types_string=('application/pdf' 'application/postscript' 'application/xml' 'application/ogg' 'message/rfc822')
safe_dirs_glob=(.{git,hg,svn} '*.git')
-safe_files_regexp=('/po/[^/]*.gmo$' '\.flw$' '\.odg$' '\.ppt$')
+safe_types_regexp=('(inode|text|image|video|audio)/.*' 'application/(pdf|postscript|xml|ogg)' message/rfc822)
+safe_files_regexp=('.*/po/[^/]*.gmo' '.*\.(flw|odg|ppt)')
safe_files_string=()
-# don't care about files less than 3 bytes.
-min_size=3
+# don't care about files less than 3 bytes ('c' is for characters)
+min_size=3c
+
+sep='<no-filename-ever-contains-this>'
+
+################################################################################
cwd="$(readlink -m -- "$PWD")"
@@ -23,28 +26,6 @@ normalize_filenames() {
done | sed "s|^$cwd/|./|"
}
-matches_string() {
- local needle=$1
- shift
- for straw in "$@"; do
- if [[ "$needle" = "$straw" ]]; then
- return 0
- fi
- done
- return 1
-}
-
-matches_regexp() {
- local needle=$1
- shift
- for straw in "$@"; do
- if [[ "$needle" =~ $straw ]]; then
- return 0
- fi
- done
- return 1
-}
-
print-human() {
libremessages warning "The source directory %s contains binary files:" "$PWD"
sed 's/^/ -> /'
@@ -69,36 +50,23 @@ main() {
local unsafe_files="$(mktemp --tmpdir "${0##*/}.XXXXXXXXXX")"
trap "$(printf 'rm -f -- %q' "$unsafe_files")" EXIT
- # Heavy lifting
- local filter_dirs=()
+ # Turn the variables up top into a bunch of `find(1)` filters
+ local filters=()
local glob
for glob in "${safe_dirs_glob[@]}"; do
- filter_dirs+=(-type d -name "$glob" -prune -o)
+ filters+=(-type d -name "$glob" -prune -o)
done
- find . "${filter_dirs[@]}" -type f -printf '%s %p\n' | # find all files
- while read -r size file; do # filter out files smaller than $min_size
- [[ $size -lt $min_size ]] || printf '%s\n' "$file"
- done |
+ filters+=(-type f -size +"${min_size}")
+
+ # Heavy lifting
+ find . "${filters[@]}" -print | # find all files
normalize_filenames |
+ grep -Fxvf <(printf '%s\n' "${safe_files_string[@]}") |
+ grep -Exvf <(printf '%s\n' "${safe_files_regexp[@]}") |
xargs -r -d'\n' file --mime-type -r -F "$sep" | # identify the filetypes
sed -r "s@(.*)${sep}\s*(.*)@\2:\1@" | # reformat the output to be easier to parse
- while IFS=: read -r type file; do
- declare -A cached_types
- if ! { matches_string "$file" "${safe_files_string[@]}" || \
- matches_regexp "$file" "${safe_files_regexp[@]}" ;}; then
- if [[ -z ${cached_types[$type]} ]]; then
- if matches_string "$type" "${safe_types_string[@]}" || \
- matches_regexp "$type" "${safe_types_regexp[@]}" ; then
- cached_types[$type]=false
- else
- cached_types[$type]=true
- fi
- fi
- if "${cached_types[$type]}"; then
- printf "%s\n" "$file"
- fi
- fi
- done > "$unsafe_files"
+ grep -Exvf <(printf '%s:.*\n' "${safe_types_regexp[@]}") |
+ cut -d: -f2- > "$unsafe_files"
if [[ "$(stat -c '%s' -- "$unsafe_files")" -gt 0 ]]; then
<"$unsafe_files" sort | print-$format