From 67cd422d20ffbd8be04e2a95803ef073b63237d0 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Fri, 21 Oct 2016 19:04:18 -0400 Subject: pull tools/notsd-* changes from notsystemd/master --- tools/notsd-fixup | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'tools/notsd-fixup') diff --git a/tools/notsd-fixup b/tools/notsd-fixup index 77df56fff4..99ae0981a3 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -22,12 +22,17 @@ fixup_makefiles() ( ) fixup_includes() ( - find $(find . -type d -name include) -type d | while read -r dir; do - lib="${dir##*/}" - pushd "$dir" >/dev/null - find . -type f -exec sed -ri -e "s|$lib/||" -- {} + - popd >/dev/null - done + dirs=($(find "$@" -type d -name include)) + if [[ ${#dirs[@]} -gt 0 ]]; then + find "${dirs[@]}" -type d | while read -r dir; do + printf '=> libdir %q\n' "$dir" + lib="${dir##*/}" + find "$dir" -type f | while read -r filename; do + printf ' => sed -ir %q %q\n' "s|$lib/||" "$filename" + sed -r "s|$lib/||" < "$filename" | build-aux/write-ifchanged "$filename" + done + done + fi find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' \) -type f | while read -r filename; do "$0"--includes "$filename" -- cgit v1.2.3-54-g00ecf From 434b9800e88bcc9e451b4c4b709e61f5a6992f49 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 23 Oct 2016 12:10:04 -0400 Subject: tools/notsd-fixup--includes: clean up, add caching So now it should cache calls to `cpp` or filesystem checks, which are comparatively expensive. --- tools/.gitignore | 1 + tools/notsd-fixup | 6 +- tools/notsd-fixup--includes | 295 ++++++++++++++++++++++++++++++-------------- 3 files changed, 203 insertions(+), 99 deletions(-) create mode 100644 tools/.gitignore (limited to 'tools/notsd-fixup') diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000000..4bba404d19 --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +/notsd-fixup--includes.cache diff --git a/tools/notsd-fixup b/tools/notsd-fixup index 99ae0981a3..aa4bf4a10d 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -25,18 +25,18 @@ fixup_includes() ( dirs=($(find "$@" -type d -name include)) if [[ ${#dirs[@]} -gt 0 ]]; then find "${dirs[@]}" -type d | while read -r dir; do - printf '=> libdir %q\n' "$dir" lib="${dir##*/}" find "$dir" -type f | while read -r filename; do - printf ' => sed -ir %q %q\n' "s|$lib/||" "$filename" + >&2 printf ' => sed -r %q < %q\n' "s|$lib/||" "$filename" sed -r "s|$lib/||" < "$filename" | build-aux/write-ifchanged "$filename" done done fi find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' \) -type f | while read -r filename; do - "$0"--includes "$filename" + "$0"--includes "$filename" | build-aux/write-ifchanged "$filename" done + rm -rf -- "$0"--includes.cache ) main() { diff --git a/tools/notsd-fixup--includes b/tools/notsd-fixup--includes index 33218233d5..9dfa7d0604 100755 --- a/tools/notsd-fixup--includes +++ b/tools/notsd-fixup--includes @@ -1,28 +1,48 @@ #!/usr/bin/env bash -panic() { - >&2 echo panic - exit 2 -} +# Requires Bash 4.2 or higher (for `test -v`). + +# If you are thinking "this file looks gross!", it is. It +# started out as a set of Bash one-liners. Which got turned +# into a script. Which grew somewhat organically. Not huge, +# but given that it started as some one liners, that's not a +# very pretty several hundred lines. So yes, it is gross. +# Rewrites welcome; just don't introduce any behavioral changes +# (easy since `tools/notsd-move` runs it on the entire repo and +# puts the results in git history). + +################################################################ +# Everything else in this program is just fluff and bookkeeping +# around around calling classify(). out() { - printf '%q ' "$@" + _ret_class=$1 + _ret_path=$2 } -# system -# linux -# public -# protected -# private +# Return a tuple of (class/group, path); which is a class that +# the header path belongs to, and a normalized path for it. +# +# There are a fixed number of classes that it may put a header +# in; in order of most-public to most-private: +# +# system +# linux +# public +# protected +# private +# +# This uses the global variable `expensive`. classify() { - local path=$1 + local current_file=$1 + local path=$2 if [[ "$path" = linux/* ]]; then out linux "$path" - elif [[ -f "${current_file%/*}/${path}" ]]; then + elif expensive.exists "${current_file%/*}/${path}"; then out private "$path" elif [[ "$path" != systemd/* ]] && [[ "$path" != libudev.h ]] && - cpp -include "$path" <<<'' &>/dev/null; then + expensive.cpp "$path"; then out system "$path" else case "$path" in @@ -38,7 +58,7 @@ classify() { *) >&2 printf 'Unknown gperf base: %q\n' "$base" >&2 printf 'Cannot figure out: %q\n' "$path" - exit 2 + return 2 ;; esac file="$d/${path##*/}" @@ -61,7 +81,7 @@ classify() { fi ;; *) - file=$(find src -type f -name "${path##*/}") + file=$(expensive.find "${path##*/}") if [[ -f "$file" ]]; then case "$file" in */src/*) @@ -90,146 +110,229 @@ classify() { esac else >&2 printf 'Cannot figure out: %q\n' "$path" - exit 2 + return 2 fi ;; esac fi } -phase=phase0 -hook=: +################################################################ +# Cache expensive things -phase0() { - phase=phase0 - hook=: - local line="$1" - case "$line" in - '#include'*|'typedef '*';') - phase1 "$line" - ;; - *) - printf '%s\n' "$line" - ;; - esac +cache.init_cpp() { + if ! [[ -v _cache_cpp[@] ]]; then + if [[ -f "$0.cache/cpp" ]]; then + . "$0.cache/cpp" + else + declare -gA _cache_cpp=() + fi + fi +} + +cache.save_cpp() { + cache.init_cpp + mkdir -p "$0.cache" + declare -p _cache_cpp | sed 's/-/-g/' > "$0.cache/cpp" +} + + +cache.init_fs() { + if ! [[ -v _cache_fs ]]; then + if ! [[ -f "$0.cache/fs" ]]; then + >&2 echo expensive fs.find + mkdir -p "$0.cache" + find src -name '*.h' \( -type l -printf 'l %p\n' -o -type f -printf 'f %p\n' \) > "$0.cache/fs" + fi + declare -g _cache_fs=true + fi +} + +expensive.cpp() { + local path=$1 + cache.init_cpp + if [[ -z "${_cache_cpp[$path]}" ]]; then + >&2 echo expensive cpp "$path" + local r + r=0; cpp -include "$path" <<<'' &>/dev/null || r=$? + _cache_cpp[$path]=$r + fi + return ${_cache_cpp[$path]} +} + +expensive.exists() { + local path=$1 + cache.init_fs + grep -qFx \ + -e "l $path" \ + -e "f $path" \ + < "$0.cache/fs" } -phase1_init() { - phase1_tail= - system=() - linux=() - public=() - protected=() - typedef=(); typedef_last=true - private=() +expensive.find() { + local name=$1 + cache.init_fs + sed -n "/^f .*\/${name//./\\.}\$/s/^f //p" < "$0.cache/fs" } -phase1_init -phase1_flush() { + +################################################################ +# Data structure for storing a chunk of `#include` lines. + +includes.init() { + _includes_trailing_nl= + _includes_system=() + _includes_linux=() + _includes_public=() + _includes_protected=() + _includes_typedef=() + _includes_typedef_last=true + _includes_private=() +} +includes.print() { local b=: - if [[ ${#system[@]} -gt 0 ]]; then - printf '%s\n' "${system[@]}" | sort -u + if [[ ${#_includes_system[@]} -gt 0 ]]; then + printf '%s\n' "${_includes_system[@]}" | sort -u b=echo fi - if [[ ${#linux[@]} -gt 0 ]]; then + if [[ ${#_includes_linux[@]} -gt 0 ]]; then $b - printf '%s\n' "${linux[@]}" + printf '%s\n' "${_includes_linux[@]}" b=echo fi - if [[ ${#public[@]} -gt 0 ]]; then + if [[ ${#_includes_public[@]} -gt 0 ]]; then $b - printf '%s\n' "${public[@]}" | sort -u + printf '%s\n' "${_includes_public[@]}" | sort -u b=echo fi - if [[ ${#protected[@]} -gt 0 ]]; then + if [[ ${#_includes_protected[@]} -gt 0 ]]; then $b - printf '%s\n' "${protected[@]}" | sort -u + printf '%s\n' "${_includes_protected[@]}" | sort -u b=echo fi - if [[ ${#typedef[@]} -gt 0 ]] && ! $typedef_last; then + if [[ ${#_includes_typedef[@]} -gt 0 ]] && ! $_includes_typedef_last; then $b printf '%s\n' "${typedef[@]}" | sort -u b=echo fi - if [[ ${#private[@]} -gt 0 ]]; then + if [[ ${#_includes_private[@]} -gt 0 ]]; then $b - printf '%s\n' "${private[@]}" | sort -u + printf '%s\n' "${_includes_private[@]}" | sort -u b=echo fi - if [[ ${#typedef[@]} -gt 0 ]] && $typedef_last; then + if [[ ${#_includes_typedef[@]} -gt 0 ]] && $_includes_typedef_last; then $b - printf '%s\n' "${typedef[@]}" + printf '%s\n' "${_includes_typedef[@]}" fi - printf '%s' "$phase1_tail" - phase1_init + printf '%s' "$_includes_trailing_nl" +} +includes.add() { + local class=$1 + local path=$2 + local extra=$3 + local line + case "$class" in + system) + printf -v line '#include <%s>%s' "$path" "$extra" + _includes_system+=("$line") + ;; + linux) + printf -v line '#include <%s>%s' "$path" "$extra" + _includes_linux+=("$line") + ;; + public) + printf -v line '#include <%s>%s' "$path" "$extra" + _includes_public+=("$line") + ;; + protected) + printf -v line '#include "%s"%s' "$path" "$extra" + _includes_protected+=("$line") + ;; + private) + if [[ ${#typedef[@]} -gt 0 ]]; then + _includes_typedef_last=false + fi + printf -v line '#include "%s"%s' "$path" "$extra" + _includes_private+=("$line") + ;; + *) + >&2 printf 'Invalid include class: %q\n' "$class" + return 2 + ;; + esac } + +################################################################ +# The main program loop + +panic() { + >&2 echo panic + exit 2 +} + +phase0() { + phase=phase0 + hook=: + local filename="$1" + local line="$2" + case "$line" in + '#include'*|'typedef '*';') + includes.init + phase1 "$filename" "$line" + ;; + *) + printf '%s\n' "$line" + ;; + esac +} + phase1() { phase=phase1 - hook=phase1_flush - local line="$1" + hook=includes.print + local filename="$1" + local line="$2" case "$line" in '') - phase1_tail+=$'\n' + _includes_trailing_nl+=$'\n' ;; '#include'*) - phase1_tail='' + _includes_trailing_nl='' local re='^#include [<"]([^">]*)[">](.*)' if [[ "$line" =~ $re ]]; then - IFS=' ' - local buf - buf="$(classify "${BASH_REMATCH[1]}")" || panic - read -r class path <<<"$buf" - case "$class" in - system) - printf -v line '#include <%s>%s' "$path" "${BASH_REMATCH[2]}" - system+=("$line") - ;; - linux) - printf -v line '#include <%s>%s' "$path" "${BASH_REMATCH[2]}" - linux+=("$line") - ;; - public) - printf -v line '#include <%s>%s' "$path" "${BASH_REMATCH[2]}" - public+=("$line") - ;; - protected) - printf -v line '#include "%s"%s' "$path" "${BASH_REMATCH[2]}" - protected+=("$line") - ;; - private) - if [[ ${#typedef[@]} -gt 0 ]]; then - typedef_last=false - fi - printf -v line '#include "%s"%s' "$path" "${BASH_REMATCH[2]}" - private+=("$line") - ;; - esac + # OK, this is gross, but we want to avoid creating a subshell + local _ret_class _ret_path + classify "$filename" "${BASH_REMATCH[1]}" || panic + includes.add "$_ret_class" "$_ret_path" "${BASH_REMATCH[2]}" || panic else panic fi ;; 'typedef '*';') - phase1_tail='' - typedef+=("$line") + _includes_trailing_nl='' + _includes_typedef+=("$line") ;; *) - phase1_flush - phase0 "$line" + includes.print + phase0 "$filename" "$line" ;; esac } +phase=phase0 +hook=: + main() { - current_file="$1" - printf ' => %q %q\n' "$0" "$current_file" + local filename="$1" + >&2 printf ' => %q %q\n' "$0" "$filename" set -o pipefail { IFS='' while read -r line; do - "$phase" "$line" + "$phase" "$filename" "$line" IFS='' done "$hook" - } < "$current_file" | build-aux/write-ifchanged "$current_file" + } < "$filename" + cache.save_cpp } main "$@" -- cgit v1.2.3-54-g00ecf From 9825b6a50a433f15cc0b5baaea2f63eac47fe54e Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Mon, 24 Oct 2016 00:17:07 -0400 Subject: tools/notsd-fixup--includes: port from bash to python for speed This shaves off a decent chunk of time because Bash read(1) is slow because it can't buffer and has to read(2) one byte at a time. --- tools/notsd-fixup | 21 +- tools/notsd-fixup--includes | 523 ++++++++++++++++++++------------------------ 2 files changed, 252 insertions(+), 292 deletions(-) (limited to 'tools/notsd-fixup') diff --git a/tools/notsd-fixup b/tools/notsd-fixup index aa4bf4a10d..1d4287b259 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Copyright (C) 2015-2016 Luke Shumaker # The reason we do `find`/`while read`-loops instead of `find -exec` commands # is that we want errors from the inner loop to bubble up. @@ -34,7 +35,25 @@ fixup_includes() ( fi find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' \) -type f | while read -r filename; do - "$0"--includes "$filename" | build-aux/write-ifchanged "$filename" + false + # We copy the write-ifchanged logic to here, because we have a + # higher-than usual chance of the main command failing. In a + # Makefile we would handle this by setting .DELETE_ON_ERROR:, + # but we can't do that here, so we have to inter-mingle the + # logics. + local outfile="$filename" + tmpfile="$(dirname "$outfile")/.tmp.${outfile##*/}.tmp" + local r=0 + "$0"--includes "$filename" > "$tmpfile" || r=$? + if [[ $r != 0 ]]; then + rm -f "$tmpfile" || : + (exit $r) + fi + if cmp -s "$tmpfile" "$outfile"; then + rm -f "$tmpfile" || : + else + mv -f "$tmpfile" "$outfile" + fi done rm -rf -- "$0"--includes.cache ) diff --git a/tools/notsd-fixup--includes b/tools/notsd-fixup--includes index 9dfa7d0604..196fd488a3 100755 --- a/tools/notsd-fixup--includes +++ b/tools/notsd-fixup--includes @@ -1,25 +1,26 @@ -#!/usr/bin/env bash - -# Requires Bash 4.2 or higher (for `test -v`). +#!/usr/bin/env python3 # If you are thinking "this file looks gross!", it is. It # started out as a set of Bash one-liners. Which got turned # into a script. Which grew somewhat organically. Not huge, # but given that it started as some one liners, that's not a -# very pretty several hundred lines. So yes, it is gross. +# very pretty several hunderd lines. Then got fairly litterally +# translated into this, for speed. So yes, it is gross. # Rewrites welcome; just don't introduce any behavioral changes # (easy since `tools/notsd-move` runs it on the entire repo and # puts the results in git history). +import json +import os +import re +import shlex +import subprocess +import sys + ################################################################ # Everything else in this program is just fluff and bookkeeping # around around calling classify(). -out() { - _ret_class=$1 - _ret_path=$2 -} - # Return a tuple of (class/group, path); which is a class that # the header path belongs to, and a normalized path for it. # @@ -31,308 +32,248 @@ out() { # public # protected # private -# -# This uses the global variable `expensive`. -classify() { - local current_file=$1 - local path=$2 - if [[ "$path" = linux/* ]]; then - out linux "$path" - elif expensive.exists "${current_file%/*}/${path}"; then - out private "$path" - elif [[ "$path" != systemd/* ]] && - [[ "$path" != libudev.h ]] && - expensive.cpp "$path"; then - out system "$path" - else - case "$path" in - *-to-name.h|*-from-name.h) - base="${path##*/}" - base="${base%-to-name.h}" - base="${base%-from-name.h}" - case "$base" in - dns_type) d=src/grp-resolve/systemd-resolved;; - keyboard-keys) d=src/grp-udev/libudev-core;; - af|arphrd|cap|errno) d=src/libsystemd-basic/src;; - audit_type) d=src/libsystemd/src/sd-journal;; - *) - >&2 printf 'Unknown gperf base: %q\n' "$base" - >&2 printf 'Cannot figure out: %q\n' "$path" - return 2 - ;; - esac - file="$d/${path##*/}" - if [[ "$current_file" = "$d"/* ]]; then - out private "${file##*/}" - elif [[ "$file" = */include/* ]]; then - out protected "${file##*/include/}" - else - out protected "${file##*/}" - fi - ;; - asm/sgidefs.h|dbus/dbus.h|efi.h|efilib.h|gio/gio.h|glib.h|libmount.h) - out system "$path" - ;; - util.h|*/util.h) - if [[ "$current_file" = */systemd-boot/* ]]; then - out private util.h - else - out protected systemd-basic/util.h - fi - ;; - *) - file=$(expensive.find "${path##*/}") - if [[ -f "$file" ]]; then - case "$file" in - */src/*) - if [[ "${current_file%/*}" = "${file%/*}" ]]; then - out private "${file##*/}" - else - out protected "${file##*/src/}" - fi - ;; - */libsystemd/include/*|*/libudev/include/*) - out public "${file##*/include/}" - ;; - */include/*) - out protected "${file##*/include/}" - ;; - */include-staging/*) - out protected "${file##*/include-staging/}" - ;; - *) - if [[ "${current_file%/*}" = "${file%/*}" ]]; then - out private "${file##*/}" - else - out protected "${file##*/}" - fi - ;; - esac - else - >&2 printf 'Cannot figure out: %q\n' "$path" - return 2 - fi - ;; - esac - fi -} +def classify(expensive, current_file, path): + if path.startswith('linux/'): + return 'linux', path + elif expensive.exists(os.path.join(os.path.dirname(current_file), path)): + return 'private', path + elif not path.startswith('systemd/') and path != 'libudev.h' and expensive.cpp(path): + return 'system', path + else: + if path.endswith('-to-name.h') or path.endswith('-from-name.h'): + base = re.fullmatch('(.*)-(to|from)-name\.h', os.path.basename(path)).group(1) + d={ + 'dns_type' : 'src/grp-resolve/systemd-resolved', + 'keyboard-keys' : 'src/grp-udev/libudev-core', + 'af' : 'src/libsystemd-basic/src', + 'arphrd' : 'src/libsystemd-basic/src', + 'cap' : 'src/libsystemd-basic/src', + 'errno' : 'src/libsystemd-basic/src', + 'audit_type' : 'src/libsystemd/src/sd-journal', + } + file = os.path.join(d[base], os.path.basename(path)) + if current_file.startswith(d[base]): + return 'private', os.path.basename(file) + elif '/include/' in file: + return 'protected', re.sub('.*/include/', '', file) + else: + return 'protected', os.path.basename(file) + elif path in [ 'asm/sgidefs.h', 'dbus/dbus.h', 'efi.h', 'efilib.h', 'gio/gio.h', 'glib.h', 'libmount.h' ]: + return 'system', path + elif os.path.basename(path) == 'util.h': + if '/systemd-boot/' in current_file: + return 'private', 'util.h' + else: + return 'protected', 'systemd-basic/util.h' + else: + find = expensive.find(os.path.basename(path)) + if len(find) == 1: + file = find[0] + if '/src/' in file: + if os.path.dirname(current_file) == os.path.dirname(file): + return 'private', os.path.basename(file) + else: + return 'protected', re.sub('.*/src/', '', file) + elif ('/libsystemd/include/' in file) or ('/libudev/include/' in file): + return 'public', re.sub('.*/include/', '', file) + elif '/include/' in file: + return 'protected', re.sub('.*/include/', '', file) + elif '/include-staging/' in file: + return 'protected', re.sub('.*/include-staging/', '', file) + else: + if os.path.dirname(current_file) == os.path.dirname(file): + return 'private', os.path.basename(file) + else: + return 'protected', os.path.basename(file) + else: + sys.exit('Cannot figure out: {0}'.format(path)) ################################################################ # Cache expensive things -cache.init_cpp() { - if ! [[ -v _cache_cpp[@] ]]; then - if [[ -f "$0.cache/cpp" ]]; then - . "$0.cache/cpp" - else - declare -gA _cache_cpp=() - fi - fi -} +class Cache: + def __init__(self, filename): + self.cache = { + 'find': None, + 'cpp': {} + } + self.dirty = True + + if os.path.isfile(filename): + with open(filename) as file: + self.cache = json.load(file) + self.dirty = False -cache.save_cpp() { - cache.init_cpp - mkdir -p "$0.cache" - declare -p _cache_cpp | sed 's/-/-g/' > "$0.cache/cpp" -} + def save(self, filename): + if self.dirty: + with open(filename, 'w') as file: + json.dump(self.cache, file) + def real_cpp(path): + # `cpp -include "$path" <<<'' &>/dev/null` + print(' -> cpp({0})'.format(path), file=sys.stderr) + with subprocess.Popen(['cpp', '-include', path], + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL) as proc: + proc.stdin.close() + return proc.wait() == 0 -cache.init_fs() { - if ! [[ -v _cache_fs ]]; then - if ! [[ -f "$0.cache/fs" ]]; then - >&2 echo expensive fs.find - mkdir -p "$0.cache" - find src -name '*.h' \( -type l -printf 'l %p\n' -o -type f -printf 'f %p\n' \) > "$0.cache/fs" - fi - declare -g _cache_fs=true - fi -} + def real_find(): + # This can probably be done with os.walk or something, + # but since it is only called once, it isn't a good + # place to start optimizing. + # + # `find src -name '*.h' \( -type l -printf 'l %p\n' -o -type f -printf 'f %p\n' \)` + print(' -> find()', file=sys.stderr) + ret = {} + with subprocess.Popen(['find', 'src', '-name', '*.h', '(', '-type', 'l', '-printf', 'l %p\n', '-o', '-type', 'f', '-printf', 'f %p\n', ')'], + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, universal_newlines=True, + stderr=subprocess.DEVNULL) as proc: + for line in proc.stdout: + t, p = line.rstrip('\n').split(' ', 1) + ret[p]=t + return ret -expensive.cpp() { - local path=$1 - cache.init_cpp - if [[ -z "${_cache_cpp[$path]}" ]]; then - >&2 echo expensive cpp "$path" - local r - r=0; cpp -include "$path" <<<'' &>/dev/null || r=$? - _cache_cpp[$path]=$r - fi - return ${_cache_cpp[$path]} -} + def cpp(self, path): + # `cpp -include "$path" <<<'' &>/dev/null` + if path not in self.cache['cpp']: + self.cache['cpp'][path] = Cache.real_cpp(path) + self.dirty = True + return self.cache['cpp'][path] -expensive.exists() { - local path=$1 - cache.init_fs - grep -qFx \ - -e "l $path" \ - -e "f $path" \ - < "$0.cache/fs" -} + def exists(self, path): + # `test -f "$path"` + if not self.cache['find']: + self.cache['find'] = Cache.real_find() + self.dirty = True + return path in self.cache['find'] -expensive.find() { - local name=$1 - cache.init_fs - sed -n "/^f .*\/${name//./\\.}\$/s/^f //p" < "$0.cache/fs" -} + def find(self, name): + # `find src -type f -name "$name"` + if not self.cache['find']: + self.cache['find'] = Cache.real_find() + self.dirty = True + return [p for p in self.cache['find'].keys() if self.cache['find'][p]=='f' and os.path.basename(p) == name] ################################################################ # Data structure for storing a chunk of `#include` lines. -includes.init() { - _includes_trailing_nl= - _includes_system=() - _includes_linux=() - _includes_public=() - _includes_protected=() - _includes_typedef=() - _includes_typedef_last=true - _includes_private=() -} -includes.print() { - local b=: - if [[ ${#_includes_system[@]} -gt 0 ]]; then - printf '%s\n' "${_includes_system[@]}" | sort -u - b=echo - fi - if [[ ${#_includes_linux[@]} -gt 0 ]]; then - $b - printf '%s\n' "${_includes_linux[@]}" - b=echo - fi - if [[ ${#_includes_public[@]} -gt 0 ]]; then - $b - printf '%s\n' "${_includes_public[@]}" | sort -u - b=echo - fi - if [[ ${#_includes_protected[@]} -gt 0 ]]; then - $b - printf '%s\n' "${_includes_protected[@]}" | sort -u - b=echo - fi - if [[ ${#_includes_typedef[@]} -gt 0 ]] && ! $_includes_typedef_last; then - $b - printf '%s\n' "${typedef[@]}" | sort -u - b=echo - fi - if [[ ${#_includes_private[@]} -gt 0 ]]; then - $b - printf '%s\n' "${_includes_private[@]}" | sort -u - b=echo - fi - if [[ ${#_includes_typedef[@]} -gt 0 ]] && $_includes_typedef_last; then - $b - printf '%s\n' "${_includes_typedef[@]}" - fi - printf '%s' "$_includes_trailing_nl" -} -includes.add() { - local class=$1 - local path=$2 - local extra=$3 - local line - case "$class" in - system) - printf -v line '#include <%s>%s' "$path" "$extra" - _includes_system+=("$line") - ;; - linux) - printf -v line '#include <%s>%s' "$path" "$extra" - _includes_linux+=("$line") - ;; - public) - printf -v line '#include <%s>%s' "$path" "$extra" - _includes_public+=("$line") - ;; - protected) - printf -v line '#include "%s"%s' "$path" "$extra" - _includes_protected+=("$line") - ;; - private) - if [[ ${#typedef[@]} -gt 0 ]]; then - _includes_typedef_last=false - fi - printf -v line '#include "%s"%s' "$path" "$extra" - _includes_private+=("$line") - ;; - *) - >&2 printf 'Invalid include class: %q\n' "$class" - return 2 - ;; - esac -} +class IncludeSection: + def __init__(self): + self.trailing_nl = '' + self.system = [] + self.linux = [] + self.public = [] + self.protected = [] + self.typedef = [] + self.typedef_last = True + self.private = [] + def print(self, file=sys.stdout): + b='' + if len(self.system) > 0: + for line in sorted(set(self.system)): + print(line, file=file) + b='\n' + if len(self.linux) > 0: + print(b, end='', file=file) + for line in self.linux: + print(line, file=file) + b='\n' + if len(self.public) > 0: + print(b, end='', file=file) + for line in sorted(set(self.public)): + print(line, file=file) + b='\n' + if len(self.protected) > 0: + print(b, end='', file=file) + for line in sorted(set(self.protected)): + print(line, file=file) + b='\n' + if len(self.typedef) > 0 and not self.typedef_last: + print(b, end='', file=file) + for line in sorted(set(self.typedef)): + print(line, file=file) + b='\n' + if len(self.private) > 0: + print(b, end='', file=file) + for line in sorted(set(self.private)): + print(line, file=file) + b='\n' + if len(self.typedef) > 0 and self.typedef_last: + print(b, end='', file=file) + for line in sorted(set(self.typedef)): + print(line, file=file) + print(self.trailing_nl, end='', file=file) + def add(self, group, path, extra): + if group == 'system': + self.system.append('#include <{0}>{1}'.format(path, extra)) + elif group == 'linux': + self.linux.append('#include <{0}>{1}'.format(path, extra)) + elif group == 'public': + self.public.append('#include <{0}>{1}'.format(path, extra)) + elif group == 'protected': + self.protected.append('#include "{0}"{1}'.format(path, extra)) + elif group == 'private': + if len(self.typedef) > 0: + self.typedef_last = False + self.private.append('#include "{0}"{1}'.format(path, extra)) + else: + sys.exit('panic: unrecognized line class: {0}'.format(group)) ################################################################ # The main program loop -panic() { - >&2 echo panic - exit 2 -} +def phase0(cache, filename, line): + global phase + phase = phase0 + + if re.fullmatch('#include.*|typedef .*;', line): + global includes + includes = IncludeSection() + phase1(cache, filename, line) + else: + print(line) -phase0() { - phase=phase0 - hook=: - local filename="$1" - local line="$2" - case "$line" in - '#include'*|'typedef '*';') - includes.init - phase1 "$filename" "$line" - ;; - *) - printf '%s\n' "$line" - ;; - esac -} +def phase1(cache, filename, line): + global phase, includes + phase = phase1 -phase1() { - phase=phase1 - hook=includes.print - local filename="$1" - local line="$2" - case "$line" in - '') - _includes_trailing_nl+=$'\n' - ;; - '#include'*) - _includes_trailing_nl='' - local re='^#include [<"]([^">]*)[">](.*)' - if [[ "$line" =~ $re ]]; then - # OK, this is gross, but we want to avoid creating a subshell - local _ret_class _ret_path - classify "$filename" "${BASH_REMATCH[1]}" || panic - includes.add "$_ret_class" "$_ret_path" "${BASH_REMATCH[2]}" || panic - else - panic - fi - ;; - 'typedef '*';') - _includes_trailing_nl='' - _includes_typedef+=("$line") - ;; - *) - includes.print - phase0 "$filename" "$line" - ;; - esac -} + if line == '': + includes.trailing_nl += '\n' + elif line.startswith('#include'): + includes.trailing_nl = '' + match = re.fullmatch('^#include [<"]([^">]*)[">](.*)', line) + if match: + group, path = classify(cache, filename, match.group(1)) + includes.add(group, path, match.group(2)) + else: + sys.exit('panic: malformed #include line') + elif re.fullmatch('typedef .*;', line): + includes.trailing_nl = '' + includes.typedef.append(line) + else: + includes.print() + includes = None + phase0(cache, filename, line) -phase=phase0 -hook=: +includes = None +phase = phase0 -main() { - local filename="$1" - >&2 printf ' => %q %q\n' "$0" "$filename" - set -o pipefail - { - IFS='' - while read -r line; do - "$phase" "$filename" "$line" - IFS='' - done - "$hook" - } < "$filename" - cache.save_cpp -} +def main(argv): + filename = argv[1] + print(' => {0} {1}'.format( + shlex.quote(__file__), + shlex.quote(filename), + ), file=sys.stderr) + cache = Cache(__file__+'.cache') + with open(filename) as f: + for line in f: + phase(cache, filename, line.rstrip('\n')) + if includes: + includes.print() + cache.save(__file__+'.cache') -main "$@" +if __name__ == '__main__': + main(sys.argv) -- cgit v1.2.3-54-g00ecf From 52443abf0074a859472a373113a183e0ba518f26 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 25 Oct 2016 01:10:06 -0400 Subject: tools/notsd-fixup--includes: Allow passing multiple arguments. This substantially speeds things up because it doesn't have to set up and tear down the Python runtime for every single C file now. --- tools/notsd-fixup | 32 ++++++++----------------------- tools/notsd-fixup--includes | 46 ++++++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 39 deletions(-) (limited to 'tools/notsd-fixup') diff --git a/tools/notsd-fixup b/tools/notsd-fixup index 1d4287b259..4bd4b3f94c 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -1,11 +1,9 @@ #!/usr/bin/env bash # Copyright (C) 2015-2016 Luke Shumaker -# The reason we do `find`/`while read`-loops instead of `find -exec` commands -# is that we want errors from the inner loop to bubble up. - fixup_makefiles() ( find "$@" -type f -name Makefile | while read -r filename; do + >&2 printf ' => fixup %q\n' "$filename" { <"$filename" sed -r \ -e "s|(/\.\.)*/config.mk|/$(realpath -ms --relative-to="$(dirname -- "$filename")" config.mk)|" \ @@ -34,27 +32,13 @@ fixup_includes() ( done fi - find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' \) -type f | while read -r filename; do - false - # We copy the write-ifchanged logic to here, because we have a - # higher-than usual chance of the main command failing. In a - # Makefile we would handle this by setting .DELETE_ON_ERROR:, - # but we can't do that here, so we have to inter-mingle the - # logics. - local outfile="$filename" - tmpfile="$(dirname "$outfile")/.tmp.${outfile##*/}.tmp" - local r=0 - "$0"--includes "$filename" > "$tmpfile" || r=$? - if [[ $r != 0 ]]; then - rm -f "$tmpfile" || : - (exit $r) - fi - if cmp -s "$tmpfile" "$outfile"; then - rm -f "$tmpfile" || : - else - mv -f "$tmpfile" "$outfile" - fi - done + # We wrap the $0--includes program with `sh` because xargs only exits + # early if the status is 255, but we want to exit early for all + # non-zero statuses. We use xargs instead of -exec because -exec won't + # do much of anything useful with the exit status. + rm -rf -- "$0"--includes.cache + find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' -o -name '*.gperf.m4' \) -type f -print0 | + xargs -r0 sh -c "$0--includes \"\$@\" || exit 255" -- rm -rf -- "$0"--includes.cache ) diff --git a/tools/notsd-fixup--includes b/tools/notsd-fixup--includes index 196fd488a3..d16a3e6500 100755 --- a/tools/notsd-fixup--includes +++ b/tools/notsd-fixup--includes @@ -10,6 +10,8 @@ # (easy since `tools/notsd-move` runs it on the entire repo and # puts the results in git history). +import atexit +import filecmp import json import os import re @@ -225,7 +227,7 @@ class IncludeSection: ################################################################ # The main program loop -def phase0(cache, filename, line): +def phase0(cache, filename, line, file=sys.stdout): global phase phase = phase0 @@ -234,9 +236,9 @@ def phase0(cache, filename, line): includes = IncludeSection() phase1(cache, filename, line) else: - print(line) + print(line, file=file) -def phase1(cache, filename, line): +def phase1(cache, filename, line, file=sys.stdout): global phase, includes phase = phase1 @@ -254,25 +256,39 @@ def phase1(cache, filename, line): includes.trailing_nl = '' includes.typedef.append(line) else: - includes.print() + includes.print(file=file) includes = None - phase0(cache, filename, line) + phase0(cache, filename, line, file=file) includes = None phase = phase0 def main(argv): - filename = argv[1] - print(' => {0} {1}'.format( - shlex.quote(__file__), - shlex.quote(filename), - ), file=sys.stderr) cache = Cache(__file__+'.cache') - with open(filename) as f: - for line in f: - phase(cache, filename, line.rstrip('\n')) - if includes: - includes.print() + tmpfilename = '' + def cleanup(): + if tmpfilename != '': + try: + os.unlink(tmpfilename) + except FileNotFoundError: + pass + atexit.register(cleanup) + for filename in argv[1:]: + tmpfilename = os.path.join(os.path.dirname(filename), '.tmp.'+os.path.basename(filename)+'.tmp') + print(' => {0} {1}'.format( + shlex.quote(__file__), + shlex.quote(filename), + ), file=sys.stderr) + with open(tmpfilename, 'w') as tmpfile: + with open(filename) as f: + for line in f: + phase(cache, filename, line.rstrip('\n'), file=tmpfile) + if includes: + includes.print(file=tmpfile) + if not filecmp.cmp(filename, tmpfilename): + os.rename(tmpfilename, filename) + cleanup() + tmpfilename = '' cache.save(__file__+'.cache') if __name__ == '__main__': -- cgit v1.2.3-54-g00ecf From e4f65c8bb8468e4391c0b38b51e86001cd7e0ed8 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 25 Oct 2016 01:55:19 -0400 Subject: tools/notsd-fixup: Merge the pre-includes pass the includes pass. This shaves off a decent chunk of time, and simplifies the code. --- tools/notsd-fixup | 20 +++++--------------- tools/notsd-fixup--includes | 4 ++++ 2 files changed, 9 insertions(+), 15 deletions(-) (limited to 'tools/notsd-fixup') diff --git a/tools/notsd-fixup b/tools/notsd-fixup index 4bd4b3f94c..79b30b7c55 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -1,6 +1,11 @@ #!/usr/bin/env bash # Copyright (C) 2015-2016 Luke Shumaker +# We wrap the programs called by xargs with `sh` because xargs only exits early +# if the status is 255, but we want to exit early for all non-zero statuses. +# We use xargs instead of `find -exec` because `-exec` won't do much of +# anything useful with the exit status. + fixup_makefiles() ( find "$@" -type f -name Makefile | while read -r filename; do >&2 printf ' => fixup %q\n' "$filename" @@ -21,21 +26,6 @@ fixup_makefiles() ( ) fixup_includes() ( - dirs=($(find "$@" -type d -name include)) - if [[ ${#dirs[@]} -gt 0 ]]; then - find "${dirs[@]}" -type d | while read -r dir; do - lib="${dir##*/}" - find "$dir" -type f | while read -r filename; do - >&2 printf ' => sed -r %q < %q\n' "s|$lib/||" "$filename" - sed -r "s|$lib/||" < "$filename" | build-aux/write-ifchanged "$filename" - done - done - fi - - # We wrap the $0--includes program with `sh` because xargs only exits - # early if the status is 255, but we want to exit early for all - # non-zero statuses. We use xargs instead of -exec because -exec won't - # do much of anything useful with the exit status. rm -rf -- "$0"--includes.cache find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' -o -name '*.gperf.m4' \) -type f -print0 | xargs -r0 sh -c "$0--includes \"\$@\" || exit 255" -- diff --git a/tools/notsd-fixup--includes b/tools/notsd-fixup--includes index d16a3e6500..a82247e175 100755 --- a/tools/notsd-fixup--includes +++ b/tools/notsd-fixup--includes @@ -35,6 +35,10 @@ import sys # protected # private def classify(expensive, current_file, path): + if re.fullmatch('.*/include(-staging)?/.*/.*', current_file): + lib = os.path.basename(os.path.dirname(current_file)) + if path.startswith(lib+'/'): + path = re.sub('^'+lib+'/', path) if path.startswith('linux/'): return 'linux', path elif expensive.exists(os.path.join(os.path.dirname(current_file), path)): -- cgit v1.2.3-54-g00ecf From 96a2b322fe384fa624cadd468e7d44396e49c2af Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 25 Oct 2016 02:03:25 -0400 Subject: tools/notsd-fixup: Split the makefile pass into a separate executable. This avoids using bash read(1) to loop over the files, as we've already gotten decent speedups from avoiding bash read(1). This means we've got at least two more fork/exec's because of xargs, but it's probably worth it. But that's kind of premature-optimization; the time improvement here is probably just random noise. But, I think this makes the code more maintainable/manageable too, so I'm committing it. --- tools/notsd-fixup | 44 +++++++++++++------------------------------- tools/notsd-fixup--makefiles | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 31 deletions(-) create mode 100755 tools/notsd-fixup--makefiles (limited to 'tools/notsd-fixup') diff --git a/tools/notsd-fixup b/tools/notsd-fixup index 79b30b7c55..71b3bf4dfa 100755 --- a/tools/notsd-fixup +++ b/tools/notsd-fixup @@ -1,43 +1,25 @@ #!/usr/bin/env bash # Copyright (C) 2015-2016 Luke Shumaker -# We wrap the programs called by xargs with `sh` because xargs only exits early -# if the status is 255, but we want to exit early for all non-zero statuses. -# We use xargs instead of `find -exec` because `-exec` won't do much of -# anything useful with the exit status. +main() { + set -e + set -o pipefail + export LC_COLLATE=C + + # We wrap the programs called by xargs with `sh` because xargs only exits early + # if the status is 255, but we want to exit early for all non-zero statuses. + # We use xargs instead of `find -exec` because `-exec` won't do much of + # anything useful with the exit status. -fixup_makefiles() ( - find "$@" -type f -name Makefile | while read -r filename; do - >&2 printf ' => fixup %q\n' "$filename" - { - <"$filename" sed -r \ - -e "s|(/\.\.)*/config.mk|/$(realpath -ms --relative-to="$(dirname -- "$filename")" config.mk)|" \ - -e '/^nested\.subdirs/d' \ - -e '/^include \$\(topsrcdir\)\/build-aux\/Makefile\.tail\.mk$/d' - echo - find "$(dirname "$filename")" -mindepth 2 -maxdepth 2 -name Makefile -print0 | - xargs -r0 dirname -z -- | - xargs -r0 basename -a -z | - xargs -r0 printf 'nested.subdirs += %s\n' | sort - echo - echo 'include $(topsrcdir)/build-aux/Makefile.tail.mk' - } | cat -s | build-aux/write-ifchanged "$filename" - done -) + # Makefiles + find "$@" -type f -name Makefile -print0 | + xargs -r0 sh -c "$0--makefiles \"\$@\" || exit 255" -- -fixup_includes() ( + # C includes rm -rf -- "$0"--includes.cache find "$@" \( -name '*.h' -o -name '*.c' -o -name '*.gperf' -o -name '*.gperf.m4' \) -type f -print0 | xargs -r0 sh -c "$0--includes \"\$@\" || exit 255" -- rm -rf -- "$0"--includes.cache -) - -main() { - set -e - set -o pipefail - export LC_COLLATE=C - fixup_makefiles "$@" - fixup_includes "$@" } main "$@" diff --git a/tools/notsd-fixup--makefiles b/tools/notsd-fixup--makefiles new file mode 100755 index 0000000000..bb18c3be7b --- /dev/null +++ b/tools/notsd-fixup--makefiles @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +doit() { + local filename=$1 + { + <"$filename" sed -r \ + -e "s|(/\.\.)*/config.mk|/$(realpath -ms --relative-to="$(dirname -- "$filename")" config.mk)|" \ + -e '/^nested\.subdirs/d' \ + -e '/^include \$\(topsrcdir\)\/build-aux\/Makefile\.tail\.mk$/d' + echo + find "$(dirname "$filename")" -mindepth 2 -maxdepth 2 -name Makefile -print0 | + xargs -r0 dirname -z -- | + xargs -r0 basename -a -z | + xargs -r0 printf 'nested.subdirs += %s\n' | sort + echo + echo 'include $(topsrcdir)/build-aux/Makefile.tail.mk' + } | cat -s | build-aux/write-ifchanged "$filename" +} + +main() { + set -e + set -o pipefail + local filename + for filename in "$@"; do + >&2 printf ' => fixup %q\n' "$filename" + doit "$filename" + done +} + +main "$@" -- cgit v1.2.3-54-g00ecf