#!/usr/bin/env python3

# If you are thinking "this file looks gross!", it is.  It
# started out as a set of Bash one-liners.  Which got turned
# into a script.  Which grew somewhat organically.  Not huge,
# but given that it started as some one liners, that's not a
# very pretty several hunderd lines.  Then got fairly litterally
# translated into this, for speed.  So yes, it is gross.
# Rewrites welcome; just don't introduce any behavioral changes
# (easy since `tools/notsd-move` runs it on the entire repo and
# puts the results in git history).

import atexit
import filecmp
import json
import os
import re
import shlex
import subprocess
import sys

################################################################
# Everything else in this program is just fluff and bookkeeping
# around around calling classify().

# Return a tuple of (class/group, path); which is a class that
# the header path belongs to, and a normalized path for it.
#
# There are a fixed number of classes that it may put a header
# in; in order of most-public to most-private:
#
#  system
#  linux
#  public
#  protected
#  private
def classify(expensive, current_file, path):
    if re.fullmatch('.*/include(-staging)?/.*/.*', current_file):
        lib = os.path.basename(os.path.dirname(current_file))
        if path.startswith(lib+'/'):
            path = re.sub('^'+lib+'/', path)
    if path.startswith('linux/'):
        return 'linux', path
    elif expensive.exists(os.path.join(os.path.dirname(current_file), path)):
        return 'private', path
    elif not path.startswith('systemd/') and path != 'libudev.h' and expensive.cpp(path):
        return 'system', path
    else:
        if path.endswith('-to-name.h') or path.endswith('-from-name.h'):
            base = re.fullmatch('(.*)-(to|from)-name\.h', os.path.basename(path)).group(1)
            d={
                'dns_type'      : 'src/grp-resolve/systemd-resolved',
                'keyboard-keys' : 'src/grp-udev/libudev-core',
                'af'            : 'src/libsystemd-basic/src',
                'arphrd'        : 'src/libsystemd-basic/src',
                'cap'           : 'src/libsystemd-basic/src',
                'errno'         : 'src/libsystemd-basic/src',
                'audit_type'    : 'src/libsystemd/src/sd-journal',
            }
            file = os.path.join(d[base], os.path.basename(path))
            if current_file.startswith(d[base]):
                return 'private', os.path.basename(file)
            elif '/include/' in file:
                return 'protected', re.sub('.*/include/', '', file)
            else:
                return 'protected', os.path.basename(file)
        elif path in [ 'asm/sgidefs.h', 'dbus/dbus.h', 'efi.h', 'efilib.h', 'gio/gio.h', 'glib.h', 'libmount.h' ]:
            return 'system', path
        elif os.path.basename(path) == 'util.h':
            if '/systemd-boot/' in current_file:
                return 'private', 'util.h'
            else:
                return 'protected', 'systemd-basic/util.h'
        else:
            find = expensive.find(os.path.basename(path))
            if len(find) == 1:
                file = find[0]
                if '/src/' in file:
                    if os.path.dirname(current_file) == os.path.dirname(file):
                        return 'private', os.path.basename(file)
                    else:
                        return 'protected', re.sub('.*/src/', '', file)
                elif ('/libsystemd/include/' in file) or ('/libudev/include/' in file):
                    return 'public', re.sub('.*/include/', '', file)
                elif '/include/' in file:
                    return 'protected', re.sub('.*/include/', '', file)
                elif '/include-staging/' in file:
                    return 'protected', re.sub('.*/include-staging/', '', file)
                else:
                    if os.path.dirname(current_file) == os.path.dirname(file):
                        return 'private', os.path.basename(file)
                    else:
                        return 'protected', os.path.basename(file)
            else:
                sys.exit('Cannot figure out: {0}'.format(path))

################################################################
# Cache expensive things

class Cache:
    def __init__(self, filename):
        self.cache = {
            'find': None,
            'cpp': {}
        }
        self.dirty = True

        if os.path.isfile(filename):
            with open(filename) as file:
                self.cache = json.load(file)
                self.dirty = False

    def save(self, filename):
        if self.dirty:
            with open(filename, 'w') as file:
                json.dump(self.cache, file)

    def real_cpp(path):
        # `cpp -include "$path" <<<'' &>/dev/null`
        print('  -> cpp({0})'.format(path), file=sys.stderr)
        with subprocess.Popen(['cpp', '-include', path],
                                stdin=subprocess.PIPE,
                                stdout=subprocess.DEVNULL,
                                stderr=subprocess.DEVNULL) as proc:
            proc.stdin.close()
            return proc.wait() == 0

    def real_find():
        # This can probably be done with os.walk or something,
        # but since it is only called once, it isn't a good
        # place to start optimizing.
        #
        # `find src -name '*.h' \( -type l -printf 'l %p\n' -o -type f -printf 'f %p\n' \)`
        print('  -> find()', file=sys.stderr)
        ret = {}
        with subprocess.Popen(['find', 'src', '-name', '*.h', '(', '-type', 'l', '-printf', 'l %p\n', '-o', '-type', 'f', '-printf', 'f %p\n', ')'],
                              stdin=subprocess.DEVNULL,
                              stdout=subprocess.PIPE, universal_newlines=True,
                              stderr=subprocess.DEVNULL) as proc:
            for line in proc.stdout:
                t, p = line.rstrip('\n').split(' ', 1)
                ret[p]=t
        return ret

    def cpp(self, path):
        # `cpp -include "$path" <<<'' &>/dev/null`
        if path not in self.cache['cpp']:
            self.cache['cpp'][path] = Cache.real_cpp(path)
            self.dirty = True
        return self.cache['cpp'][path]

    def exists(self, path):
        # `test -f "$path"`
        if not self.cache['find']:
            self.cache['find'] = Cache.real_find()
            self.dirty = True
        return path in self.cache['find']

    def find(self, name):
        # `find src -type f -name "$name"`
        if not self.cache['find']:
            self.cache['find'] = Cache.real_find()
            self.dirty = True
        return [p for p in self.cache['find'].keys() if self.cache['find'][p]=='f' and os.path.basename(p) == name]

################################################################
# Data structure for storing a chunk of `#include` lines.

class IncludeSection:
    def __init__(self):
        self.trailing_nl = ''
        self.system = []
        self.linux = []
        self.public = []
        self.protected = []
        self.typedef = []
        self.typedef_last = True
        self.private = []
    def print(self, file=sys.stdout):
        b=''
        if len(self.system) > 0:
            for line in sorted(set(self.system)):
                print(line, file=file)
            b='\n'
        if len(self.linux) > 0:
            print(b, end='', file=file)
            for line in self.linux:
                print(line, file=file)
            b='\n'
        if len(self.public) > 0:
            print(b, end='', file=file)
            for line in sorted(set(self.public)):
                print(line, file=file)
            b='\n'
        if len(self.protected) > 0:
            print(b, end='', file=file)
            for line in sorted(set(self.protected)):
                print(line, file=file)
            b='\n'
        if len(self.typedef) > 0 and not self.typedef_last:
            print(b, end='', file=file)
            for line in sorted(set(self.typedef)):
                print(line, file=file)
            b='\n'
        if len(self.private) > 0:
            print(b, end='', file=file)
            for line in sorted(set(self.private)):
                print(line, file=file)
            b='\n'
        if len(self.typedef) > 0 and self.typedef_last:
            print(b, end='', file=file)
            for line in self.typedef:
                print(line, file=file)
        print(self.trailing_nl, end='', file=file)
    def add(self, group, path, extra):
        if group == 'system':
            self.system.append('#include <{0}>{1}'.format(path, extra))
        elif group == 'linux':
            self.linux.append('#include <{0}>{1}'.format(path, extra))
        elif group == 'public':
            self.public.append('#include <{0}>{1}'.format(path, extra))
        elif group == 'protected':
            self.protected.append('#include "{0}"{1}'.format(path, extra))
        elif group == 'private':
            if len(self.typedef) > 0:
                self.typedef_last = False
            self.private.append('#include "{0}"{1}'.format(path, extra))
        else:
            sys.exit('panic: unrecognized line class: {0}'.format(group))

################################################################
# The main program loop

class Parser:
    def __init__(self, cache, ifilename, ofilename):
        self.cache = cache
        self.ifilename = os.path.normpath(ifilename)
        self.ofilename = ofilename

        self.includes = None
        self.phase = self.phase0

    def phase0(self, line, ofile):
        self.phase = self.phase0

        if re.fullmatch('#include.*|typedef .*;', line):
            self.includes = IncludeSection()
            self.phase1(line, ofile)
        else:
            print(line, file=ofile)

    def phase1(self, line, ofile):
        self.phase = self.phase1

        if line == '':
            self.includes.trailing_nl += '\n'
        elif line.startswith('#include'):
            self.includes.trailing_nl = ''
            match = re.fullmatch('^#include [<"]([^">]*)[">](.*)', line)
            if match:
                group, path = classify(self.cache, self.ifilename, match.group(1))
                self.includes.add(group, path, match.group(2))
            else:
                sys.exit('panic: malformed #include line')
        elif re.fullmatch('typedef .*;', line):
            self.includes.trailing_nl = ''
            self.includes.typedef.append(line)
        else:
            self.includes.print(file=ofile)
            self.includes = None
            self.phase0(line, ofile)

    def run(self):
        print(' => {0} {1}'.format(
            shlex.quote(__file__),
            shlex.quote(self.ifilename),
        ), file=sys.stderr)
        with open(self.ofilename, 'w') as ofile:
            with open(self.ifilename) as ifile:
                for line in ifile:
                    self.phase(line.rstrip('\n'), ofile)
            if self.includes:
                self.includes.print(file=ofile)

def main(argv):
    cache = Cache(__file__+'.cache')
    tmpfilename = ''
    def cleanup():
        if tmpfilename != '':
            try:
                os.unlink(tmpfilename)
            except FileNotFoundError:
                pass
    atexit.register(cleanup)
    for filename in argv[1:]:
        tmpfilename = os.path.join(os.path.dirname(filename), '.tmp.'+os.path.basename(filename)+'.tmp')
        Parser(cache, filename, tmpfilename).run()
        if not filecmp.cmp(filename, tmpfilename):
            os.rename(tmpfilename, filename)
        cleanup()
        tmpfilename = ''
    cache.save(__file__+'.cache')

if __name__ == '__main__':
    main(sys.argv)