/***
  This file is part of systemd.

  Copyright 2013 Tom Gundersen <teg@jklm.no>

  systemd is free software; you can redistribute it and/or modify it
  under the terms of the GNU Lesser General Public License as published by
  the Free Software Foundation; either version 2.1 of the License, or
  (at your option) any later version.

  systemd is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public License
  along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/

#include <netinet/in.h>
#include <stdbool.h>
#include <unistd.h>

#include "sd-netlink.h"

#include "alloc-util.h"
#include "formats-util.h"
#include "missing.h"
#include "netlink-internal.h"
#include "netlink-types.h"
#include "netlink-util.h"
#include "refcnt.h"
#include "socket-util.h"
#include "util.h"

int socket_open(int family) {
        int fd;

        fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family);
        if (fd < 0)
                return -errno;

        return fd;
}

static int broadcast_groups_get(sd_netlink *nl) {
        _cleanup_free_ uint32_t *groups = NULL;
        socklen_t len = 0, old_len;
        unsigned i, j;
        int r;

        assert(nl);
        assert(nl->fd >= 0);

        r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len);
        if (r < 0) {
                if (errno == ENOPROTOOPT) {
                        nl->broadcast_group_dont_leave = true;
                        return 0;
                } else
                        return -errno;
        }

        if (len == 0)
                return 0;

        groups = new0(uint32_t, len);
        if (!groups)
                return -ENOMEM;

        old_len = len;

        r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len);
        if (r < 0)
                return -errno;

        if (old_len != len)
                return -EIO;

        r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
        if (r < 0)
                return r;

        for (i = 0; i < len; i++) {
                for (j = 0; j < sizeof(uint32_t) * 8; j++) {
                        uint32_t offset;
                        unsigned group;

                        offset = 1U << j;

                        if (!(groups[i] & offset))
                                continue;

                        group = i * sizeof(uint32_t) * 8 + j + 1;

                        r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1));
                        if (r < 0)
                                return r;
                }
        }

        return 0;
}

int socket_bind(sd_netlink *nl) {
        socklen_t addrlen;
        int r, one = 1;

        r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, &one, sizeof(one));
        if (r < 0)
                return -errno;

        addrlen = sizeof(nl->sockaddr);

        r = bind(nl->fd, &nl->sockaddr.sa, addrlen);
        /* ignore EINVAL to allow opening an already bound socket */
        if (r < 0 && errno != EINVAL)
                return -errno;

        r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen);
        if (r < 0)
                return -errno;

        r = broadcast_groups_get(nl);
        if (r < 0)
                return r;

        return 0;
}

static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) {
        assert(nl);

        return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group)));
}

static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) {
        int r;

        assert(nl);

        r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref));
        if (r < 0)
                return r;

        return 0;
}

static int broadcast_group_join(sd_netlink *nl, unsigned group) {
        int r;

        assert(nl);
        assert(nl->fd >= 0);
        assert(group > 0);

        r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group));
        if (r < 0)
                return -errno;

        return 0;
}

int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) {
        unsigned n_ref;
        int r;

        assert(nl);

        n_ref = broadcast_group_get_ref(nl, group);

        n_ref++;

        r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL);
        if (r < 0)
                return r;

        r = broadcast_group_set_ref(nl, group, n_ref);
        if (r < 0)
                return r;

        if (n_ref > 1)
                /* not yet in the group */
                return 0;

        r = broadcast_group_join(nl, group);
        if (r < 0)
                return r;

        return 0;
}

static int broadcast_group_leave(sd_netlink *nl, unsigned group) {
        int r;

        assert(nl);
        assert(nl->fd >= 0);
        assert(group > 0);

        if (nl->broadcast_group_dont_leave)
                return 0;

        r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group));
        if (r < 0)
                return -errno;

        return 0;
}

int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) {
        unsigned n_ref;
        int r;

        assert(nl);

        n_ref = broadcast_group_get_ref(nl, group);

        assert(n_ref > 0);

        n_ref--;

        r = broadcast_group_set_ref(nl, group, n_ref);
        if (r < 0)
                return r;

        if (n_ref > 0)
                /* still refs left */
                return 0;

        r = broadcast_group_leave(nl, group);
        if (r < 0)
                return r;

        return 0;
}

/* returns the number of bytes sent, or a negative error code */
int socket_write_message(sd_netlink *nl, sd_netlink_message *m) {
        union {
                struct sockaddr sa;
                struct sockaddr_nl nl;
        } addr = {
                .nl.nl_family = AF_NETLINK,
        };
        ssize_t k;

        assert(nl);
        assert(m);
        assert(m->hdr);

        k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len,
                        0, &addr.sa, sizeof(addr));
        if (k < 0)
                return -errno;

        return k;
}

static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) {
        union sockaddr_union sender;
        uint8_t cmsg_buffer[CMSG_SPACE(sizeof(struct nl_pktinfo))];
        struct msghdr msg = {
                .msg_iov = iov,
                .msg_iovlen = 1,
                .msg_name = &sender,
                .msg_namelen = sizeof(sender),
                .msg_control = cmsg_buffer,
                .msg_controllen = sizeof(cmsg_buffer),
        };
        struct cmsghdr *cmsg;
        uint32_t group = 0;
        int r;

        assert(fd >= 0);
        assert(iov);

        r = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0));
        if (r < 0) {
                /* no data */
                if (errno == ENOBUFS)
                        log_debug("rtnl: kernel receive buffer overrun");
                else if (errno == EAGAIN)
                        log_debug("rtnl: no data in socket");

                return (errno == EAGAIN || errno == EINTR) ? 0 : -errno;
        }

        if (sender.nl.nl_pid != 0) {
                /* not from the kernel, ignore */
                log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid);

                if (peek) {
                        /* drop the message */
                        r = recvmsg(fd, &msg, 0);
                        if (r < 0)
                                return (errno == EAGAIN || errno == EINTR) ? 0 : -errno;
                }

                return 0;
        }

        CMSG_FOREACH(cmsg, &msg) {
                if (cmsg->cmsg_level == SOL_NETLINK &&
                    cmsg->cmsg_type == NETLINK_PKTINFO &&
                    cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) {
                        struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg);

                        /* multi-cast group */
                        group = pktinfo->group;
                }
        }

        if (_group)
                *_group = group;

        return r;
}

/* On success, the number of bytes received is returned and *ret points to the received message
 * which has a valid header and the correct size.
 * If nothing useful was received 0 is returned.
 * On failure, a negative error code is returned.
 */
int socket_read_message(sd_netlink *rtnl) {
        _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL;
        struct iovec iov = {};
        uint32_t group = 0;
        bool multi_part = false, done = false;
        struct nlmsghdr *new_msg;
        size_t len;
        int r;
        unsigned i = 0;

        assert(rtnl);
        assert(rtnl->rbuffer);
        assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr));

        /* read nothing, just get the pending message size */
        r = socket_recv_message(rtnl->fd, &iov, NULL, true);
        if (r <= 0)
                return r;
        else
                len = (size_t)r;

        /* make room for the pending message */
        if (!greedy_realloc((void **)&rtnl->rbuffer,
                            &rtnl->rbuffer_allocated,
                            len, sizeof(uint8_t)))
                return -ENOMEM;

        iov.iov_base = rtnl->rbuffer;
        iov.iov_len = rtnl->rbuffer_allocated;

        /* read the pending message */
        r = socket_recv_message(rtnl->fd, &iov, &group, false);
        if (r <= 0)
                return r;
        else
                len = (size_t)r;

        if (len > rtnl->rbuffer_allocated)
                /* message did not fit in read buffer */
                return -EIO;

        if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) {
                multi_part = true;

                for (i = 0; i < rtnl->rqueue_partial_size; i++) {
                        if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) ==
                            rtnl->rbuffer->nlmsg_seq) {
                                first = rtnl->rqueue_partial[i];
                                break;
                        }
                }
        }

        for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) {
                _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL;
                const NLType *nl_type;

                if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid)
                        /* not broadcast and not for us */
                        continue;

                if (new_msg->nlmsg_type == NLMSG_NOOP)
                        /* silently drop noop messages */
                        continue;

                if (new_msg->nlmsg_type == NLMSG_DONE) {
                        /* finished reading multi-part message */
                        done = true;

                        /* if first is not defined, put NLMSG_DONE into the receive queue. */
                        if (first)
                                continue;
                }

                /* check that we support this message type */
                r = type_system_get_type(&type_system_root, &nl_type, new_msg->nlmsg_type);
                if (r < 0) {
                        if (r == -EOPNOTSUPP)
                                log_debug("sd-netlink: ignored message with unknown type: %i",
                                          new_msg->nlmsg_type);

                        continue;
                }

                /* check that the size matches the message type */
                if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) {
                        log_debug("sd-netlink: message larger than expected, dropping");
                        continue;
                }

                r = message_new_empty(rtnl, &m);
                if (r < 0)
                        return r;

                m->broadcast = !!group;

                m->hdr = memdup(new_msg, new_msg->nlmsg_len);
                if (!m->hdr)
                        return -ENOMEM;

                /* seal and parse the top-level message */
                r = sd_netlink_message_rewind(m);
                if (r < 0)
                        return r;

                /* push the message onto the multi-part message stack */
                if (first)
                        m->next = first;
                first = m;
                m = NULL;
        }

        if (len)
                log_debug("sd-netlink: discarding %zu bytes of incoming message", len);

        if (!first)
                return 0;

        if (!multi_part || done) {
                /* we got a complete message, push it on the read queue */
                r = rtnl_rqueue_make_room(rtnl);
                if (r < 0)
                        return r;

                rtnl->rqueue[rtnl->rqueue_size++] = first;
                first = NULL;

                if (multi_part && (i < rtnl->rqueue_partial_size)) {
                        /* remove the message form the partial read queue */
                        memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1,
                                sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1));
                        rtnl->rqueue_partial_size--;
                }

                return 1;
        } else {
                /* we only got a partial multi-part message, push it on the
                   partial read queue */
                if (i < rtnl->rqueue_partial_size) {
                        rtnl->rqueue_partial[i] = first;
                } else {
                        r = rtnl_rqueue_partial_make_room(rtnl);
                        if (r < 0)
                                return r;

                        rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = first;
                }
                first = NULL;

                return 0;
        }
}