diff options
Diffstat (limited to 'src/libsystemd/src/sd-netlink/netlink-socket.c')
-rw-r--r-- | src/libsystemd/src/sd-netlink/netlink-socket.c | 474 |
1 files changed, 474 insertions, 0 deletions
diff --git a/src/libsystemd/src/sd-netlink/netlink-socket.c b/src/libsystemd/src/sd-netlink/netlink-socket.c new file mode 100644 index 0000000000..f9a0df9f20 --- /dev/null +++ b/src/libsystemd/src/sd-netlink/netlink-socket.c @@ -0,0 +1,474 @@ +/*** + This file is part of systemd. + + Copyright 2013 Tom Gundersen <teg@jklm.no> + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see <http://www.gnu.org/licenses/>. +***/ + +#include <netinet/in.h> +#include <stdbool.h> +#include <unistd.h> + +#include "systemd-basic/alloc-util.h" +#include "systemd-basic/formats-util.h" +#include "systemd-basic/missing.h" +#include "systemd-basic/refcnt.h" +#include "systemd-basic/socket-util.h" +#include "systemd-basic/util.h" +#include "systemd-staging/sd-netlink.h" + +#include "netlink-internal.h" +#include "netlink-types.h" +#include "netlink-util.h" + +int socket_open(int family) { + int fd; + + fd = socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC|SOCK_NONBLOCK, family); + if (fd < 0) + return -errno; + + return fd; +} + +static int broadcast_groups_get(sd_netlink *nl) { + _cleanup_free_ uint32_t *groups = NULL; + socklen_t len = 0, old_len; + unsigned i, j; + int r; + + assert(nl); + assert(nl->fd >= 0); + + r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &len); + if (r < 0) { + if (errno == ENOPROTOOPT) { + nl->broadcast_group_dont_leave = true; + return 0; + } else + return -errno; + } + + if (len == 0) + return 0; + + groups = new0(uint32_t, len); + if (!groups) + return -ENOMEM; + + old_len = len; + + r = getsockopt(nl->fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, groups, &len); + if (r < 0) + return -errno; + + if (old_len != len) + return -EIO; + + r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL); + if (r < 0) + return r; + + for (i = 0; i < len; i++) { + for (j = 0; j < sizeof(uint32_t) * 8; j++) { + uint32_t offset; + unsigned group; + + offset = 1U << j; + + if (!(groups[i] & offset)) + continue; + + group = i * sizeof(uint32_t) * 8 + j + 1; + + r = hashmap_put(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(1)); + if (r < 0) + return r; + } + } + + return 0; +} + +int socket_bind(sd_netlink *nl) { + socklen_t addrlen; + int r, one = 1; + + r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_PKTINFO, &one, sizeof(one)); + if (r < 0) + return -errno; + + addrlen = sizeof(nl->sockaddr); + + r = bind(nl->fd, &nl->sockaddr.sa, addrlen); + /* ignore EINVAL to allow opening an already bound socket */ + if (r < 0 && errno != EINVAL) + return -errno; + + r = getsockname(nl->fd, &nl->sockaddr.sa, &addrlen); + if (r < 0) + return -errno; + + r = broadcast_groups_get(nl); + if (r < 0) + return r; + + return 0; +} + +static unsigned broadcast_group_get_ref(sd_netlink *nl, unsigned group) { + assert(nl); + + return PTR_TO_UINT(hashmap_get(nl->broadcast_group_refs, UINT_TO_PTR(group))); +} + +static int broadcast_group_set_ref(sd_netlink *nl, unsigned group, unsigned n_ref) { + int r; + + assert(nl); + + r = hashmap_replace(nl->broadcast_group_refs, UINT_TO_PTR(group), UINT_TO_PTR(n_ref)); + if (r < 0) + return r; + + return 0; +} + +static int broadcast_group_join(sd_netlink *nl, unsigned group) { + int r; + + assert(nl); + assert(nl->fd >= 0); + assert(group > 0); + + r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &group, sizeof(group)); + if (r < 0) + return -errno; + + return 0; +} + +int socket_broadcast_group_ref(sd_netlink *nl, unsigned group) { + unsigned n_ref; + int r; + + assert(nl); + + n_ref = broadcast_group_get_ref(nl, group); + + n_ref++; + + r = hashmap_ensure_allocated(&nl->broadcast_group_refs, NULL); + if (r < 0) + return r; + + r = broadcast_group_set_ref(nl, group, n_ref); + if (r < 0) + return r; + + if (n_ref > 1) + /* not yet in the group */ + return 0; + + r = broadcast_group_join(nl, group); + if (r < 0) + return r; + + return 0; +} + +static int broadcast_group_leave(sd_netlink *nl, unsigned group) { + int r; + + assert(nl); + assert(nl->fd >= 0); + assert(group > 0); + + if (nl->broadcast_group_dont_leave) + return 0; + + r = setsockopt(nl->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &group, sizeof(group)); + if (r < 0) + return -errno; + + return 0; +} + +int socket_broadcast_group_unref(sd_netlink *nl, unsigned group) { + unsigned n_ref; + int r; + + assert(nl); + + n_ref = broadcast_group_get_ref(nl, group); + + assert(n_ref > 0); + + n_ref--; + + r = broadcast_group_set_ref(nl, group, n_ref); + if (r < 0) + return r; + + if (n_ref > 0) + /* still refs left */ + return 0; + + r = broadcast_group_leave(nl, group); + if (r < 0) + return r; + + return 0; +} + +/* returns the number of bytes sent, or a negative error code */ +int socket_write_message(sd_netlink *nl, sd_netlink_message *m) { + union { + struct sockaddr sa; + struct sockaddr_nl nl; + } addr = { + .nl.nl_family = AF_NETLINK, + }; + ssize_t k; + + assert(nl); + assert(m); + assert(m->hdr); + + k = sendto(nl->fd, m->hdr, m->hdr->nlmsg_len, + 0, &addr.sa, sizeof(addr)); + if (k < 0) + return -errno; + + return k; +} + +static int socket_recv_message(int fd, struct iovec *iov, uint32_t *_group, bool peek) { + union sockaddr_union sender; + uint8_t cmsg_buffer[CMSG_SPACE(sizeof(struct nl_pktinfo))]; + struct msghdr msg = { + .msg_iov = iov, + .msg_iovlen = 1, + .msg_name = &sender, + .msg_namelen = sizeof(sender), + .msg_control = cmsg_buffer, + .msg_controllen = sizeof(cmsg_buffer), + }; + struct cmsghdr *cmsg; + uint32_t group = 0; + int r; + + assert(fd >= 0); + assert(iov); + + r = recvmsg(fd, &msg, MSG_TRUNC | (peek ? MSG_PEEK : 0)); + if (r < 0) { + /* no data */ + if (errno == ENOBUFS) + log_debug("rtnl: kernel receive buffer overrun"); + else if (errno == EAGAIN) + log_debug("rtnl: no data in socket"); + + return (errno == EAGAIN || errno == EINTR) ? 0 : -errno; + } + + if (sender.nl.nl_pid != 0) { + /* not from the kernel, ignore */ + log_debug("rtnl: ignoring message from portid %"PRIu32, sender.nl.nl_pid); + + if (peek) { + /* drop the message */ + r = recvmsg(fd, &msg, 0); + if (r < 0) + return (errno == EAGAIN || errno == EINTR) ? 0 : -errno; + } + + return 0; + } + + CMSG_FOREACH(cmsg, &msg) { + if (cmsg->cmsg_level == SOL_NETLINK && + cmsg->cmsg_type == NETLINK_PKTINFO && + cmsg->cmsg_len == CMSG_LEN(sizeof(struct nl_pktinfo))) { + struct nl_pktinfo *pktinfo = (void *)CMSG_DATA(cmsg); + + /* multi-cast group */ + group = pktinfo->group; + } + } + + if (_group) + *_group = group; + + return r; +} + +/* On success, the number of bytes received is returned and *ret points to the received message + * which has a valid header and the correct size. + * If nothing useful was received 0 is returned. + * On failure, a negative error code is returned. + */ +int socket_read_message(sd_netlink *rtnl) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *first = NULL; + struct iovec iov = {}; + uint32_t group = 0; + bool multi_part = false, done = false; + struct nlmsghdr *new_msg; + size_t len; + int r; + unsigned i = 0; + + assert(rtnl); + assert(rtnl->rbuffer); + assert(rtnl->rbuffer_allocated >= sizeof(struct nlmsghdr)); + + /* read nothing, just get the pending message size */ + r = socket_recv_message(rtnl->fd, &iov, NULL, true); + if (r <= 0) + return r; + else + len = (size_t)r; + + /* make room for the pending message */ + if (!greedy_realloc((void **)&rtnl->rbuffer, + &rtnl->rbuffer_allocated, + len, sizeof(uint8_t))) + return -ENOMEM; + + iov.iov_base = rtnl->rbuffer; + iov.iov_len = rtnl->rbuffer_allocated; + + /* read the pending message */ + r = socket_recv_message(rtnl->fd, &iov, &group, false); + if (r <= 0) + return r; + else + len = (size_t)r; + + if (len > rtnl->rbuffer_allocated) + /* message did not fit in read buffer */ + return -EIO; + + if (NLMSG_OK(rtnl->rbuffer, len) && rtnl->rbuffer->nlmsg_flags & NLM_F_MULTI) { + multi_part = true; + + for (i = 0; i < rtnl->rqueue_partial_size; i++) { + if (rtnl_message_get_serial(rtnl->rqueue_partial[i]) == + rtnl->rbuffer->nlmsg_seq) { + first = rtnl->rqueue_partial[i]; + break; + } + } + } + + for (new_msg = rtnl->rbuffer; NLMSG_OK(new_msg, len) && !done; new_msg = NLMSG_NEXT(new_msg, len)) { + _cleanup_(sd_netlink_message_unrefp) sd_netlink_message *m = NULL; + const NLType *nl_type; + + if (!group && new_msg->nlmsg_pid != rtnl->sockaddr.nl.nl_pid) + /* not broadcast and not for us */ + continue; + + if (new_msg->nlmsg_type == NLMSG_NOOP) + /* silently drop noop messages */ + continue; + + if (new_msg->nlmsg_type == NLMSG_DONE) { + /* finished reading multi-part message */ + done = true; + + /* if first is not defined, put NLMSG_DONE into the receive queue. */ + if (first) + continue; + } + + /* check that we support this message type */ + r = type_system_get_type(&type_system_root, &nl_type, new_msg->nlmsg_type); + if (r < 0) { + if (r == -EOPNOTSUPP) + log_debug("sd-netlink: ignored message with unknown type: %i", + new_msg->nlmsg_type); + + continue; + } + + /* check that the size matches the message type */ + if (new_msg->nlmsg_len < NLMSG_LENGTH(type_get_size(nl_type))) { + log_debug("sd-netlink: message larger than expected, dropping"); + continue; + } + + r = message_new_empty(rtnl, &m); + if (r < 0) + return r; + + m->broadcast = !!group; + + m->hdr = memdup(new_msg, new_msg->nlmsg_len); + if (!m->hdr) + return -ENOMEM; + + /* seal and parse the top-level message */ + r = sd_netlink_message_rewind(m); + if (r < 0) + return r; + + /* push the message onto the multi-part message stack */ + if (first) + m->next = first; + first = m; + m = NULL; + } + + if (len) + log_debug("sd-netlink: discarding %zu bytes of incoming message", len); + + if (!first) + return 0; + + if (!multi_part || done) { + /* we got a complete message, push it on the read queue */ + r = rtnl_rqueue_make_room(rtnl); + if (r < 0) + return r; + + rtnl->rqueue[rtnl->rqueue_size++] = first; + first = NULL; + + if (multi_part && (i < rtnl->rqueue_partial_size)) { + /* remove the message form the partial read queue */ + memmove(rtnl->rqueue_partial + i,rtnl->rqueue_partial + i + 1, + sizeof(sd_netlink_message*) * (rtnl->rqueue_partial_size - i - 1)); + rtnl->rqueue_partial_size--; + } + + return 1; + } else { + /* we only got a partial multi-part message, push it on the + partial read queue */ + if (i < rtnl->rqueue_partial_size) { + rtnl->rqueue_partial[i] = first; + } else { + r = rtnl_rqueue_partial_make_room(rtnl); + if (r < 0) + return r; + + rtnl->rqueue_partial[rtnl->rqueue_partial_size++] = first; + } + first = NULL; + + return 0; + } +} |