/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ /*** This file is part of systemd. Copyright 2014 Lennart Poettering systemd is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. systemd is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with systemd; If not, see <http://www.gnu.org/licenses/>. ***/ #include <netinet/tcp.h> #include "missing.h" #include "resolved-dns-stream.h" #define DNS_STREAM_TIMEOUT_USEC (10 * USEC_PER_SEC) #define DNS_STREAMS_MAX 128 static void dns_stream_stop(DnsStream *s) { assert(s); s->io_event_source = sd_event_source_unref(s->io_event_source); s->timeout_event_source = sd_event_source_unref(s->timeout_event_source); s->fd = safe_close(s->fd); } static int dns_stream_update_io(DnsStream *s) { int f = 0; assert(s); if (s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size) f |= EPOLLOUT; if (!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size) f |= EPOLLIN; return sd_event_source_set_io_events(s->io_event_source, f); } static int dns_stream_complete(DnsStream *s, int error) { assert(s); dns_stream_stop(s); if (s->complete) s->complete(s, error); else dns_stream_free(s); return 0; } static int dns_stream_identify(DnsStream *s) { union { struct cmsghdr header; /* For alignment */ uint8_t buffer[CMSG_SPACE(MAXSIZE(struct in_pktinfo, struct in6_pktinfo)) + EXTRA_CMSG_SPACE /* kernel appears to require extra space */]; } control; struct msghdr mh = {}; struct cmsghdr *cmsg; socklen_t sl; int r; assert(s); if (s->identified) return 0; /* Query the local side */ s->local_salen = sizeof(s->local); r = getsockname(s->fd, &s->local.sa, &s->local_salen); if (r < 0) return -errno; if (s->local.sa.sa_family == AF_INET6 && s->ifindex <= 0) s->ifindex = s->local.in6.sin6_scope_id; /* Query the remote side */ s->peer_salen = sizeof(s->peer); r = getpeername(s->fd, &s->peer.sa, &s->peer_salen); if (r < 0) return -errno; if (s->peer.sa.sa_family == AF_INET6 && s->ifindex <= 0) s->ifindex = s->peer.in6.sin6_scope_id; /* Check consistency */ assert(s->peer.sa.sa_family == s->local.sa.sa_family); assert(IN_SET(s->peer.sa.sa_family, AF_INET, AF_INET6)); /* Query connection meta information */ sl = sizeof(control); if (s->peer.sa.sa_family == AF_INET) { r = getsockopt(s->fd, IPPROTO_IP, IP_PKTOPTIONS, &control, &sl); if (r < 0) return -errno; } else if (s->peer.sa.sa_family == AF_INET6) { r = getsockopt(s->fd, IPPROTO_IPV6, IPV6_2292PKTOPTIONS, &control, &sl); if (r < 0) return -errno; } else return -EAFNOSUPPORT; mh.msg_control = &control; mh.msg_controllen = sl; CMSG_FOREACH(cmsg, &mh) { if (cmsg->cmsg_level == IPPROTO_IPV6) { assert(s->peer.sa.sa_family == AF_INET6); switch (cmsg->cmsg_type) { case IPV6_PKTINFO: { struct in6_pktinfo *i = (struct in6_pktinfo*) CMSG_DATA(cmsg); if (s->ifindex <= 0) s->ifindex = i->ipi6_ifindex; break; } case IPV6_HOPLIMIT: s->ttl = *(int *) CMSG_DATA(cmsg); break; } } else if (cmsg->cmsg_level == IPPROTO_IP) { assert(s->peer.sa.sa_family == AF_INET); switch (cmsg->cmsg_type) { case IP_PKTINFO: { struct in_pktinfo *i = (struct in_pktinfo*) CMSG_DATA(cmsg); if (s->ifindex <= 0) s->ifindex = i->ipi_ifindex; break; } case IP_TTL: s->ttl = *(int *) CMSG_DATA(cmsg); break; } } } /* The Linux kernel sets the interface index to the loopback * device if the connection came from the local host since it * avoids the routing table in such a case. Let's unset the * interface index in such a case. */ if (s->ifindex == LOOPBACK_IFINDEX) s->ifindex = 0; /* If we don't know the interface index still, we look for the * first local interface with a matching address. Yuck! */ if (s->ifindex <= 0) s->ifindex = manager_find_ifindex(s->manager, s->local.sa.sa_family, s->local.sa.sa_family == AF_INET ? (union in_addr_union*) &s->local.in.sin_addr : (union in_addr_union*) &s->local.in6.sin6_addr); if (s->protocol == DNS_PROTOCOL_LLMNR && s->ifindex > 0) { uint32_t ifindex = htobe32(s->ifindex); /* Make sure all packets for this connection are sent on the same interface */ if (s->local.sa.sa_family == AF_INET) { r = setsockopt(s->fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex)); if (r < 0) log_debug_errno(errno, "Failed to invoke IP_UNICAST_IF: %m"); } else if (s->local.sa.sa_family == AF_INET6) { r = setsockopt(s->fd, IPPROTO_IPV6, IPV6_UNICAST_IF, &ifindex, sizeof(ifindex)); if (r < 0) log_debug_errno(errno, "Failed to invoke IPV6_UNICAST_IF: %m"); } } s->identified = true; return 0; } static int on_stream_timeout(sd_event_source *es, usec_t usec, void *userdata) { DnsStream *s = userdata; assert(s); return dns_stream_complete(s, ETIMEDOUT); } static int on_stream_io(sd_event_source *es, int fd, uint32_t revents, void *userdata) { DnsStream *s = userdata; int r; assert(s); r = dns_stream_identify(s); if (r < 0) return dns_stream_complete(s, -r); if ((revents & EPOLLOUT) && s->write_packet && s->n_written < sizeof(s->write_size) + s->write_packet->size) { struct iovec iov[2]; ssize_t ss; iov[0].iov_base = &s->write_size; iov[0].iov_len = sizeof(s->write_size); iov[1].iov_base = DNS_PACKET_DATA(s->write_packet); iov[1].iov_len = s->write_packet->size; IOVEC_INCREMENT(iov, 2, s->n_written); ss = writev(fd, iov, 2); if (ss < 0) { if (errno != EINTR && errno != EAGAIN) return dns_stream_complete(s, errno); } else s->n_written += ss; /* Are we done? If so, disable the event source for EPOLLOUT */ if (s->n_written >= sizeof(s->write_size) + s->write_packet->size) { r = dns_stream_update_io(s); if (r < 0) return dns_stream_complete(s, -r); } } if ((revents & (EPOLLIN|EPOLLHUP|EPOLLRDHUP)) && (!s->read_packet || s->n_read < sizeof(s->read_size) + s->read_packet->size)) { if (s->n_read < sizeof(s->read_size)) { ssize_t ss; ss = read(fd, (uint8_t*) &s->read_size + s->n_read, sizeof(s->read_size) - s->n_read); if (ss < 0) { if (errno != EINTR && errno != EAGAIN) return dns_stream_complete(s, errno); } else if (ss == 0) return dns_stream_complete(s, ECONNRESET); else s->n_read += ss; } if (s->n_read >= sizeof(s->read_size)) { if (be16toh(s->read_size) < DNS_PACKET_HEADER_SIZE) return dns_stream_complete(s, EBADMSG); if (s->n_read < sizeof(s->read_size) + be16toh(s->read_size)) { ssize_t ss; if (!s->read_packet) { r = dns_packet_new(&s->read_packet, s->protocol, be16toh(s->read_size)); if (r < 0) return dns_stream_complete(s, -r); s->read_packet->size = be16toh(s->read_size); s->read_packet->ipproto = IPPROTO_TCP; s->read_packet->family = s->peer.sa.sa_family; s->read_packet->ttl = s->ttl; s->read_packet->ifindex = s->ifindex; if (s->read_packet->family == AF_INET) { s->read_packet->sender.in = s->peer.in.sin_addr; s->read_packet->sender_port = be16toh(s->peer.in.sin_port); s->read_packet->destination.in = s->local.in.sin_addr; s->read_packet->destination_port = be16toh(s->local.in.sin_port); } else { assert(s->read_packet->family == AF_INET6); s->read_packet->sender.in6 = s->peer.in6.sin6_addr; s->read_packet->sender_port = be16toh(s->peer.in6.sin6_port); s->read_packet->destination.in6 = s->local.in6.sin6_addr; s->read_packet->destination_port = be16toh(s->local.in6.sin6_port); if (s->read_packet->ifindex == 0) s->read_packet->ifindex = s->peer.in6.sin6_scope_id; if (s->read_packet->ifindex == 0) s->read_packet->ifindex = s->local.in6.sin6_scope_id; } } ss = read(fd, (uint8_t*) DNS_PACKET_DATA(s->read_packet) + s->n_read - sizeof(s->read_size), sizeof(s->read_size) + be16toh(s->read_size) - s->n_read); if (ss < 0) { if (errno != EINTR && errno != EAGAIN) return dns_stream_complete(s, errno); } else if (ss == 0) return dns_stream_complete(s, ECONNRESET); else s->n_read += ss; } /* Are we done? If so, disable the event source for EPOLLIN */ if (s->n_read >= sizeof(s->read_size) + be16toh(s->read_size)) { r = dns_stream_update_io(s); if (r < 0) return dns_stream_complete(s, -r); /* If there's a packet handler * installed, call that. Note that * this is optional... */ if (s->on_packet) return s->on_packet(s); } } } if ((s->write_packet && s->n_written >= sizeof(s->write_size) + s->write_packet->size) && (s->read_packet && s->n_read >= sizeof(s->read_size) + s->read_packet->size)) return dns_stream_complete(s, 0); return 0; } DnsStream *dns_stream_free(DnsStream *s) { if (!s) return NULL; dns_stream_stop(s); if (s->manager) { LIST_REMOVE(streams, s->manager->dns_streams, s); s->manager->n_dns_streams--; } dns_packet_unref(s->write_packet); dns_packet_unref(s->read_packet); free(s); return 0; } DEFINE_TRIVIAL_CLEANUP_FUNC(DnsStream*, dns_stream_free); int dns_stream_new(Manager *m, DnsStream **ret, DnsProtocol protocol, int fd) { static const int one = 1; _cleanup_(dns_stream_freep) DnsStream *s = NULL; int r; assert(m); assert(fd >= 0); if (m->n_dns_streams > DNS_STREAMS_MAX) return -EBUSY; s = new0(DnsStream, 1); if (!s) return -ENOMEM; s->fd = -1; s->protocol = protocol; r = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)); if (r < 0) return -errno; r = sd_event_add_io(m->event, &s->io_event_source, fd, EPOLLIN, on_stream_io, s); if (r < 0) return r; r = sd_event_add_time( m->event, &s->timeout_event_source, clock_boottime_or_monotonic(), now(clock_boottime_or_monotonic()) + DNS_STREAM_TIMEOUT_USEC, 0, on_stream_timeout, s); if (r < 0) return r; LIST_PREPEND(streams, m->dns_streams, s); s->manager = m; s->fd = fd; m->n_dns_streams++; *ret = s; s = NULL; return 0; } int dns_stream_write_packet(DnsStream *s, DnsPacket *p) { assert(s); if (s->write_packet) return -EBUSY; s->write_packet = dns_packet_ref(p); s->write_size = htobe16(p->size); s->n_written = 0; return dns_stream_update_io(s); }