summaryrefslogtreecommitdiff
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c258
1 files changed, 172 insertions, 86 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf6e76643..a77498548 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -76,17 +76,18 @@ struct listeners {
};
/* state bits */
-#define NETLINK_CONGESTED 0x0
+#define NETLINK_S_CONGESTED 0x0
/* flags */
-#define NETLINK_KERNEL_SOCKET 0x1
-#define NETLINK_RECV_PKTINFO 0x2
-#define NETLINK_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
+ return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
}
struct netlink_table *nl_table __read_mostly;
@@ -157,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
out:
spin_unlock(&netlink_tap_lock);
- if (found && nt->module)
+ if (found)
module_put(nt->module);
return found ? 0 : -ENODEV;
@@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
- if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
- if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+ if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!test_and_set_bit(NETLINK_S_CONGESTED,
+ &nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
sk->sk_error_report(sk);
}
@@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk)
struct netlink_sock *nlk = nlk_sk(sk);
if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(NETLINK_CONGESTED, &nlk->state);
- if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
+ if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
wake_up_interruptible(&nlk->wait);
}
@@ -355,25 +357,52 @@ err1:
return NULL;
}
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+ unsigned int order)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sk_buff_head *queue;
+ struct netlink_ring *ring;
+
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool closing, bool tx_ring)
+ bool tx_ring)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
- struct sk_buff_head *queue;
void **pg_vec = NULL;
unsigned int order = 0;
- int err;
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- if (!closing) {
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
- }
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
if (req->nm_block_nr) {
if (ring->pg_vec != NULL)
@@ -405,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
return -EINVAL;
}
- err = -EBUSY;
mutex_lock(&nlk->pg_vec_lock);
- if (closing || atomic_read(&nlk->mapped) == 0) {
- err = 0;
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
+ if (atomic_read(&nlk->mapped) == 0) {
+ __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+ mutex_unlock(&nlk->pg_vec_lock);
+ return 0;
}
+
mutex_unlock(&nlk->pg_vec_lock);
if (pg_vec)
free_pg_vec(pg_vec, order, req->nm_block_nr);
- return err;
+
+ return -EBUSY;
}
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -898,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk)
memset(&req, 0, sizeof(req));
if (nlk->rx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, false);
+ __netlink_set_ring(sk, &req, false, NULL, 0);
memset(&req, 0, sizeof(req));
if (nlk->tx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, true);
+ __netlink_set_ring(sk, &req, true, NULL, 0);
}
#endif /* CONFIG_NETLINK_MMAP */
@@ -1079,6 +1096,11 @@ static int netlink_insert(struct sock *sk, u32 portid)
err = __netlink_insert(table, sk);
if (err) {
+ /* In case the hashtable backend returns with -EBUSY
+ * from here, it must not escape to the caller.
+ */
+ if (unlikely(err == -EBUSY))
+ err = -EOVERFLOW;
if (err == -EEXIST)
err = -EADDRINUSE;
nlk_sk(sk)->portid = 0;
@@ -1118,14 +1140,15 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol)
+ struct mutex *cb_mutex, int protocol,
+ int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
if (!sk)
return -ENOMEM;
@@ -1187,7 +1210,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol);
+ err = __netlink_create(net, sock, cb_mutex, protocol, kern);
if (err < 0)
goto out_module;
@@ -1297,20 +1320,24 @@ static int netlink_autobind(struct socket *sock)
struct netlink_table *table = &nl_table[sk->sk_protocol];
s32 portid = task_tgid_vnr(current);
int err;
- static s32 rover = -4097;
+ s32 rover = -4096;
+ bool ok;
retry:
cond_resched();
rcu_read_lock();
- if (__netlink_lookup(table, portid, net)) {
+ ok = !__netlink_lookup(table, portid, net);
+ rcu_read_unlock();
+ if (!ok) {
/* Bind collision, search negative portid values. */
- portid = rover--;
- if (rover > -4097)
+ if (rover == -4096)
+ /* rover will be in range [S32_MIN, -4097] */
+ rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
+ else if (rover >= -4096)
rover = -4097;
- rcu_read_unlock();
+ portid = rover--;
goto retry;
}
- rcu_read_unlock();
err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
@@ -1657,7 +1684,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
@@ -1672,7 +1699,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
add_wait_queue(&nlk->wait, &wait);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
@@ -1896,7 +1923,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_sock *nlk = nlk_sk(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
@@ -1932,8 +1959,17 @@ static void do_one_broadcast(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
return;
- if (!net_eq(sock_net(sk), p->net))
- return;
+ if (!net_eq(sock_net(sk), p->net)) {
+ if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ return;
+
+ if (!peernet_has_id(sock_net(sk), p->net))
+ return;
+
+ if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+ CAP_NET_BROADCAST))
+ return;
+ }
if (p->failure) {
netlink_overrun(sk);
@@ -1957,23 +1993,33 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
- } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ goto out;
+ }
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if (sk_filter(sk, p->skb2)) {
+ goto out;
+ }
+ if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+ goto out;
+ }
+ NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+ NETLINK_CB(p->skb2).nsid_is_set = true;
+ val = netlink_broadcast_deliver(sk, p->skb2);
+ if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
} else {
p->congested |= val;
p->delivered = 1;
p->skb2 = NULL;
}
+out:
sock_put(sk);
}
@@ -2058,7 +2104,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
ret = 1;
goto out;
}
@@ -2077,7 +2123,7 @@ out:
* @code: error code, must be negative (as usual in kernelspace)
*
* This function returns the number of broadcast listeners that have set the
- * NETLINK_RECV_NO_ENOBUFS socket option.
+ * NETLINK_NO_ENOBUFS socket option.
*/
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
@@ -2137,9 +2183,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
if (val)
- nlk->flags |= NETLINK_RECV_PKTINFO;
+ nlk->flags |= NETLINK_F_RECV_PKTINFO;
else
- nlk->flags &= ~NETLINK_RECV_PKTINFO;
+ nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
err = 0;
break;
case NETLINK_ADD_MEMBERSHIP:
@@ -2168,18 +2214,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
case NETLINK_BROADCAST_ERROR:
if (val)
- nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
else
- nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
err = 0;
break;
case NETLINK_NO_ENOBUFS:
if (val) {
- nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
- clear_bit(NETLINK_CONGESTED, &nlk->state);
+ nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
} else {
- nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
+ nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
err = 0;
break;
@@ -2197,11 +2243,21 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
- err = netlink_set_ring(sk, &req, false,
+ err = netlink_set_ring(sk, &req,
optname == NETLINK_TX_RING);
break;
}
#endif /* CONFIG_NETLINK_MMAP */
+ case NETLINK_LISTEN_ALL_NSID:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+ return -EPERM;
+
+ if (val)
+ nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+ else
+ nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2228,7 +2284,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2238,7 +2294,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
+ val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2248,12 +2304,34 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
err = 0;
break;
+ case NETLINK_LIST_MEMBERSHIPS: {
+ int pos, idx, shift;
+
+ err = 0;
+ netlink_table_grab();
+ for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+ if (len - pos < sizeof(u32))
+ break;
+
+ idx = pos / sizeof(unsigned long);
+ shift = (pos % sizeof(unsigned long)) * 8;
+ if (put_user((u32)(nlk->groups[idx] >> shift),
+ (u32 __user *)(optval + pos))) {
+ err = -EFAULT;
+ break;
+ }
+ }
+ if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+ err = -EFAULT;
+ netlink_table_ungrab();
+ break;
+ }
default:
err = -ENOPROTOOPT;
}
@@ -2268,6 +2346,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ if (!NETLINK_CB(skb).nsid_is_set)
+ return;
+
+ put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+ &NETLINK_CB(skb).nsid);
+}
+
static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
@@ -2313,7 +2401,7 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
* sendmsg(), but that's what we've got...
*/
if (netlink_tx_is_mmaped(sk) &&
- msg->msg_iter.type == ITER_IOVEC &&
+ iter_is_iovec(&msg->msg_iter) &&
msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
@@ -2419,8 +2507,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_RECV_PKTINFO)
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
netlink_cmsg_recv_pktinfo(msg, skb);
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
scm.creds = *NETLINK_CREDS(skb);
@@ -2474,17 +2564,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- /*
- * We have to just have a reference on the net from sk, but don't
- * get_net it. Besides, we cannot get and then put the net here.
- * So we create one inside init_net and the move it to net.
- */
-
- if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+ if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
- sk_change_net(sk, net);
if (!cfg || cfg->groups < 32)
groups = 32;
@@ -2503,7 +2586,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_KERNEL_SOCKET;
+ nlk->flags |= NETLINK_F_KERNEL_SOCKET;
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2540,7 +2623,10 @@ EXPORT_SYMBOL(__netlink_kernel_create);
void
netlink_kernel_release(struct sock *sk)
{
- sk_release_kernel(sk);
+ if (sk == NULL || sk->sk_socket == NULL)
+ return;
+
+ sock_release(sk->sk_socket);
}
EXPORT_SYMBOL(netlink_kernel_release);