diff options
Diffstat (limited to 'net/netfilter')
25 files changed, 241 insertions, 144 deletions
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index b0bc475f6..2e8e7e5fb 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 29dde2083..9a065f672 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -267,6 +267,8 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[], e.id = ip_to_id(map, ip); if (tb[IPSET_ATTR_ETHER]) { + if (nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN) + return -IPSET_ERR_PROTOCOL; memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN); e.add_mac = 1; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 95db43fc0..a748b0c2c 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } +/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need + * a separate reference counter + */ +static inline void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static inline void +__ip_set_put_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + BUG_ON(set->ref_netlink == 0); + set->ref_netlink--; + write_unlock_bh(&ip_set_ref_lock); +} + /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced @@ -985,6 +1005,9 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call @@ -999,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s && s->ref) { + if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1021,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!s) { ret = -ENOENT; goto out; - } else if (s->ref) { + } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1168,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; + if (from->ref_netlink || to->ref_netlink) + return -EBUSY; + strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); @@ -1203,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); } return 0; } @@ -1325,7 +1351,7 @@ dump_last: if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - set->ref++; + set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, @@ -1393,7 +1419,7 @@ release_refcount: if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e5336ab36..d32fd6b03 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index f1e7d2c0f..8f004edad 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -110,7 +110,8 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); - if (unlikely(!tb[IPSET_ATTR_ETHER])) + if (unlikely(!tb[IPSET_ATTR_ETHER] || + nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN)) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_extensions(set, tb, &ext); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index bbede95c9..a2a89e4e0 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -30,6 +30,7 @@ MODULE_ALIAS("ip_set_list:set"); struct set_elem { struct rcu_head rcu; struct list_head list; + struct ip_set *set; /* Sigh, in order to cleanup reference */ ip_set_id_t id; } __aligned(__alignof__(u64)); @@ -151,30 +152,29 @@ list_set_kadt(struct ip_set *set, const struct sk_buff *skb, /* Userspace interfaces: we are protected by the nfnl mutex */ static void -__list_set_del(struct ip_set *set, struct set_elem *e) +__list_set_del_rcu(struct rcu_head * rcu) { + struct set_elem *e = container_of(rcu, struct set_elem, rcu); + struct ip_set *set = e->set; struct list_set *map = set->data; ip_set_put_byindex(map->net, e->id); - /* We may call it, because we don't have a to be destroyed - * extension which is used by the kernel. - */ ip_set_ext_destroy(set, e); - kfree_rcu(e, rcu); + kfree(e); } static inline void list_set_del(struct ip_set *set, struct set_elem *e) { list_del_rcu(&e->list); - __list_set_del(set, e); + call_rcu(&e->rcu, __list_set_del_rcu); } static inline void -list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old) +list_set_replace(struct set_elem *e, struct set_elem *old) { list_replace_rcu(&old->list, &e->list); - __list_set_del(set, old); + call_rcu(&old->rcu, __list_set_del_rcu); } static void @@ -244,9 +244,6 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct set_elem *e, *n, *prev, *next; bool flag_exist = flags & IPSET_FLAG_EXIST; - if (SET_WITH_TIMEOUT(set)) - set_cleanup_entries(set); - /* Find where to add the new entry */ n = prev = next = NULL; list_for_each_entry(e, &map->members, list) { @@ -301,10 +298,11 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (!e) return -ENOMEM; e->id = d->id; + e->set = set; INIT_LIST_HEAD(&e->list); list_set_init_extensions(set, ext, e); if (n) - list_set_replace(set, e, n); + list_set_replace(e, n); else if (next) list_add_tail_rcu(&e->list, &next->list); else if (prev) @@ -431,6 +429,7 @@ list_set_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); + list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -450,14 +449,16 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) struct set_elem *e; u32 n = 0; - list_for_each_entry(e, &map->members, list) + rcu_read_lock(); + list_for_each_entry_rcu(e, &map->members, list) n++; + rcu_read_unlock(); nested = ipset_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; @@ -483,33 +484,25 @@ list_set_list(const struct ip_set *set, atd = ipset_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; - list_for_each_entry(e, &map->members, list) { - if (i == first) - break; - i++; - } rcu_read_lock(); - list_for_each_entry_from(e, &map->members, list) { - i++; - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + list_for_each_entry_rcu(e, &map->members, list) { + if (i < first || + (SET_WITH_TIMEOUT(set) && + ip_set_timeout_expired(ext_timeout(e, set)))) { + i++; continue; + } nested = ipset_nest_start(skb, IPSET_ATTR_DATA); - if (!nested) { - if (i == first) { - nla_nest_cancel(skb, atd); - ret = -EMSGSIZE; - goto out; - } + if (!nested) goto nla_put_failure; - } if (nla_put_string(skb, IPSET_ATTR_NAME, ip_set_name_byindex(map->net, e->id))) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; ipset_nest_end(skb, nested); + i++; } ipset_nest_end(skb, atd); @@ -520,10 +513,12 @@ list_set_list(const struct ip_set *set, nla_put_failure: nla_nest_cancel(skb, nested); if (unlikely(i == first)) { + nla_nest_cancel(skb, atd); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; + } else { + cb->args[IPSET_CB_ARG0] = i; } - cb->args[IPSET_CB_ARG0] = i - 1; ipset_nest_end(skb, atd); out: rcu_read_unlock(); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0328f7250..299edc6ad 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = { int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - INIT_LIST_HEAD(&ipvs->app_list); - proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); + proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops); return 0; } void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - unregister_ip_vs_app(ipvs, NULL /* all */); - remove_proc_entry("ip_vs_app", net->proc_net); + remove_proc_entry("ip_vs_app", ipvs->net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4da560005..b9a4082af 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1089,6 +1089,7 @@ static inline bool is_new_conn_expected(const struct ip_vs_conn *cp, switch (cp->protocol) { case IPPROTO_TCP: return (cp->state == IP_VS_TCP_S_TIME_WAIT) || + (cp->state == IP_VS_TCP_S_CLOSE) || ((conn_reuse_mode & 2) && (cp->state == IP_VS_TCP_S_FIN_WAIT) && (cp->flags & IP_VS_CONN_F_NOOUTPUT)); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e7c1b052c..404b2a4f4 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_pe *old_pe; struct netns_ipvs *ipvs = svc->ipvs; - pr_info("%s: enter\n", __func__); - /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) ipvs->num_services--; @@ -3947,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = { int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; int i, idx; /* Initialize rs_table */ @@ -3974,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) spin_lock_init(&ipvs->tot_stats.lock); - proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops); - proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops); - proc_create("ip_vs_stats_percpu", 0, net->proc_net, + proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops); + proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops); + proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net, &ip_vs_stats_percpu_fops); if (ip_vs_control_net_init_sysctl(ipvs)) @@ -3991,13 +3988,11 @@ err: void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { - struct net *net = ipvs->net; - ip_vs_trash_cleanup(ipvs); ip_vs_control_net_cleanup_sysctl(ipvs); - remove_proc_entry("ip_vs_stats_percpu", net->proc_net); - remove_proc_entry("ip_vs_stats", net->proc_net); - remove_proc_entry("ip_vs", net->proc_net); + remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); + remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); + remove_proc_entry("ip_vs", ipvs->net->proc_net); free_percpu(ipvs->tot_stats.cpustats); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3264cb49b..dc196a0f5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -531,8 +531,6 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); - if (!skb->sk) - skb_sender_cpu_clear(skb); } return ret; } @@ -573,8 +571,6 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, if (!local) { skb_forward_csum(skb); - if (!skb->sk) - skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -595,8 +591,6 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (!local) { ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); - if (!skb->sk) - skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, NULL, skb_dst(skb)->dev, dst_output); } else @@ -1019,8 +1013,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads( - skb, false, __tun_gso_type_mask(AF_INET, cp->af)); + skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)); if (IS_ERR(skb)) goto tx_error; @@ -1112,8 +1105,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (IS_ERR(skb)) goto tx_error; - skb = iptunnel_handle_offloads( - skb, false, __tun_gso_type_mask(AF_INET6, cp->af)); + skb = iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)); if (IS_ERR(skb)) goto tx_error; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f60b4fdee..e27fd17c6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; +static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) @@ -74,8 +74,7 @@ void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) spin_lock(lock); while (unlikely(nf_conntrack_locks_all)) { spin_unlock(lock); - spin_lock(&nf_conntrack_locks_all_lock); - spin_unlock(&nf_conntrack_locks_all_lock); + spin_unlock_wait(&nf_conntrack_locks_all_lock); spin_lock(lock); } } @@ -121,8 +120,7 @@ static void nf_conntrack_all_lock(void) nf_conntrack_locks_all = true; for (i = 0; i < CONNTRACK_LOCKS; i++) { - spin_lock(&nf_conntrack_locks[i]); - spin_unlock(&nf_conntrack_locks[i]); + spin_unlock_wait(&nf_conntrack_locks[i]); } } @@ -1780,6 +1778,7 @@ void nf_conntrack_init_end(void) int nf_conntrack_init_net(struct net *net) { + static atomic64_t unique_id; int ret = -ENOMEM; int cpu; @@ -1802,7 +1801,8 @@ int nf_conntrack_init_net(struct net *net) if (!net->ct.stat) goto err_pcpu_lists; - net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net); + net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu", + (u64)atomic64_inc_return(&unique_id)); if (!net->ct.slabname) goto err_slabname; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 278f3b935..7cc1d9c22 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -410,6 +410,8 @@ static void tcp_options(const struct sk_buff *skb, length--; continue; default: + if (length < 2) + return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; @@ -470,6 +472,8 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 8414ee1a0..7ec697239 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -31,7 +31,6 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) skb_push(skb, skb->mac_len); skb->dev = dev; - skb_sender_cpu_clear(skb); dev_queue_xmit(skb); } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 857ae8963..2278d9ab7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -127,13 +127,6 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, - u32 dst_portid, gfp_t gfp_mask) -{ - return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); -} -EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); - int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 5274b04c4..dbd0803b1 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -96,6 +96,8 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, return -EINVAL; if (flags & NFACCT_F_OVERQUOTA) return -EINVAL; + if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA]) + return -EINVAL; size += sizeof(u64); } @@ -242,6 +244,9 @@ nfacct_filter_alloc(const struct nlattr * const attr) if (err < 0) return ERR_PTR(err); + if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) + return ERR_PTR(-EINVAL); + filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); if (!filter) return ERR_PTR(-ENOMEM); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8ca932057..11f81c838 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -330,14 +330,13 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, * message. WARNING: has to be <= 128k due to slab restrictions */ n = max(inst_size, pkt_size); - skb = nfnetlink_alloc_skb(net, n, peer_portid, GFP_ATOMIC); + skb = alloc_skb(n, GFP_ATOMIC); if (!skb) { if (n > pkt_size) { /* try to allocate only as much as we need for current * packet */ - skb = nfnetlink_alloc_skb(net, pkt_size, - peer_portid, GFP_ATOMIC); + skb = alloc_skb(pkt_size, GFP_ATOMIC); } } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 1d3936587..cb5b630a6 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, __be32 **packet_id_ptr) { size_t size; - size_t data_len = 0, cap_len = 0, rem_len = 0; + size_t data_len = 0, cap_len = 0; unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; @@ -361,7 +361,6 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; - rem_len = data_len - hlen; break; } @@ -386,8 +385,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, size += nla_total_size(seclen); } - skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, - GFP_ATOMIC); + skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); return NULL; @@ -584,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, /* nfnetlink_unicast will either free the nskb or add it to a socket */ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { - queue->queue_user_dropped++; + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_user_dropped++; + } goto err_out_unlock; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 454841baa..6228c422c 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -660,6 +660,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, if (IS_ERR(match)) return ERR_PTR(-ENOENT); + if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) + return ERR_PTR(-EINVAL); + /* This is the first time we use this match, allocate operations */ nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); if (nft_match == NULL) @@ -740,6 +743,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (IS_ERR(target)) return ERR_PTR(-ENOENT); + if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) + return ERR_PTR(-EINVAL); + /* This is the first time we use this target, allocate operations */ nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); if (nft_target == NULL) diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 9aea747b4..81b5ad616 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -17,7 +17,9 @@ #include <net/netfilter/nft_masq.h> const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = { - [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, + [NFTA_MASQ_FLAGS] = { .type = NLA_U32 }, + [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(nft_masq_policy); @@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { + u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); struct nft_masq *priv = nft_expr_priv(expr); int err; @@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx, if (err) return err; - if (tb[NFTA_MASQ_FLAGS] == NULL) - return 0; - - priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); - if (priv->flags & ~NF_NAT_RANGE_MASK) - return -EINVAL; + if (tb[NFTA_MASQ_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } + + if (tb[NFTA_MASQ_REG_PROTO_MIN]) { + priv->sreg_proto_min = + nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]); + + err = nft_validate_register_load(priv->sreg_proto_min, plen); + if (err < 0) + return err; + + if (tb[NFTA_MASQ_REG_PROTO_MAX]) { + priv->sreg_proto_max = + nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]); + + err = nft_validate_register_load(priv->sreg_proto_max, + plen); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } return 0; } @@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_masq *priv = nft_expr_priv(expr); - if (priv->flags == 0) - return 0; - - if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags))) + if (priv->flags != 0 && + nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags))) goto nla_put_failure; + if (priv->sreg_proto_min) { + if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN, + priv->sreg_proto_min) || + nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX, + priv->sreg_proto_max)) + goto nla_put_failure; + } + return 0; nla_put_failure: diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index fe885bf27..16c50b0dd 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -28,6 +28,8 @@ #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ +static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -181,6 +183,11 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = sock_cgroup_classid(&sk->sk_cgrp_data); break; #endif + case NFT_META_PRANDOM: { + struct rnd_state *state = this_cpu_ptr(&nft_prandom_state); + *dest = prandom_u32_state(state); + break; + } default: WARN_ON(1); goto err; @@ -277,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_OIFNAME: len = IFNAMSIZ; break; + case NFT_META_PRANDOM: + prandom_init_once(&nft_prandom_state); + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c8a0b7da5..582c9cfd6 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -659,6 +659,9 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) struct xt_table_info *info = NULL; size_t sz = sizeof(*info) + size; + if (sz < sizeof(*info)) + return NULL; + /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; @@ -694,12 +697,45 @@ EXPORT_SYMBOL(xt_free_table_info); struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { - struct xt_table *t; + struct xt_table *t, *found = NULL; mutex_lock(&xt[af].mutex); list_for_each_entry(t, &net->xt.tables[af], list) if (strcmp(t->name, name) == 0 && try_module_get(t->me)) return t; + + if (net == &init_net) + goto out; + + /* Table doesn't exist in this netns, re-try init */ + list_for_each_entry(t, &init_net.xt.tables[af], list) { + if (strcmp(t->name, name)) + continue; + if (!try_module_get(t->me)) + return NULL; + + mutex_unlock(&xt[af].mutex); + if (t->table_init(net) != 0) { + module_put(t->me); + return NULL; + } + + found = t; + + mutex_lock(&xt[af].mutex); + break; + } + + if (!found) + goto out; + + /* and once again: */ + list_for_each_entry(t, &net->xt.tables[af], list) + if (strcmp(t->name, name) == 0) + return t; + + module_put(found->me); + out: mutex_unlock(&xt[af].mutex); return NULL; } @@ -1170,20 +1206,20 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ /** - * xt_hook_link - set up hooks for a new table + * xt_hook_ops_alloc - set up hooks for a new table * @table: table with metadata needed to set up hooks * @fn: Hook function * - * This function will take care of creating and registering the necessary - * Netfilter hooks for XT tables. + * This function will create the nf_hook_ops that the x_table needs + * to hand to xt_hook_link_net(). */ -struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) +struct nf_hook_ops * +xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) { unsigned int hook_mask = table->valid_hooks; uint8_t i, num_hooks = hweight32(hook_mask); uint8_t hooknum; struct nf_hook_ops *ops; - int ret; ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) @@ -1200,27 +1236,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) ++i; } - ret = nf_register_hooks(ops, num_hooks); - if (ret < 0) { - kfree(ops); - return ERR_PTR(ret); - } - return ops; } -EXPORT_SYMBOL_GPL(xt_hook_link); - -/** - * xt_hook_unlink - remove hooks for a table - * @ops: nf_hook_ops array as returned by nf_hook_link - * @hook_mask: the very same mask that was passed to nf_hook_link - */ -void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops) -{ - nf_unregister_hooks(ops, hweight32(table->valid_hooks)); - kfree(ops); -} -EXPORT_SYMBOL_GPL(xt_hook_unlink); +EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); int xt_proto_init(struct net *net, u_int8_t af) { diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 29d2c31f4..daf45da44 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -236,6 +236,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); + cancel_work_sync(&info->timer->work); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); kfree(info->timer->attr.attr.name); kfree(info->timer); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 3ab591e73..7f4414d26 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -105,19 +105,24 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) * belonging to established connections going through that one. */ static inline struct sock * -nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; + struct tcphdr *tcph; switch (protocol) { case IPPROTO_TCP: switch (lookup_type) { case NFT_LOOKUP_LISTENER: - sk = inet_lookup_listener(net, &tcp_hashinfo, + tcph = hp; + sk = inet_lookup_listener(net, &tcp_hashinfo, skb, + ip_hdrlen(skb) + + __tcp_hdrlen(tcph), saddr, sport, daddr, dport, in->ifindex); @@ -169,19 +174,23 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, #ifdef XT_TPROXY_HAVE_IPV6 static inline struct sock * -nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { struct sock *sk; + struct tcphdr *tcph; switch (protocol) { case IPPROTO_TCP: switch (lookup_type) { case NFT_LOOKUP_LISTENER: - sk = inet6_lookup_listener(net, &tcp_hashinfo, + tcph = hp; + sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, + thoff + __tcp_hdrlen(tcph), saddr, sport, daddr, ntohs(dport), in->ifindex); @@ -267,7 +276,7 @@ tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v4(net, iph->protocol, + sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, laddr ? laddr : iph->daddr, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); @@ -305,7 +314,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v4(net, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, iph->daddr, hp->source, hp->dest, skb->dev, NFT_LOOKUP_ESTABLISHED); @@ -321,7 +330,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, else if (!sk) /* no, there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v4(net, iph->protocol, + sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol, iph->saddr, laddr, hp->source, lport, skb->dev, NFT_LOOKUP_LISTENER); @@ -429,7 +438,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, * to a listener socket if there's one */ struct sock *sk2; - sk2 = nf_tproxy_get_sock_v6(par->net, tproto, + sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, &iph->saddr, tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr), hp->source, @@ -472,7 +481,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) * addresses, this happens if the redirect already happened * and the current packet belongs to an already established * connection */ - sk = nf_tproxy_get_sock_v6(par->net, tproto, + sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto, &iph->saddr, &iph->daddr, hp->source, hp->dest, par->in, NFT_LOOKUP_ESTABLISHED); @@ -487,8 +496,8 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) else if (!sk) /* no there's no established connection, check if * there's a listener on the redirected addr/port */ - sk = nf_tproxy_get_sock_v6(par->net, tproto, - &iph->saddr, laddr, + sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, + tproto, &iph->saddr, laddr, hp->source, lport, par->in, NFT_LOOKUP_LISTENER); diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 4e3c3affd..2455b69b5 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -262,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) if (f->opt[optnum].kind == (*optp)) { __u32 len = f->opt[optnum].length; const __u8 *optend = optp + len; - int loop_cont = 0; fmatch = FMATCH_OK; @@ -275,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) mss = ntohs((__force __be16)mss); break; case OSFOPT_TS: - loop_cont = 1; break; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 2ec08f04b..49d14ecad 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -112,14 +112,15 @@ extract_icmp4_fields(const struct sk_buff *skb, * box. */ static struct sock * -xt_socket_get_sock_v4(struct net *net, const u8 protocol, +xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, + const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: - return __inet_lookup(net, &tcp_hashinfo, + return __inet_lookup(net, &tcp_hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: @@ -148,6 +149,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -169,6 +172,10 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, sport = hp->source; daddr = iph->daddr; dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = iph->protocol == IPPROTO_TCP ? + ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : + ip_hdrlen(skb) + sizeof(*hp); } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, @@ -198,8 +205,8 @@ static struct sock *xt_socket_lookup_slow_v4(struct net *net, } #endif - return xt_socket_get_sock_v4(net, protocol, saddr, daddr, - sport, dport, indev); + return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, + daddr, sport, dport, indev); } static bool @@ -318,14 +325,15 @@ extract_icmp6_fields(const struct sk_buff *skb, } static struct sock * -xt_socket_get_sock_v6(struct net *net, const u8 protocol, +xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, + const u8 protocol, const struct in6_addr *saddr, const struct in6_addr *daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, + return inet6_lookup(net, &tcp_hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: @@ -343,6 +351,8 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, __be16 uninitialized_var(dport), uninitialized_var(sport); const struct in6_addr *daddr = NULL, *saddr = NULL; struct ipv6hdr *iph = ipv6_hdr(skb); + struct sk_buff *data_skb = NULL; + int doff = 0; int thoff = 0, tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); @@ -362,6 +372,10 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, sport = hp->source; daddr = &iph->daddr; dport = hp->dest; + data_skb = (struct sk_buff *)skb; + doff = tproto == IPPROTO_TCP ? + thoff + __tcp_hdrlen((struct tcphdr *)hp) : + thoff + sizeof(*hp); } else if (tproto == IPPROTO_ICMPV6) { struct ipv6hdr ipv6_var; @@ -373,7 +387,7 @@ static struct sock *xt_socket_lookup_slow_v6(struct net *net, return NULL; } - return xt_socket_get_sock_v6(net, tproto, saddr, daddr, + return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr, sport, dport, indev); } |