diff options
Diffstat (limited to 'net/netfilter')
43 files changed, 1395 insertions, 679 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6eae69a69..3e1b4abf1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + select NF_DUP_IPV4 + select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- This option adds a "TEE" target with which a packet can be cloned and this clone be rerouted to another nexthop. diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a0e54974e..21a085686 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo); const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_ipv6_ops); +DEFINE_PER_CPU(bool, nf_skb_duplicated); +EXPORT_SYMBOL_GPL(nf_skb_duplicated); + int nf_register_afinfo(const struct nf_afinfo *afinfo) { mutex_lock(&afinfo_mutex); @@ -52,9 +55,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo) } EXPORT_SYMBOL_GPL(nf_unregister_afinfo); -struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; -EXPORT_SYMBOL(nf_hooks); - #ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); @@ -62,63 +62,168 @@ EXPORT_SYMBOL(nf_hooks_needed); static DEFINE_MUTEX(nf_hook_mutex); -int nf_register_hook(struct nf_hook_ops *reg) +static struct list_head *nf_find_hook_list(struct net *net, + const struct nf_hook_ops *reg) { - struct list_head *nf_hook_list; - struct nf_hook_ops *elem; + struct list_head *hook_list = NULL; - mutex_lock(&nf_hook_mutex); - switch (reg->pf) { - case NFPROTO_NETDEV: + if (reg->pf != NFPROTO_NETDEV) + hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS - if (reg->hooknum == NF_NETDEV_INGRESS) { - BUG_ON(reg->dev == NULL); - nf_hook_list = ®->dev->nf_hooks_ingress; - net_inc_ingress_queue(); - break; - } + if (reg->dev && dev_net(reg->dev) == net) + hook_list = ®->dev->nf_hooks_ingress; #endif - /* Fall through. */ - default: - nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; - break; } + return hook_list; +} + +struct nf_hook_entry { + const struct nf_hook_ops *orig_ops; + struct nf_hook_ops ops; +}; + +int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) +{ + struct list_head *hook_list; + struct nf_hook_entry *entry; + struct nf_hook_ops *elem; - list_for_each_entry(elem, nf_hook_list, list) { + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->orig_ops = reg; + entry->ops = *reg; + + hook_list = nf_find_hook_list(net, reg); + if (!hook_list) { + kfree(entry); + return -ENOENT; + } + + mutex_lock(&nf_hook_mutex); + list_for_each_entry(elem, hook_list, list) { if (reg->priority < elem->priority) break; } - list_add_rcu(®->list, elem->list.prev); + list_add_rcu(&entry->ops.list, elem->list.prev); mutex_unlock(&nf_hook_mutex); +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + net_inc_ingress_queue(); +#endif #ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; } -EXPORT_SYMBOL(nf_register_hook); +EXPORT_SYMBOL(nf_register_net_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { + struct list_head *hook_list; + struct nf_hook_entry *entry; + struct nf_hook_ops *elem; + + hook_list = nf_find_hook_list(net, reg); + if (!hook_list) + return; + mutex_lock(&nf_hook_mutex); - list_del_rcu(®->list); - mutex_unlock(&nf_hook_mutex); - switch (reg->pf) { - case NFPROTO_NETDEV: -#ifdef CONFIG_NETFILTER_INGRESS - if (reg->hooknum == NF_NETDEV_INGRESS) { - net_dec_ingress_queue(); + list_for_each_entry(elem, hook_list, list) { + entry = container_of(elem, struct nf_hook_entry, ops); + if (entry->orig_ops == reg) { + list_del_rcu(&entry->ops.list); break; } - break; -#endif - default: - break; } + mutex_unlock(&nf_hook_mutex); + if (&elem->list == hook_list) { + WARN(1, "nf_unregister_net_hook: hook not found!\n"); + return; + } +#ifdef CONFIG_NETFILTER_INGRESS + if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) + net_dec_ingress_queue(); +#endif #ifdef HAVE_JUMP_LABEL static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(reg); + nf_queue_nf_hook_drop(net, &entry->ops); + /* other cpu might still process nfqueue verdict that used reg */ + synchronize_net(); + kfree(entry); +} +EXPORT_SYMBOL(nf_unregister_net_hook); + +int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = nf_register_net_hook(net, ®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + nf_unregister_net_hooks(net, reg, i); + return err; +} +EXPORT_SYMBOL(nf_register_net_hooks); + +void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n) +{ + while (n-- > 0) + nf_unregister_net_hook(net, ®[n]); +} +EXPORT_SYMBOL(nf_unregister_net_hooks); + +static LIST_HEAD(nf_hook_list); + +int nf_register_hook(struct nf_hook_ops *reg) +{ + struct net *net, *last; + int ret; + + rtnl_lock(); + for_each_net(net) { + ret = nf_register_net_hook(net, reg); + if (ret && ret != -ENOENT) + goto rollback; + } + list_add_tail(®->list, &nf_hook_list); + rtnl_unlock(); + + return 0; +rollback: + last = net; + for_each_net(net) { + if (net == last) + break; + nf_unregister_net_hook(net, reg); + } + rtnl_unlock(); + return ret; +} +EXPORT_SYMBOL(nf_register_hook); + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + struct net *net; + + rtnl_lock(); + list_del(®->list); + for_each_net(net) + nf_unregister_net_hook(net, reg); + rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -285,9 +390,12 @@ EXPORT_SYMBOL(nf_conntrack_destroy); struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly; EXPORT_SYMBOL_GPL(nfq_ct_hook); -struct nfq_ct_nat_hook __rcu *nfq_ct_nat_hook __read_mostly; -EXPORT_SYMBOL_GPL(nfq_ct_nat_hook); - +/* Built-in default zone used e.g. by modules. */ +const struct nf_conntrack_zone nf_ct_zone_dflt = { + .id = NF_CT_DEFAULT_ZONE_ID, + .dir = NF_CT_DEFAULT_ZONE_DIR, +}; +EXPORT_SYMBOL_GPL(nf_ct_zone_dflt); #endif /* CONFIG_NF_CONNTRACK */ #ifdef CONFIG_NF_NAT_NEEDED @@ -295,8 +403,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif +static int nf_register_hook_list(struct net *net) +{ + struct nf_hook_ops *elem; + int ret; + + rtnl_lock(); + list_for_each_entry(elem, &nf_hook_list, list) { + ret = nf_register_net_hook(net, elem); + if (ret && ret != -ENOENT) + goto out_undo; + } + rtnl_unlock(); + return 0; + +out_undo: + list_for_each_entry_continue_reverse(elem, &nf_hook_list, list) + nf_unregister_net_hook(net, elem); + rtnl_unlock(); + return ret; +} + +static void nf_unregister_hook_list(struct net *net) +{ + struct nf_hook_ops *elem; + + rtnl_lock(); + list_for_each_entry(elem, &nf_hook_list, list) + nf_unregister_net_hook(net, elem); + rtnl_unlock(); +} + static int __net_init netfilter_net_init(struct net *net) { + int i, h, ret; + + for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { + for (h = 0; h < NF_MAX_HOOKS; h++) + INIT_LIST_HEAD(&net->nf.hooks[i][h]); + } + #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); @@ -307,11 +453,16 @@ static int __net_init netfilter_net_init(struct net *net) return -ENOMEM; } #endif - return 0; + ret = nf_register_hook_list(net); + if (ret) + remove_proc_entry("netfilter", net->proc_net); + + return ret; } static void __net_exit netfilter_net_exit(struct net *net) { + nf_unregister_hook_list(net); remove_proc_entry("netfilter", net->proc_net); } @@ -322,12 +473,7 @@ static struct pernet_operations netfilter_net_ops = { int __init netfilter_init(void) { - int i, h, ret; - - for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&nf_hooks[i][h]); - } + int ret; ret = register_pernet_subsys(&netfilter_net_ops); if (ret < 0) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index a1fe5377a..5a30ce6e8 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -297,7 +297,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, ip_set_timeout_expired(ext_timeout(n, set)))) n = NULL; - e = kzalloc(set->dsize, GFP_KERNEL); + e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) return -ENOMEM; e->id = d->id; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 3b6929dec..b32fb0dbe 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -162,6 +162,17 @@ config IP_VS_FO If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +config IP_VS_OVF + tristate "weighted overflow scheduling" + ---help--- + The weighted overflow scheduling algorithm directs network + connections to the server with the highest weight that is + currently available and overflows to the next when active + connections exceed the node's weight. + + If you want to compile it in kernel, say Y. To compile it as a + module, choose M here. If unsure, say N. + config IP_VS_LBLC tristate "locality-based least-connection scheduling" ---help--- diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 38b2723b2..67f3f4389 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o +obj-$(CONFIG_IP_VS_OVF) += ip_vs_ovf.o obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 24c554201..1a23e91d5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2335,13 +2335,23 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - mutex_lock(&ipvs->sync_mutex); - if (cmd == IP_VS_SO_SET_STARTDAEMON) - ret = start_sync_thread(net, dm->state, dm->mcast_ifn, - dm->syncid); - else + if (cmd == IP_VS_SO_SET_STARTDAEMON) { + struct ipvs_sync_daemon_cfg cfg; + + memset(&cfg, 0, sizeof(cfg)); + strlcpy(cfg.mcast_ifn, dm->mcast_ifn, + sizeof(cfg.mcast_ifn)); + cfg.syncid = dm->syncid; + rtnl_lock(); + mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(net, &cfg, dm->state); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + } else { + mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(net, dm->state); - mutex_unlock(&ipvs->sync_mutex); + mutex_unlock(&ipvs->sync_mutex); + } goto out_dec; } @@ -2645,15 +2655,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) mutex_lock(&ipvs->sync_mutex); if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, sizeof(d[0].mcast_ifn)); - d[0].syncid = ipvs->master_syncid; + d[0].syncid = ipvs->mcfg.syncid; } if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, sizeof(d[1].mcast_ifn)); - d[1].syncid = ipvs->backup_syncid; + d[1].syncid = ipvs->bcfg.syncid; } if (copy_to_user(user, &d, sizeof(d)) != 0) ret = -EFAULT; @@ -2808,6 +2818,11 @@ static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, .len = IP_VS_IFNAME_MAXLEN }, [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, + [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, + [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, + [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, }; /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ @@ -3266,7 +3281,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, } static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, - const char *mcast_ifn, __u32 syncid) + struct ipvs_sync_daemon_cfg *c) { struct nlattr *nl_daemon; @@ -3275,9 +3290,23 @@ static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, return -EMSGSIZE; if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || - nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) || - nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid)) + nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || + nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || + nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || + nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || + nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) goto nla_put_failure; +#ifdef CONFIG_IP_VS_IPV6 + if (c->mcast_af == AF_INET6) { + if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, + &c->mcast_group.in6)) + goto nla_put_failure; + } else +#endif + if (c->mcast_af == AF_INET && + nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, + c->mcast_group.ip)) + goto nla_put_failure; nla_nest_end(skb, nl_daemon); return 0; @@ -3288,7 +3317,7 @@ nla_put_failure: } static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, - const char *mcast_ifn, __u32 syncid, + struct ipvs_sync_daemon_cfg *c, struct netlink_callback *cb) { void *hdr; @@ -3298,7 +3327,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, if (!hdr) return -EMSGSIZE; - if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + if (ip_vs_genl_fill_daemon(skb, state, c)) goto nla_put_failure; genlmsg_end(skb, hdr); @@ -3318,8 +3347,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, mutex_lock(&ipvs->sync_mutex); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, - ipvs->master_mcast_ifn, - ipvs->master_syncid, cb) < 0) + &ipvs->mcfg, cb) < 0) goto nla_put_failure; cb->args[0] = 1; @@ -3327,8 +3355,7 @@ static int ip_vs_genl_dump_daemons(struct sk_buff *skb, if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, - ipvs->backup_mcast_ifn, - ipvs->backup_syncid, cb) < 0) + &ipvs->bcfg, cb) < 0) goto nla_put_failure; cb->args[1] = 1; @@ -3342,30 +3369,83 @@ nla_put_failure: static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) { + struct netns_ipvs *ipvs = net_ipvs(net); + struct ipvs_sync_daemon_cfg c; + struct nlattr *a; + int ret; + + memset(&c, 0, sizeof(c)); if (!(attrs[IPVS_DAEMON_ATTR_STATE] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; + strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + sizeof(c.mcast_ifn)); + c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); + + a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; + if (a) + c.sync_maxlen = nla_get_u16(a); + + a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; + if (a) { + c.mcast_af = AF_INET; + c.mcast_group.ip = nla_get_in_addr(a); + if (!ipv4_is_multicast(c.mcast_group.ip)) + return -EINVAL; + } else { + a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; + if (a) { +#ifdef CONFIG_IP_VS_IPV6 + int addr_type; + + c.mcast_af = AF_INET6; + c.mcast_group.in6 = nla_get_in6_addr(a); + addr_type = ipv6_addr_type(&c.mcast_group.in6); + if (!(addr_type & IPV6_ADDR_MULTICAST)) + return -EINVAL; +#else + return -EAFNOSUPPORT; +#endif + } + } + + a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; + if (a) + c.mcast_port = nla_get_u16(a); + + a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; + if (a) + c.mcast_ttl = nla_get_u8(a); /* The synchronization protocol is incompatible with mixed family * services */ - if (net_ipvs(net)->mixed_address_family_dests > 0) + if (ipvs->mixed_address_family_dests > 0) return -EINVAL; - return start_sync_thread(net, - nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), - nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), - nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); + rtnl_lock(); + mutex_lock(&ipvs->sync_mutex); + ret = start_sync_thread(net, &c, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_unlock(&ipvs->sync_mutex); + rtnl_unlock(); + return ret; } static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) { + struct netns_ipvs *ipvs = net_ipvs(net); + int ret; + if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - return stop_sync_thread(net, - nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_lock(&ipvs->sync_mutex); + ret = stop_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + mutex_unlock(&ipvs->sync_mutex); + return ret; } static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) @@ -3389,7 +3469,7 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { - int ret = 0, cmd; + int ret = -EINVAL, cmd; struct net *net; struct netns_ipvs *ipvs; @@ -3400,22 +3480,19 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; - mutex_lock(&ipvs->sync_mutex); if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], - ip_vs_daemon_policy)) { - ret = -EINVAL; + ip_vs_daemon_policy)) goto out; - } if (cmd == IPVS_CMD_NEW_DAEMON) ret = ip_vs_genl_new_daemon(net, daemon_attrs); else ret = ip_vs_genl_del_daemon(net, daemon_attrs); -out: - mutex_unlock(&ipvs->sync_mutex); } + +out: return ret; } diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 5882bbfd1..136184572 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c new file mode 100644 index 000000000..f7d62c3b7 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_ovf.c @@ -0,0 +1,86 @@ +/* + * IPVS: Overflow-Connection Scheduling module + * + * Authors: Raducu Deaconu <rhadoo_io@yahoo.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Scheduler implements "overflow" loadbalancing according to number of active + * connections , will keep all conections to the node with the highest weight + * and overflow to the next node if the number of connections exceeds the node's + * weight. + * Note that this scheduler might not be suitable for UDP because it only uses + * active connections + * + */ + +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> + +#include <net/ip_vs.h> + +/* OVF Connection scheduling */ +static struct ip_vs_dest * +ip_vs_ovf_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_dest *dest, *h = NULL; + int hw = 0, w; + + IP_VS_DBG(6, "ip_vs_ovf_schedule(): Scheduling...\n"); + /* select the node with highest weight, go to next in line if active + * connections exceed weight + */ + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { + w = atomic_read(&dest->weight); + if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || + atomic_read(&dest->activeconns) > w || + w == 0) + continue; + if (!h || w > hw) { + h = dest; + hw = w; + } + } + + if (h) { + IP_VS_DBG_BUF(6, "OVF: server %s:%u active %d w %d\n", + IP_VS_DBG_ADDR(h->af, &h->addr), + ntohs(h->port), + atomic_read(&h->activeconns), + atomic_read(&h->weight)); + return h; + } + + ip_vs_scheduler_err(svc, "no destination available"); + return NULL; +} + +static struct ip_vs_scheduler ip_vs_ovf_scheduler = { + .name = "ovf", + .refcnt = ATOMIC_INIT(0), + .module = THIS_MODULE, + .n_list = LIST_HEAD_INIT(ip_vs_ovf_scheduler.n_list), + .schedule = ip_vs_ovf_schedule, +}; + +static int __init ip_vs_ovf_init(void) +{ + return register_ip_vs_scheduler(&ip_vs_ovf_scheduler); +} + +static void __exit ip_vs_ovf_cleanup(void) +{ + unregister_ip_vs_scheduler(&ip_vs_ovf_scheduler); + synchronize_rcu(); +} + +module_init(ip_vs_ovf_init); +module_exit(ip_vs_ovf_cleanup); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 7e8141647..a2ff7d746 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name) void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) { - if (scheduler && scheduler->module) + if (scheduler) module_put(scheduler->module); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index d99ad93eb..43f140950 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -262,6 +262,11 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; +union ipvs_sockaddr { + struct sockaddr_in in; + struct sockaddr_in6 in6; +}; + struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -320,26 +325,28 @@ sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) * Create a new sync buffer for Version 1 proto. */ static inline struct ip_vs_sync_buff * -ip_vs_sync_buff_create(struct netns_ipvs *ipvs) +ip_vs_sync_buff_create(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg), + ipvs->mcfg.sync_maxlen); + sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ sb->mesg->version = SYNC_PROTO_VER; - sb->mesg->syncid = ipvs->master_syncid; + sb->mesg->syncid = ipvs->mcfg.syncid; sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg)); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); - sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; + sb->end = (unsigned char *)sb->mesg + len; sb->firstuse = jiffies; return sb; @@ -402,7 +409,7 @@ select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp) * Create a new sync buffer for Version 0 proto. */ static inline struct ip_vs_sync_buff * -ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) +ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs, unsigned int len) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; @@ -410,17 +417,19 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + len = max_t(unsigned int, len + sizeof(struct ip_vs_sync_mesg_v0), + ipvs->mcfg.sync_maxlen); + sb->mesg = kmalloc(len, GFP_ATOMIC); if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; - mesg->syncid = ipvs->master_syncid; + mesg->syncid = ipvs->mcfg.syncid; mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0)); sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); - sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; + sb->end = (unsigned char *)mesg + len; sb->firstuse = jiffies; return sb; } @@ -533,7 +542,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, struct ip_vs_sync_buff *buff; struct ipvs_master_sync_state *ms; int id; - int len; + unsigned int len; if (unlikely(cp->af != AF_INET)) return; @@ -553,17 +562,19 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, id = select_master_thread_id(ipvs, cp); ms = &ipvs->ms[id]; buff = ms->sync_buff; + len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : + SIMPLE_CONN_SIZE; if (buff) { m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; /* Send buffer if it is for v1 */ - if (!m->nr_conns) { + if (buff->head + len > buff->end || !m->nr_conns) { sb_queue_tail(ipvs, ms); ms->sync_buff = NULL; buff = NULL; } } if (!buff) { - buff = ip_vs_sync_buff_create_v0(ipvs); + buff = ip_vs_sync_buff_create_v0(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); @@ -572,8 +583,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, ms->sync_buff = buff; } - len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : - SIMPLE_CONN_SIZE; m = (struct ip_vs_sync_mesg_v0 *) buff->mesg; s = (struct ip_vs_sync_conn_v0 *) buff->head; @@ -597,12 +606,6 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, m->nr_conns++; m->size = htons(ntohs(m->size) + len); buff->head += len; - - /* check if there is a space for next one */ - if (buff->head + FULL_CONN_SIZE > buff->end) { - sb_queue_tail(ipvs, ms); - ms->sync_buff = NULL; - } spin_unlock_bh(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ @@ -694,7 +697,7 @@ sloop: } if (!buff) { - buff = ip_vs_sync_buff_create(ipvs); + buff = ip_vs_sync_buff_create(ipvs, len); if (!buff) { spin_unlock_bh(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); @@ -1219,7 +1222,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* SyncID sanity check */ - if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { + if (ipvs->bcfg.syncid != 0 && m2->syncid != ipvs->bcfg.syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } @@ -1303,6 +1306,14 @@ static void set_mcast_loop(struct sock *sk, u_char loop) /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ lock_sock(sk); inet->mc_loop = loop ? 1 : 0; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MULTICAST_LOOP */ + np->mc_loop = loop ? 1 : 0; + } +#endif release_sock(sk); } @@ -1316,6 +1327,33 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); inet->mc_ttl = ttl; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MULTICAST_HOPS */ + np->mcast_hops = ttl; + } +#endif + release_sock(sk); +} + +/* Control fragmentation of messages */ +static void set_mcast_pmtudisc(struct sock *sk, int val) +{ + struct inet_sock *inet = inet_sk(sk); + + /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ + lock_sock(sk); + inet->pmtudisc = val; +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + /* IPV6_MTU_DISCOVER */ + np->pmtudisc = val; + } +#endif release_sock(sk); } @@ -1338,44 +1376,15 @@ static int set_mcast_if(struct sock *sk, char *ifname) lock_sock(sk); inet->mc_index = dev->ifindex; /* inet->mc_addr = 0; */ - release_sock(sk); - - return 0; -} - +#ifdef CONFIG_IP_VS_IPV6 + if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); -/* - * Set the maximum length of sync message according to the - * specified interface's MTU. - */ -static int set_sync_mesg_maxlen(struct net *net, int sync_state) -{ - struct netns_ipvs *ipvs = net_ipvs(net); - struct net_device *dev; - int num; - - if (sync_state == IP_VS_STATE_MASTER) { - dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); - if (!dev) - return -ENODEV; - - num = (dev->mtu - sizeof(struct iphdr) - - sizeof(struct udphdr) - - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + - SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); - IP_VS_DBG(7, "setting the maximum length of sync sending " - "message %d.\n", ipvs->send_mesg_maxlen); - } else if (sync_state == IP_VS_STATE_BACKUP) { - dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); - if (!dev) - return -ENODEV; - - ipvs->recv_mesg_maxlen = dev->mtu - - sizeof(struct iphdr) - sizeof(struct udphdr); - IP_VS_DBG(7, "setting the maximum length of sync receiving " - "message %d.\n", ipvs->recv_mesg_maxlen); + /* IPV6_MULTICAST_IF */ + np->mcast_oif = dev->ifindex; } +#endif + release_sock(sk); return 0; } @@ -1405,15 +1414,34 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) mreq.imr_ifindex = dev->ifindex; - rtnl_lock(); lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); - rtnl_unlock(); return ret; } +#ifdef CONFIG_IP_VS_IPV6 +static int join_mcast_group6(struct sock *sk, struct in6_addr *addr, + char *ifname) +{ + struct net *net = sock_net(sk); + struct net_device *dev; + int ret; + + dev = __dev_get_by_name(net, ifname); + if (!dev) + return -ENODEV; + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; + + lock_sock(sk); + ret = ipv6_sock_mc_join(sk, dev->ifindex, addr); + release_sock(sk); + + return ret; +} +#endif static int bind_mcastif_addr(struct socket *sock, char *ifname) { @@ -1442,6 +1470,26 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); } +static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, + struct ipvs_sync_daemon_cfg *c, int id) +{ + if (AF_INET6 == c->mcast_af) { + sa->in6 = (struct sockaddr_in6) { + .sin6_family = AF_INET6, + .sin6_port = htons(c->mcast_port + id), + }; + sa->in6.sin6_addr = c->mcast_group.in6; + *salen = sizeof(sa->in6); + } else { + sa->in = (struct sockaddr_in) { + .sin_family = AF_INET, + .sin_port = htons(c->mcast_port + id), + }; + sa->in.sin_addr = c->mcast_group.in; + *salen = sizeof(sa->in); + } +} + /* * Set up sending multicast socket over UDP */ @@ -1449,40 +1497,43 @@ static struct socket *make_send_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ - struct sockaddr_in mcast_addr = { - .sin_family = AF_INET, - .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), - .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), - }; + union ipvs_sockaddr mcast_addr; struct socket *sock; - int result; + int result, salen; /* First create a socket */ - result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM, + IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); } - result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); + result = set_mcast_if(sock->sk, ipvs->mcfg.mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; } set_mcast_loop(sock->sk, 0); - set_mcast_ttl(sock->sk, 1); + set_mcast_ttl(sock->sk, ipvs->mcfg.mcast_ttl); + /* Allow fragmentation if MTU changes */ + set_mcast_pmtudisc(sock->sk, IP_PMTUDISC_DONT); result = sysctl_sync_sock_size(ipvs); if (result > 0) set_sock_size(sock->sk, 1, result); - result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); + if (AF_INET == ipvs->mcfg.mcast_af) + result = bind_mcastif_addr(sock, ipvs->mcfg.mcast_ifn); + else + result = 0; if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; } + get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id); result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr, - sizeof(struct sockaddr), 0); + salen, 0); if (result < 0) { pr_err("Error connecting to the multicast addr\n"); goto error; @@ -1503,16 +1554,13 @@ static struct socket *make_receive_sock(struct net *net, int id) { struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ - struct sockaddr_in mcast_addr = { - .sin_family = AF_INET, - .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id), - .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), - }; + union ipvs_sockaddr mcast_addr; struct socket *sock; - int result; + int result, salen; /* First create a socket */ - result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM, + IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); return ERR_PTR(result); @@ -1523,17 +1571,22 @@ static struct socket *make_receive_sock(struct net *net, int id) if (result > 0) set_sock_size(sock->sk, 0, result); - result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr, - sizeof(struct sockaddr)); + get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id); + result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen); if (result < 0) { pr_err("Error binding to the multicast addr\n"); goto error; } /* join the multicast group */ - result = join_mcast_group(sock->sk, - (struct in_addr *) &mcast_addr.sin_addr, - ipvs->backup_mcast_ifn); +#ifdef CONFIG_IP_VS_IPV6 + if (ipvs->bcfg.mcast_af == AF_INET6) + result = join_mcast_group6(sock->sk, &mcast_addr.in6.sin6_addr, + ipvs->bcfg.mcast_ifn); + else +#endif + result = join_mcast_group(sock->sk, &mcast_addr.in.sin_addr, + ipvs->bcfg.mcast_ifn); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; @@ -1641,7 +1694,7 @@ static int sync_thread_master(void *data) pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d, id = %d\n", - ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id); + ipvs->mcfg.mcast_ifn, ipvs->mcfg.syncid, tinfo->id); for (;;) { sb = next_sync_buff(ipvs, ms); @@ -1695,7 +1748,7 @@ static int sync_thread_backup(void *data) pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d, id = %d\n", - ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id); + ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1705,7 +1758,7 @@ static int sync_thread_backup(void *data) /* do we have data now? */ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { len = ip_vs_receive(tinfo->sock, tinfo->buf, - ipvs->recv_mesg_maxlen); + ipvs->bcfg.sync_maxlen); if (len <= 0) { if (len != -EAGAIN) pr_err("receiving message error\n"); @@ -1725,16 +1778,19 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) +int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, + int state) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **array = NULL, *task; struct socket *sock; struct netns_ipvs *ipvs = net_ipvs(net); + struct net_device *dev; char *name; int (*threadfn)(void *data); - int id, count; + int id, count, hlen; int result = -ENOMEM; + u16 mtu, min_mtu; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", @@ -1746,22 +1802,46 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) } else count = ipvs->threads_mask + 1; + if (c->mcast_af == AF_UNSPEC) { + c->mcast_af = AF_INET; + c->mcast_group.ip = cpu_to_be32(IP_VS_SYNC_GROUP); + } + if (!c->mcast_port) + c->mcast_port = IP_VS_SYNC_PORT; + if (!c->mcast_ttl) + c->mcast_ttl = 1; + + dev = __dev_get_by_name(net, c->mcast_ifn); + if (!dev) { + pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); + return -ENODEV; + } + hlen = (AF_INET6 == c->mcast_af) ? + sizeof(struct ipv6hdr) + sizeof(struct udphdr) : + sizeof(struct iphdr) + sizeof(struct udphdr); + mtu = (state == IP_VS_STATE_BACKUP) ? + clamp(dev->mtu, 1500U, 65535U) : 1500U; + min_mtu = (state == IP_VS_STATE_BACKUP) ? 1024 : 1; + + if (c->sync_maxlen) + c->sync_maxlen = clamp_t(unsigned int, + c->sync_maxlen, min_mtu, + 65535 - hlen); + else + c->sync_maxlen = mtu - hlen; + if (state == IP_VS_STATE_MASTER) { if (ipvs->ms) return -EEXIST; - strlcpy(ipvs->master_mcast_ifn, mcast_ifn, - sizeof(ipvs->master_mcast_ifn)); - ipvs->master_syncid = syncid; + ipvs->mcfg = *c; name = "ipvs-m:%d:%d"; threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { if (ipvs->backup_threads) return -EEXIST; - strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, - sizeof(ipvs->backup_mcast_ifn)); - ipvs->backup_syncid = syncid; + ipvs->bcfg = *c; name = "ipvs-b:%d:%d"; threadfn = sync_thread_backup; } else { @@ -1789,7 +1869,6 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) if (!array) goto out; } - set_sync_mesg_maxlen(net, state); tinfo = NULL; for (id = 0; id < count; id++) { @@ -1807,7 +1886,7 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) tinfo->net = net; tinfo->sock = sock; if (state == IP_VS_STATE_BACKUP) { - tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen, + tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) goto outtinfo; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0625a42df..c09d6c719 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); -static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) +static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple) { unsigned int n; @@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) * three bytes manually. */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^ + return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^ (((__force __u16)tuple->dst.u.all << 16) | tuple->dst.protonum)); } @@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net) } static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - u16 zone, unsigned int size) + unsigned int size) { - return __hash_bucket(hash_conntrack_raw(tuple, zone), size); + return __hash_bucket(hash_conntrack_raw(tuple), size); } -static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, +static inline u_int32_t hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, zone, net->ct.htable_size); + return __hash_conntrack(tuple, net->ct.htable_size); } bool @@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) } /* Released via destroy_conntrack() */ -struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, + const struct nf_conntrack_zone *zone, + gfp_t flags) { struct nf_conn *tmpl; @@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; + if (nf_ct_zone_add(tmpl, flags, zone) < 0) + goto out_free; - nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif atomic_set(&tmpl->ct_general.use, 0); return tmpl; -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: kfree(tmpl); return NULL; -#endif } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -374,7 +367,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; - u16 zone = nf_ct_zone(ct); unsigned int sequence; nf_ct_helper_destroy(ct); @@ -382,9 +374,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -432,8 +424,8 @@ static void death_by_timeout(unsigned long ul_conntrack) static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, - const struct nf_conntrack_tuple *tuple, - u16 zone) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -441,8 +433,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * so we need to check that the conntrack is confirmed */ return nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone && - nf_ct_is_confirmed(ct); + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && + nf_ct_is_confirmed(ct); } /* @@ -451,7 +443,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, * and recheck nf_ct_tuple_equal(tuple, &h->tuple) */ static struct nf_conntrack_tuple_hash * -____nf_conntrack_find(struct net *net, u16 zone, +____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -487,7 +479,7 @@ begin: /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * -__nf_conntrack_find_get(struct net *net, u16 zone, +__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; @@ -514,11 +506,11 @@ begin: } struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, u16 zone, +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { return __nf_conntrack_find_get(net, zone, tuple, - hash_conntrack_raw(tuple, zone)); + hash_conntrack_raw(tuple)); } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); @@ -537,11 +529,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, int nf_conntrack_hash_check_insert(struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - u16 zone; unsigned int sequence; zone = nf_ct_zone(ct); @@ -549,9 +541,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) local_bh_disable(); do { sequence = read_seqcount_begin(&net->ct.generation); - hash = hash_conntrack(net, zone, + hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -559,12 +551,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; add_timer(&ct->timeout); @@ -589,6 +583,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); int __nf_conntrack_confirm(struct sk_buff *skb) { + const struct nf_conntrack_zone *zone; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -597,7 +592,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; - u16 zone; unsigned int sequence; ct = nf_ct_get(skb, &ctinfo); @@ -618,7 +612,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = hash_bucket(hash, net); - reply_hash = hash_conntrack(net, zone, + reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); @@ -650,12 +644,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple) && - zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) + nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone, + NF_CT_DIRECTION(h))) goto out; /* Timer relative to confirmation time, not original @@ -708,11 +704,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct net *net = nf_ct_net(ignored_conntrack); + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct nf_conn *ct; - u16 zone = nf_ct_zone(ignored_conntrack); - unsigned int hash = hash_conntrack(net, zone, tuple); + unsigned int hash; + + zone = nf_ct_zone(ignored_conntrack); + hash = hash_conntrack(net, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. @@ -722,7 +721,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, ct = nf_ct_tuplehash_to_ctrack(h); if (ct != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple) && - nf_ct_zone(ct) == zone) { + nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -811,7 +810,8 @@ void init_nf_conntrack_hash_rnd(void) } static struct nf_conn * -__nf_conntrack_alloc(struct net *net, u16 zone, +__nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) @@ -821,7 +821,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, if (unlikely(!nf_conntrack_hash_rnd)) { init_nf_conntrack_hash_rnd(); /* recompute the hash as nf_conntrack_hash_rnd is initialized */ - hash = hash_conntrack_raw(orig, zone); + hash = hash_conntrack_raw(orig); } /* We don't want any race condition at early drop stage */ @@ -841,10 +841,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone, * SLAB_DESTROY_BY_RCU. */ ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp); - if (ct == NULL) { - atomic_dec(&net->ct.count); - return ERR_PTR(-ENOMEM); - } + if (ct == NULL) + goto out; + spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; @@ -858,31 +857,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone, memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); -#ifdef CONFIG_NF_CONNTRACK_ZONES - if (zone) { - struct nf_conntrack_zone *nf_ct_zone; - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); - if (!nf_ct_zone) - goto out_free; - nf_ct_zone->id = zone; - } -#endif + if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) + goto out_free; + /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; - -#ifdef CONFIG_NF_CONNTRACK_ZONES out_free: - atomic_dec(&net->ct.count); kmem_cache_free(net->ct.nf_conntrack_cachep, ct); +out: + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); -#endif } -struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -924,8 +916,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp = NULL; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; + const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; + struct nf_conntrack_zone tmp; unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -933,6 +926,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, return NULL; } + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) @@ -1027,10 +1021,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, int *set_reply, enum ip_conntrack_info *ctinfo) { + const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_zone tmp; struct nf_conn *ct; - u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; u32 hash; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), @@ -1041,7 +1036,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } /* look for tuple match */ - hash = hash_conntrack_raw(&tuple, zone); + zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); + hash = hash_conntrack_raw(&tuple); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, @@ -1597,8 +1593,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), - hashsize); + bucket = __hash_conntrack(&h->tuple, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index b45a4223c..acf5c7b3f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, u16 zone, +__nf_ct_expect_find(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone, h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) + nf_ct_zone_equal_any(i->master, zone)) return i; } return NULL; @@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, u16 zone, +nf_ct_expect_find_get(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; @@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, u16 zone, +nf_ct_find_expectation(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; @@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && - nf_ct_zone(i->master) == zone) { + nf_ct_zone_equal_any(i->master, zone)) { exp = i; break; } @@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, } return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } static inline int expect_matches(const struct nf_conntrack_expect *a, const struct nf_conntrack_expect *b) { return a->master == b->master && a->class == b->class && - nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask) && - nf_ct_zone(a->master) == nf_ct_zone(b->master); + nf_ct_tuple_equal(&a->tuple, &b->tuple) && + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master)); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index bb53f120e..3ce5c314e 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -14,6 +14,8 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> +static spinlock_t nf_connlabels_lock; + static unsigned int label_bits(const struct nf_conn_labels *l) { unsigned int longs = l->words; @@ -48,7 +50,6 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) } EXPORT_SYMBOL_GPL(nf_connlabel_set); -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) static void replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; @@ -89,7 +90,35 @@ int nf_connlabels_replace(struct nf_conn *ct, return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_replace); -#endif + +int nf_connlabels_get(struct net *net, unsigned int n_bits) +{ + size_t words; + + if (n_bits > (NF_CT_LABELS_MAX_SIZE * BITS_PER_BYTE)) + return -ERANGE; + + words = BITS_TO_LONGS(n_bits); + + spin_lock(&nf_connlabels_lock); + net->ct.labels_used++; + if (words > net->ct.label_words) + net->ct.label_words = words; + spin_unlock(&nf_connlabels_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_connlabels_get); + +void nf_connlabels_put(struct net *net) +{ + spin_lock(&nf_connlabels_lock); + net->ct.labels_used--; + if (net->ct.labels_used == 0) + net->ct.label_words = 0; + spin_unlock(&nf_connlabels_lock); +} +EXPORT_SYMBOL_GPL(nf_connlabels_put); static struct nf_ct_ext_type labels_extend __read_mostly = { .len = sizeof(struct nf_conn_labels), @@ -99,6 +128,7 @@ static struct nf_ct_ext_type labels_extend __read_mostly = { int nf_conntrack_labels_init(void) { + spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6b8b0abbf..94a66541e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -128,6 +128,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb, } static inline int +ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype, + const struct nf_conntrack_zone *zone, int dir) +{ + if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir) + return 0; + if (nla_put_be16(skb, attrtype, htons(zone->id))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static inline int ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct) { if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status))) @@ -458,6 +472,7 @@ static int ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; @@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_status(skb, ct) < 0 || @@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) + ctnetlink_label_size(ct) @@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { + const struct nf_conntrack_zone *zone; struct net *net; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) nfmsg->res_id = 0; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct) && - nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) @@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return ret; } +static int +ctnetlink_parse_zone(const struct nlattr *attr, + struct nf_conntrack_zone *zone) +{ + nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID, + NF_CT_DEFAULT_ZONE_DIR, 0); +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (attr) + zone->id = ntohs(nla_get_be16(attr)); +#else + if (attr) + return -EOPNOTSUPP; +#endif + return 0; +} + +static int +ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type, + struct nf_conntrack_zone *zone) +{ + int ret; + + if (zone->id != NF_CT_DEFAULT_ZONE_ID) + return -EINVAL; + + ret = ctnetlink_parse_zone(attr, zone); + if (ret < 0) + return ret; + + if (type == CTA_TUPLE_REPLY) + zone->dir = NF_CT_ZONE_DIR_REPL; + else + zone->dir = NF_CT_ZONE_DIR_ORIG; + + return 0; +} + static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { [CTA_TUPLE_IP] = { .type = NLA_NESTED }, [CTA_TUPLE_PROTO] = { .type = NLA_NESTED }, + [CTA_TUPLE_ZONE] = { .type = NLA_U16 }, }; static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, - enum ctattr_type type, u_int8_t l3num) + enum ctattr_type type, u_int8_t l3num, + struct nf_conntrack_zone *zone) { struct nlattr *tb[CTA_TUPLE_MAX+1]; int err; @@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], if (err < 0) return err; + if (tb[CTA_TUPLE_ZONE]) { + if (!zone) + return -EINVAL; + + err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE], + type, zone); + if (err < 0) + return err; + } + /* orig and expect tuples get DIR_ORIGINAL */ if (type == CTA_TUPLE_REPLY) tuple->dst.dir = IP_CT_DIR_REPLY; @@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } -static int -ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) -{ - if (attr) -#ifdef CONFIG_NF_CONNTRACK_ZONES - *zone = ntohs(nla_get_be16(attr)); -#else - return -EOPNOTSUPP; -#endif - else - *zone = 0; - - return 0; -} - static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN - 1 }, @@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else { return ctnetlink_flush_conntrack(net, cda, NETLINK_CB(skb).portid, @@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2 = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, + u3, &zone); else if (cda[CTA_TUPLE_REPLY]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, + u3, &zone); else return -EINVAL; if (err < 0) return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(struct net *net, u16 zone, +ctnetlink_create_conntrack(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, @@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, struct nf_conntrack_tuple_hash *master_h; struct nf_conn *master_ct; - err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, + u3, NULL); if (err < 0) goto err2; @@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nf_conn *ct; u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); @@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_TUPLE_ORIG]) { - err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_REPLY]) { - err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3); + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, + u3, &zone); if (err < 0) return err; } if (cda[CTA_TUPLE_ORIG]) - h = nf_conntrack_find_get(net, zone, &otuple); + h = nf_conntrack_find_get(net, &zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = nf_conntrack_find_get(net, zone, &rtuple); + h = nf_conntrack_find_get(net, &zone, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; - ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, + ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) return PTR_ERR(ct); @@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif #ifdef CONFIG_NF_CONNTRACK_ZONES - + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */ + + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */ #endif + ctnetlink_proto_size(ct) ; @@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct) static int ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) { + const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; rcu_read_lock(); + zone = nf_ct_zone(ct); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_ORIG) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); @@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; + if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone, + NF_CT_ZONE_DIR_REPL) < 0) + goto nla_put_failure; nla_nest_end(skb, nest_parms); - if (nf_ct_zone(ct)) { - if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) - goto nla_put_failure; - } + if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone, + NF_CT_DEFAULT_ZONE_DIR) < 0) + goto nla_put_failure; if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda, int err; err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); if (err < 0) return err; return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK, - nf_ct_l3num(ct)); + nf_ct_l3num(ct), NULL); } static int @@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - u16 zone = 0; + struct nf_conntrack_zone zone; struct netlink_dump_control c = { .dump = ctnetlink_exp_ct_dump_table, .done = ctnetlink_exp_done, }; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; - if (cda[CTA_EXPECT_ZONE]) { - err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); - if (err < 0) - return err; - } + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; - h = nf_conntrack_find_get(net, zone, &tuple); + h = nf_conntrack_find_get(net, &zone, &tuple); if (!h) return -ENOENT; @@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { @@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return err; if (cda[CTA_EXPECT_TUPLE]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); else if (cda[CTA_EXPECT_MASTER]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, + u3, NULL); else return -EINVAL; if (err < 0) return err; - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; + struct nf_conntrack_zone zone; unsigned int i; - u16 zone; int err; if (cda[CTA_EXPECT_TUPLE]) { @@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, zone, &tuple); + exp = nf_ct_expect_find_get(net, &zone, &tuple); if (!exp) return -ENOENT; @@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, return -EINVAL; err = ctnetlink_parse_tuple((const struct nlattr * const *)tb, - &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3); + &nat_tuple, CTA_EXPECT_NAT_TUPLE, + u3, NULL); if (err < 0) return err; @@ -2937,7 +3022,8 @@ err_out: } static int -ctnetlink_create_expect(struct net *net, u16 zone, +ctnetlink_create_expect(struct net *net, + const struct nf_conntrack_zone *zone, const struct nlattr * const cda[], u_int8_t u3, u32 portid, int report) { @@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone, int err; /* caller guarantees that those three CTA_EXPECT_* exist */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3); + err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, + u3, NULL); if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3); + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, + u3, NULL); if (err < 0) return err; @@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - u16 zone; + struct nf_conntrack_zone zone; int err; if (!cda[CTA_EXPECT_TUPLE] @@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, + u3, NULL); if (err < 0) return err; spin_lock_bh(&nf_conntrack_expect_lock); - exp = __nf_ct_expect_find(net, zone, &tuple); - + exp = __nf_ct_expect_find(net, &zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_expect_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(net, zone, cda, - u3, + err = ctnetlink_create_expect(net, &zone, cda, u3, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 825c3e3f8..5588c7ae1 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; + const struct nf_conntrack_zone *zone; struct nf_conntrack_expect *exp; struct nf_conn *sibling; - u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); + zone = nf_ct_zone(ct); h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b45da90fa..67197731e 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -42,6 +42,8 @@ static const char *const sctp_conntrack_names[] = { "SHUTDOWN_SENT", "SHUTDOWN_RECD", "SHUTDOWN_ACK_SENT", + "HEARTBEAT_SENT", + "HEARTBEAT_ACKED", }; #define SECS * HZ @@ -57,6 +59,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, + [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, + [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, }; #define sNO SCTP_CONNTRACK_NONE @@ -67,6 +71,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT +#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT +#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED #define sIV SCTP_CONNTRACK_MAX /* @@ -88,6 +94,10 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite to that of the SHUTDOWN chunk. CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. +HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. +HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to + that of the HEARTBEAT chunk. Secondary connection is + established. */ /* TODO @@ -97,36 +107,40 @@ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of - Check the error type in the reply dir before transitioning from cookie echoed to closed. - Sec 5.2.4 of RFC 2960 - - Multi Homing support. + - Full Multi Homing support. */ /* SCTP conntrack state transitions */ -static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { +static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ -/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} } }; @@ -278,9 +292,16 @@ static int sctp_new_state(enum ip_conntrack_dir dir, pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); i = 8; break; + case SCTP_CID_HEARTBEAT: + pr_debug("SCTP_CID_HEARTBEAT"); + i = 9; + break; + case SCTP_CID_HEARTBEAT_ACK: + pr_debug("SCTP_CID_HEARTBEAT_ACK"); + i = 10; + break; default: - /* Other chunks like DATA, SACK, HEARTBEAT and - its ACK do not cause a change in state */ + /* Other chunks like DATA or SACK do not change the state */ pr_debug("Unknown chunk type, Will stay in %s\n", sctp_conntrack_names[cur_state]); return cur_state; @@ -329,6 +350,8 @@ static int sctp_packet(struct nf_conn *ct, !test_bit(SCTP_CID_COOKIE_ECHO, map) && !test_bit(SCTP_CID_ABORT, map) && !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && + !test_bit(SCTP_CID_HEARTBEAT, map) && + !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && sh->vtag != ct->proto.sctp.vtag[dir]) { pr_debug("Verification tag check failed\n"); goto out; @@ -357,6 +380,16 @@ static int sctp_packet(struct nf_conn *ct, /* Sec 8.5.1 (D) */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_HEARTBEAT || + sch->type == SCTP_CID_HEARTBEAT_ACK) { + if (ct->proto.sctp.vtag[dir] == 0) { + pr_debug("Setting vtag %x for dir %d\n", + sh->vtag, dir); + ct->proto.sctp.vtag[dir] = sh->vtag; + } else if (sh->vtag != ct->proto.sctp.vtag[dir]) { + pr_debug("Verification tag check failed\n"); + goto out_unlock; + } } old_state = ct->proto.sctp.state; @@ -466,6 +499,10 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Sec 8.5.1 (A) */ return false; } + } else if (sch->type == SCTP_CID_HEARTBEAT) { + pr_debug("Setting vtag %x for secondary conntrack\n", + sh->vtag); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; } /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ @@ -610,6 +647,8 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, + [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, + [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ @@ -658,6 +697,18 @@ static struct ctl_table sctp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "nf_conntrack_sctp_timeout_heartbeat_sent", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { } }; @@ -730,6 +781,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; + pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT]; + pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED]; #endif return 0; } diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index ce3e840c8..dff0f0cc5 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb, ntohl(sack->end_seq), ntohl(new_end_seq)); inet_proto_csum_replace4(&tcph->check, skb, - sack->start_seq, new_start_seq, 0); + sack->start_seq, new_start_seq, false); inet_proto_csum_replace4(&tcph->check, skb, - sack->end_seq, new_end_seq, 0); + sack->end_seq, new_end_seq, false); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb, newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, + false); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index fc823fa5d..1fb3cacc0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) } #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES +static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ + const struct nf_conntrack_zone *zone = nf_ct_zone(ct); + + if (zone->dir != dir) + return; + switch (zone->dir) { + case NF_CT_DEFAULT_ZONE_DIR: + seq_printf(s, "zone=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_ORIG: + seq_printf(s, "zone-orig=%u ", zone->id); + break; + case NF_CT_ZONE_DIR_REPL: + seq_printf(s, "zone-reply=%u ", zone->id); + break; + default: + break; + } +} +#else +static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct, + int dir) +{ +} +#endif + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) { @@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); + if (seq_has_overflowed(s)) goto release; @@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v) print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto); + ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) goto release; @@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v) #endif ct_show_secctx(s, ct); - -#ifdef CONFIG_NF_CONNTRACK_ZONES - seq_printf(s, "zone=%u ", nf_ct_zone(ct)); -#endif - + ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR); ct_show_delta_time(s, ct); seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 399210693..065522564 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -19,7 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum); -void nf_queue_nf_hook_drop(struct nf_hook_ops *ops); +void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 4e0b47831..5113dfd39 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, u16 zone, - const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); + tuple->dst.protonum ^ nf_conntrack_hash_rnd); return reciprocal_scale(hash, net->ct.nat_htable_size); } @@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, u16 zone, +find_appropriate_src(struct net *net, + const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, zone, tuple); + unsigned int h = hash_by_src(net, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { + if (same_src(ct, tuple) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone, * the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, +find_best_ips_proto(const struct nf_conntrack_zone *zone, + struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, */ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), range->flags & NF_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); + 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id); full_range = false; for (i = 0; i <= max; i++) { @@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + const struct nf_conntrack_zone *zone; const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; struct net *net = nf_ct_net(ct); - u16 zone = nf_ct_zone(ct); + + zone = nf_ct_zone(ct); rcu_read_lock(); l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); @@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct, if (maniptype == NF_NAT_MANIP_SRC) { unsigned int srchash; - srchash = hash_by_src(net, nf_ct_zone(ct), + srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate extension aera */ diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c index b8067b53f..15c47b246 100644 --- a/net/netfilter/nf_nat_proto_dccp.c +++ b/net/netfilter/nf_nat_proto_dccp.c @@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype); inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport, - 0); + false); return true; } diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c index 37f5505f4..4f8820fc5 100644 --- a/net/netfilter/nf_nat_proto_tcp.c +++ b/net/netfilter/nf_nat_proto_tcp.c @@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb, return true; l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false); return true; } diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c index b0ede2f0d..b1e627227 100644 --- a/net/netfilter/nf_nat_proto_udp.c +++ b/net/netfilter/nf_nat_proto_udp.c @@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb, l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, - 0); + false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; } diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c index 368f14e01..58340c97b 100644 --- a/net/netfilter/nf_nat_proto_udplite.c +++ b/net/netfilter/nf_nat_proto_udplite.c @@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb, } l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype); - inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0); + inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 8a8b2abc3..96777f9a9 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -105,21 +105,15 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct nf_hook_ops *ops) +void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) { const struct nf_queue_handler *qh; - struct net *net; - rtnl_lock(); rcu_read_lock(); qh = rcu_dereference(queue_handler); - if (qh) { - for_each_net(net) { - qh->nf_hook_drop(net, ops); - } - } + if (qh) + qh->nf_hook_drop(net, ops); rcu_read_unlock(); - rtnl_unlock(); } /* diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index d6ee8f8b1..c8a4a48bc 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -17,10 +17,12 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_tcpudp.h> #include <linux/netfilter/xt_SYNPROXY.h> + #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> +#include <net/netfilter/nf_conntrack_zones.h> int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); @@ -186,7 +188,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, const struct nf_conn_synproxy *synproxy) { unsigned int optoff, optend; - u32 *ptr, old; + __be32 *ptr, old; if (synproxy->tsoff == 0) return 1; @@ -214,18 +216,18 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, if (op[0] == TCPOPT_TIMESTAMP && op[1] == TCPOLEN_TIMESTAMP) { if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - ptr = (u32 *)&op[2]; + ptr = (__be32 *)&op[2]; old = *ptr; *ptr = htonl(ntohl(*ptr) - synproxy->tsoff); } else { - ptr = (u32 *)&op[6]; + ptr = (__be32 *)&op[6]; old = *ptr; *ptr = htonl(ntohl(*ptr) + synproxy->tsoff); } inet_proto_csum_replace4(&th->check, skb, - old, *ptr, 0); + old, *ptr, false); return 1; } optoff += op[1]; @@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net) struct nf_conn *ct; int err = -ENOMEM; - ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL); if (!ct) goto err1; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index cfe636808..4a41eb92b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -130,20 +130,24 @@ static void nft_trans_destroy(struct nft_trans *trans) int nft_register_basechain(struct nft_base_chain *basechain, unsigned int hook_nops) { + struct net *net = read_pnet(&basechain->pnet); + if (basechain->flags & NFT_BASECHAIN_DISABLED) return 0; - return nf_register_hooks(basechain->ops, hook_nops); + return nf_register_net_hooks(net, basechain->ops, hook_nops); } EXPORT_SYMBOL_GPL(nft_register_basechain); void nft_unregister_basechain(struct nft_base_chain *basechain, unsigned int hook_nops) { + struct net *net = read_pnet(&basechain->pnet); + if (basechain->flags & NFT_BASECHAIN_DISABLED) return; - nf_unregister_hooks(basechain->ops, hook_nops); + nf_unregister_net_hooks(net, basechain->ops, hook_nops); } EXPORT_SYMBOL_GPL(nft_unregister_basechain); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f77bad46a..05d0b0353 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -114,7 +114,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { const struct nft_chain *chain = ops->priv, *basechain = chain; - const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet); const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); const struct nft_rule *rule; const struct nft_expr *expr, *last; @@ -125,10 +124,6 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) int rulenum; unsigned int gencursor = nft_genmask_cur(net); - /* Ignore chains that are not for the current network namespace */ - if (!net_eq(net, chain_net)) - return NF_ACCEPT; - do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c18af2f63..fefbf5f0b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -27,8 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); -static LIST_HEAD(nfnl_acct_list); - struct nf_acct { atomic64_t pkts; atomic64_t bytes; @@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; + struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (strlen(acct_name) == 0) return -EINVAL; - list_for_each_entry(nfacct, &nfnl_acct_list, head) { + list_for_each_entry(nfacct, &net->nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); - list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -185,6 +184,7 @@ nla_put_failure: static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_acct *cur, *last; const struct nfacct_filter *filter = cb->data; @@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (last) { if (cur != last) continue; @@ -257,6 +257,7 @@ static int nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) @@ -336,19 +337,20 @@ static int nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; if (!tb[NFACCT_NAME]) { - list_for_each_entry(cur, &nfnl_acct_list, head) + list_for_each_entry(cur, &net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; } acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); -struct nf_acct *nfnl_acct_find_get(const char *acct_name) +struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) { struct nf_acct *cur, *acct = NULL; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; @@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - atomic_dec(&acct->refcnt); + if (atomic_dec_and_test(&acct->refcnt)) + kfree_rcu(acct, rcu_head); + module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(nfnl_acct_put); @@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_overquota); +static int __net_init nfnl_acct_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfnl_acct_list); + + return 0; +} + +static void __net_exit nfnl_acct_net_exit(struct net *net) +{ + struct nf_acct *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { + list_del_rcu(&cur->head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations nfnl_acct_ops = { + .init = nfnl_acct_net_init, + .exit = nfnl_acct_net_exit, +}; + static int __init nfnl_acct_init(void) { int ret; + ret = register_pernet_subsys(&nfnl_acct_ops); + if (ret < 0) { + pr_err("nfnl_acct_init: failed to register pernet ops\n"); + goto err_out; + } + pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); - goto err_out; + goto cleanup_pernet; } return 0; + +cleanup_pernet: + unregister_pernet_subsys(&nfnl_acct_ops); err_out: return ret; } static void __exit nfnl_acct_exit(void) { - struct nf_acct *cur, *tmp; - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); - - list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. */ - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&nfnl_acct_ops); } module_init(nfnl_acct_init); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 685cc6a17..a5cd6d90b 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -301,7 +301,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, __be32 **packet_id_ptr) { size_t size; - size_t data_len = 0, cap_len = 0; + size_t data_len = 0, cap_len = 0, rem_len = 0; unsigned int hlen = 0; struct sk_buff *skb; struct nlattr *nla; @@ -360,6 +360,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, hlen = min_t(unsigned int, hlen, data_len); size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; + rem_len = data_len - hlen; break; } @@ -377,7 +378,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, size += nla_total_size(seclen); } - skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, + skb = __netlink_alloc_skb(net->nfnl, size, rem_len, queue->peer_portid, GFP_ATOMIC); if (!skb) { skb_tx_error(entskb); diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 175912392..1067fb4c1 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -18,39 +18,59 @@ #include <net/netfilter/nf_tables.h> struct nft_counter { - seqlock_t lock; u64 bytes; u64 packets; }; +struct nft_counter_percpu { + struct nft_counter counter; + struct u64_stats_sync syncp; +}; + +struct nft_counter_percpu_priv { + struct nft_counter_percpu __percpu *counter; +}; + static void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { - struct nft_counter *priv = nft_expr_priv(expr); - - write_seqlock_bh(&priv->lock); - priv->bytes += pkt->skb->len; - priv->packets++; - write_sequnlock_bh(&priv->lock); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *this_cpu; + + local_bh_disable(); + this_cpu = this_cpu_ptr(priv->counter); + u64_stats_update_begin(&this_cpu->syncp); + this_cpu->counter.bytes += pkt->skb->len; + this_cpu->counter.packets++; + u64_stats_update_end(&this_cpu->syncp); + local_bh_enable(); } static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu *cpu_stats; + struct nft_counter total; + u64 bytes, packets; unsigned int seq; - u64 bytes; - u64 packets; - - do { - seq = read_seqbegin(&priv->lock); - bytes = priv->bytes; - packets = priv->packets; - } while (read_seqretry(&priv->lock, seq)); - - if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets))) + int cpu; + + memset(&total, 0, sizeof(total)); + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(priv->counter, cpu); + do { + seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + bytes = cpu_stats->counter.bytes; + packets = cpu_stats->counter.packets; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); + + total.packets += packets; + total.bytes += bytes; + } + + if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) || + nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets))) goto nla_put_failure; return 0; @@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_counter *priv = nft_expr_priv(expr); + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); + struct nft_counter_percpu __percpu *cpu_stats; + struct nft_counter_percpu *this_cpu; + + cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); + if (cpu_stats == NULL) + return ENOMEM; + + preempt_disable(); + this_cpu = this_cpu_ptr(cpu_stats); + if (tb[NFTA_COUNTER_PACKETS]) { + this_cpu->counter.packets = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); + } + if (tb[NFTA_COUNTER_BYTES]) { + this_cpu->counter.bytes = + be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); + } + preempt_enable(); + priv->counter = cpu_stats; + return 0; +} - if (tb[NFTA_COUNTER_PACKETS]) - priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); - if (tb[NFTA_COUNTER_BYTES]) - priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); +static void nft_counter_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_counter_percpu_priv *priv = nft_expr_priv(expr); - seqlock_init(&priv->lock); - return 0; + free_percpu(priv->counter); } static struct nft_expr_type nft_counter_type; static const struct nft_expr_ops nft_counter_ops = { .type = &nft_counter_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)), + .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)), .eval = nft_counter_eval, .init = nft_counter_init, + .destroy = nft_counter_destroy, .dump = nft_counter_dump, }; diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 435c1ccd6..5d67938f8 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -20,63 +20,79 @@ static DEFINE_SPINLOCK(limit_lock); struct nft_limit { + u64 last; u64 tokens; + u64 tokens_max; u64 rate; - u64 unit; - unsigned long stamp; + u64 nsecs; + u32 burst; }; -static void nft_limit_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 now, tokens; + s64 delta; spin_lock_bh(&limit_lock); - if (time_after_eq(jiffies, priv->stamp)) { - priv->tokens = priv->rate; - priv->stamp = jiffies + priv->unit * HZ; - } - - if (priv->tokens >= 1) { - priv->tokens--; + now = ktime_get_ns(); + tokens = limit->tokens + now - limit->last; + if (tokens > limit->tokens_max) + tokens = limit->tokens_max; + + limit->last = now; + delta = tokens - cost; + if (delta >= 0) { + limit->tokens = delta; spin_unlock_bh(&limit_lock); - return; + return false; } + limit->tokens = tokens; spin_unlock_bh(&limit_lock); - - regs->verdict.code = NFT_BREAK; + return true; } -static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { - [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, - [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, -}; - -static int nft_limit_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, +static int nft_limit_init(struct nft_limit *limit, const struct nlattr * const tb[]) { - struct nft_limit *priv = nft_expr_priv(expr); + u64 unit; if (tb[NFTA_LIMIT_RATE] == NULL || tb[NFTA_LIMIT_UNIT] == NULL) return -EINVAL; - priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); - priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); - priv->stamp = jiffies + priv->unit * HZ; - priv->tokens = priv->rate; + limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE])); + unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT])); + limit->nsecs = unit * NSEC_PER_SEC; + if (limit->rate == 0 || limit->nsecs < unit) + return -EOVERFLOW; + limit->tokens = limit->tokens_max = limit->nsecs; + + if (tb[NFTA_LIMIT_BURST]) { + u64 rate; + + limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + + rate = limit->rate + limit->burst; + if (rate < limit->rate) + return -EOVERFLOW; + + limit->rate = rate; + } + limit->last = ktime_get_ns(); + return 0; } -static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, + enum nft_limit_type type) { - const struct nft_limit *priv = nft_expr_priv(expr); + u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); + u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate))) - goto nla_put_failure; - if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit))) + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) || + nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) || + nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) || + nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type))) goto nla_put_failure; return 0; @@ -84,18 +100,114 @@ nla_put_failure: return -1; } +struct nft_limit_pkts { + struct nft_limit limit; + u64 cost; +}; + +static void nft_limit_pkts_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + + if (nft_limit_eval(&priv->limit, priv->cost)) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { + [NFTA_LIMIT_RATE] = { .type = NLA_U64 }, + [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, + [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, + [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_limit_pkts_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit_pkts *priv = nft_expr_priv(expr); + int err; + + err = nft_limit_init(&priv->limit, tb); + if (err < 0) + return err; + + priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate); + return 0; +} + +static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_limit_pkts *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); +} + static struct nft_expr_type nft_limit_type; -static const struct nft_expr_ops nft_limit_ops = { +static const struct nft_expr_ops nft_limit_pkts_ops = { + .type = &nft_limit_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .eval = nft_limit_pkts_eval, + .init = nft_limit_pkts_init, + .dump = nft_limit_pkts_dump, +}; + +static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_expr_priv(expr); + u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate); + + if (nft_limit_eval(priv, cost)) + regs->verdict.code = NFT_BREAK; +} + +static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_init(priv, tb); +} + +static int nft_limit_pkt_bytes_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_limit *priv = nft_expr_priv(expr); + + return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); +} + +static const struct nft_expr_ops nft_limit_pkt_bytes_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .eval = nft_limit_eval, - .init = nft_limit_init, - .dump = nft_limit_dump, + .eval = nft_limit_pkt_bytes_eval, + .init = nft_limit_pkt_bytes_init, + .dump = nft_limit_pkt_bytes_dump, }; +static const struct nft_expr_ops * +nft_limit_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_LIMIT_TYPE] == NULL) + return &nft_limit_pkts_ops; + + switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { + case NFT_LIMIT_PKTS: + return &nft_limit_pkts_ops; + case NFT_LIMIT_PKT_BYTES: + return &nft_limit_pkt_bytes_ops; + } + return ERR_PTR(-EOPNOTSUPP); +} + static struct nft_expr_type nft_limit_type __read_mostly = { .name = "limit", - .ops = &nft_limit_ops, + .select_ops = nft_limit_select_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, .flags = NFT_EXPR_STATEFUL, diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 52561e1c3..cb2f13ebb 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -166,11 +166,13 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; *dest = out->group; break; +#ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: if (skb->sk == NULL || !sk_fullsock(skb->sk)) goto err; *dest = skb->sk->sk_classid; break; +#endif default: WARN_ON(1); goto err; @@ -246,7 +248,9 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: +#ifdef CONFIG_CGROUP_NET_CLASSID case NFT_META_CGROUP: +#endif len = sizeof(u32); break; case NFT_META_IIFNAME: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 94fb3b27a..09b4b07eb 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -9,6 +9,7 @@ */ #include <linux/kernel.h> +#include <linux/if_vlan.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> @@ -17,6 +18,53 @@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +/* add vlan header into the user buffer for if tag was removed by offloads */ +static bool +nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) +{ + int mac_off = skb_mac_header(skb) - skb->data; + u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d; + struct vlan_ethhdr veth; + + vlanh = (u8 *) &veth; + if (offset < ETH_HLEN) { + u8 ethlen = min_t(u8, len, ETH_HLEN - offset); + + if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN)) + return false; + + veth.h_vlan_proto = skb->vlan_proto; + + memcpy(dst_u8, vlanh + offset, ethlen); + + len -= ethlen; + if (len == 0) + return true; + + dst_u8 += ethlen; + offset = ETH_HLEN; + } else if (offset >= VLAN_ETH_HLEN) { + offset -= VLAN_HLEN; + goto skip; + } + + veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); + veth.h_vlan_encapsulated_proto = skb->protocol; + + vlanh += offset; + + vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset); + memcpy(dst_u8, vlanh, vlan_len); + + len -= vlan_len; + if (!len) + return true; + + dst_u8 += vlan_len; + skip: + return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0; +} + static void nft_payload_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr, u32 *dest = ®s->data[priv->dreg]; int offset; + dest[priv->len / NFT_REG32_SIZE] = 0; switch (priv->base) { case NFT_PAYLOAD_LL_HEADER: if (!skb_mac_header_was_set(skb)) goto err; + + if (skb_vlan_tag_present(skb)) { + if (!nft_payload_copy_vlan(dest, skb, + priv->offset, priv->len)) + goto err; + return; + } offset = skb_mac_header(skb) - skb->data; break; case NFT_PAYLOAD_NETWORK_HEADER: @@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d324fe712..9b42b5ea6 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_IPV6] = "ip6", }; -/* Allow this many total (re)entries. */ -static const unsigned int xt_jumpstack_multiplier = 2; - /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { @@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info) kvfree(info->jumpstack); } - free_percpu(info->stackptr); - kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); @@ -732,15 +727,14 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock); DEFINE_PER_CPU(seqcount_t, xt_recseq); EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); +struct static_key xt_tee_enabled __read_mostly; +EXPORT_SYMBOL_GPL(xt_tee_enabled); + static int xt_jumpstack_alloc(struct xt_table_info *i) { unsigned int size; int cpu; - i->stackptr = alloc_percpu(unsigned int); - if (i->stackptr == NULL) - return -ENOMEM; - size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) i->jumpstack = vzalloc(size); @@ -749,8 +743,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) if (i->jumpstack == NULL) return -ENOMEM; - i->stacksize *= xt_jumpstack_multiplier; - size = sizeof(void *) * i->stacksize; + /* ruleset without jumps -- no stack needed */ + if (i->stacksize == 0) + return 0; + + /* Jumpstack needs to be able to record two full callchains, one + * from the first rule set traversal, plus one table reentrancy + * via -j TEE without clobbering the callchain that brought us to + * TEE target. + * + * This is done by allocating two jumpstacks per cpu, on reentry + * the upper half of the stack is used. + * + * see the jumpstack setup in ipt_do_table() for more details. + */ + size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { if (size > PAGE_SIZE) i->jumpstack[cpu] = vmalloc_node(size, diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index f3377ce1f..faf32d888 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -181,9 +181,23 @@ out: #endif } +static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info) +{ + switch (info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL)) { + case XT_CT_ZONE_DIR_ORIG: + return NF_CT_ZONE_DIR_ORIG; + case XT_CT_ZONE_DIR_REPL: + return NF_CT_ZONE_DIR_REPL; + default: + return NF_CT_DEFAULT_ZONE_DIR; + } +} + static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { + struct nf_conntrack_zone zone; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, } #ifndef CONFIG_NF_CONNTRACK_ZONES - if (info->zone) + if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG | + XT_CT_ZONE_DIR_REPL | + XT_CT_ZONE_MARK)) goto err1; #endif @@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); + memset(&zone, 0, sizeof(zone)); + zone.id = info->zone; + zone.dir = xt_ct_flags_to_dir(info); + if (info->flags & XT_CT_ZONE_MARK) + zone.flags |= NF_CT_FLAG_MARK; + + ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL); if (!ct) { ret = -ENOMEM; goto err2; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 8c3190e2f..8c02501a5 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldmss), htons(newmss), - 0); + false); return 0; } } @@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb, memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); inet_proto_csum_replace2(&tcph->check, skb, - htons(len), htons(len + TCPOLEN_MSS), 1); + htons(len), htons(len + TCPOLEN_MSS), true); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; inet_proto_csum_replace2(&tcph->check, skb, - oldval, ((__be16 *)tcph)[6], 0); + oldval, ((__be16 *)tcph)[6], false); return TCPOLEN_MSS; } diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 625fa1d63..eb92bffff 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, n <<= 8; } inet_proto_csum_replace2(&tcph->check, skb, htons(o), - htons(n), 0); + htons(n), false); } memset(opt + i, TCPOPT_NOP, optl); } diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index a747eb475..fd980aa77 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -10,26 +10,15 @@ * modify it under the terms of the GNU General Public License * version 2 or later, as published by the Free Software Foundation. */ -#include <linux/ip.h> #include <linux/module.h> -#include <linux/percpu.h> -#include <linux/route.h> #include <linux/skbuff.h> -#include <linux/notifier.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/ip6_route.h> -#include <net/route.h> +#include <linux/route.h> #include <linux/netfilter/x_tables.h> +#include <net/route.h> +#include <net/netfilter/ipv4/nf_dup_ipv4.h> +#include <net/netfilter/ipv6/nf_dup_ipv6.h> #include <linux/netfilter/xt_TEE.h> -#if IS_ENABLED(CONFIG_NF_CONNTRACK) -# define WITH_CONNTRACK 1 -# include <net/netfilter/nf_conntrack.h> -#endif - struct xt_tee_priv { struct notifier_block notifier; struct xt_tee_tginfo *tginfo; @@ -37,163 +26,25 @@ struct xt_tee_priv { }; static const union nf_inet_addr tee_zero_address; -static DEFINE_PER_CPU(bool, tee_active); - -static struct net *pick_net(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_NS - const struct dst_entry *dst; - - if (skb->dev != NULL) - return dev_net(skb->dev); - dst = skb_dst(skb); - if (dst != NULL && dst->dev != NULL) - return dev_net(dst->dev); -#endif - return &init_net; -} - -static bool -tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct iphdr *iph = ip_hdr(skb); - struct net *net = pick_net(skb); - struct rtable *rt; - struct flowi4 fl4; - - memset(&fl4, 0, sizeof(fl4)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl4.flowi4_oif = info->priv->oif; - } - fl4.daddr = info->gw.ip; - fl4.flowi4_tos = RT_TOS(iph->tos); - fl4.flowi4_scope = RT_SCOPE_UNIVERSE; - fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) - return false; - - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - skb->dev = rt->dst.dev; - skb->protocol = htons(ETH_P_IP); - return true; -} static unsigned int tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - struct iphdr *iph; - if (__this_cpu_read(tee_active)) - return XT_CONTINUE; - /* - * Copy the skb, and route the copy. Will later return %XT_CONTINUE for - * the original skb, which should continue on its way as if nothing has - * happened. The copy should be independently delivered to the TEE - * --gateway. - */ - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; - -#ifdef WITH_CONNTRACK - /* Avoid counting cloned packets towards the original connection. */ - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential TEE loops - * between two hosts. - * - * Set %IP_DF so that the original source is notified of a potentially - * decreased MTU on the clone route. IPv6 does this too. - */ - iph = ip_hdr(skb); - iph->frag_off |= htons(IP_DF); - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) - --iph->ttl; - ip_send_check(iph); + nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif); - if (tee_tg_route4(skb, info)) { - __this_cpu_write(tee_active, true); - ip_local_out(skb); - __this_cpu_write(tee_active, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } -#if IS_ENABLED(CONFIG_IPV6) -static bool -tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct net *net = pick_net(skb); - struct dst_entry *dst; - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - if (info->priv) { - if (info->priv->oif == -1) - return false; - fl6.flowi6_oif = info->priv->oif; - } - fl6.daddr = info->gw.in6; - fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; - fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - return false; - } - skb_dst_drop(skb); - skb_dst_set(skb, dst); - skb->dev = dst->dev; - skb->protocol = htons(ETH_P_IPV6); - return true; -} - +#if IS_ENABLED(CONFIG_NF_DUP_IPV6) static unsigned int tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - if (__this_cpu_read(tee_active)) - return XT_CONTINUE; - skb = pskb_copy(skb, GFP_ATOMIC); - if (skb == NULL) - return XT_CONTINUE; + nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif); -#ifdef WITH_CONNTRACK - nf_conntrack_put(skb->nfct); - skb->nfct = &nf_ct_untracked_get()->ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); -#endif - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_IN) { - struct ipv6hdr *iph = ipv6_hdr(skb); - --iph->hop_limit; - } - if (tee_tg_route6(skb, info)) { - __this_cpu_write(tee_active, true); - ip6_local_out(skb); - __this_cpu_write(tee_active, false); - } else { - kfree_skb(skb); - } return XT_CONTINUE; } #endif @@ -252,6 +103,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par) } else info->priv = NULL; + static_key_slow_inc(&xt_tee_enabled); return 0; } @@ -263,6 +115,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par) unregister_netdevice_notifier(&info->priv->notifier); kfree(info->priv); } + static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { @@ -276,7 +129,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { .destroy = tee_tg_destroy, .me = THIS_MODULE, }, -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_NF_DUP_IPV6) { .name = "TEE", .revision = 1, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index cca96cec1..d0c96c5ae 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -272,8 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk)); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } @@ -437,8 +436,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, tgi->lport ? tgi->lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk)); - inet_twsk_put(inet_twsk(sk)); + inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; } } diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 9f8719df2..bb9cbeb18 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -42,10 +42,6 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) XT_CONNLABEL_OP_SET; struct xt_connlabel_mtinfo *info = par->matchinfo; int ret; - size_t words; - - if (info->bit > XT_CONNLABEL_MAXBIT) - return -ERANGE; if (info->options & ~options) { pr_err("Unknown options in mask %x\n", info->options); @@ -59,19 +55,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) return ret; } - par->net->ct.labels_used++; - words = BITS_TO_LONGS(info->bit+1); - if (words > par->net->ct.label_words) - par->net->ct.label_words = words; - + ret = nf_connlabels_get(par->net, info->bit + 1); + if (ret < 0) + nf_ct_l3proto_module_put(par->family); return ret; } static void connlabel_mt_destroy(const struct xt_mtdtor_param *par) { - par->net->ct.labels_used--; - if (par->net->ct.labels_used == 0) - par->net->ct.label_words = 0; + nf_connlabels_put(par->net); nf_ct_l3proto_module_put(par->family); } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 29ba6218a..075d89d94 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head, static unsigned int check_hlist(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, - u16 zone, + const struct nf_conntrack_zone *zone, bool *addit) { const struct nf_conntrack_tuple_hash *found; @@ -201,7 +201,7 @@ static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u8 family, u16 zone) + u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; @@ -290,7 +290,8 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family, u16 zone) + u_int8_t family, + const struct nf_conntrack_zone *zone) { struct rb_root *root; int count; @@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; + const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; - u16 zone = NF_CT_DEFAULT_ZONE; ct = nf_ct_get(skb, &ctinfo); if (ct != NULL) { diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 8c646ed9c..3048a7e3a 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; - nfacct = nfnl_acct_find_get(info->name); + nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info("xt_nfacct: accounting object with name `%s' " "does not exists\n", info->name); |