summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-01-20 14:01:31 -0300
commitb4b7ff4b08e691656c9d77c758fc355833128ac0 (patch)
tree82fcb00e6b918026dc9f2d1f05ed8eee83874cc0 /net/netfilter
parent35acfa0fc609f2a2cd95cef4a6a9c3a5c38f1778 (diff)
Linux-libre 4.4-gnupck-4.4-gnu
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig21
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c13
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c64
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c18
-rw-r--r--net/netfilter/ipset/ip_set_core.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h26
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c91
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c550
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c291
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c49
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c87
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c85
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c1
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_nat_redirect.c2
-rw-r--r--net/netfilter/nf_queue.c42
-rw-r--r--net/netfilter/nf_tables_api.c100
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/netfilter/nf_tables_netdev.c22
-rw-r--r--net/netfilter/nfnetlink.c8
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c34
-rw-r--r--net/netfilter/nfnetlink_log.c89
-rw-r--r--net/netfilter/nfnetlink_queue.c (renamed from net/netfilter/nfnetlink_queue_core.c)76
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c113
-rw-r--r--net/netfilter/nft_counter.c49
-rw-r--r--net/netfilter/nft_ct.c1
-rw-r--r--net/netfilter/nft_dynset.c5
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_meta.c40
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_reject_inet.c19
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netfilter/xt_CT.c7
-rw-r--r--net/netfilter/xt_LOG.c2
-rw-r--r--net/netfilter/xt_NFLOG.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TEE.c6
-rw-r--r--net/netfilter/xt_TPROXY.c24
-rw-r--r--net/netfilter/xt_addrtype.c4
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_ipvs.c5
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_owner.c6
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netfilter/xt_socket.c14
69 files changed, 1293 insertions, 1221 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3e1b4abf1..4692782b5 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -354,7 +354,7 @@ config NF_CT_NETLINK_HELPER
select NETFILTER_NETLINK
depends on NF_CT_NETLINK
depends on NETFILTER_NETLINK_QUEUE
- depends on NETFILTER_NETLINK_QUEUE_CT
+ depends on NETFILTER_NETLINK_GLUE_CT
depends on NETFILTER_ADVANCED
help
This option enables the user-space connection tracking helpers
@@ -362,13 +362,14 @@ config NF_CT_NETLINK_HELPER
If unsure, say `N'.
-config NETFILTER_NETLINK_QUEUE_CT
- bool "NFQUEUE integration with Connection Tracking"
- default n
- depends on NETFILTER_NETLINK_QUEUE
+config NETFILTER_NETLINK_GLUE_CT
+ bool "NFQUEUE and NFLOG integration with Connection Tracking"
+ default n
+ depends on (NETFILTER_NETLINK_QUEUE || NETFILTER_NETLINK_LOG) && NF_CT_NETLINK
help
- If this option is enabled, NFQUEUE can include Connection Tracking
- information together with the packet is the enqueued via NFNETLINK.
+ If this option is enabled, NFQUEUE and NFLOG can include
+ Connection Tracking information together with the packet is
+ the enqueued via NFNETLINK.
config NF_NAT
tristate
@@ -868,7 +869,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4
- select NF_DUP_IPV6 if IP6_NF_IPTABLES
+ select NF_DUP_IPV6 if IP6_NF_IPTABLES != n
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -881,7 +882,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
@@ -1374,7 +1375,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on IPV6 || IPV6=n
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
select NF_DEFRAG_IPV4
- select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
+ select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
help
This option adds a `socket' match, which can be used to match
packets for which a TCP or UDP socket lookup finds a valid socket.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 70d026d46..7638c36b4 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
-nfnetlink_queue-y := nfnetlink_queue_core.o
-nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 21a085686..f39276d1c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -271,7 +271,7 @@ unsigned int nf_iterate(struct list_head *head,
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(*elemp, skb, state);
+ verdict = (*elemp)->hook((*elemp)->priv, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
@@ -315,8 +315,6 @@ next_hook:
int err = nf_queue(skb, elem, state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
- if (err == -ECANCELED)
- goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
@@ -350,6 +348,12 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
}
EXPORT_SYMBOL(skb_make_writable);
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
@@ -387,9 +391,6 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_conntrack_destroy);
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
-
/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
.id = NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index d05e759ed..b0bc475f6 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -33,7 +33,7 @@
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype MTYPE
-#define get_ext(set, map, id) ((map)->extensions + (set)->dsize * (id))
+#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
@@ -67,12 +67,9 @@ mtype_destroy(struct ip_set *set)
del_timer_sync(&map->gc);
ip_set_free(map->members);
- if (set->dsize) {
- if (set->extensions & IPSET_EXT_DESTROY)
- mtype_ext_cleanup(set);
- ip_set_free(map->extensions);
- }
- kfree(map);
+ if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
+ mtype_ext_cleanup(set);
+ ip_set_free(map);
set->data = NULL;
}
@@ -92,16 +89,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
{
const struct mtype *map = set->data;
struct nlattr *nested;
+ size_t memsize = sizeof(*map) + map->memsize;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested)
goto nla_put_failure;
if (mtype_do_head(skb, map) ||
nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
- nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
- htonl(sizeof(*map) +
- map->memsize +
- set->dsize * map->elements)))
+ nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
goto nla_put_failure;
if (unlikely(ip_set_put_flags(skb, set)))
goto nla_put_failure;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 64a564334..4783efff0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -41,7 +41,6 @@ MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
struct bitmap_ip {
void *members; /* the set members */
- void *extensions; /* data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
@@ -49,6 +48,8 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -224,13 +225,6 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -316,13 +310,13 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
pr_debug("hosts %u, elements %llu\n",
hosts, (unsigned long long)elements);
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ip;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_ip(set, map, first_ip, last_ip,
elements, hosts, netmask)) {
kfree(map);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 143053511..29dde2083 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -47,24 +47,26 @@ enum {
/* Type structure */
struct bitmap_ipmac {
void *members; /* the set members */
- void *extensions; /* MAC + data extensions */
u32 first_ip; /* host byte order, included in range */
u32 last_ip; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ unsigned char extensions[0] /* MAC + data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
struct bitmap_ipmac_adt_elem {
+ unsigned char ether[ETH_ALEN] __aligned(2);
u16 id;
- unsigned char *ether;
+ u16 add_mac;
};
struct bitmap_ipmac_elem {
unsigned char ether[ETH_ALEN];
unsigned char filled;
-} __attribute__ ((aligned));
+} __aligned(__alignof__(u64));
static inline u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
@@ -72,11 +74,11 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
return ip - m->first_ip;
}
-static inline struct bitmap_ipmac_elem *
-get_elem(void *extensions, u16 id, size_t dsize)
-{
- return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
-}
+#define get_elem(extensions, id, dsize) \
+ (struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
+
+#define get_const_elem(extensions, id, dsize) \
+ (const struct bitmap_ipmac_elem *)(extensions + (id) * (dsize))
/* Common functions */
@@ -88,10 +90,9 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
if (!test_bit(e->id, map->members))
return 0;
- elem = get_elem(map->extensions, e->id, dsize);
- if (elem->filled == MAC_FILLED)
- return !e->ether ||
- ether_addr_equal(e->ether, elem->ether);
+ elem = get_const_elem(map->extensions, e->id, dsize);
+ if (e->add_mac && elem->filled == MAC_FILLED)
+ return ether_addr_equal(e->ether, elem->ether);
/* Trigger kernel to fill out the ethernet address */
return -EAGAIN;
}
@@ -103,7 +104,7 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
if (!test_bit(id, map->members))
return 0;
- elem = get_elem(map->extensions, id, dsize);
+ elem = get_const_elem(map->extensions, id, dsize);
/* Timer not started for the incomplete elements */
return elem->filled == MAC_FILLED;
}
@@ -133,7 +134,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
* and we can reuse it later when MAC is filled out,
* possibly by the kernel
*/
- if (e->ether)
+ if (e->add_mac)
ip_set_timeout_set(timeout, t);
else
*timeout = t;
@@ -150,7 +151,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
elem = get_elem(map->extensions, e->id, dsize);
if (test_bit(e->id, map->members)) {
if (elem->filled == MAC_FILLED) {
- if (e->ether &&
+ if (e->add_mac &&
(flags & IPSET_FLAG_EXIST) &&
!ether_addr_equal(e->ether, elem->ether)) {
/* memcpy isn't atomic */
@@ -159,7 +160,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
}
return IPSET_ADD_FAILED;
- } else if (!e->ether)
+ } else if (!e->add_mac)
/* Already added without ethernet address */
return IPSET_ADD_FAILED;
/* Fill the MAC address and trigger the timer activation */
@@ -168,7 +169,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
return IPSET_ADD_START_STORED_TIMEOUT;
- } else if (e->ether) {
+ } else if (e->add_mac) {
/* We can store MAC too */
ether_addr_copy(elem->ether, e->ether);
elem->filled = MAC_FILLED;
@@ -191,7 +192,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
const struct bitmap_ipmac_elem *elem =
- get_elem(map->extensions, id, dsize);
+ get_const_elem(map->extensions, id, dsize);
return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
htonl(map->first_ip + id)) ||
@@ -213,7 +214,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = { .id = 0 };
+ struct bitmap_ipmac_adt_elem e = { .id = 0, .add_mac = 1 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -231,7 +232,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- e.ether = eth_hdr(skb)->h_source;
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
@@ -265,11 +266,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
e.id = ip_to_id(map, ip);
- if (tb[IPSET_ATTR_ETHER])
- e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
- else
- e.ether = NULL;
-
+ if (tb[IPSET_ATTR_ETHER]) {
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ e.add_mac = 1;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
return ip_set_eexist(ret, flags) ? 0 : ret;
@@ -300,13 +300,6 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_ip = first_ip;
map->last_ip = last_ip;
map->elements = elements;
@@ -361,14 +354,15 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ set->dsize = ip_set_elem_len(set, tb,
+ sizeof(struct bitmap_ipmac_elem),
+ __alignof__(struct bitmap_ipmac_elem));
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
map->memsize = bitmap_bytes(0, elements - 1);
set->variant = &bitmap_ipmac;
- set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct bitmap_ipmac_elem));
if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 5338ccd5d..7f0c73335 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -35,12 +35,13 @@ MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
struct bitmap_port {
void *members; /* the set members */
- void *extensions; /* data extensions */
u16 first_port; /* host byte order, included in range */
u16 last_port; /* host byte order, included in range */
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ unsigned char extensions[0] /* data extensions */
+ __aligned(__alignof__(u64));
};
/* ADT structure for generic function args */
@@ -209,13 +210,6 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->members = ip_set_alloc(map->memsize);
if (!map->members)
return false;
- if (set->dsize) {
- map->extensions = ip_set_alloc(set->dsize * map->elements);
- if (!map->extensions) {
- kfree(map->members);
- return false;
- }
- }
map->first_port = first_port;
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
@@ -232,6 +226,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map;
u16 first_port, last_port;
+ u32 elements;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
!ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
@@ -248,14 +243,15 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
last_port = tmp;
}
- map = kzalloc(sizeof(*map), GFP_KERNEL);
+ elements = last_port - first_port + 1;
+ set->dsize = ip_set_elem_len(set, tb, 0, 0);
+ map = ip_set_alloc(sizeof(*map) + elements * set->dsize);
if (!map)
return -ENOMEM;
- map->elements = last_port - first_port + 1;
+ map->elements = elements;
map->memsize = bitmap_bytes(0, map->elements);
set->variant = &bitmap_port;
- set->dsize = ip_set_elem_len(set, tb, 0);
if (!init_map_port(set, map, first_port, last_port)) {
kfree(map);
return -ENOMEM;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 338b40477..54f3d7cb2 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -364,25 +364,27 @@ add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
}
size_t
-ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
+ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len,
+ size_t align)
{
enum ip_set_ext_id id;
- size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
if (cadt_flags & IPSET_FLAG_WITH_FORCEADD)
set->flags |= IPSET_CREATE_FLAG_FORCEADD;
+ if (!align)
+ align = 1;
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset = ALIGN(offset, ip_set_extensions[id].align);
- set->offset[id] = offset;
+ len = ALIGN(len, ip_set_extensions[id].align);
+ set->offset[id] = len;
set->extensions |= ip_set_extensions[id].type;
- offset += ip_set_extensions[id].len;
+ len += ip_set_extensions[id].len;
}
- return offset;
+ return ALIGN(len, align);
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
@@ -519,8 +521,7 @@ int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret = 0;
BUG_ON(!set);
@@ -558,8 +559,7 @@ int
ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret;
BUG_ON(!set);
@@ -581,8 +581,7 @@ int
ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret = 0;
BUG_ON(!set);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 691b54fca..e5336ab36 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -72,8 +72,9 @@ struct hbucket {
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
u8 pos; /* position of the first free entry */
- unsigned char value[0]; /* the array of the values */
-} __attribute__ ((aligned));
+ unsigned char value[0] /* the array of the values */
+ __aligned(__alignof__(u64));
+};
/* The hash table: the table size stored here in order to make resizing easy */
struct htable {
@@ -475,7 +476,7 @@ static void
mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
{
struct htable *t;
- struct hbucket *n;
+ struct hbucket *n, *tmp;
struct mtype_elem *data;
u32 i, j, d;
#ifdef IP_SET_HASH_WITH_NETS
@@ -510,9 +511,14 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
}
}
if (d >= AHASH_INIT_SIZE) {
- struct hbucket *tmp = kzalloc(sizeof(*tmp) +
- (n->size - AHASH_INIT_SIZE) * dsize,
- GFP_ATOMIC);
+ if (d >= n->size) {
+ rcu_assign_pointer(hbucket(t, i), NULL);
+ kfree_rcu(n, rcu);
+ continue;
+ }
+ tmp = kzalloc(sizeof(*tmp) +
+ (n->size - AHASH_INIT_SIZE) * dsize,
+ GFP_ATOMIC);
if (!tmp)
/* Still try to delete expired elements */
continue;
@@ -522,7 +528,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
continue;
data = ahash_data(n, j, dsize);
memcpy(tmp->value + d * dsize, data, dsize);
- set_bit(j, tmp->used);
+ set_bit(d, tmp->used);
d++;
}
tmp->pos = d;
@@ -1323,12 +1329,14 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem)));
#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
- sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
+ sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)),
+ __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 5a30ce6e8..bbede95c9 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -31,7 +31,7 @@ struct set_elem {
struct rcu_head rcu;
struct list_head list;
ip_set_id_t id;
-};
+} __aligned(__alignof__(u64));
struct set_adt_elem {
ip_set_id_t id;
@@ -618,7 +618,8 @@ list_set_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
size = IP_SET_LIST_MIN_SIZE;
set->variant = &set_variant;
- set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem));
+ set->dsize = ip_set_elem_len(set, tb, sizeof(struct set_elem),
+ __alignof__(struct set_elem));
if (!init_list_set(net, set, size))
return -ENOMEM;
if (tb[IPSET_ATTR_TIMEOUT]) {
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index dfd7b65b3..0328f7250 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
struct ip_vs_protocol *pp;
@@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
}
}
- ret = pp->register_app(net, inc);
+ ret = pp->register_app(ipvs, inc);
if (ret)
goto out;
@@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
return;
if (pp->unregister_app)
- pp->unregister_app(net, inc);
+ pp->unregister_app(ipvs, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
int result;
mutex_lock(&__ip_vs_app_mutex);
- result = ip_vs_app_inc_new(net, app, proto, port);
+ result = ip_vs_app_inc_new(ipvs, app, proto, port);
mutex_unlock(&__ip_vs_app_mutex);
@@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
/* Register application for netns */
-struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a;
int err = 0;
- if (!ipvs)
- return ERR_PTR(-ENOENT);
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry(a, &ipvs->app_list, a_list) {
@@ -230,21 +226,17 @@ out_unlock:
* We are sure there are no app incarnations attached to services
* Caller should use synchronize_rcu() or rcu_barrier()
*/
-void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
+void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a, *anxt, *inc, *nxt;
- if (!ipvs)
- return;
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
if (app && strcmp(app->name, a->name))
continue;
list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
- ip_vs_app_inc_release(net, inc);
+ ip_vs_app_inc_release(ipvs, inc);
}
list_del(&a->a_list);
@@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = {
};
#endif
-int __net_init ip_vs_app_net_init(struct net *net)
+int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
INIT_LIST_HEAD(&ipvs->app_list);
proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
return 0;
}
-void __net_exit ip_vs_app_net_cleanup(struct net *net)
+void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
- unregister_ip_vs_app(net, NULL /* all */);
+ struct net *net = ipvs->net;
+
+ unregister_ip_vs_app(ipvs, NULL /* all */);
remove_proc_entry("ip_vs_app", net->proc_net);
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b0f7b626b..85ca189bd 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto,
+static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr,
__be16 port)
{
@@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro
if (af == AF_INET6)
return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
#endif
return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
- return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
&cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) {
@@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
}
static int
-ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
+ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
+ int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
- struct net *net = skb_net(skb);
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return 1;
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
struct ip_vs_conn *
-ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_in_get(&p);
@@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (unlikely(p->pe_data && p->pe->ct_match)) {
- if (!ip_vs_conn_net_eq(cp, p->net))
+ if (cp->ipvs != p->ipvs)
continue;
if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
if (__ip_vs_conn_get(cp))
@@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
p->vport == cp->vport && p->cport == cp->cport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (__ip_vs_conn_get(cp))
goto out;
}
@@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
}
struct ip_vs_conn *
-ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_out_get(&p);
@@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
* so we can make the assumption that the svc_af is the same as the
* dest_af
*/
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
+ dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
#endif
ip_vs_bind_xmit(cp);
- pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+ pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
if (pd && atomic_read(&pd->appcnt))
ip_vs_bind_app(cp, pd->pp);
}
@@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs,
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
+ struct netns_ipvs *ipvs = ct->ipvs;
/*
* Checking the dest server status.
@@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
- struct net *net = ip_vs_conn_net(cp);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
/*
* do I control anybody?
@@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data)
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+ ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
ip_vs_conn_put(cp);
}
@@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(p->net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ struct netns_ipvs *ipvs = p->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
@@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
INIT_HLIST_NODE(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
- ip_vs_conn_net_set(cp, p->net);
+ cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
cp->protocol = p->protocol;
@@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
size_t len = 0;
char dbuf[IP_VS_ADDRSTRLEN];
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
@@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
#ifdef CONFIG_IP_VS_IPV6
@@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(struct net *net)
+void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
@@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
@@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(struct net *net)
+static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
- struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
rcu_read_lock();
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
@@ -1345,23 +1345,22 @@ flush_again:
/*
* per netns init and exit
*/
-int __net_init ip_vs_conn_net_init(struct net *net)
+int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
atomic_set(&ipvs->conn_count, 0);
- proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops);
- proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops);
+ proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
+ proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_fops);
return 0;
}
-void __net_exit ip_vs_conn_net_cleanup(struct net *net)
+void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
- ip_vs_conn_flush(net);
- remove_proc_entry("ip_vs_conn", net->proc_net);
- remove_proc_entry("ip_vs_conn_sync", net->proc_net);
+ ip_vs_conn_flush(ipvs);
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
}
int __init ip_vs_conn_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 38fbc194b..f57b4dcdb 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -112,7 +112,7 @@ static inline void
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -146,7 +146,7 @@ static inline void
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
@@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
union nf_inet_addr snet; /* source network of the client,
after masking */
+ const union nf_inet_addr *src_addr, *dst_addr;
+
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ src_addr = &iph->saddr;
+ dst_addr = &iph->daddr;
+ } else {
+ src_addr = &iph->daddr;
+ dst_addr = &iph->saddr;
+ }
+
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+ ipv6_addr_prefix(&snet.in6, &src_addr->in6,
(__force __u32) svc->netmask);
else
#endif
- snet.ip = iph->saddr.ip & svc->netmask;
+ snet.ip = src_addr->ip & svc->netmask;
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
- IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
+ IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
{
int protocol = iph->protocol;
- const union nf_inet_addr *vaddr = &iph->daddr;
+ const union nf_inet_addr *vaddr = dst_addr;
__be16 vport = 0;
if (dst_port == svc->port) {
@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
- src_port, &iph->daddr, dst_port, &param);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
+ src_port, dst_addr, dst_port, &param);
cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
skb->mark);
@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
- __be16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr, cport, vport;
+ const void *caddr, *vaddr;
unsigned int flags;
*ignored = 1;
@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
if (pptr == NULL)
return NULL;
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ cport = pptr[0];
+ caddr = &iph->saddr;
+ vport = pptr[1];
+ vaddr = &iph->daddr;
+ } else {
+ cport = pptr[1];
+ caddr = &iph->daddr;
+ vport = pptr[0];
+ vaddr = &iph->saddr;
+ }
+
/*
* FTPDATA needs this check when using local real server.
* Never schedule Active FTPDATA connections from real server.
* For LVS-NAT they must be already created. For other methods
* with persistence the connection is created on SYN+ACK.
*/
- if (pptr[0] == FTPDATA) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+ if (cport == FTPDATA) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
"Not scheduling FTPDATA");
return NULL;
}
@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Do not schedule replies from local real server.
*/
- if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
- "Not scheduling reply for existing connection");
- __ip_vs_conn_put(cp);
- return NULL;
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+ cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+
+ if (cp) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+ "Not scheduling reply for existing"
+ " connection");
+ __ip_vs_conn_put(cp);
+ return NULL;
+ }
}
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
+ return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
iph);
*ignored = 0;
@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Non-persistent service
*/
- if (!svc->fwmark && pptr[1] != svc->port) {
+ if (!svc->fwmark && vport != svc->port) {
if (!svc->port)
pr_err("Schedule: port zero only supported "
"in persistent services, "
@@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
- &iph->saddr, pptr[0], &iph->daddr,
- pptr[1], &p);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+ caddr, cport, vaddr, vport, &p);
cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
- dest->port ? dest->port : pptr[1],
+ dest->port ? dest->port : vport,
flags, dest, skb->mark);
if (!cp) {
*ignored = -1;
@@ -519,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return cp;
}
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
+ union nf_inet_addr *addr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
+#endif
+ return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
+}
/*
* Pass or drop the packet.
@@ -528,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{
- __be16 _ports[2], *pptr;
-#ifdef CONFIG_SYSCTL
- struct net *net;
- struct netns_ipvs *ipvs;
- int unicast;
-#endif
+ __be16 _ports[2], *pptr, dport;
+ struct netns_ipvs *ipvs = svc->ipvs;
+ struct net *net = ipvs->net;
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
- if (pptr == NULL) {
+ if (!pptr)
return NF_DROP;
- }
-
-#ifdef CONFIG_SYSCTL
- net = skb_net(skb);
-
-#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6)
- unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
- else
-#endif
- unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
+ dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- ipvs = net_ipvs(net);
- if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
+ if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
+ !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
+ ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
int ret;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -566,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
@@ -590,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
-#endif
/*
* When the virtual ftp service is presented, packets destined
@@ -598,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
+ if (svc->port == FTPPORT && dport != FTPPORT)
return NF_ACCEPT;
+ if (unlikely(ip_vs_iph_icmp(iph)))
+ return NF_DROP;
+
/*
* Notify the client that the destination is unreachable, and
* release the socket buffer.
@@ -610,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
*/
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
- if (!skb->dev) {
- struct net *net_ = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net_->loopback_dev;
- }
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
} else
#endif
@@ -625,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#ifdef CONFIG_SYSCTL
-static int sysctl_snat_reroute(struct sk_buff *skb)
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
return ipvs->sysctl_snat_reroute;
}
-static int sysctl_nat_icmp_send(struct net *net)
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
return ipvs->sysctl_nat_icmp_send;
}
@@ -644,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
#else
-static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
-static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -664,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
return IP_DEFRAG_VS_OUT;
}
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
+ struct sk_buff *skb, u_int32_t user)
{
int err;
local_bh_disable();
- err = ip_defrag(skb, user);
+ err = ip_defrag(ipvs->net, skb, user);
local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@@ -677,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
- unsigned int hooknum)
+static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
+ struct sk_buff *skb, unsigned int hooknum)
{
- if (!sysctl_snat_reroute(skb))
+ if (!sysctl_snat_reroute(ipvs))
return 0;
/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
if (NF_INET_LOCAL_IN == hooknum)
@@ -690,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
- ip6_route_me_harder(skb) != 0)
+ ip6_route_me_harder(ipvs->net, skb) != 0)
return 1;
} else
#endif
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
return 1;
return 0;
@@ -848,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto out;
/* do the statistics and put it back */
@@ -872,8 +895,8 @@ out:
* Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
- unsigned int hooknum)
+static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
@@ -888,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -934,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for");
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
+
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
if (!cp)
return NF_ACCEPT;
@@ -947,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
+static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *ipvsh)
{
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
union nf_inet_addr snet;
- unsigned int writable;
+ unsigned int offset;
*related = 1;
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
@@ -984,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&ipvsh->saddr, &ipvsh->daddr);
- /* Now find the contained IP header */
- ciph.len = ipvsh->len + sizeof(_icmph);
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
+ true, &ciph))
return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
pp = ip_vs_proto_get(ciph.protocol);
if (!pp)
return NF_ACCEPT;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
if (!cp)
return NF_ACCEPT;
snet.in6 = ciph.saddr.in6;
- writable = ciph.len;
+ offset = ciph.len;
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
- pp, writable, sizeof(struct ipv6hdr),
+ pp, offset, sizeof(struct ipv6hdr),
hooknum);
}
#endif
@@ -1093,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{
struct ip_vs_protocol *pp = pd->pp;
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
+ IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
if (!skb_make_writable(skb, iph->len))
goto drop;
@@ -1127,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto drop;
- IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
+ IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
@@ -1155,13 +1170,13 @@ drop:
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct sock *sk;
EnterFunction(11);
@@ -1169,29 +1184,27 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (skb->ipvs_property)
return NF_ACCEPT;
+ sk = skb_to_full_sk(skb);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- net = skb_net(skb);
- if (!net_ipvs(net)->enable)
+ if (!ipvs->enable)
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_out_icmp_v6(skb, &related,
+ int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
hooknum, &iph);
if (related)
@@ -1201,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_out_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
if (related)
return verdict;
}
- pd = ip_vs_proto_data_get(net, iph.protocol);
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
if (unlikely(!pd))
return NF_ACCEPT;
pp = pd->pp;
@@ -1217,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET)
#endif
if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
- if (ip_vs_gather_frags(skb,
+ if (ip_vs_gather_frags(ipvs, skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
- ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, &iph, 0);
+ cp = pp->conn_out_get(ipvs, af, skb, &iph);
if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum);
- if (sysctl_nat_icmp_send(net) &&
+ if (sysctl_nat_icmp_send(ipvs) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1241,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
pptr[0])) {
/*
* Notify the real server: there is no
@@ -1258,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!skb->dev)
- skb->dev = net->loopback_dev;
+ skb->dev = ipvs->net->loopback_dev;
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
@@ -1272,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
}
}
}
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
+ IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
}
@@ -1283,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1294,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1308,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1319,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
+static unsigned int
+ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_protocol *pp = pd->pp;
+
+ if (!iph->fragoffs) {
+ /* No (second) fragments need to enter here, as nf_defrag_ipv6
+ * replayed fragment zero will already have created the cp
+ */
+
+ /* Schedule and create new connection entry into cpp */
+ if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
+ return 0;
+ }
+
+ if (unlikely(!*cpp)) {
+ /* sorry, all this trouble for a no-hit :) */
+ IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
+ "ip_vs_in: packet continues traversal as normal");
+ if (iph->fragoffs) {
+ /* Fragment that couldn't be mapped to a conn entry
+ * is missing module nf_defrag_ipv6
+ */
+ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+ IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
+ "unhandled fragment");
+ }
+ *verdict = NF_ACCEPT;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1334,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
+ unsigned int hooknum)
{
- struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -1345,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
- bool ipip;
+ bool ipip, new_cp = false;
*related = 1;
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -1385,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- net = skb_net(skb);
-
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
@@ -1402,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipip = true;
}
- pd = ip_vs_proto_data_get(net, cih->protocol);
+ pd = ip_vs_proto_data_get(ipvs, cih->protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1416,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
"Checking incoming ICMP for");
offset2 = offset;
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
offset = ciph.len;
+
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
- cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
- if (!cp)
- return NF_ACCEPT;
+ cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
+ return v;
+ new_cp = true;
+ }
verdict = NF_DROP;
@@ -1455,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
- ipv4_update_pmtu(skb, dev_net(skb->dev),
+ ipv4_update_pmtu(skb, ipvs->net,
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
if (!(frag_off & htons(IP_DF)))
@@ -1501,23 +1558,26 @@ ignore_ipip:
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
out:
- __ip_vs_conn_put(cp);
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *iph)
+static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *iph)
{
- struct net *net = NULL;
- struct ipv6hdr _ip6h, *ip6h;
struct icmp6hdr _icmph, *ic;
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offs_ciph, writable, verdict;
+ unsigned int offset, verdict;
+ bool new_cp = false;
*related = 1;
@@ -1546,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&iph->saddr, &iph->daddr);
- /* Now find the contained IP header */
- ciph.len = iph->len + sizeof(_icmph);
- offs_ciph = ciph.len; /* Save ip header offset */
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
- return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
-
- net = skb_net(skb);
- pd = ip_vs_proto_data_get(net, ciph.protocol);
+ offset = iph->len + sizeof(_icmph);
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
+ return NF_ACCEPT;
+
+ pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1569,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
if (ciph.fragoffs)
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for");
/* The embedded headers contain source and dest in reverse order
* if not from localhost
*/
- cp = pp->conn_in_get(AF_INET6, skb, &ciph,
- (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
+ cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
+ return v;
+
+ new_cp = true;
+ }
- if (!cp)
- return NF_ACCEPT;
/* VS/TUN, VS/DR and LOCALNODE just let it go */
if ((hooknum == NF_INET_LOCAL_OUT) &&
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
- __ip_vs_conn_put(cp);
- return NF_ACCEPT;
+ verdict = NF_ACCEPT;
+ goto out;
}
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
/* Need to mangle contained IPv6 header in ICMPv6 packet */
- writable = ciph.len;
+ offset = ciph.len;
if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
IPPROTO_SCTP == ciph.protocol)
- writable += 2 * sizeof(__u16); /* Also mangle ports */
+ offset += 2 * sizeof(__u16); /* Also mangle ports */
- verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
- __ip_vs_conn_put(cp);
+out:
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
@@ -1610,16 +1673,15 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- struct netns_ipvs *ipvs;
int conn_reuse_mode;
+ struct sock *sk;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1633,7 +1695,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) ||
!skb_dst(skb))) {
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n",
skb->pkt_type, iph.protocol,
@@ -1641,20 +1703,17 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
- if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
+ sk = skb_to_full_sk(skb);
+ if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT &&
af == AF_INET)) {
- struct sock *sk = skb->sk;
- struct inet_sock *inet = inet_sk(skb->sk);
- if (inet && sk->sk_family == PF_INET && inet->nodefrag)
+ if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag)
return NF_ACCEPT;
}
@@ -1662,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
- &iph);
+ int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
+ hooknum, &iph);
if (related)
return verdict;
@@ -1672,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_in_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_in_icmp(ipvs, skb, &related,
+ hooknum);
if (related)
return verdict;
}
/* Protocol supported? */
- pd = ip_vs_proto_data_get(net, iph.protocol);
- if (unlikely(!pd))
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+ if (unlikely(!pd)) {
+ /* The only way we'll see this packet again is if it's
+ * encapsulated, so mark it with ipvs_property=1 so we
+ * skip it if we're ignoring tunneled packets
+ */
+ if (sysctl_ignore_tunneled(ipvs))
+ skb->ipvs_property = 1;
+
return NF_ACCEPT;
+ }
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, &iph, 0);
+ cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs &&
@@ -1700,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
cp = NULL;
}
- if (unlikely(!cp) && !iph.fragoffs) {
- /* No (second) fragments need to enter here, as nf_defrag_ipv6
- * replayed fragment zero will already have created the cp
- */
+ if (unlikely(!cp)) {
int v;
- /* Schedule and create new connection entry into &cp */
- if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
+ if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
return v;
}
- if (unlikely(!cp)) {
- /* sorry, all this trouble for a no-hit :) */
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
- "ip_vs_in: packet continues traversal as normal");
- if (iph.fragoffs) {
- /* Fragment that couldn't be mapped to a conn entry
- * is missing module nf_defrag_ipv6
- */
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
- IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
- }
- return NF_ACCEPT;
- }
+ IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
@@ -1765,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
pkts = atomic_add_return(1, &cp->in_pkts);
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
ip_vs_conn_put(cp);
return ret;
@@ -1776,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1787,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1800,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1811,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
@@ -1830,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(skb, &r, ops->hooknum);
+ return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
-ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
struct ip_vs_iphdr iphdr;
- ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
+ return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
}
#endif
@@ -1878,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
@@ -1888,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
@@ -1896,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
@@ -1904,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
@@ -1913,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1921,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1930,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 2,
@@ -1940,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 1,
@@ -1948,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
@@ -1956,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 2,
@@ -1965,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp_v6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1973,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1999,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net)
atomic_inc(&ipvs_netns_cnt);
net->ipvs = ipvs;
- if (ip_vs_estimator_net_init(net) < 0)
+ if (ip_vs_estimator_net_init(ipvs) < 0)
goto estimator_fail;
- if (ip_vs_control_net_init(net) < 0)
+ if (ip_vs_control_net_init(ipvs) < 0)
goto control_fail;
- if (ip_vs_protocol_net_init(net) < 0)
+ if (ip_vs_protocol_net_init(ipvs) < 0)
goto protocol_fail;
- if (ip_vs_app_net_init(net) < 0)
+ if (ip_vs_app_net_init(ipvs) < 0)
goto app_fail;
- if (ip_vs_conn_net_init(net) < 0)
+ if (ip_vs_conn_net_init(ipvs) < 0)
goto conn_fail;
- if (ip_vs_sync_net_init(net) < 0)
+ if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -2025,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net)
*/
sync_fail:
- ip_vs_conn_net_cleanup(net);
+ ip_vs_conn_net_cleanup(ipvs);
conn_fail:
- ip_vs_app_net_cleanup(net);
+ ip_vs_app_net_cleanup(ipvs);
app_fail:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
protocol_fail:
- ip_vs_control_net_cleanup(net);
+ ip_vs_control_net_cleanup(ipvs);
control_fail:
- ip_vs_estimator_net_cleanup(net);
+ ip_vs_estimator_net_cleanup(ipvs);
estimator_fail:
net->ipvs = NULL;
return -ENOMEM;
@@ -2041,22 +2074,25 @@ estimator_fail:
static void __net_exit __ip_vs_cleanup(struct net *net)
{
- ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(net);
- ip_vs_app_net_cleanup(net);
- ip_vs_protocol_net_cleanup(net);
- ip_vs_control_net_cleanup(net);
- ip_vs_estimator_net_cleanup(net);
- IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
net->ipvs = NULL;
}
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
EnterFunction(2);
- net_ipvs(net)->enable = 0; /* Disable packet reception */
+ ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
- ip_vs_sync_net_cleanup(net);
+ ip_vs_sync_net_cleanup(ipvs);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1a23e91d5..e7c1b052c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work)
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
- ip_vs_random_dropentry(ipvs->net);
+ ip_vs_random_dropentry(ipvs);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
#endif
@@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
* Returns hash value for virtual service
*/
static inline unsigned int
-ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
+ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr, __be16 port)
{
register unsigned int porth = ntohs(port);
@@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
addr->ip6[2]^addr->ip6[3];
#endif
ahash = ntohl(addr_fold);
- ahash ^= ((size_t) net >> 8);
+ ahash ^= ((size_t) ipvs >> 8);
return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
IP_VS_SVC_TAB_MASK;
@@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
{
- return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
+ return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
/*
@@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
* Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
&svc->addr, svc->port);
hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
* Hash it by fwmark in svc_fwm_table
*/
- hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
+ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
@@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
* Get service by {netns, proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
&& (svc->protocol == protocol)
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
- hash = ip_vs_svc_fwm_hashkey(net, fwmark);
+ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Find service, called under RCU lock */
struct ip_vs_service *
-ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs = net_ipvs(net);
/*
* Check the table hashed by fwmark first
*/
if (fwmark) {
- svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
if (svc)
goto out;
}
@@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
@@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
@@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
}
out:
@@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
}
/* Check if real service by <proto,addr,port> is present */
-bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
@@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
/*
* Find the destination in trash
@@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
* are expired, and the refcnt of each destination in the trash must
* be 0, so we simply release them here.
*/
-static void ip_vs_trash_cleanup(struct net *net)
+static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
{
struct ip_vs_dest *dest, *nxt;
- struct netns_ipvs *ipvs = net_ipvs(net);
del_timer_sync(&ipvs->dest_trash_timer);
/* No need to use dest_trash_lock */
@@ -788,7 +785,7 @@ static void
__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
spin_unlock_bh(&dest->dst_lock);
if (add) {
- ip_vs_start_estimator(svc->net, &dest->stats);
+ ip_vs_start_estimator(svc->ipvs, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(svc->net, udest->addr.ip);
+ atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
@@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
bool cleanup)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_stop_estimator(net, &dest->stats);
+ ip_vs_stop_estimator(ipvs, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
@@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
svc->num_dests--;
if (dest->af != svc->af)
- net_ipvs(svc->net)->mixed_address_family_dests--;
+ svc->ipvs->mixed_address_family_dests--;
if (svcupd) {
struct ip_vs_scheduler *sched;
@@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete the destination
*/
- __ip_vs_del_dest(svc->net, dest, false);
+ __ip_vs_del_dest(svc->ipvs, dest, false);
LeaveFunction(2);
@@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
static void ip_vs_dest_trash_expire(unsigned long data)
{
- struct net *net = (struct net *) data;
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
@@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data)
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0, i;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
- svc->net = net;
+ svc->ipvs = ipvs;
INIT_LIST_HEAD(&svc->destinations);
spin_lock_init(&svc->sched_lock);
@@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
- ip_vs_start_estimator(net, &svc->stats);
+ ip_vs_start_estimator(ipvs, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
@@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
pr_info("%s: enter\n", __func__);
@@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
if (svc->af == AF_INET)
ipvs->num_services--;
- ip_vs_stop_estimator(svc->net, &svc->stats);
+ ip_vs_stop_estimator(svc->ipvs, &svc->stats);
/* Unbind scheduler */
old_sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(svc->net, dest, cleanup);
+ __ip_vs_del_dest(svc->ipvs, dest, cleanup);
}
/*
@@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(struct net *net, bool cleanup)
+static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
{
int idx;
struct ip_vs_service *svc;
@@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup)
* Delete service by {netns} in the service table.
* Called by __ip_vs_cleanup()
*/
-void ip_vs_service_net_cleanup(struct net *net)
+void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
{
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(net, true);
+ ip_vs_flush(ipvs, true);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
@@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
}
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
return 0;
}
-static int ip_vs_zero_all(struct net *net)
+static int ip_vs_zero_all(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
- ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
+ ip_vs_zero_stats(&ipvs->tot_stats);
return 0;
}
@@ -1615,7 +1608,7 @@ static int
proc_do_defense_mode(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level(net_ipvs(net));
+ update_defense_level(ipvs);
}
}
return rc;
@@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "schedule_icmp",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ignore_tunneled",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
@@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
return svc;
@@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
return svc;
@@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
@@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
@@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cfg.syncid = dm->syncid;
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
- ret = start_sync_thread(net, &cfg, dm->state);
+ ret = start_sync_thread(ipvs, &cfg, dm->state);
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
- ret = stop_sync_thread(net, dm->state);
+ ret = stop_sync_thread(ipvs, dm->state);
mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec;
@@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
mutex_lock(&__ip_vs_mutex);
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
}
@@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out_unlock;
}
}
@@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
rcu_read_lock();
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD
@@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
@@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
}
static inline int
-__ip_vs_get_service_entries(struct net *net,
+__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
@@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2522,7 +2528,7 @@ out:
}
static inline int
-__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
rcu_read_lock();
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
get->port);
rcu_read_unlock();
@@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
}
static inline void
-__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
memset(u, 0, sizeof (*u));
#ifdef CONFIG_IP_VS_PROTO_TCP
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
u->udp_timeout =
pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
@@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(net, get, user);
+ ret = __ip_vs_get_service_entries(ipvs, get, user);
}
break;
@@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
addr.ip = entry->addr;
rcu_read_lock();
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET,
+ svc = __ip_vs_service_find(ipvs, AF_INET,
entry->protocol, &addr,
entry->port);
rcu_read_unlock();
@@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(net, get, user);
+ ret = __ip_vs_get_dest_entries(ipvs, get, user);
}
break;
@@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -3028,7 +3035,7 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct net *net,
+static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
@@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net,
rcu_read_lock();
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
rcu_read_unlock();
*ret_svc = svc;
@@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net,
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
@@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
goto out_err;
- svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3341,7 +3349,7 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
@@ -3367,9 +3375,8 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ipvs_sync_daemon_cfg c;
struct nlattr *a;
int ret;
@@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
- ret = start_sync_thread(net, &c,
+ ret = start_sync_thread(ipvs, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
return ret;
}
-static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
int ret;
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
mutex_lock(&ipvs->sync_mutex);
- ret = stop_sync_thread(net,
+ ret = stop_sync_thread(ipvs,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
return ret;
}
-static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(net, &t);
+ return ip_vs_set_timeout(ipvs, &t);
}
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{
int ret = -EINVAL, cmd;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
- ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
goto out;
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(net, daemon_attrs);
+ ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(net, daemon_attrs);
+ ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
}
out:
@@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(net, info->attrs);
+ ret = ip_vs_genl_set_config(ipvs, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out;
}
@@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(net, &usvc,
+ ret = ip_vs_genl_parse_service(ipvs, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
/* The synchronization protocol is incompatible
* with mixed family services
*/
- if (net_ipvs(net)->sync_state) {
+ if (ipvs->sync_state) {
ret = -EINVAL;
goto out;
}
@@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(net,
+ svc = ip_vs_genl_find_service(ipvs,
info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
@@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
t.tcp_timeout) ||
@@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void)
* per netns intit/exit func.
*/
#ifdef CONFIG_SYSCTL
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ctl_table *tbl;
atomic_set(&ipvs->dropentry, 0);
@@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
+ for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
+ if (tbl[idx].proc_handler == proc_do_defense_mode)
+ tbl[idx].extra2 = ipvs;
+ }
idx = 0;
ipvs->sysctl_amemthresh = 1024;
tbl[idx++].data = &ipvs->sysctl_amemthresh;
@@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_backup_only;
ipvs->sysctl_conn_reuse_mode = 1;
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
-
+ tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
+ tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
@@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
kfree(tbl);
return -ENOMEM;
}
- ip_vs_start_estimator(net, &ipvs->tot_stats);
+ ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0;
}
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
+ ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
if (!net_eq(net, &init_net))
kfree(ipvs->sysctl_tbl);
@@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
#else
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
#endif
@@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
};
-int __net_init ip_vs_control_net_init(struct net *net)
+int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int i, idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) net);
+ (unsigned long) ipvs);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
@@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
proc_create("ip_vs_stats_percpu", 0, net->proc_net,
&ip_vs_stats_percpu_fops);
- if (ip_vs_control_net_init_sysctl(net))
+ if (ip_vs_control_net_init_sysctl(ipvs))
goto err;
return 0;
@@ -3980,12 +3989,12 @@ err:
return -ENOMEM;
}
-void __net_exit ip_vs_control_net_cleanup(struct net *net)
+void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
- ip_vs_trash_cleanup(net);
- ip_vs_control_net_cleanup_sysctl(net);
+ ip_vs_trash_cleanup(ipvs);
+ ip_vs_control_net_cleanup_sysctl(ipvs);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
remove_proc_entry("ip_vs", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ef0eb0a8d..457c6c193 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg)
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
- struct net *net = (struct net *)arg;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
- ipvs = net_ipvs(net);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
@@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg)
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
-void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
@@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
spin_unlock_bh(&ipvs->est_lock);
}
-void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
spin_lock_bh(&ipvs->est_lock);
@@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
dst->outbps = (e->outbps + 0xF) >> 5;
}
-int __net_init ip_vs_estimator_net_init(struct net *net)
+int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
- setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+ setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
-void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
+void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
{
- del_timer_sync(&net_ipvs(net)->est_timer);
+ del_timer_sync(&ipvs->est_timer);
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 5d3daae98..d30c327bb 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- struct net *net;
*diff = 0;
@@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &from, port,
&cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ ip_vs_conn_fill_param(cp->ipvs,
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
/* As above, this is ipv4 only */
@@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
- net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
- struct net *net;
/* no diff required for incoming packets */
*diff = 0;
@@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &to, port, &cp->vaddr,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
@@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
- net = skb_net(skb);
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return 1;
@@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!ipvs)
return -ENOENT;
- app = register_ip_vs_app(net, &ip_vs_ftp);
+ app = register_ip_vs_app(ipvs, &ip_vs_ftp);
if (IS_ERR(app))
return PTR_ERR(app);
for (i = 0; i < ports_count; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
if (ret)
goto err_unreg;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
return 0;
err_unreg:
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
return ret;
}
/*
@@ -471,7 +467,12 @@ err_unreg:
*/
static void __ip_vs_ftp_exit(struct net *net)
{
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!ipvs)
+ return;
+
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 127f14046..cccf4d637 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblc_expiration;
+ return svc->ipvs->sysctl_lblc_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 2229d2d8b..796d70e47 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblcr_expiration;
+ return svc->ipvs->sysctl_lblcr_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 136184572..30434fb13 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
- &tuple);
+ h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042..1b8d594e4 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, &iph);
+ ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
if (iph.protocol != IPPROTO_UDP)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 939f7fbe9..8ae480715 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
* register an ipvs protocols netns related data
*/
static int
-register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
struct ip_vs_proto_data *pd =
kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
@@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
atomic_set(&pd->appcnt, 0); /* Init app counter */
if (pp->init_netns != NULL) {
- int ret = pp->init_netns(net, pd);
+ int ret = pp->init_netns(ipvs, pd);
if (ret) {
/* unlink an free proto data */
ipvs->proto_data_table[hash] = pd->next;
@@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
* unregister an ipvs protocols netns data
*/
static int
-unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data **pd_p;
unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
@@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
if (*pd_p == pd) {
*pd_p = pd->next;
if (pd->pp->exit_netns != NULL)
- pd->pp->exit_netns(net, pd);
+ pd->pp->exit_netns(ipvs, pd);
kfree(pd);
return 0;
}
@@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
/*
* get ip_vs_protocol object data by netns and proto
*/
-static struct ip_vs_proto_data *
-__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
struct ip_vs_proto_data *pd;
unsigned int hash = IP_VS_PROTO_HASH(proto);
@@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
return NULL;
}
-
-struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
-{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- return __ipvs_proto_data_get(ipvs, proto);
-}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
@@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
/*
* per network name-space init
*/
-int __net_init ip_vs_protocol_net_init(struct net *net)
+int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
{
int i, ret;
static struct ip_vs_protocol *protos[] = {
@@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net)
};
for (i = 0; i < ARRAY_SIZE(protos); i++) {
- ret = register_ip_vs_proto_netns(net, protos[i]);
+ ret = register_ip_vs_proto_netns(ipvs, protos[i]);
if (ret < 0)
goto cleanup;
}
return 0;
cleanup:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
return ret;
}
-void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
+void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
int i;
/* unregister all the ipvs proto data for this netns */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pd = ipvs->proto_data_table[i]) != NULL)
- unregister_ip_vs_proto_netns(net, pd);
+ unregister_ip_vs_proto_netns(ipvs, pd);
}
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5de3dd312..5320d3997 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,30 +41,28 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(struct net *net, int af,
- const struct ip_vs_iphdr *iph, int inverse,
+ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
+ const struct ip_vs_iphdr *iph,
struct ip_vs_conn_param *p)
{
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- int inverse)
+ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
*/
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
static struct ip_vs_conn *
-ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 5b84c0b56..010ddeec1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,35 +9,44 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
-
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (sh == NULL) {
- *verdict = NF_DROP;
- return 0;
+ __be16 _ports[2], *ports = NULL;
+
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (sh) {
+ sch = skb_header_pointer(
+ skb, iph->len + sizeof(sctp_sctphdr_t),
+ sizeof(_schunkh), &_schunkh);
+ if (sch && (sch->type == SCTP_CID_INIT ||
+ sysctl_sloppy_sctp(ipvs)))
+ ports = &sh->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
}
- sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
- sizeof(_schunkh), &_schunkh);
- if (sch == NULL) {
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
rcu_read_lock();
- if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, sh->dest))) {
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
+static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
@@ -498,9 +506,9 @@ out:
return ret;
}
-static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
+static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -549,10 +557,8 @@ out:
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
@@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8e92beb0c..d7024b2ed 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,27 +32,47 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- struct netns_ipvs *ipvs;
+ __be16 _ports[2], *ports = NULL;
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (th == NULL) {
+ /* In the event of icmp, we're only guaranteed to have the first 8
+ * bytes of the transport header, so we only check the rest of the
+ * TCP packet for non-ICMP packets
+ */
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (th) {
+ if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
+ return 1;
+ ports = &th->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
+
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
- if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, th->dest))) {
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port)
}
-static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
+static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
@@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
+tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
@@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
@@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index b62a3c0ff..e494e9a88 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,28 +29,42 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
+ __be16 _ports[2], *ports = NULL;
- /* IPv6 fragments, only first fragment will hit this */
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (uh == NULL) {
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ /* IPv6 fragments, only first fragment will hit this */
+ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (uh)
+ ports = &uh->source;
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
+
rcu_read_lock();
- svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, uh->dest);
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
if (svc) {
int ignored;
- if (ip_vs_todrop(net_ipvs(net))) {
+ if (ip_vs_todrop(ipvs)) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port)
}
-static int udp_register_app(struct net *net, struct ip_vs_app *inc)
+static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
hash = udp_app_hashkey(port);
@@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-udp_unregister_app(struct net *net, struct ip_vs_app *inc)
+udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
}
-static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
@@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 98a13433b..1e373a5e4 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
static inline __be16
ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
- __be16 port;
- struct tcphdr _tcph, *th;
- struct udphdr _udph, *uh;
- sctp_sctphdr_t _sctph, *sh;
+ __be16 _ports[2], *ports;
+ /* At this point we know that we have a valid packet of some kind.
+ * Because ICMP packets are only guaranteed to have the first 8
+ * bytes, let's just grab the ports. Fortunately they're in the
+ * same position for all three of the protocols we care about.
+ */
switch (iph->protocol) {
case IPPROTO_TCP:
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (unlikely(th == NULL))
- return 0;
- port = th->source;
- break;
case IPPROTO_UDP:
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (unlikely(uh == NULL))
- return 0;
- port = uh->source;
- break;
case IPPROTO_SCTP:
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (unlikely(sh == NULL))
+ ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+ &_ports);
+ if (unlikely(!ports))
return 0;
- port = sh->source;
- break;
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ return ports[0];
+ else
+ return ports[1];
default:
- port = 0;
+ return 0;
}
-
- return port;
}
@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_dest *dest;
struct ip_vs_sh_state *s;
__be16 port = 0;
+ const union nf_inet_addr *hash_addr;
+
+ hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
s = (struct ip_vs_sh_state *) svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
- dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
else
- dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get(svc, s, hash_addr, port);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr),
+ IP_VS_DBG_ADDR(svc->af, hash_addr),
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 43f140950..803001a45 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -193,7 +193,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
- struct net *net;
+ struct netns_ipvs *ipvs;
struct socket *sock;
char *buf;
int id;
@@ -533,10 +533,9 @@ set:
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_buff *buff;
@@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
- ip_vs_sync_conn(net, cp, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
}
}
@@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
+void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
struct ip_vs_sync_buff *buff;
@@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
/* Handle old version of the protocol */
if (sysctl_sync_ver(ipvs) == 0) {
- ip_vs_sync_conn_v0(net, cp, pkts);
+ ip_vs_sync_conn_v0(ipvs, cp, pkts);
return;
}
/* Do not sync ONE PACKET */
@@ -784,21 +782,21 @@ control:
* fill_param used by version 1
*/
static inline int
-ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p);
else
#endif
- ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr,
@@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
* Param: ...
* timeout is in sec.
*/
-static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
unsigned int flags, unsigned int state,
unsigned int protocol, unsigned int type,
const union nf_inet_addr *daddr, __be16 dport,
@@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
@@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* with synchronization, so we can make the assumption that
* the svc_af is the same as the dest_af
*/
- dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
param->vaddr, param->vport, protocol,
fwmark, flags);
@@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
} else {
struct ip_vs_proto_data *pd;
- pd = ip_vs_proto_data_get(net, protocol);
+ pd = ip_vs_proto_data_get(ipvs, protocol);
if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
cp->timeout = pd->timeout_table[state];
else
@@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
/*
* Process received multicast message for Version 0
*/
-static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
const size_t buflen)
{
struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
@@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
}
}
- ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+ ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr,
s->cport,
(const union nf_inet_addr *)&s->vaddr,
s->vport, &param);
/* Send timeout as Zero */
- ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
(union nf_inet_addr *)&s->daddr, s->dport,
0, 0, opt);
}
@@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
/*
* Process a Version 1 sync. connection
*/
-static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
{
struct ip_vs_sync_conn_options opt;
union ip_vs_sync_conn *s;
@@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+ if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {
retc = 50;
goto out;
}
/* If only IPv4, just silent skip IPv6 */
if (af == AF_INET)
- ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
);
#ifdef CONFIG_IP_VS_IPV6
else
- ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
@@ -1204,10 +1201,9 @@ out:
* ip_vs_conn entries.
* Handles Version 0 & 1
*/
-static void ip_vs_process_message(struct net *net, __u8 *buffer,
+static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
const size_t buflen)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
int i, nr_conns;
@@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* Process a single sync_conn */
- retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
if (retc < 0) {
IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
retc);
@@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
}
} else {
/* Old type of message */
- ip_vs_process_message_v0(net, buffer, buflen);
+ ip_vs_process_message_v0(ipvs, buffer, buflen);
return;
}
}
@@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct net *net, int id)
+static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
union ipvs_sockaddr mcast_addr;
struct socket *sock;
int result, salen;
/* First create a socket */
- result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+ result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
@@ -1550,16 +1545,15 @@ error:
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct net *net, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
union ipvs_sockaddr mcast_addr;
struct socket *sock;
int result, salen;
/* First create a socket */
- result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+ result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
@@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
struct sock *sk = tinfo->sock->sk;
struct ip_vs_sync_buff *sb;
@@ -1743,7 +1737,7 @@ done:
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
@@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data)
break;
}
- ip_vs_process_message(tinfo->net, tinfo->buf, len);
+ ip_vs_process_message(ipvs, tinfo->buf, len);
}
}
@@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
+int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **array = NULL, *task;
struct socket *sock;
- struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
@@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
if (!c->mcast_ttl)
c->mcast_ttl = 1;
- dev = __dev_get_by_name(net, c->mcast_ifn);
+ dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
if (!dev) {
pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
return -ENODEV;
@@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
tinfo = NULL;
for (id = 0; id < count; id++) {
if (state == IP_VS_STATE_MASTER)
- sock = make_send_sock(net, id);
+ sock = make_send_sock(ipvs, id);
else
- sock = make_receive_sock(net, id);
+ sock = make_receive_sock(ipvs, id);
if (IS_ERR(sock)) {
result = PTR_ERR(sock);
goto outtinfo;
@@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
goto outsocket;
- tinfo->net = net;
+ tinfo->ipvs = ipvs;
tinfo->sock = sock;
if (state == IP_VS_STATE_BACKUP) {
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
@@ -1947,9 +1940,8 @@ out:
}
-int stop_sync_thread(struct net *net, int state)
+int stop_sync_thread(struct netns_ipvs *ipvs, int state)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct task_struct **array;
int id;
int retc = -EINVAL;
@@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state)
/*
* Initialize data struct for each netns
*/
-int __net_init ip_vs_sync_net_init(struct net *net)
+int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
spin_lock_init(&ipvs->sync_lock);
spin_lock_init(&ipvs->sync_buff_lock);
return 0;
}
-void ip_vs_sync_net_cleanup(struct net *net)
+void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
{
int retc;
- struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
- retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
- retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
mutex_unlock(&ipvs->sync_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 258a0b0e8..3264cb49b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
-static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+ int rt_mode,
struct ip_vs_iphdr *ipvsh,
struct sk_buff *skb, int mtu)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb_af == AF_INET6) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
if (!skb->dev)
skb->dev = net->loopback_dev;
/* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
+ if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG(1, "frag needed for %pI6c\n",
&ipv6_hdr(skb)->saddr);
@@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
} else
#endif
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
-
/* If we're going to tunnel the packet and pmtu discovery
* is disabled, we'll just fragment it anyway
*/
@@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
return true;
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
- skb->len > mtu && !skb_is_gso(skb))) {
+ skb->len > mtu && !skb_is_gso(skb) &&
+ !ip_vs_iph_icmp(ipvsh))) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
IP_VS_DBG(1, "frag needed for %pI4\n",
@@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
__be32 daddr, int rt_mode, __be32 *ret_saddr,
struct ip_vs_iphdr *ipvsh)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
int mtu;
@@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -402,11 +403,12 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
struct dst_entry *dst;
@@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
@@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
return ret;
@@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -656,10 +658,13 @@ int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
+ &iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR, NULL, ipvsh);
@@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
goto tx_error;
@@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
- IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
- "stopping DNAT to loopback address");
+ IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
+ "ip_vs_nat_xmit(): stopping DNAT to loopback "
+ "address");
goto tx_error;
}
@@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
- IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
goto tx_error;
@@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
- IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
goto tx_error;
@@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
- IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -967,8 +974,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct net *net = skb_net(skb);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
+ struct net *net = ipvs->net;
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
@@ -1042,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip_local_out(skb);
+ ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1133,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip6_local_out(skb);
+ ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
@@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
goto tx_error;
@@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
- NULL, ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c09d6c719..3cb3cb831 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
+ struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
@@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb,
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return l4proto->pkt_to_tuple(skb, dataoff, tuple);
+ return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
- u_int16_t l3num, struct nf_conntrack_tuple *tuple)
+ u_int16_t l3num,
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
@@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
- ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
+ ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l3proto, l4proto);
rcu_read_unlock();
@@ -938,10 +940,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
- if (timeout_ext)
- timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
- else
+ if (timeout_ext) {
+ timeouts = nf_ct_timeout_data(timeout_ext);
+ if (unlikely(!timeouts))
+ timeouts = l4proto->get_timeouts(net);
+ } else {
timeouts = l4proto->get_timeouts(net);
+ }
if (!l4proto->new(ct, skb, dataoff, timeouts)) {
nf_conntrack_free(ct);
@@ -950,7 +955,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
if (timeout_ext)
- nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
+ nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
+ GFP_ATOMIC);
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
@@ -1029,7 +1035,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
- dataoff, l3num, protonum, &tuple, l3proto,
+ dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("resolve_normal_ct: Can't get tuple\n");
return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94a66541e..9f5272968 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2133,9 +2133,9 @@ ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
static size_t
-ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ctnetlink_glue_build_size(const struct nf_conn *ct)
{
return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+ 3 * nla_total_size(0) /* CTA_TUPLE_IP */
@@ -2162,8 +2162,19 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
;
}
-static int
-ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
+static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, ctinfo);
+ if (ct && nf_ct_is_untracked(ct))
+ ct = NULL;
+
+ return ct;
+}
+
+static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
{
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
@@ -2236,7 +2247,32 @@ nla_put_failure:
}
static int
-ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
+ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ u_int16_t ct_attr, u_int16_t ct_info_attr)
+{
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ if (__ctnetlink_glue_build(skb, ct) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest_parms);
+
+ if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int
+ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
{
int err;
@@ -2276,7 +2312,7 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
}
static int
-ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
+ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
{
struct nlattr *cda[CTA_MAX+1];
int ret;
@@ -2286,16 +2322,16 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret;
spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
-static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
- const struct nf_conn *ct,
- struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_tuple *mask)
+static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+ const struct nf_conn *ct,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
{
int err;
@@ -2309,8 +2345,8 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
}
static int
-ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
- u32 portid, u32 report)
+ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report)
{
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
@@ -2322,8 +2358,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (err < 0)
return err;
- err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
- ct, &tuple, &mask);
+ err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
+ ct, &tuple, &mask);
if (err < 0)
return err;
@@ -2350,14 +2386,24 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
return 0;
}
-static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
- .build_size = ctnetlink_nfqueue_build_size,
- .build = ctnetlink_nfqueue_build,
- .parse = ctnetlink_nfqueue_parse,
- .attach_expect = ctnetlink_nfqueue_attach_expect,
- .seq_adjust = nf_ct_tcp_seqadj_set,
+static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, int diff)
+{
+ if (!(ct->status & IPS_NAT_MASK))
+ return;
+
+ nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+}
+
+static struct nfnl_ct_hook ctnetlink_glue_hook = {
+ .get_ct = ctnetlink_glue_get_ct,
+ .build_size = ctnetlink_glue_build_size,
+ .build = ctnetlink_glue_build,
+ .parse = ctnetlink_glue_parse,
+ .attach_expect = ctnetlink_glue_attach_expect,
+ .seq_adjust = ctnetlink_glue_seqadj,
};
-#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
+#endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */
/***********************************************************************
* EXPECT
@@ -3341,9 +3387,9 @@ static int __init ctnetlink_init(void)
pr_err("ctnetlink_init: cannot register pernet operations\n");
goto err_unreg_exp_subsys;
}
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
/* setup interaction between nf_queue and nf_conntrack_netlink. */
- RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook);
+ RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
#endif
return 0;
@@ -3362,8 +3408,8 @@ static void __exit ctnetlink_exit(void)
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
- RCU_INIT_POINTER(nfq_ct_hook, NULL);
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+ RCU_INIT_POINTER(nfnl_ct_hook, NULL);
#endif
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6dd995c7c..fce1b1cca 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net)
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
struct dccp_hdr _hdr, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 2281be419..86dc752e5 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net)
static bool generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 7648674f2..a96451a7a 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
/* gre hdr info to tuple */
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
const struct gre_hdr_pptp *pgrehdr;
struct gre_hdr_pptp _pgrehdr;
__be16 srckey;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 67197731e..9578a7c37 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net)
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct sctphdr *hp;
struct sctphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70383de72..278f3b935 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
}
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 6957281ff..478f92f83 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
static bool udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct udphdr *hp;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5903d164..1ac8ee13a 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net)
static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct udphdr *hp;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5113dfd39..06a9f4577 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -83,7 +83,7 @@ out:
rcu_read_unlock();
}
-int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
+int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
{
struct flowi fl;
unsigned int hh_len;
@@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
dst = ((struct xfrm_dst *)dst)->route;
dst_hold(dst);
- dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+ dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bf..d43869879 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
rcu_read_lock();
indev = __in_dev_get_rcu(skb->dev);
- if (indev != NULL) {
+ if (indev && indev->ifa_list) {
ifa = indev->ifa_list;
newdst = ifa->ifa_local;
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9..5baa8e24e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -69,19 +69,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(physdev);
}
#endif
- /* Drop reference to owner of hook which queued us. */
- module_put(entry->elem->owner);
}
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
-bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
- if (!try_module_get(entry->elem->owner))
- return false;
-
if (state->in)
dev_hold(state->in);
if (state->out)
@@ -100,8 +95,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(physdev);
}
#endif
-
- return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -131,22 +124,20 @@ int nf_queue(struct sk_buff *skb,
const struct nf_queue_handler *qh;
/* QUEUE == DROP if no one is waiting, to be safe. */
- rcu_read_lock();
-
qh = rcu_dereference(queue_handler);
if (!qh) {
status = -ESRCH;
- goto err_unlock;
+ goto err;
}
afinfo = nf_get_afinfo(state->pf);
if (!afinfo)
- goto err_unlock;
+ goto err;
entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
if (!entry) {
status = -ENOMEM;
- goto err_unlock;
+ goto err;
}
*entry = (struct nf_queue_entry) {
@@ -156,16 +147,11 @@ int nf_queue(struct sk_buff *skb,
.size = sizeof(*entry) + afinfo->route_key_size,
};
- if (!nf_queue_entry_get_refs(entry)) {
- status = -ECANCELED;
- goto err_unlock;
- }
+ nf_queue_entry_get_refs(entry);
skb_dst_force(skb);
afinfo->saveroute(skb, entry);
status = qh->outfn(entry, queuenum);
- rcu_read_unlock();
-
if (status < 0) {
nf_queue_entry_release_refs(entry);
goto err;
@@ -173,8 +159,6 @@ int nf_queue(struct sk_buff *skb,
return 0;
-err_unlock:
- rcu_read_unlock();
err:
kfree(entry);
return status;
@@ -187,19 +171,15 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
const struct nf_afinfo *afinfo;
int err;
- rcu_read_lock();
-
nf_queue_entry_release_refs(entry);
/* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT) {
- elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
- verdict = NF_ACCEPT;
- }
+ if (verdict == NF_REPEAT)
+ verdict = elem->hook(elem->priv, skb, &entry->state);
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(entry->state.pf);
- if (!afinfo || afinfo->reroute(skb, entry) < 0)
+ if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
verdict = NF_DROP;
}
@@ -215,15 +195,13 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
- entry->state.okfn(entry->state.sk, skb);
+ entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
err = nf_queue(skb, elem, &entry->state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
- if (err == -ECANCELED)
- goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
@@ -235,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
default:
kfree_skb(skb);
}
- rcu_read_unlock();
+
kfree(entry);
}
EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a41eb92b..2cb429d34 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -89,6 +89,7 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
}
static void nft_ctx_init(struct nft_ctx *ctx,
+ struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
struct nft_af_info *afi,
@@ -96,7 +97,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
struct nft_chain *chain,
const struct nlattr * const *nla)
{
- ctx->net = sock_net(skb->sk);
+ ctx->net = net;
ctx->afi = afi;
ctx->table = table;
ctx->chain = chain;
@@ -672,15 +673,14 @@ err:
return ret;
}
-static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newtable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nlattr *name;
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
u32 flags = 0;
struct nft_ctx ctx;
@@ -706,7 +706,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
return nf_tables_updtable(&ctx);
}
@@ -730,7 +730,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
if (err < 0)
goto err3;
@@ -810,18 +810,17 @@ out:
return err;
}
-static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_deltable(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, NULL, NULL, NULL, nla);
if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
return nft_flush(&ctx, family);
@@ -1221,8 +1220,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
}
}
-static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1232,7 +1231,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
struct nft_chain *chain;
struct nft_base_chain *basechain = NULL;
struct nlattr *ha[NFTA_HOOK_MAX + 1];
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct net_device *dev = NULL;
u8 policy = NF_ACCEPT;
@@ -1313,7 +1311,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(stats);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
if (trans == NULL) {
@@ -1433,7 +1431,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
for (i = 0; i < afi->nops; i++) {
ops = &basechain->ops[i];
ops->pf = family;
- ops->owner = afi->owner;
ops->hooknum = hooknum;
ops->priority = priority;
ops->priv = chain;
@@ -1462,7 +1459,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
goto err1;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
if (err < 0)
goto err2;
@@ -1477,15 +1474,14 @@ err1:
return err;
}
-static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delchain(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
struct nft_chain *chain;
- struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
@@ -1507,7 +1503,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
if (chain->use > 0)
return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
return nft_delchain(&ctx);
}
@@ -2011,13 +2007,12 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
static struct nft_expr_info *info;
-static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain;
struct nft_rule *rule, *old_rule = NULL;
@@ -2076,7 +2071,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(old_rule);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
n = 0;
size = 0;
@@ -2177,13 +2172,12 @@ err1:
return err;
}
-static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delrule(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_chain *chain = NULL;
struct nft_rule *rule;
@@ -2206,7 +2200,7 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(chain);
}
- nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla);
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
@@ -2345,12 +2339,11 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi = NULL;
struct nft_table *table = NULL;
@@ -2372,7 +2365,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
return -ENOENT;
}
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -2624,6 +2617,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_ctx ctx;
struct sk_buff *skb2;
@@ -2631,7 +2625,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
int err;
/* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -2694,14 +2688,13 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
const struct nft_set_ops *ops;
struct nft_af_info *afi;
- struct net *net = sock_net(skb->sk);
struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
@@ -2799,7 +2792,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(table))
return PTR_ERR(table);
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla);
set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
if (IS_ERR(set)) {
@@ -2883,8 +2876,8 @@ static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set
nft_set_destroy(set);
}
-static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delset(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -2897,7 +2890,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
+ err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla);
if (err < 0)
return err;
@@ -3025,7 +3018,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
+static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
const struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -3034,7 +3027,6 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nft_af_info *afi;
struct nft_table *table;
- struct net *net = sock_net(skb->sk);
afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
if (IS_ERR(afi))
@@ -3046,7 +3038,7 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
if (!trans && (table->flags & NFT_TABLE_INACTIVE))
return -ENOENT;
- nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
+ nft_ctx_init(ctx, net, skb, nlh, afi, table, NULL, nla);
return 0;
}
@@ -3136,6 +3128,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_set_dump_args args;
struct nft_ctx ctx;
@@ -3151,8 +3144,8 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (err < 0)
return err;
- err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
- false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh,
+ (void *)nla, false);
if (err < 0)
return err;
@@ -3213,11 +3206,12 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
+ struct net *net = sock_net(skb->sk);
const struct nft_set *set;
struct nft_ctx ctx;
int err;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -3529,11 +3523,10 @@ err1:
return err;
}
-static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
- struct net *net = sock_net(skb->sk);
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
@@ -3542,7 +3535,7 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, true);
if (err < 0)
return err;
@@ -3624,8 +3617,8 @@ err1:
return err;
}
-static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
+static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
{
const struct nlattr *attr;
@@ -3636,7 +3629,7 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
+ err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, false);
if (err < 0)
return err;
@@ -4031,7 +4024,8 @@ static int nf_tables_abort(struct sk_buff *skb)
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+ list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 05d0b0353..f3695a497 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
const struct nft_chain *chain,
int rulenum, enum nft_trace type)
{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
@@ -111,10 +109,10 @@ struct nft_jumpstack {
};
unsigned int
-nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
- const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+ const struct nft_chain *chain = priv, *basechain = chain;
+ const struct net *net = pkt->net;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 2cae4d4a0..edb3502f2 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -17,13 +17,13 @@
static inline void
nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops, struct sk_buff *skb,
+ struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct iphdr *iph, _iph;
u32 len, thoff;
- nft_set_pktinfo(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
&_iph);
@@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
static inline void
__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -82,33 +81,32 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
}
static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- nft_set_pktinfo(pkt, ops, skb, state);
- __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
+ __nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
}
static unsigned int
-nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nft_do_chain_netdev(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
- switch (eth_hdr(skb)->h_proto) {
+ switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
break;
case htons(ETH_P_IPV6):
- nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
break;
default:
- nft_set_pktinfo(&pkt, ops, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
break;
}
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_netdev __read_mostly = {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 70277b11f..77afe913d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -64,7 +64,7 @@ void nfnl_unlock(__u8 subsys_id)
EXPORT_SYMBOL_GPL(nfnl_unlock);
#ifdef CONFIG_PROVE_LOCKING
-int lockdep_nfnl_is_held(u8 subsys_id)
+bool lockdep_nfnl_is_held(u8 subsys_id)
{
return lockdep_is_held(&table[subsys_id].mutex);
}
@@ -295,8 +295,6 @@ replay:
if (!skb)
return netlink_ack(oskb, nlh, -ENOMEM);
- skb->sk = oskb->sk;
-
nfnl_lock(subsys_id);
ss = rcu_dereference_protected(table[subsys_id].subsys,
lockdep_is_held(&table[subsys_id].mutex));
@@ -381,7 +379,7 @@ replay:
goto ack;
if (nc->call_batch) {
- err = nc->call_batch(net->nfnl, skb, nlh,
+ err = nc->call_batch(net, net->nfnl, skb, nlh,
(const struct nlattr **)cda);
}
@@ -492,7 +490,7 @@ static int nfnetlink_bind(struct net *net, int group)
type = nfnl_group2type[group];
rcu_read_lock();
- ss = nfnetlink_get_subsys(type);
+ ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 476accd17..c7a2d0e1c 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -291,6 +291,34 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
+static void untimeout(struct nf_conntrack_tuple_hash *i,
+ struct ctnl_timeout *timeout)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
+ struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+ if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+}
+
+static void ctnl_untimeout(struct ctnl_timeout *timeout)
+{
+ struct nf_conntrack_tuple_hash *h;
+ const struct hlist_nulls_node *nn;
+ int i;
+
+ local_bh_disable();
+ for (i = 0; i < init_net.ct.htable_size; i++) {
+ spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < init_net.ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+ untimeout(h, timeout);
+ }
+ spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ }
+ local_bh_enable();
+}
+
/* try to delete object, fail if it is still in use. */
static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
{
@@ -301,6 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
+ ctnl_untimeout(timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
@@ -567,6 +596,10 @@ static void __exit cttimeout_exit(void)
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
+
+ /* Make sure no conntrack objects refer to custom timeouts anymore. */
+ ctnl_untimeout(NULL);
+
list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
list_del_rcu(&cur->head);
/* We are sure that our objects have no clients at this point,
@@ -579,6 +612,7 @@ static void __exit cttimeout_exit(void)
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+ rcu_barrier();
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4670821b5..740cce468 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -27,6 +27,7 @@
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
@@ -401,7 +402,9 @@ __build_packet_message(struct nfnl_log_net *log,
unsigned int hooknum,
const struct net_device *indev,
const struct net_device *outdev,
- const char *prefix, unsigned int plen)
+ const char *prefix, unsigned int plen,
+ const struct nfnl_ct_hook *nfnl_ct,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
@@ -538,9 +541,9 @@ __build_packet_message(struct nfnl_log_net *log,
if (skb->tstamp.tv64) {
struct nfulnl_msg_packet_timestamp ts;
- struct timeval tv = ktime_to_timeval(skb->tstamp);
- ts.sec = cpu_to_be64(tv.tv_sec);
- ts.usec = cpu_to_be64(tv.tv_usec);
+ struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+ ts.sec = cpu_to_be64(kts.tv_sec);
+ ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
@@ -575,6 +578,10 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
+ if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
+ NFULA_CT, NFULA_CT_INFO) < 0)
+ goto nla_put_failure;
+
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
@@ -620,12 +627,16 @@ nfulnl_log_packet(struct net *net,
const struct nf_loginfo *li_user,
const char *prefix)
{
- unsigned int size, data_len;
+ size_t size;
+ unsigned int data_len;
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
struct nfnl_log_net *log = nfnl_log_pernet(net);
+ const struct nfnl_ct_hook *nfnl_ct = NULL;
+ struct nf_conn *ct = NULL;
+ enum ip_conntrack_info uninitialized_var(ctinfo);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
@@ -671,6 +682,14 @@ nfulnl_log_packet(struct net *net,
size += nla_total_size(sizeof(u_int32_t));
if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
size += nla_total_size(sizeof(u_int32_t));
+ if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+ if (nfnl_ct != NULL) {
+ ct = nfnl_ct->get_ct(skb, &ctinfo);
+ if (ct != NULL)
+ size += nfnl_ct->build_size(ct);
+ }
+ }
qthreshold = inst->qthreshold;
/* per-rule qthreshold overrides per-instance */
@@ -715,7 +734,8 @@ nfulnl_log_packet(struct net *net,
inst->qlen++;
__build_packet_message(log, inst, skb, data_len, pf,
- hooknum, in, out, prefix, plen);
+ hooknum, in, out, prefix, plen,
+ nfnl_ct, ct, ctinfo);
if (inst->qlen >= qthreshold)
__nfulnl_flush(inst);
@@ -805,6 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
struct net *net = sock_net(ctnl);
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
+ u16 flags = 0;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
@@ -826,6 +847,28 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
}
+ /* Check if we support these flags in first place, dependencies should
+ * be there too not to break atomicity.
+ */
+ if (nfula[NFULA_CFG_FLAGS]) {
+ flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
+
+ if ((flags & NFULNL_CFG_F_CONNTRACK) &&
+ !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(NFNL_SUBSYS_ULOG);
+ request_module("ip_conntrack_netlink");
+ nfnl_lock(NFNL_SUBSYS_ULOG);
+ if (rcu_access_pointer(nfnl_ct_hook)) {
+ ret = -EAGAIN;
+ goto out_put;
+ }
+#endif
+ ret = -EOPNOTSUPP;
+ goto out_put;
+ }
+ }
+
if (cmd != NULL) {
switch (cmd->command) {
case NFULNL_CFG_CMD_BIND:
@@ -854,16 +897,15 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -ENOTSUPP;
break;
}
+ } else if (!inst) {
+ ret = -ENODEV;
+ goto out;
}
if (nfula[NFULA_CFG_MODE]) {
- struct nfulnl_msg_config_mode *params;
- params = nla_data(nfula[NFULA_CFG_MODE]);
+ struct nfulnl_msg_config_mode *params =
+ nla_data(nfula[NFULA_CFG_MODE]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_mode(inst, params->copy_mode,
ntohl(params->copy_range));
}
@@ -871,42 +913,23 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (nfula[NFULA_CFG_TIMEOUT]) {
__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_timeout(inst, ntohl(timeout));
}
if (nfula[NFULA_CFG_NLBUFSIZ]) {
__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
}
if (nfula[NFULA_CFG_QTHRESH]) {
__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_qthresh(inst, ntohl(qthresh));
}
- if (nfula[NFULA_CFG_FLAGS]) {
- __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
-
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
- nfulnl_set_flags(inst, ntohs(flags));
- }
+ if (nfula[NFULA_CFG_FLAGS])
+ nfulnl_set_flags(inst, flags);
out_put:
instance_put(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue.c
index a5cd6d90b..861c66152 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -28,12 +28,12 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/netfilter/nf_queue.h>
#include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_queue.h>
#include <linux/atomic.h>
@@ -313,6 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
+ struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
@@ -364,8 +365,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
break;
}
- if (queue->flags & NFQA_CFG_F_CONNTRACK)
- ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
+ if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+ if (nfnl_ct != NULL) {
+ ct = nfnl_ct->get_ct(entskb, &ctinfo);
+ if (ct != NULL)
+ size += nfnl_ct->build_size(ct);
+ }
+ }
if (queue->flags & NFQA_CFG_F_UID_GID) {
size += (nla_total_size(sizeof(u_int32_t)) /* uid */
@@ -493,9 +501,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (entskb->tstamp.tv64) {
struct nfqnl_msg_packet_timestamp ts;
- struct timeval tv = ktime_to_timeval(entskb->tstamp);
- ts.sec = cpu_to_be64(tv.tv_sec);
- ts.usec = cpu_to_be64(tv.tv_usec);
+ struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+
+ ts.sec = cpu_to_be64(kts.tv_sec);
+ ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
@@ -508,7 +517,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
goto nla_put_failure;
- if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+ if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
goto nla_put_failure;
if (cap_len > data_len &&
@@ -598,12 +607,9 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry) {
- if (nf_queue_entry_get_refs(entry))
- return entry;
- kfree(entry);
- }
- return NULL;
+ if (entry)
+ nf_queue_entry_get_refs(entry);
+ return entry;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -670,8 +676,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct nfqnl_instance *queue;
struct sk_buff *skb, *segs;
int err = -ENOBUFS;
- struct net *net = dev_net(entry->state.in ?
- entry->state.in : entry->state.out);
+ struct net *net = entry->state.net;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
/* rcu_read_lock()ed by nf_hook_slow() */
@@ -699,7 +704,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
nf_bridge_adjust_skb_data(skb);
segs = skb_gso_segment(skb, 0);
/* Does not use PTR_ERR to limit the number of error codes that can be
- * returned by nf_queue. For instance, callers rely on -ECANCELED to
+ * returned by nf_queue. For instance, callers rely on -ESRCH to
* mean 'ignore this hook'.
*/
if (IS_ERR_OR_NULL(segs))
@@ -1002,6 +1007,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
+static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[],
+ struct nf_queue_entry *entry,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct nf_conn *ct;
+
+ ct = nfnl_ct->get_ct(entry->skb, ctinfo);
+ if (ct == NULL)
+ return NULL;
+
+ if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
+ return NULL;
+
+ if (nfqa[NFQA_EXP])
+ nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
+ NETLINK_CB(entry->skb).portid,
+ nlmsg_report(nlh));
+ return ct;
+}
+
static int
nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -1015,6 +1042,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
unsigned int verdict;
struct nf_queue_entry *entry;
enum ip_conntrack_info uninitialized_var(ctinfo);
+ struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
struct net *net = sock_net(ctnl);
@@ -1037,13 +1065,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
+ /* rcu lock already held from nfnl->call_rcu. */
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+
if (nfqa[NFQA_CT]) {
- ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
- if (ct && nfqa[NFQA_EXP]) {
- nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- }
+ if (nfnl_ct != NULL)
+ ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
}
if (nfqa[NFQA_PAYLOAD]) {
@@ -1054,8 +1081,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
payload_len, entry, diff) < 0)
verdict = NF_DROP;
- if (ct)
- nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
+ if (ct && diff)
+ nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
}
if (nfqa[NFQA_MARK])
@@ -1392,6 +1419,7 @@ static int __init nfnetlink_queue_init(void)
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
out:
return status;
}
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
deleted file mode 100644
index 96cac50e0..000000000
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_queue.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nfnetlink_queue.h>
-
-struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
- enum ip_conntrack_info *ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nf_conn *ct;
-
- /* rcu_read_lock()ed by __nf_queue already. */
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return NULL;
-
- ct = nf_ct_get(entskb, ctinfo);
- if (ct) {
- if (!nf_ct_is_untracked(ct))
- *size += nfq_ct->build_size(ct);
- else
- ct = NULL;
- }
- return ct;
-}
-
-struct nf_conn *
-nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr,
- enum ip_conntrack_info *ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nf_conn *ct;
-
- /* rcu_read_lock()ed by __nf_queue already. */
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return NULL;
-
- ct = nf_ct_get(skb, ctinfo);
- if (ct && !nf_ct_is_untracked(ct))
- nfq_ct->parse(attr, ct);
-
- return ct;
-}
-
-int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nlattr *nest_parms;
- u_int32_t tmp;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return 0;
-
- nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED);
- if (!nest_parms)
- goto nla_put_failure;
-
- if (nfq_ct->build(skb, ct) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest_parms);
-
- tmp = ctinfo;
- if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp)))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
-void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo, int diff)
-{
- struct nfq_ct_hook *nfq_ct;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return;
-
- if ((ct->status & IPS_NAT_MASK) && diff)
- nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
-}
-
-int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
- u32 portid, u32 report)
-{
- struct nfq_ct_hook *nfq_ct;
-
- if (nf_ct_is_untracked(ct))
- return 0;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return -EOPNOTSUPP;
-
- return nfq_ct->attach_expect(attr, ct, portid, report);
-}
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 1067fb4c1..c7808fc19 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -47,27 +47,34 @@ static void nft_counter_eval(const struct nft_expr *expr,
local_bh_enable();
}
-static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void nft_counter_fetch(const struct nft_counter_percpu __percpu *counter,
+ struct nft_counter *total)
{
- struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- struct nft_counter_percpu *cpu_stats;
- struct nft_counter total;
+ const struct nft_counter_percpu *cpu_stats;
u64 bytes, packets;
unsigned int seq;
int cpu;
- memset(&total, 0, sizeof(total));
+ memset(total, 0, sizeof(*total));
for_each_possible_cpu(cpu) {
- cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ cpu_stats = per_cpu_ptr(counter, cpu);
do {
seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
bytes = cpu_stats->counter.bytes;
packets = cpu_stats->counter.packets;
} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
- total.packets += packets;
- total.bytes += bytes;
+ total->packets += packets;
+ total->bytes += bytes;
}
+}
+
+static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
@@ -118,6 +125,31 @@ static void nft_counter_destroy(const struct nft_ctx *ctx,
free_percpu(priv->counter);
}
+static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(src);
+ struct nft_counter_percpu_priv *priv_clone = nft_expr_priv(dst);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+ struct nft_counter total;
+
+ nft_counter_fetch(priv->counter, &total);
+
+ cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
+ GFP_ATOMIC);
+ if (cpu_stats == NULL)
+ return ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ this_cpu->counter.packets = total.packets;
+ this_cpu->counter.bytes = total.bytes;
+ preempt_enable();
+
+ priv_clone->counter = cpu_stats;
+ return 0;
+}
+
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
@@ -126,6 +158,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.init = nft_counter_init,
.destroy = nft_counter_destroy,
.dump = nft_counter_dump,
+ .clone = nft_counter_clone,
};
static struct nft_expr_type nft_counter_type __read_mostly = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 8cbca3432..939921532 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -366,6 +366,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
switch (priv->key) {
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 513a8ef60..9dec3bd1b 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -50,8 +50,9 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
}
ext = nft_set_elem_ext(set, elem);
- if (priv->expr != NULL)
- nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+ if (priv->expr != NULL &&
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
+ return NULL;
return elem;
}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a13d6a386..319c22b4b 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &priv->loginfo, "%s", priv->prefix);
}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index cb2f13ebb..9dfaf4d55 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,6 +31,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
+ struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
@@ -42,7 +43,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- *dest = pkt->ops->pf;
+ *dest = pkt->pf;
break;
case NFT_META_L4PROTO:
*dest = pkt->tprot;
@@ -86,33 +87,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kuid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsuid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsuid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket == NULL ||
- skb->sk->sk_socket->file == NULL) {
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket == NULL ||
+ sk->sk_socket->file == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
goto err;
}
*dest = from_kgid_munged(&init_user_ns,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk->sk_socket->file->f_cred->fsgid);
+ read_unlock_bh(&sk->sk_callback_lock);
break;
#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID: {
@@ -135,7 +138,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
}
- switch (pkt->ops->pf) {
+ switch (pkt->pf) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
*dest = PACKET_MULTICAST;
@@ -168,9 +171,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
- if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ sk = skb_to_full_sk(skb);
+ if (!sk || !sk_fullsock(sk))
goto err;
- *dest = skb->sk->sk_classid;
+ *dest = sk->sk_classid;
break;
#endif
default:
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 96805d21d..61d216eb7 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
- priv->queues_total, pkt->ops->pf,
+ priv->queues_total, pkt->pf,
jhash_initval);
}
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 635dbba93..759ca5248 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -22,38 +22,37 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
- switch (pkt->ops->pf) {
+ switch (pkt->pf) {
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
nf_send_unreach(pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ nf_send_reset(pkt->net, pkt->skb, pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
nft_reject_icmp_code(priv->icmp_code),
- pkt->ops->hooknum);
+ pkt->hook);
break;
}
break;
case NFPROTO_IPV6:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nf_send_unreach6(net, pkt->skb,
+ nf_send_unreach6(pkt->net, pkt->skb,
nft_reject_icmpv6_code(priv->icmp_code),
- pkt->ops->hooknum);
+ pkt->hook);
break;
}
break;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9b42b5ea6..d4aaad747 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1193,7 +1193,6 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
if (!(hook_mask & 1))
continue;
ops[i].hook = fn;
- ops[i].owner = table->me;
ops[i].pf = table->af;
ops[i].hooknum = hooknum;
ops[i].priority = table->priority;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index faf32d888..e7ac07e53 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -171,6 +171,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
if (timeout_ext == NULL)
ret = -ENOMEM;
+ rcu_read_unlock();
+ return ret;
+
err_put_timeout:
__xt_ct_tg_timeout_put(timeout);
out:
@@ -318,8 +321,10 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct)
if (timeout_put) {
timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext)
+ if (timeout_ext) {
timeout_put(timeout_ext->timeout);
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+ }
}
rcu_read_unlock();
#endif
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index c13b79440..1763ab82b 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = loginfo->level;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb7497c92..a1fa2c800 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c02501a5..b7c43def0 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
return -1;
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
if (dst_mtu(skb_dst(skb)) <= minlen) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index fd980aa77..3eff7b67c 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,8 +31,9 @@ static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
+ nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
return XT_CONTINUE;
}
@@ -42,8 +43,9 @@ static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
+ int oif = info->priv ? info->priv->oif : 0;
- nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
+ nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d0c96c5ae..3ab591e73 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
-tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
- struct sock *sk)
+tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ __be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
@@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
}
static unsigned int
-tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
+ sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info *tgi = par->targinfo;
- return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
static unsigned int
@@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
- return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
#ifdef XT_TPROXY_HAVE_IPV6
@@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 5b4743cc0..11d609199 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
static bool
addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_addrtype_info *info = par->matchinfo;
const struct iphdr *iph = ip_hdr(skb);
bool ret = true;
@@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
static bool
addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_addrtype_info_v1 *info = par->matchinfo;
const struct iphdr *iph;
const struct net_device *dev = NULL;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 075d89d94..99bbc8298 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_connlimit_info *info = par->matchinfo;
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
@@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- par->family, &tuple)) {
+ par->family, net, &tuple)) {
goto hotdrop;
}
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 8d47c3780..71a9d95e0 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,6 +48,7 @@ static bool
ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_ipvs_mtinfo *data = par->matchinfo;
+ struct netns_ipvs *ipvs = net_ipvs(par->net);
/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
const u_int8_t family = par->family;
struct ip_vs_iphdr iph;
@@ -67,7 +68,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto out;
}
- ip_vs_fill_iph_skb(family, skb, &iph);
+ ip_vs_fill_iph_skb(family, skb, true, &iph);
if (data->bitmask & XT_IPVS_PROTO)
if ((iph.protocol == data->l4proto) ^
@@ -85,7 +86,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
+ cp = pp->conn_out_get(ipvs, family, skb, &iph);
if (unlikely(cp == NULL)) {
match = false;
goto out;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 0778855ea..df8801e02 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
unsigned char opts[MAX_IPOPTLEN];
const struct xt_osf_finger *kf;
const struct xt_osf_user_finger *f;
- struct net *net = dev_net(p->in ? p->in : p->out);
+ struct net *net = p->net;
if (!info)
return false;
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed..1302b475a 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <net/sock.h>
+#include <net/inet_sock.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_owner.h>
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_owner_match_info *info = par->matchinfo;
const struct file *filp;
+ struct sock *sk = skb_to_full_sk(skb);
- if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+ if (sk == NULL || sk->sk_socket == NULL)
return (info->match ^ info->invert) == 0;
else if (info->match & info->invert & XT_OWNER_SOCKET)
/*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
return false;
- filp = skb->sk->sk_socket->file;
+ filp = sk->sk_socket->file;
if (filp == NULL)
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 45e1b30e4..d725a2774 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t)
static bool
recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
struct recent_net *recent_net = recent_pernet(net);
const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
struct recent_table *t;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 43e26c881..2ec08f04b 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
}
}
-static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v4(struct net *net,
+ const struct sk_buff *skb,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
}
#endif
- return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
sport, dport, indev);
}
@@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v4(skb, par->in);
+ sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
return NULL;
}
-static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v6(struct net *net,
+ const struct sk_buff *skb,
const struct net_device *indev)
{
__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
return NULL;
}
- return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
sport, dport, indev);
}
@@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v6(skb, par->in);
+ sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;