From b4b7ff4b08e691656c9d77c758fc355833128ac0 Mon Sep 17 00:00:00 2001 From: AndrĂ© Fabian Silva Delgado Date: Wed, 20 Jan 2016 14:01:31 -0300 Subject: Linux-libre 4.4-gnu --- net/netfilter/ipvs/ip_vs_app.c | 36 +-- net/netfilter/ipvs/ip_vs_conn.c | 91 +++--- net/netfilter/ipvs/ip_vs_core.c | 550 +++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_ctl.c | 291 +++++++++-------- net/netfilter/ipvs/ip_vs_est.c | 20 +- net/netfilter/ipvs/ip_vs_ftp.c | 27 +- net/netfilter/ipvs/ip_vs_lblc.c | 3 +- net/netfilter/ipvs/ip_vs_lblcr.c | 3 +- net/netfilter/ipvs/ip_vs_nfct.c | 5 +- net/netfilter/ipvs/ip_vs_pe_sip.c | 2 +- net/netfilter/ipvs/ip_vs_proto.c | 33 +- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 32 +- net/netfilter/ipvs/ip_vs_proto_sctp.c | 58 ++-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 61 ++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 49 +-- net/netfilter/ipvs/ip_vs_sh.c | 45 ++- net/netfilter/ipvs/ip_vs_sync.c | 87 +++-- net/netfilter/ipvs/ip_vs_xmit.c | 85 ++--- 18 files changed, 762 insertions(+), 716 deletions(-) (limited to 'net/netfilter/ipvs') diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index dfd7b65b3..0328f7250 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, +ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { struct ip_vs_protocol *pp; @@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, } } - ret = pp->register_app(net, inc); + ret = pp->register_app(ipvs, inc); if (ret) goto out; @@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, * Release app incarnation */ static void -ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) +ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(net, inc); + pp->unregister_app(ipvs, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, +register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto, __u16 port) { int result; mutex_lock(&__ip_vs_app_mutex); - result = ip_vs_app_inc_new(net, app, proto, port); + result = ip_vs_app_inc_new(ipvs, app, proto, port); mutex_unlock(&__ip_vs_app_mutex); @@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, /* Register application for netns */ -struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app) +struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *a; int err = 0; - if (!ipvs) - return ERR_PTR(-ENOENT); - mutex_lock(&__ip_vs_app_mutex); list_for_each_entry(a, &ipvs->app_list, a_list) { @@ -230,21 +226,17 @@ out_unlock: * We are sure there are no app incarnations attached to services * Caller should use synchronize_rcu() or rcu_barrier() */ -void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) +void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *a, *anxt, *inc, *nxt; - if (!ipvs) - return; - mutex_lock(&__ip_vs_app_mutex); list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) { if (app && strcmp(app->name, a->name)) continue; list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) { - ip_vs_app_inc_release(net, inc); + ip_vs_app_inc_release(ipvs, inc); } list_del(&a->a_list); @@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __net_init ip_vs_app_net_init(struct net *net) +int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; INIT_LIST_HEAD(&ipvs->app_list); proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops); return 0; } -void __net_exit ip_vs_app_net_cleanup(struct net *net) +void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs) { - unregister_ip_vs_app(net, NULL /* all */); + struct net *net = ipvs->net; + + unregister_ip_vs_app(ipvs, NULL /* all */); remove_proc_entry("ip_vs_app", net->proc_net); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b0f7b626b..85ca189bd 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto, +static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { @@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro if (af == AF_INET6) return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)net>>8)) & ip_vs_conn_tab_mask; + ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; #endif return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)net>>8)) & ip_vs_conn_tab_mask; + ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { @@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) continue; /* HIT */ @@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) } static int -ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, +ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs, + int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) + struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; - struct net *net = skb_net(skb); pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); if (pptr == NULL) return 1; - if (likely(!inverse)) - ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + if (likely(!ip_vs_iph_inverse(iph))) + ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr, pptr[1], &iph->saddr, pptr[0], p); return 0; } struct ip_vs_conn * -ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af, + const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p)) return NULL; return ip_vs_conn_in_get(&p); @@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (unlikely(p->pe_data && p->pe->ct_match)) { - if (!ip_vs_conn_net_eq(cp, p->net)) + if (cp->ipvs != p->ipvs) continue; if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { if (__ip_vs_conn_get(cp)) @@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) p->vport == cp->vport && p->cport == cp->cport && cp->flags & IP_VS_CONN_F_TEMPLATE && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (__ip_vs_conn_get(cp)) goto out; } @@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && p->protocol == cp->protocol && - ip_vs_conn_net_eq(cp, p->net)) { + cp->ipvs == p->ipvs) { if (!__ip_vs_conn_get(cp)) continue; /* HIT */ @@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) } struct ip_vs_conn * -ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, + const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn_param p; - if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p)) + if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p)) return NULL; return ip_vs_conn_out_get(&p); @@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp) * so we can make the assumption that the svc_af is the same as the * dest_af */ - dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr, + dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark, cp->flags); if (dest) { @@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp) #endif ip_vs_bind_xmit(cp); - pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol); + pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol); if (pd && atomic_read(&pd->appcnt)) ip_vs_bind_app(cp, pd->pp); } @@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); + struct netns_ipvs *ipvs = ct->ipvs; /* * Checking the dest server status. @@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; - struct net *net = ip_vs_conn_net(cp); - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = cp->ipvs; /* * do I control anybody? @@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data) cp->timeout = 60*HZ; if (ipvs->sync_state & IP_VS_STATE_MASTER) - ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); + ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs)); ip_vs_conn_put(cp); } @@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct netns_ipvs *ipvs = net_ipvs(p->net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs, p->protocol); cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC); @@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, INIT_HLIST_NODE(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); - ip_vs_conn_net_set(cp, p->net); + cp->ipvs = ipvs; cp->af = p->af; cp->daf = dest_af; cp->protocol = p->protocol; @@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) size_t len = 0; char dbuf[IP_VS_ADDRSTRLEN]; - if (!ip_vs_conn_net_eq(cp, net)) + if (!net_eq(cp->ipvs->net, net)) return 0; if (cp->pe_data) { pe_data[0] = ' '; @@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) const struct ip_vs_conn *cp = v; struct net *net = seq_file_net(seq); - if (!ip_vs_conn_net_eq(cp, net)) + if (!net_eq(cp->ipvs->net, net)) return 0; #ifdef CONFIG_IP_VS_IPV6 @@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) } /* Called from keventd and must protect itself from softirqs */ -void ip_vs_random_dropentry(struct net *net) +void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { int idx; struct ip_vs_conn *cp, *cp_c; @@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net) if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; - if (!ip_vs_conn_net_eq(cp, net)) + if (cp->ipvs != ipvs) continue; if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { @@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(struct net *net) +static void ip_vs_conn_flush(struct netns_ipvs *ipvs) { int idx; struct ip_vs_conn *cp, *cp_c; - struct netns_ipvs *ipvs = net_ipvs(net); flush_again: rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (!ip_vs_conn_net_eq(cp, net)) + if (cp->ipvs != ipvs) continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); @@ -1345,23 +1345,22 @@ flush_again: /* * per netns init and exit */ -int __net_init ip_vs_conn_net_init(struct net *net) +int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); - atomic_set(&ipvs->conn_count, 0); - proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops); - proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops); + proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops); + proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net, + &ip_vs_conn_sync_fops); return 0; } -void __net_exit ip_vs_conn_net_cleanup(struct net *net) +void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) { /* flush all the connection entries first */ - ip_vs_conn_flush(net); - remove_proc_entry("ip_vs_conn", net->proc_net); - remove_proc_entry("ip_vs_conn_sync", net->proc_net); + ip_vs_conn_flush(ipvs); + remove_proc_entry("ip_vs_conn", ipvs->net->proc_net); + remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net); } int __init ip_vs_conn_init(void) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 38fbc194b..f57b4dcdb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -112,7 +112,7 @@ static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; @@ -146,7 +146,7 @@ static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + struct netns_ipvs *ipvs = cp->ipvs; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { struct ip_vs_cpu_stats *s; @@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; s = this_cpu_ptr(cp->dest->stats.cpustats); @@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr, vport, p); p->pe = rcu_dereference(svc->pe); if (p->pe && p->pe->fill_param) @@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc, const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; /* source network of the client, after masking */ + const union nf_inet_addr *src_addr, *dst_addr; + + if (likely(!ip_vs_iph_inverse(iph))) { + src_addr = &iph->saddr; + dst_addr = &iph->daddr; + } else { + src_addr = &iph->daddr; + dst_addr = &iph->saddr; + } + /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, + ipv6_addr_prefix(&snet.in6, &src_addr->in6, (__force __u32) svc->netmask); else #endif - snet.ip = iph->saddr.ip & svc->netmask; + snet.ip = src_addr->ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port), - IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port), + IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port), + IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); /* @@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ { int protocol = iph->protocol; - const union nf_inet_addr *vaddr = &iph->daddr; + const union nf_inet_addr *vaddr = dst_addr; __be16 vport = 0; if (dst_port == svc->port) { @@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr, - src_port, &iph->daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr, + src_port, dst_addr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, dest->af, &dest->addr, dport, flags, dest, skb->mark); @@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_conn *cp = NULL; struct ip_vs_scheduler *sched; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr; + __be16 _ports[2], *pptr, cport, vport; + const void *caddr, *vaddr; unsigned int flags; *ignored = 1; @@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, if (pptr == NULL) return NULL; + if (likely(!ip_vs_iph_inverse(iph))) { + cport = pptr[0]; + caddr = &iph->saddr; + vport = pptr[1]; + vaddr = &iph->daddr; + } else { + cport = pptr[1]; + caddr = &iph->daddr; + vport = pptr[0]; + vaddr = &iph->saddr; + } + /* * FTPDATA needs this check when using local real server. * Never schedule Active FTPDATA connections from real server. * For LVS-NAT they must be already created. For other methods * with persistence the connection is created on SYN+ACK. */ - if (pptr[0] == FTPDATA) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, + if (cport == FTPDATA) { + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, "Not scheduling FTPDATA"); return NULL; } @@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Do not schedule replies from local real server. */ - if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, iph, 1))) { - IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, - "Not scheduling reply for existing connection"); - __ip_vs_conn_put(cp); - return NULL; + if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) { + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph); + iph->hdr_flags ^= IP_VS_HDR_INVERSE; + + if (cp) { + IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off, + "Not scheduling reply for existing" + " connection"); + __ip_vs_conn_put(cp); + return NULL; + } } /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) - return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored, + return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph); *ignored = 0; @@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, /* * Non-persistent service */ - if (!svc->fwmark && pptr[1] != svc->port) { + if (!svc->fwmark && vport != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " @@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, - &iph->saddr, pptr[0], &iph->daddr, - pptr[1], &p); + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, + caddr, cport, vaddr, vport, &p); cp = ip_vs_conn_new(&p, dest->af, &dest->addr, - dest->port ? dest->port : pptr[1], + dest->port ? dest->port : vport, flags, dest, skb->mark); if (!cp) { *ignored = -1; @@ -519,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, return cp; } +static inline int ip_vs_addr_is_unicast(struct net *net, int af, + union nf_inet_addr *addr) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST; +#endif + return (inet_addr_type(net, addr->ip) == RTN_UNICAST); +} /* * Pass or drop the packet. @@ -528,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph) { - __be16 _ports[2], *pptr; -#ifdef CONFIG_SYSCTL - struct net *net; - struct netns_ipvs *ipvs; - int unicast; -#endif + __be16 _ports[2], *pptr, dport; + struct netns_ipvs *ipvs = svc->ipvs; + struct net *net = ipvs->net; pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); - if (pptr == NULL) { + if (!pptr) return NF_DROP; - } - -#ifdef CONFIG_SYSCTL - net = skb_net(skb); - -#ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) - unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST; - else -#endif - unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST); + dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0]; /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - ipvs = net_ipvs(net); - if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { + if (sysctl_cache_bypass(ipvs) && svc->fwmark && + !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) && + ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -566,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, + ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, &iph->saddr, pptr[0], &iph->daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, svc->af, &daddr, 0, @@ -590,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_conn_put(cp); return ret; } -#endif /* * When the virtual ftp service is presented, packets destined @@ -598,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * listed in the ipvs table), pass the packets, because it is * not ipvs job to decide to drop the packets. */ - if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) + if (svc->port == FTPPORT && dport != FTPPORT) return NF_ACCEPT; + if (unlikely(ip_vs_iph_icmp(iph))) + return NF_DROP; + /* * Notify the client that the destination is unreachable, and * release the socket buffer. @@ -610,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) { - if (!skb->dev) { - struct net *net_ = dev_net(skb_dst(skb)->dev); - - skb->dev = net_->loopback_dev; - } + if (!skb->dev) + skb->dev = net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); } else #endif @@ -625,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, #ifdef CONFIG_SYSCTL -static int sysctl_snat_reroute(struct sk_buff *skb) +static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); return ipvs->sysctl_snat_reroute; } -static int sysctl_nat_icmp_send(struct net *net) +static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); return ipvs->sysctl_nat_icmp_send; } @@ -644,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) #else -static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; } -static int sysctl_nat_icmp_send(struct net *net) { return 0; } +static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; } +static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; } static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; } #endif @@ -664,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum) return IP_DEFRAG_VS_OUT; } -static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs, + struct sk_buff *skb, u_int32_t user) { int err; local_bh_disable(); - err = ip_defrag(skb, user); + err = ip_defrag(ipvs->net, skb, user); local_bh_enable(); if (!err) ip_send_check(ip_hdr(skb)); @@ -677,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -static int ip_vs_route_me_harder(int af, struct sk_buff *skb, - unsigned int hooknum) +static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af, + struct sk_buff *skb, unsigned int hooknum) { - if (!sysctl_snat_reroute(skb)) + if (!sysctl_snat_reroute(ipvs)) return 0; /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ if (NF_INET_LOCAL_IN == hooknum) @@ -690,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && - ip6_route_me_harder(skb) != 0) + ip6_route_me_harder(ipvs->net, skb) != 0) return 1; } else #endif if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && - ip_route_me_harder(skb, RTN_LOCAL) != 0) + ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0) return 1; return 0; @@ -848,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); - if (ip_vs_route_me_harder(af, skb, hooknum)) + if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto out; /* do the statistics and put it back */ @@ -872,8 +895,8 @@ out: * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. */ -static int ip_vs_out_icmp(struct sk_buff *skb, int *related, - unsigned int hooknum) +static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum) { struct iphdr *iph; struct icmphdr _icmph, *ic; @@ -888,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { - if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -934,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, "Checking outgoing ICMP for"); - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph); + /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, &ciph, 1); + cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph); if (!cp) return NF_ACCEPT; @@ -947,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, } #ifdef CONFIG_IP_VS_IPV6 -static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum, struct ip_vs_iphdr *ipvsh) +static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum, + struct ip_vs_iphdr *ipvsh) { struct icmp6hdr _icmph, *ic; - struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */ struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; union nf_inet_addr snet; - unsigned int writable; + unsigned int offset; *related = 1; ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh); @@ -984,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &ipvsh->saddr, &ipvsh->daddr); - /* Now find the contained IP header */ - ciph.len = ipvsh->len + sizeof(_icmph); - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph), + true, &ciph)) return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ pp = ip_vs_proto_get(ciph.protocol); if (!pp) return NF_ACCEPT; /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1); + cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph); if (!cp) return NF_ACCEPT; snet.in6 = ciph.saddr.in6; - writable = ciph.len; + offset = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr), + pp, offset, sizeof(struct ipv6hdr), hooknum); } #endif @@ -1093,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct ip_vs_protocol *pp = pd->pp; - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); + IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet"); if (!skb_make_writable(skb, iph->len)) goto drop; @@ -1127,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_vs_route_me_harder(af, skb, hooknum)) + if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum)) goto drop; - IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); + IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); @@ -1155,13 +1170,13 @@ drop: * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + struct sock *sk; EnterFunction(11); @@ -1169,29 +1184,27 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (skb->ipvs_property) return NF_ACCEPT; + sk = skb_to_full_sk(skb); /* Bad... Do not break raw sockets */ - if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && + if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { - struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(skb->sk); - if (inet && sk->sk_family == PF_INET && inet->nodefrag) + if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag) return NF_ACCEPT; } if (unlikely(!skb_dst(skb))) return NF_ACCEPT; - net = skb_net(skb); - if (!net_ipvs(net)->enable) + if (!ipvs->enable) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_out_icmp_v6(skb, &related, + int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related, hooknum, &iph); if (related) @@ -1201,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; - int verdict = ip_vs_out_icmp(skb, &related, hooknum); + int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum); if (related) return verdict; } - pd = ip_vs_proto_data_get(net, iph.protocol); + pd = ip_vs_proto_data_get(ipvs, iph.protocol); if (unlikely(!pd)) return NF_ACCEPT; pp = pd->pp; @@ -1217,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (af == AF_INET) #endif if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) { - if (ip_vs_gather_frags(skb, + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; - ip_vs_fill_ip4hdr(skb_network_header(skb), &iph); + ip_vs_fill_iph_skb(AF_INET, skb, false, &iph); } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, &iph, 0); + cp = pp->conn_out_get(ipvs, af, skb, &iph); if (likely(cp)) return handle_response(af, skb, pd, cp, &iph, hooknum); - if (sysctl_nat_icmp_send(net) && + if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1241,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports, &iph); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, + if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, pptr[0])) { /* * Notify the real server: there is no @@ -1258,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { if (!skb->dev) - skb->dev = net->loopback_dev; + skb->dev = ipvs->net->loopback_dev; icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, @@ -1272,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) } } } - IP_VS_DBG_PKT(12, af, pp, skb, 0, + IP_VS_DBG_PKT(12, af, pp, skb, iph.off, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } @@ -1283,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); } /* @@ -1294,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1308,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); } /* @@ -1319,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Check if packet is reply for established ip_vs_conn. */ static unsigned int -ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_reply6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_out(ops->hooknum, skb, AF_INET6); + return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6); } #endif +static unsigned int +ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, + int *verdict, struct ip_vs_conn **cpp, + struct ip_vs_iphdr *iph) +{ + struct ip_vs_protocol *pp = pd->pp; + + if (!iph->fragoffs) { + /* No (second) fragments need to enter here, as nf_defrag_ipv6 + * replayed fragment zero will already have created the cp + */ + + /* Schedule and create new connection entry into cpp */ + if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph)) + return 0; + } + + if (unlikely(!*cpp)) { + /* sorry, all this trouble for a no-hit :) */ + IP_VS_DBG_PKT(12, af, pp, skb, iph->off, + "ip_vs_in: packet continues traversal as normal"); + if (iph->fragoffs) { + /* Fragment that couldn't be mapped to a conn entry + * is missing module nf_defrag_ipv6 + */ + IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + IP_VS_DBG_PKT(7, af, pp, skb, iph->off, + "unhandled fragment"); + } + *verdict = NF_ACCEPT; + return 0; + } + + return 1; +} + /* * Handle ICMP messages in the outside-to-inside direction (incoming). * Find any that might be relevant, check against existing connections, @@ -1334,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, + unsigned int hooknum) { - struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -1345,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, offset2, ihl, verdict; - bool ipip; + bool ipip, new_cp = false; *related = 1; /* reassemble IP fragments */ if (ip_is_fragment(ip_hdr(skb))) { - if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum))) + if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum))) return NF_STOLEN; } @@ -1385,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - net = skb_net(skb); - /* Special case for errors for IPIP packets */ ipip = false; if (cih->protocol == IPPROTO_IPIP) { @@ -1402,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ipip = true; } - pd = ip_vs_proto_data_get(net, cih->protocol); + pd = ip_vs_proto_data_get(ipvs, cih->protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; @@ -1416,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) "Checking incoming ICMP for"); offset2 = offset; - ip_vs_fill_ip4hdr(cih, &ciph); - ciph.len += offset; + ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph); offset = ciph.len; + /* The embedded headers contain source and dest in reverse order. * For IPIP this is error for request, not for reply. */ - cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1); - if (!cp) - return NF_ACCEPT; + cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph); + + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(ipvs)) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) + return v; + new_cp = true; + } verdict = NF_DROP; @@ -1455,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) skb_reset_network_header(skb); IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n", &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu); - ipv4_update_pmtu(skb, dev_net(skb->dev), + ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0, 0, 0); /* Client uses PMTUD? */ if (!(frag_off & htons(IP_DF))) @@ -1501,23 +1558,26 @@ ignore_ipip: verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph); out: - __ip_vs_conn_put(cp); + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } #ifdef CONFIG_IP_VS_IPV6 -static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, - unsigned int hooknum, struct ip_vs_iphdr *iph) +static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb, + int *related, unsigned int hooknum, + struct ip_vs_iphdr *iph) { - struct net *net = NULL; - struct ipv6hdr _ip6h, *ip6h; struct icmp6hdr _icmph, *ic; struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; - unsigned int offs_ciph, writable, verdict; + unsigned int offset, verdict; + bool new_cp = false; *related = 1; @@ -1546,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); - /* Now find the contained IP header */ - ciph.len = iph->len + sizeof(_icmph); - offs_ciph = ciph.len; /* Save ip header offset */ - ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h); - if (ip6h == NULL) - return NF_ACCEPT; /* The packet looks wrong, ignore */ - ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */ - ciph.daddr.in6 = ip6h->daddr; - /* skip possible IPv6 exthdrs of contained IPv6 packet */ - ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL); - if (ciph.protocol < 0) - return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */ - - net = skb_net(skb); - pd = ip_vs_proto_data_get(net, ciph.protocol); + offset = iph->len + sizeof(_icmph); + if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph)) + return NF_ACCEPT; + + pd = ip_vs_proto_data_get(ipvs, ciph.protocol); if (!pd) return NF_ACCEPT; pp = pd->pp; @@ -1569,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, if (ciph.fragoffs) return NF_ACCEPT; - IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph, + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, "Checking incoming ICMPv6 for"); /* The embedded headers contain source and dest in reverse order * if not from localhost */ - cp = pp->conn_in_get(AF_INET6, skb, &ciph, - (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1); + cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph); + + if (!cp) { + int v; + + if (!sysctl_schedule_icmp(ipvs)) + return NF_ACCEPT; + + if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph)) + return v; + + new_cp = true; + } - if (!cp) - return NF_ACCEPT; /* VS/TUN, VS/DR and LOCALNODE just let it go */ if ((hooknum == NF_INET_LOCAL_OUT) && (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) { - __ip_vs_conn_put(cp); - return NF_ACCEPT; + verdict = NF_ACCEPT; + goto out; } /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); /* Need to mangle contained IPv6 header in ICMPv6 packet */ - writable = ciph.len; + offset = ciph.len; if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol || IPPROTO_SCTP == ciph.protocol) - writable += 2 * sizeof(__u16); /* Also mangle ports */ + offset += 2 * sizeof(__u16); /* Also mangle ports */ - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph); + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph); - __ip_vs_conn_put(cp); +out: + if (likely(!new_cp)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); return verdict; } @@ -1610,16 +1673,15 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) +ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; - struct netns_ipvs *ipvs; int conn_reuse_mode; + struct sock *sk; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1633,7 +1695,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely((skb->pkt_type != PACKET_HOST && hooknum != NF_INET_LOCAL_OUT) || !skb_dst(skb))) { - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s" " ignored in hook %u\n", skb->pkt_type, iph.protocol, @@ -1641,20 +1703,17 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; } /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - ip_vs_fill_iph_skb(af, skb, &iph); + ip_vs_fill_iph_skb(af, skb, false, &iph); /* Bad... Do not break raw sockets */ - if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && + sk = skb_to_full_sk(skb); + if (unlikely(sk && hooknum == NF_INET_LOCAL_OUT && af == AF_INET)) { - struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(skb->sk); - if (inet && sk->sk_family == PF_INET && inet->nodefrag) + if (sk->sk_family == PF_INET && inet_sk(sk)->nodefrag) return NF_ACCEPT; } @@ -1662,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (af == AF_INET6) { if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; - int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, - &iph); + int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related, + hooknum, &iph); if (related) return verdict; @@ -1672,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #endif if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related; - int verdict = ip_vs_in_icmp(skb, &related, hooknum); + int verdict = ip_vs_in_icmp(ipvs, skb, &related, + hooknum); if (related) return verdict; } /* Protocol supported? */ - pd = ip_vs_proto_data_get(net, iph.protocol); - if (unlikely(!pd)) + pd = ip_vs_proto_data_get(ipvs, iph.protocol); + if (unlikely(!pd)) { + /* The only way we'll see this packet again is if it's + * encapsulated, so mark it with ipvs_property=1 so we + * skip it if we're ignoring tunneled packets + */ + if (sysctl_ignore_tunneled(ipvs)) + skb->ipvs_property = 1; + return NF_ACCEPT; + } pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, &iph, 0); + cp = pp->conn_in_get(ipvs, af, skb, &iph); conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); if (conn_reuse_mode && !iph.fragoffs && @@ -1700,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) cp = NULL; } - if (unlikely(!cp) && !iph.fragoffs) { - /* No (second) fragments need to enter here, as nf_defrag_ipv6 - * replayed fragment zero will already have created the cp - */ + if (unlikely(!cp)) { int v; - /* Schedule and create new connection entry into &cp */ - if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph)) + if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph)) return v; } - if (unlikely(!cp)) { - /* sorry, all this trouble for a no-hit :) */ - IP_VS_DBG_PKT(12, af, pp, skb, 0, - "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); - IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment"); - } - return NF_ACCEPT; - } + IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet"); - IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1765,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) pkts = atomic_add_return(1, &cp->in_pkts); if (ipvs->sync_state & IP_VS_STATE_MASTER) - ip_vs_sync_conn(net, cp, pkts); + ip_vs_sync_conn(ipvs, cp, pkts); ip_vs_conn_put(cp); return ret; @@ -1776,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); } /* @@ -1787,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET); } #ifdef CONFIG_IP_VS_IPV6 @@ -1800,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from remote clients */ static unsigned int -ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_remote_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); } /* @@ -1811,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * Schedule and forward packets from local clients */ static unsigned int -ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_local_request6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - return ip_vs_in(ops->hooknum, skb, AF_INET6); + return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6); } #endif @@ -1830,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; - struct net *net; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = net_ipvs(state->net); if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, ops->hooknum); + return ip_vs_in_icmp(ipvs, skb, &r, state->hook); } #ifdef CONFIG_IP_VS_IPV6 static unsigned int -ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb, +ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { int r; - struct net *net; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = net_ipvs(state->net); struct ip_vs_iphdr iphdr; - ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr); + ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr); if (iphdr.protocol != IPPROTO_ICMPV6) return NF_ACCEPT; /* ipvs enabled in this netns ? */ - net = skb_net(skb); - ipvs = net_ipvs(net); if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) return NF_ACCEPT; - return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr); + return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr); } #endif @@ -1878,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply4, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 2, @@ -1888,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { * applied to IPVS. */ { .hook = ip_vs_remote_request4, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC - 1, @@ -1896,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_local_reply4, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 1, @@ -1904,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After mangle, schedule and forward local requests */ { .hook = ip_vs_local_request4, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP_PRI_NAT_DST + 2, @@ -1913,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 99, @@ -1921,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply4, - .owner = THIS_MODULE, .pf = NFPROTO_IPV4, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -1930,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 2, @@ -1940,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { * applied to IPVS. */ { .hook = ip_vs_remote_request6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC - 1, @@ -1948,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* Before ip_vs_in, change source only for VS/NAT */ { .hook = ip_vs_local_reply6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 1, @@ -1956,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After mangle, schedule and forward local requests */ { .hook = ip_vs_local_request6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST + 2, @@ -1965,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { * destined for 0.0.0.0/0, which is for incoming IPVS connections */ { .hook = ip_vs_forward_icmp_v6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 99, @@ -1973,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, - .owner = THIS_MODULE, .pf = NFPROTO_IPV6, .hooknum = NF_INET_FORWARD, .priority = 100, @@ -1999,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net) atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; - if (ip_vs_estimator_net_init(net) < 0) + if (ip_vs_estimator_net_init(ipvs) < 0) goto estimator_fail; - if (ip_vs_control_net_init(net) < 0) + if (ip_vs_control_net_init(ipvs) < 0) goto control_fail; - if (ip_vs_protocol_net_init(net) < 0) + if (ip_vs_protocol_net_init(ipvs) < 0) goto protocol_fail; - if (ip_vs_app_net_init(net) < 0) + if (ip_vs_app_net_init(ipvs) < 0) goto app_fail; - if (ip_vs_conn_net_init(net) < 0) + if (ip_vs_conn_net_init(ipvs) < 0) goto conn_fail; - if (ip_vs_sync_net_init(net) < 0) + if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", @@ -2025,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net) */ sync_fail: - ip_vs_conn_net_cleanup(net); + ip_vs_conn_net_cleanup(ipvs); conn_fail: - ip_vs_app_net_cleanup(net); + ip_vs_app_net_cleanup(ipvs); app_fail: - ip_vs_protocol_net_cleanup(net); + ip_vs_protocol_net_cleanup(ipvs); protocol_fail: - ip_vs_control_net_cleanup(net); + ip_vs_control_net_cleanup(ipvs); control_fail: - ip_vs_estimator_net_cleanup(net); + ip_vs_estimator_net_cleanup(ipvs); estimator_fail: net->ipvs = NULL; return -ENOMEM; @@ -2041,22 +2074,25 @@ estimator_fail: static void __net_exit __ip_vs_cleanup(struct net *net) { - ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */ - ip_vs_conn_net_cleanup(net); - ip_vs_app_net_cleanup(net); - ip_vs_protocol_net_cleanup(net); - ip_vs_control_net_cleanup(net); - ip_vs_estimator_net_cleanup(net); - IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ + ip_vs_conn_net_cleanup(ipvs); + ip_vs_app_net_cleanup(ipvs); + ip_vs_protocol_net_cleanup(ipvs); + ip_vs_control_net_cleanup(ipvs); + ip_vs_estimator_net_cleanup(ipvs); + IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen); net->ipvs = NULL; } static void __net_exit __ip_vs_dev_cleanup(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); - net_ipvs(net)->enable = 0; /* Disable packet reception */ + ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); - ip_vs_sync_net_cleanup(net); + ip_vs_sync_net_cleanup(ipvs); LeaveFunction(2); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1a23e91d5..e7c1b052c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work) update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) - ip_vs_random_dropentry(ipvs->net); + ip_vs_random_dropentry(ipvs); schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } #endif @@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; * Returns hash value for virtual service */ static inline unsigned int -ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, +ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { register unsigned int porth = ntohs(port); @@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, addr->ip6[2]^addr->ip6[3]; #endif ahash = ntohl(addr_fold); - ahash ^= ((size_t) net >> 8); + ahash ^= ((size_t) ipvs >> 8); return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & IP_VS_SVC_TAB_MASK; @@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, /* * Returns hash value of fwmark for virtual service lookup */ -static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) +static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) { - return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; + return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* @@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, &svc->addr, svc->port); hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(struct net *net, int af, __u16 protocol, +__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { unsigned int hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->protocol == protocol) - && net_eq(svc->net, net)) { + && (svc->ipvs == ipvs)) { /* HIT */ return svc; } @@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) { unsigned int hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(net, fwmark); + hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { if (svc->fwmark == fwmark && svc->af == af - && net_eq(svc->net, net)) { + && (svc->ipvs == ipvs)) { /* HIT */ return svc; } @@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) /* Find service, called under RCU lock */ struct ip_vs_service * -ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, +ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; - struct netns_ipvs *ipvs = net_ipvs(net); /* * Check the table hashed by fwmark first */ if (fwmark) { - svc = __ip_vs_svc_fwm_find(net, af, fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); if (svc) goto out; } @@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); + svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); } out: @@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest) } /* Check if real service by is present */ -bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, +bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { - struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash; struct ip_vs_dest *dest; @@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, * on the backup. * Called under RCU lock, no refcnt is returned. */ -struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, +struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, @@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af, struct ip_vs_service *svc; __be16 port = dport; - svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest; - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; /* * Find the destination in trash @@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) * are expired, and the refcnt of each destination in the trash must * be 0, so we simply release them here. */ -static void ip_vs_trash_cleanup(struct net *net) +static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) { struct ip_vs_dest *dest, *nxt; - struct netns_ipvs *ipvs = net_ipvs(net); del_timer_sync(&ipvs->dest_trash_timer); /* No need to use dest_trash_lock */ @@ -788,7 +785,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_service *old_svc; struct ip_vs_scheduler *sched; int conn_flags; @@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock_bh(&dest->dst_lock); if (add) { - ip_vs_start_estimator(svc->net, &dest->stats); + ip_vs_start_estimator(svc->ipvs, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; sched = rcu_dereference_protected(svc->scheduler, 1); @@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && - !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) + !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; } else #endif { - atype = inet_addr_type(svc->net, udest->addr.ip); + atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); if (atype != RTN_LOCAL && atype != RTN_UNICAST) return -EINVAL; } @@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, +static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, bool cleanup) { - struct netns_ipvs *ipvs = net_ipvs(net); - - ip_vs_stop_estimator(net, &dest->stats); + ip_vs_stop_estimator(ipvs, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, svc->num_dests--; if (dest->af != svc->af) - net_ipvs(svc->net)->mixed_address_family_dests--; + svc->ipvs->mixed_address_family_dests--; if (svcupd) { struct ip_vs_scheduler *sched; @@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(svc->net, dest, false); + __ip_vs_del_dest(svc->ipvs, dest, false); LeaveFunction(2); @@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) static void ip_vs_dest_trash_expire(unsigned long data) { - struct net *net = (struct net *) data; - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = (struct netns_ipvs *)data; struct ip_vs_dest *dest, *next; unsigned long now = jiffies; @@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data) * Add a service into the service hash table */ static int -ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0, i; struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; - struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; - svc->net = net; + svc->ipvs = ipvs; INIT_LIST_HEAD(&svc->destinations); spin_lock_init(&svc->sched_lock); @@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ipvs->nullsvc_counter); - ip_vs_start_estimator(net, &svc->stats); + ip_vs_start_estimator(ipvs, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; - struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct netns_ipvs *ipvs = svc->ipvs; pr_info("%s: enter\n", __func__); @@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) if (svc->af == AF_INET) ipvs->num_services--; - ip_vs_stop_estimator(svc->net, &svc->stats); + ip_vs_stop_estimator(svc->ipvs, &svc->stats); /* Unbind scheduler */ old_sched = rcu_dereference_protected(svc->scheduler, 1); @@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(svc->net, dest, cleanup); + __ip_vs_del_dest(svc->ipvs, dest, cleanup); } /* @@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(struct net *net, bool cleanup) +static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) { int idx; struct ip_vs_service *svc; @@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_unlink_service(svc, cleanup); } } @@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_unlink_service(svc, cleanup); } } @@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup) * Delete service by {netns} in the service table. * Called by __ip_vs_cleanup() */ -void ip_vs_service_net_cleanup(struct net *net) +void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs) { EnterFunction(2); /* Check for "full" addressed entries */ mutex_lock(&__ip_vs_mutex); - ip_vs_flush(net, true); + ip_vs_flush(ipvs, true); mutex_unlock(&__ip_vs_mutex); LeaveFunction(2); } @@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, mutex_lock(&__ip_vs_mutex); for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) { + if (svc->ipvs == ipvs) { list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_forget_dev(dest, dev); @@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, } hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) { + if (svc->ipvs == ipvs) { list_for_each_entry(dest, &svc->destinations, n_list) { ip_vs_forget_dev(dest, dev); @@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(struct net *net) +static int ip_vs_zero_all(struct netns_ipvs *ipvs) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net)) + if (svc->ipvs == ipvs) ip_vs_zero_service(svc); } } - ip_vs_zero_stats(&net_ipvs(net)->tot_stats); + ip_vs_zero_stats(&ipvs->tot_stats); return 0; } @@ -1615,7 +1608,7 @@ static int proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; + struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; int val = *valp; int rc; @@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(net_ipvs(net)); + update_defense_level(ipvs); } } return rc; @@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "schedule_icmp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ignore_tunneled", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags) static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { - if (net_eq(svc->net, net) && pos-- == 0) { + if ((svc->ipvs == ipvs) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (net_eq(svc->net, net) && pos-- == 0) { + if ((svc->ipvs == ipvs) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = { /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) { #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; @@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } @@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } @@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) cfg.syncid = dm->syncid; rtnl_lock(); mutex_lock(&ipvs->sync_mutex); - ret = start_sync_thread(net, &cfg, dm->state); + ret = start_sync_thread(ipvs, &cfg, dm->state); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); } else { mutex_lock(&ipvs->sync_mutex); - ret = stop_sync_thread(net, dm->state); + ret = stop_sync_thread(ipvs, dm->state); mutex_unlock(&ipvs->sync_mutex); } goto out_dec; @@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) mutex_lock(&__ip_vs_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(net, false); + ret = ip_vs_flush(ipvs, false); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); goto out_unlock; } @@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(net); + ret = ip_vs_zero_all(ipvs); goto out_unlock; } } @@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by or fwmark */ rcu_read_lock(); if (usvc.fwmark == 0) - svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, + svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); rcu_read_unlock(); if (cmd != IP_VS_SO_SET_ADD @@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(net, &usvc, &svc); + ret = ip_vs_add_service(ipvs, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(struct net *net, +__ip_vs_get_service_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { @@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || !net_eq(svc->net, net)) + if (svc->af != AF_INET || (svc->ipvs != ipvs)) continue; if (count >= get->num_services) @@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || !net_eq(svc->net, net)) + if (svc->af != AF_INET || (svc->ipvs != ipvs)) continue; if (count >= get->num_services) @@ -2522,7 +2528,7 @@ out: } static inline int -__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, rcu_read_lock(); if (get->fwmark) - svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, get->port); rcu_read_unlock(); @@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) { #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) struct ip_vs_proto_data *pd; @@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) memset(u, 0, sizeof (*u)); #ifdef CONFIG_IP_VS_PROTO_TCP - pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP - pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); u->udp_timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif @@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(net, get, user); + ret = __ip_vs_get_service_entries(ipvs, get, user); } break; @@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) addr.ip = entry->addr; rcu_read_lock(); if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(net, AF_INET, + svc = __ip_vs_service_find(ipvs, AF_INET, entry->protocol, &addr, entry->port); rcu_read_unlock(); @@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(net, get, user); + ret = __ip_vs_get_dest_entries(ipvs, get, user); } break; @@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start || !net_eq(svc->net, net)) + if (++idx <= start || (svc->ipvs != ipvs)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start || !net_eq(svc->net, net)) + if (++idx <= start || (svc->ipvs != ipvs)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3028,7 +3035,7 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct net *net, +static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) @@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net, rcu_read_lock(); if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, + svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, &usvc->addr, usvc->port); rcu_read_unlock(); *ret_svc = svc; @@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, +static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&__ip_vs_mutex); @@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, goto out_err; - svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3341,7 +3349,7 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = skb_sknet(skb); + struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&ipvs->sync_mutex); @@ -3367,9 +3375,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ipvs_sync_daemon_cfg c; struct nlattr *a; int ret; @@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) rtnl_lock(); mutex_lock(&ipvs->sync_mutex); - ret = start_sync_thread(net, &c, + ret = start_sync_thread(ipvs, &c, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); return ret; } -static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) { - struct netns_ipvs *ipvs = net_ipvs(net); int ret; if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; mutex_lock(&ipvs->sync_mutex); - ret = stop_sync_thread(net, + ret = stop_sync_thread(ipvs, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); mutex_unlock(&ipvs->sync_mutex); return ret; } -static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) +static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(net, &t); + return ip_vs_set_timeout(ipvs, &t); } static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) { int ret = -EINVAL, cmd; - struct net *net; - struct netns_ipvs *ipvs; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); - ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) goto out; if (cmd == IPVS_CMD_NEW_DAEMON) - ret = ip_vs_genl_new_daemon(net, daemon_attrs); + ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); else - ret = ip_vs_genl_del_daemon(net, daemon_attrs); + ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); } out: @@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; - struct net *net; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(net, false); + ret = ip_vs_flush(ipvs, false); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(net, info->attrs); + ret = ip_vs_genl_set_config(ipvs, info->attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(net); + ret = ip_vs_zero_all(ipvs); goto out; } @@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(net, &usvc, + ret = ip_vs_genl_parse_service(ipvs, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) /* The synchronization protocol is incompatible * with mixed family services */ - if (net_ipvs(net)->sync_state) { + if (ipvs->sync_state) { ret = -EINVAL; goto out; } @@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(net, &usvc, &svc); + ret = ip_vs_add_service(ipvs, &usvc, &svc); else ret = -EEXIST; break; @@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; - struct net *net; + struct net *net = sock_net(skb->sk); + struct netns_ipvs *ipvs = net_ipvs(net); - net = skb_sknet(skb); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(net, + svc = ip_vs_genl_find_service(ipvs, info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); @@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(net, &t); + __ip_vs_get_timeouts(ipvs, &t); #ifdef CONFIG_IP_VS_PROTO_TCP if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout) || @@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void) * per netns intit/exit func. */ #ifdef CONFIG_SYSCTL -static int __net_init ip_vs_control_net_init_sysctl(struct net *net) +static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { + struct net *net = ipvs->net; int idx; - struct netns_ipvs *ipvs = net_ipvs(net); struct ctl_table *tbl; atomic_set(&ipvs->dropentry, 0); @@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) } else tbl = vs_vars; /* Initialize sysctl defaults */ + for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { + if (tbl[idx].proc_handler == proc_do_defense_mode) + tbl[idx].extra2 = ipvs; + } idx = 0; ipvs->sysctl_amemthresh = 1024; tbl[idx++].data = &ipvs->sysctl_amemthresh; @@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_backup_only; ipvs->sysctl_conn_reuse_mode = 1; tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; - + tbl[idx++].data = &ipvs->sysctl_schedule_icmp; + tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { @@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) kfree(tbl); return -ENOMEM; } - ip_vs_start_estimator(net, &ipvs->tot_stats); + ip_vs_start_estimator(ipvs, &ipvs->tot_stats); ipvs->sysctl_tbl = tbl; /* Schedule defense work */ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); @@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) return 0; } -static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; cancel_delayed_work_sync(&ipvs->defense_work); cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); - ip_vs_stop_estimator(net, &ipvs->tot_stats); + ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); if (!net_eq(net, &init_net)) kfree(ipvs->sysctl_tbl); @@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) #else -static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; } -static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { } +static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } +static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } #endif @@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = { .notifier_call = ip_vs_dst_event, }; -int __net_init ip_vs_control_net_init(struct net *net) +int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) { + struct net *net = ipvs->net; int i, idx; - struct netns_ipvs *ipvs = net_ipvs(net); /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) @@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net) INIT_LIST_HEAD(&ipvs->dest_trash); spin_lock_init(&ipvs->dest_trash_lock); setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, - (unsigned long) net); + (unsigned long) ipvs); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); @@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net) proc_create("ip_vs_stats_percpu", 0, net->proc_net, &ip_vs_stats_percpu_fops); - if (ip_vs_control_net_init_sysctl(net)) + if (ip_vs_control_net_init_sysctl(ipvs)) goto err; return 0; @@ -3980,12 +3989,12 @@ err: return -ENOMEM; } -void __net_exit ip_vs_control_net_cleanup(struct net *net) +void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); + struct net *net = ipvs->net; - ip_vs_trash_cleanup(net); - ip_vs_control_net_cleanup_sysctl(net); + ip_vs_trash_cleanup(ipvs); + ip_vs_control_net_cleanup_sysctl(ipvs); remove_proc_entry("ip_vs_stats_percpu", net->proc_net); remove_proc_entry("ip_vs_stats", net->proc_net); remove_proc_entry("ip_vs", net->proc_net); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ef0eb0a8d..457c6c193 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg) struct ip_vs_estimator *e; struct ip_vs_stats *s; u64 rate; - struct net *net = (struct net *)arg; - struct netns_ipvs *ipvs; + struct netns_ipvs *ipvs = (struct netns_ipvs *)arg; - ipvs = net_ipvs(net); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); @@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg) mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); @@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats) spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats) +void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; spin_lock_bh(&ipvs->est_lock); @@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats) dst->outbps = (e->outbps + 0xF) >> 5; } -int __net_init ip_vs_estimator_net_init(struct net *net) +int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); - INIT_LIST_HEAD(&ipvs->est_list); spin_lock_init(&ipvs->est_lock); - setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs); mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } -void __net_exit ip_vs_estimator_net_cleanup(struct net *net) +void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs) { - del_timer_sync(&net_ipvs(net)->est_timer); + del_timer_sync(&ipvs->est_timer); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 5d3daae98..d30c327bb 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; - struct net *net; *diff = 0; @@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + ip_vs_conn_fill_param(cp->ipvs, AF_INET, iph->protocol, &from, port, &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), + ip_vs_conn_fill_param(cp->ipvs, AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); /* As above, this is ipv4 only */ @@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ - net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(net, n_cp); + ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct net *net; /* no diff required for incoming packets */ *diff = 0; @@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + ip_vs_conn_fill_param(cp->ipvs, AF_INET, iph->protocol, &to, port, &cp->vaddr, htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); @@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - net = skb_net(skb); - ip_vs_tcp_conn_listen(net, n_cp); + ip_vs_tcp_conn_listen(n_cp); ip_vs_conn_put(n_cp); return 1; @@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!ipvs) return -ENOENT; - app = register_ip_vs_app(net, &ip_vs_ftp); + app = register_ip_vs_app(ipvs, &ip_vs_ftp); if (IS_ERR(app)) return PTR_ERR(app); for (i = 0; i < ports_count; i++) { if (!ports[i]) continue; - ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); + ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]); if (ret) goto err_unreg; pr_info("%s: loaded support on port[%d] = %d\n", @@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) return 0; err_unreg: - unregister_ip_vs_app(net, &ip_vs_ftp); + unregister_ip_vs_app(ipvs, &ip_vs_ftp); return ret; } /* @@ -471,7 +467,12 @@ err_unreg: */ static void __ip_vs_ftp_exit(struct net *net) { - unregister_ip_vs_app(net, &ip_vs_ftp); + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!ipvs) + return; + + unregister_ip_vs_app(ipvs, &ip_vs_ftp); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 127f14046..cccf4d637 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc) static int sysctl_lblc_expiration(struct ip_vs_service *svc) { #ifdef CONFIG_SYSCTL - struct netns_ipvs *ipvs = net_ipvs(svc->net); - return ipvs->sysctl_lblc_expiration; + return svc->ipvs->sysctl_lblc_expiration; #else return DEFAULT_EXPIRATION; #endif diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 2229d2d8b..796d70e47 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc) static int sysctl_lblcr_expiration(struct ip_vs_service *svc) { #ifdef CONFIG_SYSCTL - struct netns_ipvs *ipvs = net_ipvs(svc->net); - return ipvs->sysctl_lblcr_expiration; + return svc->ipvs->sysctl_lblcr_expiration; #else return DEFAULT_EXPIRATION; #endif diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 136184572..30434fb13 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt, - &tuple); + h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index bed5f7042..1b8d594e4 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) const char *dptr; int retc; - ip_vs_fill_iph_skb(p->af, skb, &iph); + ip_vs_fill_iph_skb(p->af, skb, false, &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 939f7fbe9..8ae480715 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) * register an ipvs protocols netns related data */ static int -register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) +register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) { - struct netns_ipvs *ipvs = net_ipvs(net); unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); struct ip_vs_proto_data *pd = kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); @@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) atomic_set(&pd->appcnt, 0); /* Init app counter */ if (pp->init_netns != NULL) { - int ret = pp->init_netns(net, pd); + int ret = pp->init_netns(ipvs, pd); if (ret) { /* unlink an free proto data */ ipvs->proto_data_table[hash] = pd->next; @@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) * unregister an ipvs protocols netns data */ static int -unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) +unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data **pd_p; unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); @@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) if (*pd_p == pd) { *pd_p = pd->next; if (pd->pp->exit_netns != NULL) - pd->pp->exit_netns(net, pd); + pd->pp->exit_netns(ipvs, pd); kfree(pd); return 0; } @@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get); /* * get ip_vs_protocol object data by netns and proto */ -static struct ip_vs_proto_data * -__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { struct ip_vs_proto_data *pd; unsigned int hash = IP_VS_PROTO_HASH(proto); @@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) return NULL; } - -struct ip_vs_proto_data * -ip_vs_proto_data_get(struct net *net, unsigned short proto) -{ - struct netns_ipvs *ipvs = net_ipvs(net); - - return __ipvs_proto_data_get(ipvs, proto); -} EXPORT_SYMBOL(ip_vs_proto_data_get); /* @@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -int __net_init ip_vs_protocol_net_init(struct net *net) +int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) { int i, ret; static struct ip_vs_protocol *protos[] = { @@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net) }; for (i = 0; i < ARRAY_SIZE(protos); i++) { - ret = register_ip_vs_proto_netns(net, protos[i]); + ret = register_ip_vs_proto_netns(ipvs, protos[i]); if (ret < 0) goto cleanup; } return 0; cleanup: - ip_vs_protocol_net_cleanup(net); + ip_vs_protocol_net_cleanup(ipvs); return ret; } -void __net_exit ip_vs_protocol_net_cleanup(struct net *net) +void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; int i; /* unregister all the ipvs proto data for this netns */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pd = ipvs->proto_data_table[i]) != NULL) - unregister_ip_vs_proto_netns(net, pd); + unregister_ip_vs_proto_netns(ipvs, pd); } } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 5de3dd312..5320d3997 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,30 +41,28 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(struct net *net, int af, - const struct ip_vs_iphdr *iph, int inverse, +ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af, + const struct ip_vs_iphdr *iph, struct ip_vs_conn_param *p) { - if (likely(!inverse)) - ip_vs_conn_fill_param(net, af, IPPROTO_UDP, + if (likely(!ip_vs_iph_inverse(iph))) + ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(net, af, IPPROTO_UDP, + ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, - int inverse) +ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; - struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(ipvs, af, iph, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, */ IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); @@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, static struct ip_vs_conn * -ah_esp_conn_out_get(int af, const struct sk_buff *skb, - const struct ip_vs_iphdr *iph, int inverse) +ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb, + const struct ip_vs_iphdr *iph) { struct ip_vs_conn *cp; struct ip_vs_conn_param p; - struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(ipvs, af, iph, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", - inverse ? "ICMP+" : "", + ip_vs_iph_icmp(iph) ? "ICMP+" : "", ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); @@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 5b84c0b56..010ddeec1 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,35 +9,44 @@ #include static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; - struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; - - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) { - *verdict = NF_DROP; - return 0; + __be16 _ports[2], *ports = NULL; + + if (likely(!ip_vs_iph_icmp(iph))) { + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + if (sh) { + sch = skb_header_pointer( + skb, iph->len + sizeof(sctp_sctphdr_t), + sizeof(_schunkh), &_schunkh); + if (sch && (sch->type == SCTP_CID_INIT || + sysctl_sloppy_sctp(ipvs))) + ports = &sh->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); } - sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), - sizeof(_schunkh), &_schunkh); - if (sch == NULL) { + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); - ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, sh->dest))) { + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { @@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct net *net, struct ip_vs_app *inc) +static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); hash = sctp_app_hashkey(port); @@ -498,9 +506,9 @@ out: return ret; } -static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) +static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static int sctp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -549,10 +557,8 @@ out: * timeouts is netns related now. * --------------------------------------------- */ -static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, sizeof(sctp_timeouts)); @@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 8e92beb0c..d7024b2ed 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -32,27 +32,47 @@ #include static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; - struct netns_ipvs *ipvs; + __be16 _ports[2], *ports = NULL; - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (th == NULL) { + /* In the event of icmp, we're only guaranteed to have the first 8 + * bytes of the transport header, so we only check the rest of the + * TCP packet for non-ICMP packets + */ + if (likely(!ip_vs_iph_icmp(iph))) { + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + if (th) { + if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn)) + return 1; + ports = &th->source; + } + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); + } + + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); - ipvs = net_ipvs(net); + /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && - (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, th->dest))) { + + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + + if (svc) { int ignored; if (ip_vs_todrop(ipvs)) { @@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct net *net, struct ip_vs_app *inc) +static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); hash = tcp_app_hashkey(port); @@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc) static void -tcp_unregister_app(struct net *net, struct ip_vs_app *inc) +tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc) static int tcp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP); spin_lock_bh(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; @@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) * timeouts is netns related now. * --------------------------------------------- */ -static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); @@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index b62a3c0ff..e494e9a88 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -29,28 +29,42 @@ #include static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, +udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph) { - struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; + __be16 _ports[2], *ports = NULL; - /* IPv6 fragments, only first fragment will hit this */ - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (uh == NULL) { + if (likely(!ip_vs_iph_icmp(iph))) { + /* IPv6 fragments, only first fragment will hit this */ + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + if (uh) + ports = &uh->source; + } else { + ports = skb_header_pointer( + skb, iph->len, sizeof(_ports), &_ports); + } + + if (!ports) { *verdict = NF_DROP; return 0; } - net = skb_net(skb); + rcu_read_lock(); - svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, - &iph->daddr, uh->dest); + if (likely(!ip_vs_iph_inverse(iph))) + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->daddr, ports[1]); + else + svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, + &iph->saddr, ports[0]); + if (svc) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct net *net, struct ip_vs_app *inc) +static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); hash = udp_app_hashkey(port); @@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc) static void -udp_unregister_app(struct net *net, struct ip_vs_app *inc) +udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); atomic_dec(&pd->appcnt); list_del_rcu(&inc->p_list); @@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc) static int udp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); + struct netns_ipvs *ipvs = cp->ipvs; int hash; struct ip_vs_app *inc; int result = 0; @@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; } -static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) +static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { - struct netns_ipvs *ipvs = net_ipvs(net); - ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, sizeof(udp_timeouts)); @@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd) return 0; } -static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) +static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) { kfree(pd->timeout_table); } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 98a13433b..1e373a5e4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, static inline __be16 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - __be16 port; - struct tcphdr _tcph, *th; - struct udphdr _udph, *uh; - sctp_sctphdr_t _sctph, *sh; + __be16 _ports[2], *ports; + /* At this point we know that we have a valid packet of some kind. + * Because ICMP packets are only guaranteed to have the first 8 + * bytes, let's just grab the ports. Fortunately they're in the + * same position for all three of the protocols we care about. + */ switch (iph->protocol) { case IPPROTO_TCP: - th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); - if (unlikely(th == NULL)) - return 0; - port = th->source; - break; case IPPROTO_UDP: - uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); - if (unlikely(uh == NULL)) - return 0; - port = uh->source; - break; case IPPROTO_SCTP: - sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (unlikely(sh == NULL)) + ports = skb_header_pointer(skb, iph->len, sizeof(_ports), + &_ports); + if (unlikely(!ports)) return 0; - port = sh->source; - break; + + if (likely(!ip_vs_iph_inverse(iph))) + return ports[0]; + else + return ports[1]; default: - port = 0; + return 0; } - - return port; } @@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; + const union nf_inet_addr *hash_addr; + + hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); @@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) - dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); else - dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + dest = ip_vs_sh_get(svc, s, hash_addr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); @@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph->saddr), + IP_VS_DBG_ADDR(svc->af, hash_addr), IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 43f140950..803001a45 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -193,7 +193,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { - struct net *net; + struct netns_ipvs *ipvs; struct socket *sock; char *buf; int id; @@ -533,10 +533,9 @@ set: * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, +static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; struct ip_vs_sync_buff *buff; @@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp, pkts); + ip_vs_sync_conn(ipvs, cp, pkts); } } @@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) +void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; struct ip_vs_sync_buff *buff; @@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts) /* Handle old version of the protocol */ if (sysctl_sync_ver(ipvs) == 0) { - ip_vs_sync_conn_v0(net, cp, pkts); + ip_vs_sync_conn_v0(ipvs, cp, pkts); return; } /* Do not sync ONE PACKET */ @@ -784,21 +782,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(net, af, sc->v6.protocol, + ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(net, af, sc->v4.protocol, + ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, +static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param, unsigned int flags, unsigned int state, unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, @@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) { cp = ip_vs_conn_in_get(param); @@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * with synchronization, so we can make the assumption that * the svc_af is the same as the dest_af */ - dest = ip_vs_find_dest(net, type, type, daddr, dport, + dest = ip_vs_find_dest(ipvs, type, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark, flags); @@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, } else { struct ip_vs_proto_data *pd; - pd = ip_vs_proto_data_get(net, protocol); + pd = ip_vs_proto_data_get(ipvs, protocol); if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) cp->timeout = pd->timeout_table[state]; else @@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(struct net *net, const char *buffer, +static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer, const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; @@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, } } - ip_vs_conn_fill_param(net, AF_INET, s->protocol, + ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, 0, 0, opt); } @@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) state = 0; } } - if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + if (ip_vs_conn_fill_param_sync(ipvs, af, s, ¶m, pe_data, pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(ipvs, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) @@ -1204,10 +1201,9 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(struct net *net, __u8 *buffer, +static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer, const size_t buflen) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; @@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* Process a single sync_conn */ - retc = ip_vs_proc_sync_conn(net, p, msg_end); + retc = ip_vs_proc_sync_conn(ipvs, p, msg_end); if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); @@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, } } else { /* Old type of message */ - ip_vs_process_message_v0(net, buffer, buflen); + ip_vs_process_message_v0(ipvs, buffer, buflen); return; } } @@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen, /* * Set up sending multicast socket over UDP */ -static struct socket *make_send_sock(struct net *net, int id) +static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id) { - struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ - result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM, + result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); @@ -1550,16 +1545,15 @@ error: /* * Set up receiving multicast socket over UDP */ -static struct socket *make_receive_sock(struct net *net, int id) +static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id) { - struct netns_ipvs *ipvs = net_ipvs(net); /* multicast addr */ union ipvs_sockaddr mcast_addr; struct socket *sock; int result, salen; /* First create a socket */ - result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM, + result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM, IPPROTO_UDP, &sock); if (result < 0) { pr_err("Error during creation of socket; terminating\n"); @@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms) static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; - struct netns_ipvs *ipvs = net_ipvs(tinfo->net); + struct netns_ipvs *ipvs = tinfo->ipvs; struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id]; struct sock *sk = tinfo->sock->sk; struct ip_vs_sync_buff *sb; @@ -1743,7 +1737,7 @@ done: static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; - struct netns_ipvs *ipvs = net_ipvs(tinfo->net); + struct netns_ipvs *ipvs = tinfo->ipvs; int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " @@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data) break; } - ip_vs_process_message(tinfo->net, tinfo->buf, len); + ip_vs_process_message(ipvs, tinfo->buf, len); } } @@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, +int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **array = NULL, *task; struct socket *sock; - struct netns_ipvs *ipvs = net_ipvs(net); struct net_device *dev; char *name; int (*threadfn)(void *data); @@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, if (!c->mcast_ttl) c->mcast_ttl = 1; - dev = __dev_get_by_name(net, c->mcast_ifn); + dev = __dev_get_by_name(ipvs->net, c->mcast_ifn); if (!dev) { pr_err("Unknown mcast interface: %s\n", c->mcast_ifn); return -ENODEV; @@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, tinfo = NULL; for (id = 0; id < count; id++) { if (state == IP_VS_STATE_MASTER) - sock = make_send_sock(net, id); + sock = make_send_sock(ipvs, id); else - sock = make_receive_sock(net, id); + sock = make_receive_sock(ipvs, id); if (IS_ERR(sock)) { result = PTR_ERR(sock); goto outtinfo; @@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c, tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) goto outsocket; - tinfo->net = net; + tinfo->ipvs = ipvs; tinfo->sock = sock; if (state == IP_VS_STATE_BACKUP) { tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, @@ -1947,9 +1940,8 @@ out: } -int stop_sync_thread(struct net *net, int state) +int stop_sync_thread(struct netns_ipvs *ipvs, int state) { - struct netns_ipvs *ipvs = net_ipvs(net); struct task_struct **array; int id; int retc = -EINVAL; @@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -int __net_init ip_vs_sync_net_init(struct net *net) +int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs) { - struct netns_ipvs *ipvs = net_ipvs(net); - __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key); spin_lock_init(&ipvs->sync_lock); spin_lock_init(&ipvs->sync_buff_lock); return 0; } -void ip_vs_sync_net_cleanup(struct net *net) +void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) { int retc; - struct netns_ipvs *ipvs = net_ipvs(net); mutex_lock(&ipvs->sync_mutex); - retc = stop_sync_thread(net, IP_VS_STATE_MASTER); + retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); - retc = stop_sync_thread(net, IP_VS_STATE_BACKUP); + retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); mutex_unlock(&ipvs->sync_mutex); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 258a0b0e8..3264cb49b 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); } -static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, +static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, + int rt_mode, struct ip_vs_iphdr *ipvsh, struct sk_buff *skb, int mtu) { #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { if (!skb->dev) skb->dev = net->loopback_dev; /* only send ICMP too big on first fragment */ - if (!ipvsh->fragoffs) + if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh)) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); @@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, } else #endif { - struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); - /* If we're going to tunnel the packet and pmtu discovery * is disabled, we'll just fragment it anyway */ @@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && - skb->len > mtu && !skb_is_gso(skb))) { + skb->len > mtu && !skb_is_gso(skb) && + !ip_vs_iph_icmp(ipvsh))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", @@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, /* Get route to destination or remote server */ static int -__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, +__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + struct ip_vs_dest *dest, __be32 daddr, int rt_mode, __be32 *ret_saddr, struct ip_vs_iphdr *ipvsh) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rtable *rt; /* Route to the other host */ int mtu; @@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, maybe_update_pmtu(skb_af, skb, mtu); } - if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) + if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); @@ -402,11 +403,12 @@ out_err: * Get route to destination or remote server */ static int -__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, +__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, + struct ip_vs_dest *dest, struct in6_addr *daddr, struct in6_addr *ret_saddr, struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode) { - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = ipvs->net; struct ip_vs_dest_dst *dest_dst; struct rt6_info *rt; /* Route to the other host */ struct dst_entry *dst; @@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, maybe_update_pmtu(skb_af, skb, mtu); } - if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu)) + if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu)) goto err_put; skb_dst_drop(skb); @@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, skb_forward_csum(skb); if (!skb->sk) skb_sender_cpu_clear(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, - NULL, skb_dst(skb)->dev, dst_output_sk); + NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output); } else ret = NF_ACCEPT; @@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, skb_forward_csum(skb); if (!skb->sk) skb_sender_cpu_clear(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, - NULL, skb_dst(skb)->dev, dst_output_sk); + NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output); } else ret = NF_ACCEPT; return ret; @@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr, + if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; @@ -656,10 +658,13 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { + struct ipv6hdr *iph = ipv6_hdr(skb); + EnterFunction(10); rcu_read_lock(); - if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL, + if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, + &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) goto tx_error; @@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } was_input = rt_is_input_route(skb_rtable(skb)); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR, NULL, ipvsh); @@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error; @@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) { - IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " - "stopping DNAT to loopback address"); + IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off, + "ip_vs_nat_xmit(): stopping DNAT to loopback " + "address"); goto tx_error; } @@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); - IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error; @@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) { - IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, + IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error; @@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; - IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -967,8 +974,8 @@ int ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { - struct net *net = skb_net(skb); - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = cp->ipvs; + struct net *net = ipvs->net; struct rtable *rt; /* Route to the other host */ __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ @@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_CONNECT | @@ -1042,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) - ip_local_out(skb); + ip_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); rcu_read_unlock(); @@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, &saddr, ipvsh, 1, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1133,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ret = ip_vs_tunnel_xmit_prepare(skb, cp); if (ret == NF_ACCEPT) - ip6_local_out(skb); + ip6_local_out(cp->ipvs->net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); rcu_read_unlock(); @@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); @@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, + local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, NULL, iph); if (local < 0) goto tx_error; @@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); - local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6, - NULL, ipvsh, 0, rt_mode); + local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, + &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); -- cgit v1.2.3-54-g00ecf