summaryrefslogtreecommitdiff
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-08-05 17:04:01 -0300
commit57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree5e910f0e82173f4ef4f51111366a3f1299037a7b /net/ipv4/netfilter
Initial import
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig406
-rw-r--r--net/ipv4/netfilter/Makefile72
-rw-r--r--net/ipv4/netfilter/arp_tables.c1926
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c91
-rw-r--r--net/ipv4/netfilter/arptable_filter.c91
-rw-r--r--net/ipv4/netfilter/ip_tables.c2282
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c794
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c138
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c91
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c113
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c480
-rw-r--r--net/ipv4/netfilter/ipt_ah.c91
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c144
-rw-r--r--net/ipv4/netfilter/iptable_filter.c109
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c147
-rw-r--r--net/ipv4/netfilter/iptable_nat.c154
-rw-r--r--net/ipv4/netfilter/iptable_raw.c89
-rw-r--r--net/ipv4/netfilter/iptable_security.c109
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c542
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c469
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c428
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c129
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c161
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c397
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c631
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c481
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c153
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c311
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c149
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c83
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1313
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c192
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c102
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c125
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c107
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c88
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c76
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c76
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c76
39 files changed, 13416 insertions, 0 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
new file mode 100644
index 000000000..fb20f3631
--- /dev/null
+++ b/net/ipv4/netfilter/Kconfig
@@ -0,0 +1,406 @@
+#
+# IP netfilter configuration
+#
+
+menu "IP: Netfilter Configuration"
+ depends on INET && NETFILTER
+
+config NF_DEFRAG_IPV4
+ tristate
+ default n
+
+config NF_CONNTRACK_IPV4
+ tristate "IPv4 connection tracking support (required for NAT)"
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV4
+ ---help---
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections.
+
+ This is IPv4 support on Layer 3 independent connection tracking.
+ Layer 3 independent connection tracking is experimental scheme
+ which generalize ip_conntrack to support other layer 3 protocols.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_CONNTRACK_PROC_COMPAT
+ bool "proc/sysctl compatibility with old connection tracking"
+ depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
+ default y
+ help
+ This option enables /proc and sysctl compatibility with the old
+ layer 3 dependent connection tracking. This is needed to keep
+ old programs that have not been adapted to the new names working.
+
+ If unsure, say Y.
+
+if NF_TABLES
+
+config NF_TABLES_IPV4
+ tristate "IPv4 nf_tables support"
+ help
+ This option enables the IPv4 support for nf_tables.
+
+if NF_TABLES_IPV4
+
+config NFT_CHAIN_ROUTE_IPV4
+ tristate "IPv4 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv4 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, type of service and
+ the packet mark.
+
+config NFT_REJECT_IPV4
+ select NF_REJECT_IPV4
+ default NFT_REJECT
+ tristate
+
+endif # NF_TABLES_IPV4
+
+config NF_TABLES_ARP
+ tristate "ARP nf_tables support"
+ help
+ This option enables the ARP support for nf_tables.
+
+endif # NF_TABLES
+
+config NF_LOG_ARP
+ tristate "ARP packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_LOG_IPV4
+ tristate "IPv4 packet logging"
+ default m if NETFILTER_ADVANCED=n
+ select NF_LOG_COMMON
+
+config NF_REJECT_IPV4
+ tristate "IPv4 packet rejection"
+ default m if NETFILTER_ADVANCED=n
+
+config NF_NAT_IPV4
+ tristate "IPv4 NAT"
+ depends on NF_CONNTRACK_IPV4
+ default m if NETFILTER_ADVANCED=n
+ select NF_NAT
+ help
+ The IPv4 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables or nft.
+
+if NF_NAT_IPV4
+
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NF_NAT_MASQUERADE_IPV4
+ tristate "IPv4 masquerade support"
+ help
+ This is the kernel functionality to provide NAT in the masquerade
+ flavour (automatic source address selection).
+
+config NFT_MASQ_IPV4
+ tristate "IPv4 masquerading support for nf_tables"
+ depends on NF_TABLES_IPV4
+ depends on NFT_MASQ
+ select NF_NAT_MASQUERADE_IPV4
+ help
+ This is the expression that provides IPv4 masquerading support for
+ nf_tables.
+
+config NFT_REDIR_IPV4
+ tristate "IPv4 redirect support for nf_tables"
+ depends on NF_TABLES_IPV4
+ depends on NFT_REDIR
+ select NF_NAT_REDIRECT
+ help
+ This is the expression that provides IPv4 redirect support for
+ nf_tables.
+
+config NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support"
+ depends on NF_CONNTRACK_SNMP
+ depends on NETFILTER_ADVANCED
+ default NF_NAT && NF_CONNTRACK_SNMP
+ ---help---
+
+ This module implements an Application Layer Gateway (ALG) for
+ SNMP payloads. In conjunction with NAT, it allows a network
+ management system to access multiple private networks with
+ conflicting addresses. It works by modifying IP addresses
+ inside SNMP payloads to match IP-layer NAT mapping.
+
+ This is the "basic" form of SNMP-ALG, as described in RFC 2962
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config NF_NAT_PROTO_GRE
+ tristate
+ depends on NF_CT_PROTO_GRE
+
+config NF_NAT_PPTP
+ tristate
+ depends on NF_CONNTRACK
+ default NF_CONNTRACK_PPTP
+ select NF_NAT_PROTO_GRE
+
+config NF_NAT_H323
+ tristate
+ depends on NF_CONNTRACK
+ default NF_CONNTRACK_H323
+
+endif # NF_NAT_IPV4
+
+config IP_NF_IPTABLES
+ tristate "IP tables support (required for filtering/masq/NAT)"
+ default m if NETFILTER_ADVANCED=n
+ select NETFILTER_XTABLES
+ help
+ iptables is a general, extensible packet identification framework.
+ The packet filtering and full NAT (masquerading, port forwarding,
+ etc) subsystems now use this: say `Y' or `M' here if you want to use
+ either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP_NF_IPTABLES
+
+# The matches.
+config IP_NF_MATCH_AH
+ tristate '"ah" match support'
+ depends on NETFILTER_ADVANCED
+ help
+ This match extension allows you to match a range of SPIs
+ inside AH header of IPSec packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_MATCH_ECN
+ tristate '"ecn" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_MATCH_ECN
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_ECN.
+
+config IP_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
+
+ To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ipt_rpfilter.
+
+config IP_NF_MATCH_TTL
+ tristate '"ttl" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_MATCH_HL
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
+
+# `filter', generic and specific targets
+config IP_NF_FILTER
+ tristate "Packet filtering"
+ default m if NETFILTER_ADVANCED=n
+ help
+ Packet filtering defines a table `filter', which has a series of
+ rules for simple packet filtering at local input, forwarding and
+ local output. See the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_REJECT
+ tristate "REJECT target support"
+ depends on IP_NF_FILTER
+ select NF_REJECT_IPV4
+ default m if NETFILTER_ADVANCED=n
+ help
+ The REJECT target allows a filtering rule to specify that an ICMP
+ error should be issued in response to an incoming packet, rather
+ than silently being dropped.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# NAT + specific targets: nf_conntrack
+config IP_NF_NAT
+ tristate "iptables NAT support"
+ depends on NF_CONNTRACK_IPV4
+ default m if NETFILTER_ADVANCED=n
+ select NF_NAT
+ select NF_NAT_IPV4
+ select NETFILTER_XT_NAT
+ help
+ This enables the `nat' table in iptables. This allows masquerading,
+ port forwarding and other forms of full Network Address Port
+ Translation.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP_NF_NAT
+
+config IP_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ select NF_NAT_MASQUERADE_IPV4
+ default m if NETFILTER_ADVANCED=n
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_NETMAP
+ tristate "NETMAP target support"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_NETMAP
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_NETMAP.
+
+config IP_NF_TARGET_REDIRECT
+ tristate "REDIRECT target support"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_REDIRECT
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_REDIRECT.
+
+endif # IP_NF_NAT
+
+# mangle + specific targets
+config IP_NF_MANGLE
+ tristate "Packet mangling"
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `mangle' table to iptables: see the man page for
+ iptables(8). This table is used for various packet alterations
+ which can effect how the packet is routed.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_CLUSTERIP
+ tristate "CLUSTERIP target support"
+ depends on IP_NF_MANGLE
+ depends on NF_CONNTRACK_IPV4
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_MARK
+ help
+ The CLUSTERIP target allows you to build load-balancing clusters of
+ network servers without having a dedicated load-balancing
+ router/server/switch.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_ECN
+ tristate "ECN target support"
+ depends on IP_NF_MANGLE
+ depends on NETFILTER_ADVANCED
+ ---help---
+ This option adds a `ECN' target, which can be used in the iptables mangle
+ table.
+
+ You can use this target to remove the ECN bits from the IPv4 header of
+ an IP packet. This is particularly useful, if you need to work around
+ existing ECN blackholes on the internet, but don't want to disable
+ ECN support in general.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_TARGET_TTL
+ tristate '"TTL" target support'
+ depends on NETFILTER_ADVANCED && IP_NF_MANGLE
+ select NETFILTER_XT_TARGET_HL
+ ---help---
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
+
+# raw + specific targets
+config IP_NF_RAW
+ tristate 'raw table support (required for NOTRACK/TRACE)'
+ help
+ This option adds a `raw' table to iptables. This table is the very
+ first in the netfilter framework and hooks in at the PREROUTING
+ and OUTPUT chains.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+# security table for MAC policy
+config IP_NF_SECURITY
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
+endif # IP_NF_IPTABLES
+
+# ARP tables
+config IP_NF_ARPTABLES
+ tristate "ARP tables support"
+ select NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
+ help
+ arptables is a general, extensible packet identification framework.
+ The ARP packet filtering and mangling (manipulation)subsystems
+ use this: say Y or M here if you want to use either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if IP_NF_ARPTABLES
+
+config IP_NF_ARPFILTER
+ tristate "ARP packet filtering"
+ help
+ ARP packet filtering defines a table `filter', which has a series of
+ rules for simple ARP packet filtering at local input and
+ local output. On a bridge, you can also specify filtering rules
+ for forwarded ARP packets. See the man page for arptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP_NF_ARP_MANGLE
+ tristate "ARP payload mangling"
+ help
+ Allows altering the ARP packet payload: source and destination
+ hardware and network addresses.
+
+endif # IP_NF_ARPTABLES
+
+endmenu
+
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
new file mode 100644
index 000000000..7fe6c7035
--- /dev/null
+++ b/net/ipv4/netfilter/Makefile
@@ -0,0 +1,72 @@
+#
+# Makefile for the netfilter modules on top of IPv4.
+#
+
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
+ifeq ($(CONFIG_PROC_FS),y)
+nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
+endif
+endif
+
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
+
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
+
+# defrag
+obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
+
+# logging
+obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
+obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
+
+# reject
+obj-$(CONFIG_NF_REJECT_IPV4) += nf_reject_ipv4.o
+
+# NAT helpers (nf_conntrack)
+obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
+obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
+obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
+
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
+obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+
+# generic IP tables
+obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
+
+# the three instances of ip_tables
+obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
+obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
+obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
+
+# matches
+obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
+
+# targets
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
+obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
+obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
+
+# generic ARP tables
+obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
+obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
+
+# just filtering instance of ARP tables for now
+obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
new file mode 100644
index 000000000..a61200754
--- /dev/null
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -0,0 +1,1926 @@
+/*
+ * Packet matching code for ARP packets.
+ *
+ * Based heavily, if not almost entirely, upon ip_tables.c framework.
+ *
+ * Some ARP specific bits are:
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/capability.h>
+#include <linux/if_arp.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <net/compat.h>
+#include <net/sock.h>
+#include <asm/uaccess.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+#include "../../netfilter/xt_repldata.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables core");
+
+/*#define DEBUG_ARP_TABLES*/
+/*#define DEBUG_ARP_TABLES_USER*/
+
+#ifdef DEBUG_ARP_TABLES
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_ARP_TABLES_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define ARP_NF_ASSERT(x) WARN_ON(!(x))
+#else
+#define ARP_NF_ASSERT(x)
+#endif
+
+void *arpt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(arpt, ARPT);
+}
+EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
+
+static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
+ const char *hdr_addr, int len)
+{
+ int i, ret;
+
+ if (len > ARPT_DEV_ADDR_LEN_MAX)
+ len = ARPT_DEV_ADDR_LEN_MAX;
+
+ ret = 0;
+ for (i = 0; i < len; i++)
+ ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
+
+ return ret != 0;
+}
+
+/*
+ * Unfortunately, _b and _mask are not aligned to an int (or long int)
+ * Some arches dont care, unrolling the loop is a win on them.
+ * For other arches, we only have a 16bit alignement.
+ */
+static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ unsigned long ret = ifname_compare_aligned(_a, _b, _mask);
+#else
+ unsigned long ret = 0;
+ const u16 *a = (const u16 *)_a;
+ const u16 *b = (const u16 *)_b;
+ const u16 *mask = (const u16 *)_mask;
+ int i;
+
+ for (i = 0; i < IFNAMSIZ/sizeof(u16); i++)
+ ret |= (a[i] ^ b[i]) & mask[i];
+#endif
+ return ret;
+}
+
+/* Returns whether packet matches rule or not. */
+static inline int arp_packet_match(const struct arphdr *arphdr,
+ struct net_device *dev,
+ const char *indev,
+ const char *outdev,
+ const struct arpt_arp *arpinfo)
+{
+ const char *arpptr = (char *)(arphdr + 1);
+ const char *src_devaddr, *tgt_devaddr;
+ __be32 src_ipaddr, tgt_ipaddr;
+ long ret;
+
+#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
+
+ if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
+ ARPT_INV_ARPOP)) {
+ dprintf("ARP operation field mismatch.\n");
+ dprintf("ar_op: %04x info->arpop: %04x info->arpop_mask: %04x\n",
+ arphdr->ar_op, arpinfo->arpop, arpinfo->arpop_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd,
+ ARPT_INV_ARPHRD)) {
+ dprintf("ARP hardware address format mismatch.\n");
+ dprintf("ar_hrd: %04x info->arhrd: %04x info->arhrd_mask: %04x\n",
+ arphdr->ar_hrd, arpinfo->arhrd, arpinfo->arhrd_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro,
+ ARPT_INV_ARPPRO)) {
+ dprintf("ARP protocol address format mismatch.\n");
+ dprintf("ar_pro: %04x info->arpro: %04x info->arpro_mask: %04x\n",
+ arphdr->ar_pro, arpinfo->arpro, arpinfo->arpro_mask);
+ return 0;
+ }
+
+ if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln,
+ ARPT_INV_ARPHLN)) {
+ dprintf("ARP hardware address length mismatch.\n");
+ dprintf("ar_hln: %02x info->arhln: %02x info->arhln_mask: %02x\n",
+ arphdr->ar_hln, arpinfo->arhln, arpinfo->arhln_mask);
+ return 0;
+ }
+
+ src_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&src_ipaddr, arpptr, sizeof(u32));
+ arpptr += sizeof(u32);
+ tgt_devaddr = arpptr;
+ arpptr += dev->addr_len;
+ memcpy(&tgt_ipaddr, arpptr, sizeof(u32));
+
+ if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len),
+ ARPT_INV_SRCDEVADDR) ||
+ FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len),
+ ARPT_INV_TGTDEVADDR)) {
+ dprintf("Source or target device address mismatch.\n");
+
+ return 0;
+ }
+
+ if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr,
+ ARPT_INV_SRCIP) ||
+ FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr),
+ ARPT_INV_TGTIP)) {
+ dprintf("Source or target IP address mismatch.\n");
+
+ dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
+ &src_ipaddr,
+ &arpinfo->smsk.s_addr,
+ &arpinfo->src.s_addr,
+ arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
+ &tgt_ipaddr,
+ &arpinfo->tmsk.s_addr,
+ &arpinfo->tgt.s_addr,
+ arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
+ return 0;
+ }
+
+ /* Look for ifname matches. */
+ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, arpinfo->iniface,
+ arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
+
+ if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, arpinfo->outiface,
+ arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+ return 1;
+#undef FWINV
+}
+
+static inline int arp_checkentry(const struct arpt_arp *arp)
+{
+ if (arp->flags & ~ARPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ arp->flags & ~ARPT_F_MASK);
+ return 0;
+ }
+ if (arp->invflags & ~ARPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ arp->invflags & ~ARPT_INV_MASK);
+ return 0;
+ }
+
+ return 1;
+}
+
+static unsigned int
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ net_err_ratelimited("arp_tables: error: '%s'\n",
+ (const char *)par->targinfo);
+
+ return NF_DROP;
+}
+
+static inline const struct xt_entry_target *
+arpt_get_target_c(const struct arpt_entry *e)
+{
+ return arpt_get_target((struct arpt_entry *)e);
+}
+
+static inline struct arpt_entry *
+get_entry(const void *base, unsigned int offset)
+{
+ return (struct arpt_entry *)(base + offset);
+}
+
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
+unsigned int arpt_do_table(struct sk_buff *skb,
+ unsigned int hook,
+ const struct nf_hook_state *state,
+ struct xt_table *table)
+{
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+ unsigned int verdict = NF_DROP;
+ const struct arphdr *arp;
+ struct arpt_entry *e, *back;
+ const char *indev, *outdev;
+ void *table_base;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
+
+ if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
+ return NF_DROP;
+
+ indev = state->in ? state->in->name : nulldevname;
+ outdev = state->out ? state->out->name : nulldevname;
+
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
+ private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[smp_processor_id()];
+
+ e = get_entry(table_base, private->hook_entry[hook]);
+ back = get_entry(table_base, private->underflow[hook]);
+
+ acpar.in = state->in;
+ acpar.out = state->out;
+ acpar.hooknum = hook;
+ acpar.family = NFPROTO_ARP;
+ acpar.hotdrop = false;
+
+ arp = arp_hdr(skb);
+ do {
+ const struct xt_entry_target *t;
+
+ if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ e = arpt_next_entry(e);
+ continue;
+ }
+
+ ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
+
+ t = arpt_get_target_c(e);
+
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != arpt_next_entry(e)) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next = arpt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
+ }
+
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb, &acpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == XT_CONTINUE)
+ e = arpt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
+ if (acpar.hotdrop)
+ return NF_DROP;
+ else
+ return verdict;
+}
+
+/* All zeroes == unconditional rule. */
+static inline bool unconditional(const struct arpt_arp *arp)
+{
+ static const struct arpt_arp uncond;
+
+ return memcmp(arp, &uncond, sizeof(uncond)) == 0;
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ * there are loops. Puts hook bitmask in comefrom.
+ */
+static int mark_source_chains(const struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ * to 0 as we leave), and comefrom to save source hook bitmask.
+ */
+ for (hook = 0; hook < NF_ARP_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct arpt_entry *e
+ = (struct arpt_entry *)(entry0 + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ const struct xt_standard_target *t
+ = (void *)arpt_get_target_c(e);
+ int visited = e->comefrom & (1 << hook);
+
+ if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
+ pr_notice("arptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if ((e->target_offset == sizeof(struct arpt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->arp)) ||
+ visited) {
+ unsigned int oldpos, size;
+
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
+ /* Return: backtrack through the last
+ * big jump.
+ */
+ do {
+ e->comefrom ^= (1<<NF_ARP_NUMHOOKS);
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct arpt_entry *)
+ (entry0 + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct arpt_entry *)
+ (entry0 + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct arpt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct arpt_entry *)
+ (entry0 + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int check_entry(const struct arpt_entry *e, const char *name)
+{
+ const struct xt_entry_target *t;
+
+ if (!arp_checkentry(&e->arp)) {
+ duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
+ return -EINVAL;
+
+ t = arpt_get_target_c(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline int check_target(struct arpt_entry *e, const char *name)
+{
+ struct xt_entry_target *t = arpt_get_target(e);
+ int ret;
+ struct xt_tgchk_param par = {
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_ARP,
+ };
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+ if (ret < 0) {
+ duprintf("arp_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static inline int
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
+
+ t = arpt_get_target(e);
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = PTR_ERR(target);
+ goto out;
+ }
+ t->u.kernel.target = target;
+
+ ret = check_target(e, name);
+ if (ret)
+ goto err;
+ return 0;
+err:
+ module_put(t->u.kernel.target->me);
+out:
+ return ret;
+}
+
+static bool check_underflow(const struct arpt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->arp))
+ return false;
+ t = arpt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
+static inline int check_entry_size_and_hooks(struct arpt_entry *e,
+ struct xt_table_info *newinfo,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int valid_hooks)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
+ newinfo->underflow[h] = underflows[h];
+ }
+ }
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct xt_counters) { 0, 0 });
+ e->comefrom = 0;
+ return 0;
+}
+
+static inline void cleanup_entry(struct arpt_entry *e)
+{
+ struct xt_tgdtor_param par;
+ struct xt_entry_target *t;
+
+ t = arpt_get_target(e);
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_ARP;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ * newinfo).
+ */
+static int translate_table(struct xt_table_info *newinfo, void *entry0,
+ const struct arpt_replace *repl)
+{
+ struct arpt_entry *iter;
+ unsigned int i;
+ int ret = 0;
+
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
+ if (ret != 0)
+ return ret;
+
+ if (i != repl->num_entries) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, repl->num_entries);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(repl->valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, repl->hook_entry[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, repl->underflow[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
+ duprintf("Looping hook\n");
+ return -ELOOP;
+ }
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
+
+ if (ret != 0) {
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter);
+ }
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i) {
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
+ }
+
+ return ret;
+}
+
+static void get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct arpt_entry *iter;
+ unsigned int cpu;
+ unsigned int i;
+
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
+ i = 0;
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i;
+ }
+ }
+}
+
+static struct xt_counters *alloc_counters(const struct xt_table *table)
+{
+ unsigned int countersize;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ * (other than comefrom, which userspace doesn't care
+ * about).
+ */
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vzalloc(countersize);
+
+ if (counters == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ get_counters(private, counters);
+
+ return counters;
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+ const struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ const struct arpt_entry *e;
+ struct xt_counters *counters;
+ struct xt_table_info *private = table->private;
+ int ret = 0;
+ void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ /* ... then copy entire thing ... */
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ const struct xt_entry_target *t;
+
+ e = (struct arpt_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct arpt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ t = arpt_get_target_c(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct xt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(NFPROTO_ARP, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(NFPROTO_ARP, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct arpt_entry *e,
+ const struct xt_table_info *info,
+ const void *base, struct xt_table_info *newinfo)
+{
+ const struct xt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - base;
+
+ t = arpt_get_target_c(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct arpt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct arpt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ struct arpt_entry *iter;
+ void *loc_cpu_entry;
+ int ret;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
+{
+ char name[XT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct arpt_getinfo)) {
+ duprintf("length %u != %Zu\n", *len,
+ sizeof(struct arpt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(NFPROTO_ARP);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (!IS_ERR_OR_NULL(t)) {
+ struct arpt_getinfo info;
+ const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
+ if (compat) {
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ private = &tmp;
+ }
+#endif
+ memset(&info, 0, sizeof(info));
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(NFPROTO_ARP);
+#endif
+ return ret;
+}
+
+static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
+ const int *len)
+{
+ int ret;
+ struct arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct arpt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %Zu\n", *len,
+ sizeof(struct arpt_get_entries) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+
+ duprintf("t->private->number = %u\n",
+ private->number);
+ if (get.size == private->size)
+ ret = copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int __do_replace(struct net *net, const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info *newinfo,
+ unsigned int num_counters,
+ void __user *counters_ptr)
+{
+ int ret;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_old_entry;
+ struct arpt_entry *iter;
+
+ ret = 0;
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (IS_ERR_OR_NULL(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters, and synchronize with replace */
+ get_counters(oldinfo, counters);
+
+ /* Decrease module usage counts and free resource */
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter);
+
+ xt_free_table_info(oldinfo);
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
+ vfree(counters);
+ xt_table_unlock(t);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ xt_table_unlock(t);
+ free_newinfo_counters_untrans:
+ vfree(counters);
+ out:
+ return ret;
+}
+
+static int do_replace(struct net *net, const void __user *user,
+ unsigned int len)
+{
+ int ret;
+ struct arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(newinfo, loc_cpu_entry, &tmp);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("arp_tables: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
+{
+ unsigned int i, curcpu;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
+ int ret = 0;
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+ unsigned int addend;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
+
+ if (copy_from_user(ptmp, user, size) != 0)
+ return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len - size);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ if (IS_ERR_OR_NULL(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+ local_bh_disable();
+ private = t->private;
+ if (private->number != num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ /* Choose the copy that is on our node */
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
+ unlock_up_free:
+ local_bh_enable();
+ xt_table_unlock(t);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static inline void compat_release_entry(struct compat_arpt_entry *e)
+{
+ struct xt_entry_target *t;
+
+ t = compat_arpt_get_target(e);
+ module_put(t->u.kernel.target->me);
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ const char *name)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_arpt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct arpt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - (void *)base;
+
+ t = compat_arpt_get_target(e);
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = PTR_ERR(target);
+ goto out;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ goto release_target;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+ return 0;
+
+release_target:
+ module_put(t->u.kernel.target->me);
+out:
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ struct arpt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct arpt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct arpt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct arpt_entry);
+ *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_arpt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int translate_compat_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ struct compat_arpt_entry *iter0;
+ struct arpt_entry *iter1;
+ unsigned int size;
+ int ret = 0;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(NFPROTO_ARP);
+ xt_compat_init_offsets(NFPROTO_ARP, number);
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = check_target(iter1, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
+ j -= i;
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1);
+ }
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ goto out;
+}
+
+struct compat_arpt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
+static int compat_do_replace(struct net *net, void __user *user,
+ unsigned int len)
+{
+ int ret;
+ struct compat_arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = compat_do_replace(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 1);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
+ compat_uint_t *size,
+ struct xt_counters *counters,
+ unsigned int i)
+{
+ struct xt_entry_target *t;
+ struct compat_arpt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ int ret;
+
+ origsize = *size;
+ ce = (struct compat_arpt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
+
+ *dstptr += sizeof(struct compat_arpt_entry);
+ *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+ target_offset = e->target_offset - (origsize - *size);
+
+ t = arpt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ return ret;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+ struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ void *loc_cpu_entry;
+ unsigned int i = 0;
+ struct arpt_entry *iter;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
+ vfree(counters);
+ return ret;
+}
+
+struct compat_arpt_get_entries {
+ char name[XT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_arpt_entry entrytable[0];
+};
+
+static int compat_get_entries(struct net *net,
+ struct compat_arpt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(NFPROTO_ARP);
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(NFPROTO_ARP);
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
+ int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case ARPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_arpt_get_ctl(sk, cmd, user, len);
+ }
+ return ret;
+}
+#endif
+
+static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case ARPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_GET_REVISION_TARGET: {
+ struct xt_get_revision rev;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ rev.name[sizeof(rev.name)-1] = 0;
+
+ try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
+ rev.revision, 1, &ret),
+ "arpt_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_arpt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct xt_table *arpt_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct arpt_replace *repl)
+{
+ int ret;
+ struct xt_table_info *newinfo;
+ struct xt_table_info bootstrap = {0};
+ void *loc_cpu_entry;
+ struct xt_table *new_table;
+
+ newinfo = xt_alloc_table_info(repl->size);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+ ret = translate_table(newinfo, loc_cpu_entry, repl);
+ duprintf("arpt_register_table: translate table gives %d\n", ret);
+ if (ret != 0)
+ goto out_free;
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
+ }
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
+}
+
+void arpt_unregister_table(struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct arpt_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+/* The built-in targets: standard (NULL) and error. */
+static struct xt_target arpt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_ARP,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = arpt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_ARP,
+ },
+};
+
+static struct nf_sockopt_ops arpt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = ARPT_BASE_CTL,
+ .set_optmax = ARPT_SO_SET_MAX+1,
+ .set = do_arpt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_arpt_set_ctl,
+#endif
+ .get_optmin = ARPT_BASE_CTL,
+ .get_optmax = ARPT_SO_GET_MAX+1,
+ .get = do_arpt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_arpt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static int __net_init arp_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_ARP);
+}
+
+static void __net_exit arp_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_ARP);
+}
+
+static struct pernet_operations arp_tables_net_ops = {
+ .init = arp_tables_net_init,
+ .exit = arp_tables_net_exit,
+};
+
+static int __init arp_tables_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&arp_tables_net_ops);
+ if (ret < 0)
+ goto err1;
+
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+ if (ret < 0)
+ goto err2;
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&arpt_sockopts);
+ if (ret < 0)
+ goto err4;
+
+ printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n");
+ return 0;
+
+err4:
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+err2:
+ unregister_pernet_subsys(&arp_tables_net_ops);
+err1:
+ return ret;
+}
+
+static void __exit arp_tables_fini(void)
+{
+ nf_unregister_sockopt(&arpt_sockopts);
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+ unregister_pernet_subsys(&arp_tables_net_ops);
+}
+
+EXPORT_SYMBOL(arpt_register_table);
+EXPORT_SYMBOL(arpt_unregister_table);
+EXPORT_SYMBOL(arpt_do_table);
+
+module_init(arp_tables_init);
+module_exit(arp_tables_fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
new file mode 100644
index 000000000..a5e52a9f0
--- /dev/null
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -0,0 +1,91 @@
+/* module that allows mangling of the arp payload */
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_arp/arpt_mangle.h>
+#include <net/sock.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("arptables arp payload mangle target");
+
+static unsigned int
+target(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct arpt_mangle *mangle = par->targinfo;
+ const struct arphdr *arp;
+ unsigned char *arpptr;
+ int pln, hln;
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ arp = arp_hdr(skb);
+ arpptr = skb_network_header(skb) + sizeof(*arp);
+ pln = arp->ar_pln;
+ hln = arp->ar_hln;
+ /* We assume that pln and hln were checked in the match */
+ if (mangle->flags & ARPT_MANGLE_SDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, mangle->src_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_SIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_s.src_ip, pln);
+ }
+ arpptr += pln;
+ if (mangle->flags & ARPT_MANGLE_TDEV) {
+ if (ARPT_DEV_ADDR_LEN_MAX < hln ||
+ (arpptr + hln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, mangle->tgt_devaddr, hln);
+ }
+ arpptr += hln;
+ if (mangle->flags & ARPT_MANGLE_TIP) {
+ if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
+ (arpptr + pln > skb_tail_pointer(skb)))
+ return NF_DROP;
+ memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
+ }
+ return mangle->target;
+}
+
+static int checkentry(const struct xt_tgchk_param *par)
+{
+ const struct arpt_mangle *mangle = par->targinfo;
+
+ if (mangle->flags & ~ARPT_MANGLE_MASK ||
+ !(mangle->flags & ARPT_MANGLE_MASK))
+ return -EINVAL;
+
+ if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
+ mangle->target != XT_CONTINUE)
+ return -EINVAL;
+ return 0;
+}
+
+static struct xt_target arpt_mangle_reg __read_mostly = {
+ .name = "mangle",
+ .family = NFPROTO_ARP,
+ .target = target,
+ .targetsize = sizeof(struct arpt_mangle),
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
+};
+
+static int __init arpt_mangle_init(void)
+{
+ return xt_register_target(&arpt_mangle_reg);
+}
+
+static void __exit arpt_mangle_fini(void)
+{
+ xt_unregister_target(&arpt_mangle_reg);
+}
+
+module_init(arpt_mangle_init);
+module_exit(arpt_mangle_fini);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
new file mode 100644
index 000000000..93876d031
--- /dev/null
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -0,0 +1,91 @@
+/*
+ * Filtering ARP tables module.
+ *
+ * Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
+#include <linux/slab.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
+MODULE_DESCRIPTION("arptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
+ (1 << NF_ARP_FORWARD))
+
+static const struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_ARP,
+ .priority = NF_IP_PRI_FILTER,
+};
+
+/* The work comes in here from netfilter.c */
+static unsigned int
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct net *net = dev_net(state->in ? state->in : state->out);
+
+ return arpt_do_table(skb, ops->hooknum, state,
+ net->ipv4.arptable_filter);
+}
+
+static struct nf_hook_ops *arpfilter_ops __read_mostly;
+
+static int __net_init arptable_filter_net_init(struct net *net)
+{
+ struct arpt_replace *repl;
+
+ repl = arpt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.arptable_filter =
+ arpt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
+}
+
+static void __net_exit arptable_filter_net_exit(struct net *net)
+{
+ arpt_unregister_table(net->ipv4.arptable_filter);
+}
+
+static struct pernet_operations arptable_filter_net_ops = {
+ .init = arptable_filter_net_init,
+ .exit = arptable_filter_net_exit,
+};
+
+static int __init arptable_filter_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&arptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+ arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
+ if (IS_ERR(arpfilter_ops)) {
+ ret = PTR_ERR(arpfilter_ops);
+ goto cleanup_table;
+ }
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+ return ret;
+}
+
+static void __exit arptable_filter_fini(void)
+{
+ xt_hook_unlink(&packet_filter, arpfilter_ops);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
+}
+
+module_init(arptable_filter_init);
+module_exit(arptable_filter_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
new file mode 100644
index 000000000..2d0e265fe
--- /dev/null
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -0,0 +1,2282 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cache.h>
+#include <linux/capability.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/icmp.h>
+#include <net/ip.h>
+#include <net/compat.h>
+#include <asm/uaccess.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/err.h>
+#include <linux/cpumask.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv4 packet filter");
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) pr_info(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) pr_info(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) WARN_ON(!(x))
+#else
+#define IP_NF_ASSERT(x)
+#endif
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+void *ipt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(ipt, IPT);
+}
+EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
+
+/* Returns whether matches rule or not. */
+/* Performance critical - called for every packet */
+static inline bool
+ip_packet_match(const struct iphdr *ip,
+ const char *indev,
+ const char *outdev,
+ const struct ipt_ip *ipinfo,
+ int isfrag)
+{
+ unsigned long ret;
+
+#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
+
+ if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ IPT_INV_SRCIP) ||
+ FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+
+ dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
+ &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
+ ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
+ &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
+ ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
+ return false;
+ }
+
+ ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
+
+ if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ipinfo->iniface,
+ ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+ return false;
+ }
+
+ ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
+
+ if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ipinfo->outiface,
+ ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+ return false;
+ }
+
+ /* Check specific protocol */
+ if (ipinfo->proto &&
+ FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ dprintf("Packet protocol %hi does not match %hi.%s\n",
+ ip->protocol, ipinfo->proto,
+ ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+ return false;
+ }
+
+ /* If we have a fragment rule but the packet is not a fragment
+ * then we return zero */
+ if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
+ dprintf("Fragment rule but not fragment.%s\n",
+ ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+ip_checkentry(const struct ipt_ip *ip)
+{
+ if (ip->flags & ~IPT_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ip->flags & ~IPT_F_MASK);
+ return false;
+ }
+ if (ip->invflags & ~IPT_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ip->invflags & ~IPT_INV_MASK);
+ return false;
+ }
+ return true;
+}
+
+static unsigned int
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
+
+ return NF_DROP;
+}
+
+/* Performance critical */
+static inline struct ipt_entry *
+get_entry(const void *base, unsigned int offset)
+{
+ return (struct ipt_entry *)(base + offset);
+}
+
+/* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
+static inline bool unconditional(const struct ipt_ip *ip)
+{
+ static const struct ipt_ip uncond;
+
+ return memcmp(ip, &uncond, sizeof(uncond)) == 0;
+#undef FWINV
+}
+
+/* for const-correctness */
+static inline const struct xt_entry_target *
+ipt_get_target_c(const struct ipt_entry *e)
+{
+ return ipt_get_target((struct ipt_entry *)e);
+}
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+static const char *const hooknames[] = {
+ [NF_INET_PRE_ROUTING] = "PREROUTING",
+ [NF_INET_LOCAL_IN] = "INPUT",
+ [NF_INET_FORWARD] = "FORWARD",
+ [NF_INET_LOCAL_OUT] = "OUTPUT",
+ [NF_INET_POST_ROUTING] = "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+ NF_IP_TRACE_COMMENT_RULE,
+ NF_IP_TRACE_COMMENT_RETURN,
+ NF_IP_TRACE_COMMENT_POLICY,
+};
+
+static const char *const comments[] = {
+ [NF_IP_TRACE_COMMENT_RULE] = "rule",
+ [NF_IP_TRACE_COMMENT_RETURN] = "return",
+ [NF_IP_TRACE_COMMENT_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
+{
+ const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
+
+ if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
+ /* Head of user chain: ERROR target with chainname */
+ *chainname = t->target.data;
+ (*rulenum) = 0;
+ } else if (s == e) {
+ (*rulenum)++;
+
+ if (s->target_offset == sizeof(struct ipt_entry) &&
+ strcmp(t->target.u.kernel.target->name,
+ XT_STANDARD_TARGET) == 0 &&
+ t->verdict < 0 &&
+ unconditional(&s->ip)) {
+ /* Tail of chains: STANDARD target (return/policy) */
+ *comment = *chainname == hookname
+ ? comments[NF_IP_TRACE_COMMENT_POLICY]
+ : comments[NF_IP_TRACE_COMMENT_RETURN];
+ }
+ return 1;
+ } else
+ (*rulenum)++;
+
+ return 0;
+}
+
+static void trace_packet(const struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *tablename,
+ const struct xt_table_info *private,
+ const struct ipt_entry *e)
+{
+ const void *table_base;
+ const struct ipt_entry *root;
+ const char *hookname, *chainname, *comment;
+ const struct ipt_entry *iter;
+ unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
+
+ table_base = private->entries[smp_processor_id()];
+ root = get_entry(table_base, private->hook_entry[hook]);
+
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP_TRACE_COMMENT_RULE];
+
+ xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+ if (get_chainname_rulenum(iter, e, hookname,
+ &chainname, &comment, &rulenum) != 0)
+ break;
+
+ nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
+}
+#endif
+
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ipt_do_table(struct sk_buff *skb,
+ unsigned int hook,
+ const struct nf_hook_state *state,
+ struct xt_table *table)
+{
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
+ const struct iphdr *ip;
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ const void *table_base;
+ struct ipt_entry *e, **jumpstack;
+ unsigned int *stackptr, origptr, cpu;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
+
+ /* Initialization */
+ ip = ip_hdr(skb);
+ indev = state->in ? state->in->name : nulldevname;
+ outdev = state->out ? state->out->name : nulldevname;
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ acpar.thoff = ip_hdrlen(skb);
+ acpar.hotdrop = false;
+ acpar.in = state->in;
+ acpar.out = state->out;
+ acpar.family = NFPROTO_IPV4;
+ acpar.hooknum = hook;
+
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
+ private = table->private;
+ cpu = smp_processor_id();
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[cpu];
+ jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
+ origptr = *stackptr;
+
+ e = get_entry(table_base, private->hook_entry[hook]);
+
+ pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
+ table->name, hook, origptr,
+ get_entry(table_base, private->underflow[hook]));
+
+ do {
+ const struct xt_entry_target *t;
+ const struct xt_entry_match *ematch;
+
+ IP_NF_ASSERT(e);
+ if (!ip_packet_match(ip, indev, outdev,
+ &e->ip, acpar.fragoff)) {
+ no_match:
+ e = ipt_next_entry(e);
+ continue;
+ }
+
+ xt_ematch_foreach(ematch, e) {
+ acpar.match = ematch->u.kernel.match;
+ acpar.matchinfo = ematch->data;
+ if (!acpar.match->match(skb, &acpar))
+ goto no_match;
+ }
+
+ ADD_COUNTER(e->counters, skb->len, 1);
+
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, state->in, state->out,
+ table->name, private, e);
+#endif
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
+ break;
+ }
+ if (*stackptr <= origptr) {
+ e = get_entry(table_base,
+ private->underflow[hook]);
+ pr_debug("Underflow (this is normal) "
+ "to %p\n", e);
+ } else {
+ e = jumpstack[--*stackptr];
+ pr_debug("Pulled %p out from pos %u\n",
+ e, *stackptr);
+ e = ipt_next_entry(e);
+ }
+ continue;
+ }
+ if (table_base + v != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO)) {
+ if (*stackptr >= private->stacksize) {
+ verdict = NF_DROP;
+ break;
+ }
+ jumpstack[(*stackptr)++] = e;
+ pr_debug("Pushed %p into pos %u\n",
+ e, *stackptr - 1);
+ }
+
+ e = get_entry(table_base, v);
+ continue;
+ }
+
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+
+ verdict = t->u.kernel.target->target(skb, &acpar);
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ if (verdict == XT_CONTINUE)
+ e = ipt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ pr_debug("Exiting %s; resetting sp from %u to %u\n",
+ __func__, *stackptr, origptr);
+ *stackptr = origptr;
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (acpar.hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(const struct xt_table_info *newinfo,
+ unsigned int valid_hooks, void *entry0)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ const struct xt_standard_target *t
+ = (void *)ipt_get_target_c(e);
+ int visited = e->comefrom & (1 << hook);
+
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
+ pr_err("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if ((e->target_offset == sizeof(struct ipt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->ip)) ||
+ visited) {
+ unsigned int oldpos, size;
+
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_INET_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_INET_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ipt_entry *)
+ (entry0 + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ipt_entry *)
+ (entry0 + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ipt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ipt_entry *)
+ (entry0 + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
+{
+ struct xt_mtdtor_param par;
+
+ par.net = net;
+ par.match = m->u.kernel.match;
+ par.matchinfo = m->data;
+ par.family = NFPROTO_IPV4;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
+ module_put(par.match->me);
+}
+
+static int
+check_entry(const struct ipt_entry *e, const char *name)
+{
+ const struct xt_entry_target *t;
+
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ if (e->target_offset + sizeof(struct xt_entry_target) >
+ e->next_offset)
+ return -EINVAL;
+
+ t = ipt_get_target_c(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+ const struct ipt_ip *ip = par->entryinfo;
+ int ret;
+
+ par->match = m->u.kernel.match;
+ par->matchinfo = m->data;
+
+ ret = xt_check_match(par, m->u.match_size - sizeof(*m),
+ ip->proto, ip->invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("check failed for `%s'.\n", par->match->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+ struct xt_match *match;
+ int ret;
+
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ return PTR_ERR(match);
+ }
+ m->u.kernel.match = match;
+
+ ret = check_match(m, par);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
+}
+
+static int check_target(struct ipt_entry *e, struct net *net, const char *name)
+{
+ struct xt_entry_target *t = ipt_get_target(e);
+ struct xt_tgchk_param par = {
+ .net = net,
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_IPV4,
+ };
+ int ret;
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+ unsigned int size)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ int ret;
+ unsigned int j;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
+
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
+
+ j = 0;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ xt_ematch_foreach(ematch, e) {
+ ret = find_check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
+
+ t = ipt_get_target(e);
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = PTR_ERR(target);
+ goto cleanup_matches;
+ }
+ t->u.kernel.target = target;
+
+ ret = check_target(e, net, name);
+ if (ret)
+ goto err;
+ return 0;
+ err:
+ module_put(t->u.kernel.target->me);
+ cleanup_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
+ return ret;
+}
+
+static bool check_underflow(const struct ipt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->ip))
+ return false;
+ t = ipt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
+static int
+check_entry_size_and_hooks(struct ipt_entry *e,
+ struct xt_table_info *newinfo,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int valid_hooks)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
+ newinfo->underflow[h] = underflows[h];
+ }
+ }
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct xt_counters) { 0, 0 });
+ e->comefrom = 0;
+ return 0;
+}
+
+static void
+cleanup_entry(struct ipt_entry *e, struct net *net)
+{
+ struct xt_tgdtor_param par;
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
+
+ /* Cleanup all matches */
+ xt_ematch_foreach(ematch, e)
+ cleanup_match(ematch, net);
+ t = ipt_get_target(e);
+
+ par.net = net;
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_IPV4;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ipt_replace *repl)
+{
+ struct ipt_entry *iter;
+ unsigned int i;
+ int ret = 0;
+
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ return ret;
+ ++i;
+ if (strcmp(ipt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+
+ if (i != repl->num_entries) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, repl->num_entries);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(repl->valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, repl->hook_entry[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, repl->underflow[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, net, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
+
+ if (ret != 0) {
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter, net);
+ }
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i) {
+ if (newinfo->entries[i] && newinfo->entries[i] != entry0)
+ memcpy(newinfo->entries[i], entry0, newinfo->size);
+ }
+
+ return ret;
+}
+
+static void
+get_counters(const struct xt_table_info *t,
+ struct xt_counters counters[])
+{
+ struct ipt_entry *iter;
+ unsigned int cpu;
+ unsigned int i;
+
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
+ i = 0;
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i; /* macro does multi eval of i */
+ }
+ }
+}
+
+static struct xt_counters *alloc_counters(const struct xt_table *table)
+{
+ unsigned int countersize;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct xt_counters) * private->number;
+ counters = vzalloc(countersize);
+
+ if (counters == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ get_counters(private, counters);
+
+ return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ const struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ const struct ipt_entry *e;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ int ret = 0;
+ const void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ const struct xt_entry_match *m;
+ const struct xt_entry_target *t;
+
+ e = (struct ipt_entry *)(loc_cpu_entry + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ipt_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ipt_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct xt_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ipt_get_target_c(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct xt_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(AF_INET, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(AF_INET, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct ipt_entry *e,
+ const struct xt_table_info *info,
+ const void *base, struct xt_table_info *newinfo)
+{
+ const struct xt_entry_match *ematch;
+ const struct xt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - base;
+ xt_ematch_foreach(ematch, e)
+ off += xt_compat_match_offset(ematch->u.kernel.match);
+ t = ipt_get_target_c(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct ipt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct ipt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ struct ipt_entry *iter;
+ void *loc_cpu_entry;
+ int ret;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
+{
+ char name[XT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct ipt_getinfo)) {
+ duprintf("length %u != %zu\n", *len,
+ sizeof(struct ipt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(AF_INET);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (!IS_ERR_OR_NULL(t)) {
+ struct ipt_getinfo info;
+ const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
+ if (compat) {
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(AF_INET);
+ private = &tmp;
+ }
+#endif
+ memset(&info, 0, sizeof(info));
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(AF_INET);
+#endif
+ return ret;
+}
+
+static int
+get_entries(struct net *net, struct ipt_get_entries __user *uptr,
+ const int *len)
+{
+ int ret;
+ struct ipt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct ipt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n", private->number);
+ if (get.size == private->size)
+ ret = copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ return ret;
+}
+
+static int
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_table_info *newinfo, unsigned int num_counters,
+ void __user *counters_ptr)
+{
+ int ret;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
+ struct xt_counters *counters;
+ void *loc_cpu_old_entry;
+ struct ipt_entry *iter;
+
+ ret = 0;
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (IS_ERR_OR_NULL(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free_newinfo_counters_untrans;
+ }
+
+ /* You lied! */
+ if (valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto put_module;
+ }
+
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters, and synchronize with replace */
+ get_counters(oldinfo, counters);
+
+ /* Decrease module usage counts and free resource */
+ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter, net);
+
+ xt_free_table_info(oldinfo);
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
+ vfree(counters);
+ xt_table_unlock(t);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ xt_table_unlock(t);
+ free_newinfo_counters_untrans:
+ vfree(counters);
+ out:
+ return ret;
+}
+
+static int
+do_replace(struct net *net, const void __user *user, unsigned int len)
+{
+ int ret;
+ struct ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int
+do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
+{
+ unsigned int i, curcpu;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
+ int ret = 0;
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+ unsigned int addend;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
+
+ if (copy_from_user(ptmp, user, size) != 0)
+ return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len - size);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = xt_find_table_lock(net, AF_INET, name);
+ if (IS_ERR_OR_NULL(t)) {
+ ret = t ? PTR_ERR(t) : -ENOENT;
+ goto free;
+ }
+
+ local_bh_disable();
+ private = t->private;
+ if (private->number != num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ /* Choose the copy that is on our node */
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
+ unlock_up_free:
+ local_bh_enable();
+ xt_table_unlock(t);
+ module_put(t->me);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_INET_NUMHOOKS];
+ u32 underflow[NF_INET_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters; /* struct xt_counters * */
+ struct compat_ipt_entry entries[0];
+};
+
+static int
+compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
+ unsigned int *size, struct xt_counters *counters,
+ unsigned int i)
+{
+ struct xt_entry_target *t;
+ struct compat_ipt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ const struct xt_entry_match *ematch;
+ int ret = 0;
+
+ origsize = *size;
+ ce = (struct compat_ipt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
+
+ *dstptr += sizeof(struct compat_ipt_entry);
+ *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_to_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ target_offset = e->target_offset - (origsize - *size);
+ t = ipt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ return ret;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int
+compat_find_calc_match(struct xt_entry_match *m,
+ const char *name,
+ const struct ipt_ip *ip,
+ unsigned int hookmask,
+ int *size)
+{
+ struct xt_match *match;
+
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("compat_check_calc_match: `%s' not found\n",
+ m->u.user.name);
+ return PTR_ERR(match);
+ }
+ m->u.kernel.match = match;
+ *size += xt_compat_match_offset(match);
+ return 0;
+}
+
+static void compat_release_entry(struct compat_ipt_entry *e)
+{
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
+
+ /* Cleanup all matches */
+ xt_ematch_foreach(ematch, e)
+ module_put(ematch->u.kernel.match->me);
+ t = compat_ipt_get_target(e);
+ module_put(t->u.kernel.target->me);
+}
+
+static int
+check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ const char *name)
+{
+ struct xt_entry_match *ematch;
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ unsigned int j;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_ipt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct ipt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - (void *)base;
+ j = 0;
+ xt_ematch_foreach(ematch, e) {
+ ret = compat_find_calc_match(ematch, name,
+ &e->ip, e->comefrom, &off);
+ if (ret != 0)
+ goto release_matches;
+ ++j;
+ }
+
+ t = compat_ipt_get_target(e);
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = PTR_ERR(target);
+ goto release_matches;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ goto out;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+ return 0;
+
+out:
+ module_put(t->u.kernel.target->me);
+release_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ module_put(ematch->u.kernel.match->me);
+ }
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ struct ipt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+ struct xt_entry_match *ematch;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct ipt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct ipt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct ipt_entry);
+ *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_from_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_ipt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
+{
+ struct xt_entry_match *ematch;
+ struct xt_mtchk_param mtpar;
+ unsigned int j;
+ int ret = 0;
+
+ j = 0;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ xt_ematch_foreach(ematch, e) {
+ ret = check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
+
+ ret = check_target(e, net, name);
+ if (ret)
+ goto cleanup_matches;
+ return 0;
+
+ cleanup_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
+ return ret;
+}
+
+static int
+translate_compat_table(struct net *net,
+ const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ struct compat_ipt_entry *iter0;
+ struct ipt_entry *iter1;
+ unsigned int size;
+ int ret;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(AF_INET);
+ xt_compat_init_offsets(AF_INET, number);
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = compat_check_entry(iter1, net, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(ipt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
+ j -= i;
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1, net);
+ }
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ goto out;
+}
+
+static int
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct compat_ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int
+compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = compat_do_replace(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 1);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct compat_ipt_get_entries {
+ char name[XT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_ipt_entry entrytable[0];
+};
+
+static int
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ const void *loc_cpu_entry;
+ unsigned int i = 0;
+ struct ipt_entry *iter;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
+
+ vfree(counters);
+ return ret;
+}
+
+static int
+compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_ipt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+
+ if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(AF_INET);
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
+ }
+ xt_compat_flush_offsets(AF_INET);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(AF_INET);
+ return ret;
+}
+
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case IPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
+ }
+ return ret;
+}
+#endif
+
+static int
+do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case IPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
+ case IPT_SO_GET_REVISION_MATCH:
+ case IPT_SO_GET_REVISION_TARGET: {
+ struct xt_get_revision rev;
+ int target;
+
+ if (*len != sizeof(rev)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ rev.name[sizeof(rev.name)-1] = 0;
+
+ if (cmd == IPT_SO_GET_REVISION_TARGET)
+ target = 1;
+ else
+ target = 0;
+
+ try_then_request_module(xt_find_revision(AF_INET, rev.name,
+ rev.revision,
+ target, &ret),
+ "ipt_%s", rev.name);
+ break;
+ }
+
+ default:
+ duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+struct xt_table *ipt_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct ipt_replace *repl)
+{
+ int ret;
+ struct xt_table_info *newinfo;
+ struct xt_table_info bootstrap = {0};
+ void *loc_cpu_entry;
+ struct xt_table *new_table;
+
+ newinfo = xt_alloc_table_info(repl->size);
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* choose the copy on our node/cpu, but dont care about preemption */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ memcpy(loc_cpu_entry, repl->entries, repl->size);
+
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
+ if (ret != 0)
+ goto out_free;
+
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
+ }
+
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
+}
+
+void ipt_unregister_table(struct net *net, struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct ipt_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+/* Returns 1 if the type and code is matched by the range, 0 otherwise */
+static inline bool
+icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
+ u_int8_t type, u_int8_t code,
+ bool invert)
+{
+ return ((test_type == 0xFF) ||
+ (type == test_type && code >= min_code && code <= max_code))
+ ^ invert;
+}
+
+static bool
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct icmphdr *ic;
+ struct icmphdr _icmph;
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
+ if (ic == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ duprintf("Dropping evil ICMP tinygram.\n");
+ par->hotdrop = true;
+ return false;
+ }
+
+ return icmp_type_code_match(icmpinfo->type,
+ icmpinfo->code[0],
+ icmpinfo->code[1],
+ ic->type, ic->code,
+ !!(icmpinfo->invflags&IPT_ICMP_INV));
+}
+
+static int icmp_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
+}
+
+static struct xt_target ipt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_IPV4,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = ipt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_IPV4,
+ },
+};
+
+static struct nf_sockopt_ops ipt_sockopts = {
+ .pf = PF_INET,
+ .set_optmin = IPT_BASE_CTL,
+ .set_optmax = IPT_SO_SET_MAX+1,
+ .set = do_ipt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_ipt_set_ctl,
+#endif
+ .get_optmin = IPT_BASE_CTL,
+ .get_optmax = IPT_SO_GET_MAX+1,
+ .get = do_ipt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_ipt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static struct xt_match ipt_builtin_mt[] __read_mostly = {
+ {
+ .name = "icmp",
+ .match = icmp_match,
+ .matchsize = sizeof(struct ipt_icmp),
+ .checkentry = icmp_checkentry,
+ .proto = IPPROTO_ICMP,
+ .family = NFPROTO_IPV4,
+ },
+};
+
+static int __net_init ip_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_IPV4);
+}
+
+static void __net_exit ip_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_IPV4);
+}
+
+static struct pernet_operations ip_tables_net_ops = {
+ .init = ip_tables_net_init,
+ .exit = ip_tables_net_exit,
+};
+
+static int __init ip_tables_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip_tables_net_ops);
+ if (ret < 0)
+ goto err1;
+
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+ if (ret < 0)
+ goto err4;
+
+ /* Register setsockopt */
+ ret = nf_register_sockopt(&ipt_sockopts);
+ if (ret < 0)
+ goto err5;
+
+ pr_info("(C) 2000-2006 Netfilter Core Team\n");
+ return 0;
+
+err5:
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+err4:
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+err2:
+ unregister_pernet_subsys(&ip_tables_net_ops);
+err1:
+ return ret;
+}
+
+static void __exit ip_tables_fini(void)
+{
+ nf_unregister_sockopt(&ipt_sockopts);
+
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+ unregister_pernet_subsys(&ip_tables_net_ops);
+}
+
+EXPORT_SYMBOL(ipt_register_table);
+EXPORT_SYMBOL(ipt_unregister_table);
+EXPORT_SYMBOL(ipt_do_table);
+module_init(ip_tables_init);
+module_exit(ip_tables_fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
new file mode 100644
index 000000000..771ab3d01
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -0,0 +1,794 @@
+/* Cluster IP hashmark target
+ * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
+ * based on ideas of Fabio Olive Leite <olive@unixforge.org>
+ *
+ * Development of this code funded by SuSE Linux AG, http://www.suse.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/jhash.h>
+#include <linux/bitops.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/if_arp.h>
+#include <linux/seq_file.h>
+#include <linux/netfilter_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+
+#define CLUSTERIP_VERSION "0.8"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
+
+struct clusterip_config {
+ struct list_head list; /* list of all configs */
+ atomic_t refcount; /* reference count */
+ atomic_t entries; /* number of entries/rules
+ * referencing us */
+
+ __be32 clusterip; /* the IP address */
+ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
+ struct net_device *dev; /* device */
+ u_int16_t num_total_nodes; /* total number of nodes */
+ unsigned long local_nodes; /* node number array */
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *pde; /* proc dir entry */
+#endif
+ enum clusterip_hashmode hash_mode; /* which hashing mode */
+ u_int32_t hash_initval; /* hash initialization */
+ struct rcu_head rcu;
+};
+
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+#endif
+
+static int clusterip_net_id __read_mostly;
+
+struct clusterip_net {
+ struct list_head configs;
+ /* lock protects the configs list */
+ spinlock_t lock;
+
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *procdir;
+#endif
+};
+
+static inline void
+clusterip_config_get(struct clusterip_config *c)
+{
+ atomic_inc(&c->refcount);
+}
+
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+ kfree(container_of(head, struct clusterip_config, rcu));
+}
+
+static inline void
+clusterip_config_put(struct clusterip_config *c)
+{
+ if (atomic_dec_and_test(&c->refcount))
+ call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
+}
+
+/* decrease the count of entries using/referencing this config. If last
+ * entry(rule) is removed, remove the config from lists, but don't free it
+ * yet, since proc-files could still be holding references */
+static inline void
+clusterip_config_entry_put(struct clusterip_config *c)
+{
+ struct net *net = dev_net(c->dev);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ local_bh_disable();
+ if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
+
+ dev_mc_del(c->dev, c->clustermac);
+ dev_put(c->dev);
+
+ /* In case anyone still accesses the file, the open/close
+ * functions are also incrementing the refcount on their own,
+ * so it's safe to remove the entry even if it's in use. */
+#ifdef CONFIG_PROC_FS
+ proc_remove(c->pde);
+#endif
+ return;
+ }
+ local_bh_enable();
+}
+
+static struct clusterip_config *
+__clusterip_config_find(struct net *net, __be32 clusterip)
+{
+ struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ list_for_each_entry_rcu(c, &cn->configs, list) {
+ if (c->clusterip == clusterip)
+ return c;
+ }
+
+ return NULL;
+}
+
+static inline struct clusterip_config *
+clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
+{
+ struct clusterip_config *c;
+
+ rcu_read_lock_bh();
+ c = __clusterip_config_find(net, clusterip);
+ if (c) {
+ if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ c = NULL;
+ else if (entry)
+ atomic_inc(&c->entries);
+ }
+ rcu_read_unlock_bh();
+
+ return c;
+}
+
+static void
+clusterip_config_init_nodelist(struct clusterip_config *c,
+ const struct ipt_clusterip_tgt_info *i)
+{
+ int n;
+
+ for (n = 0; n < i->num_local_nodes; n++)
+ set_bit(i->local_nodes[n] - 1, &c->local_nodes);
+}
+
+static struct clusterip_config *
+clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
+ struct net_device *dev)
+{
+ struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
+
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c)
+ return NULL;
+
+ c->dev = dev;
+ c->clusterip = ip;
+ memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ c->num_total_nodes = i->num_total_nodes;
+ clusterip_config_init_nodelist(c, i);
+ c->hash_mode = i->hash_mode;
+ c->hash_initval = i->hash_initval;
+ atomic_set(&c->refcount, 1);
+ atomic_set(&c->entries, 1);
+
+#ifdef CONFIG_PROC_FS
+ {
+ char buffer[16];
+
+ /* create proc dir entry */
+ sprintf(buffer, "%pI4", &ip);
+ c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ cn->procdir,
+ &clusterip_proc_fops, c);
+ if (!c->pde) {
+ kfree(c);
+ return NULL;
+ }
+ }
+#endif
+
+ spin_lock_bh(&cn->lock);
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
+
+ return c;
+}
+
+#ifdef CONFIG_PROC_FS
+static int
+clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
+ return 1;
+
+ /* check if we already have this number in our bitfield */
+ if (test_and_set_bit(nodenum - 1, &c->local_nodes))
+ return 1;
+
+ return 0;
+}
+
+static bool
+clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
+{
+ if (nodenum == 0 ||
+ nodenum > c->num_total_nodes)
+ return true;
+
+ if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
+ return false;
+
+ return true;
+}
+#endif
+
+static inline u_int32_t
+clusterip_hashfn(const struct sk_buff *skb,
+ const struct clusterip_config *config)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ unsigned long hashval;
+ u_int16_t sport = 0, dport = 0;
+ int poff;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0) {
+ const u_int16_t *ports;
+ u16 _ports[2];
+
+ ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
+ if (ports) {
+ sport = ports[0];
+ dport = ports[1];
+ }
+ } else {
+ net_info_ratelimited("unknown protocol %u\n", iph->protocol);
+ }
+
+ switch (config->hash_mode) {
+ case CLUSTERIP_HASHMODE_SIP:
+ hashval = jhash_1word(ntohl(iph->saddr),
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT:
+ hashval = jhash_2words(ntohl(iph->saddr), sport,
+ config->hash_initval);
+ break;
+ case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
+ hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
+ config->hash_initval);
+ break;
+ default:
+ /* to make gcc happy */
+ hashval = 0;
+ /* This cannot happen, unless the check function wasn't called
+ * at rule load time */
+ pr_info("unknown mode %u\n", config->hash_mode);
+ BUG();
+ break;
+ }
+
+ /* node numbers are 1..n, not 0..n */
+ return reciprocal_scale(hashval, config->num_total_nodes) + 1;
+}
+
+static inline int
+clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
+{
+ return test_bit(hash - 1, &config->local_nodes);
+}
+
+/***********************************************************************
+ * IPTABLES TARGET
+ ***********************************************************************/
+
+static unsigned int
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ u_int32_t hash;
+
+ /* don't need to clusterip_config_get() here, since refcount
+ * is only decremented by destroy() - and ip_tables guarantees
+ * that the ->target() function isn't called after ->destroy() */
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_DROP;
+
+ /* special case: ICMP error handling. conntrack distinguishes between
+ * error messages (RELATED) and information requests (see below) */
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
+ (ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY))
+ return XT_CONTINUE;
+
+ /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ * on, which all have an ID field [relevant for hashing]. */
+
+ hash = clusterip_hashfn(skb, cipinfo->config);
+
+ switch (ctinfo) {
+ case IP_CT_NEW:
+ ct->mark = hash;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ /* FIXME: we don't handle expectations at the moment.
+ * They can arrive on a different node than
+ * the master connection (e.g. FTP passive mode) */
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ break;
+ default: /* Prevent gcc warnings */
+ break;
+ }
+
+#ifdef DEBUG
+ nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+#endif
+ pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
+ if (!clusterip_responsible(cipinfo->config, hash)) {
+ pr_debug("not responsible\n");
+ return NF_DROP;
+ }
+ pr_debug("responsible\n");
+
+ /* despite being received via linklayer multicast, this is
+ * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
+ skb->pkt_type = PACKET_HOST;
+
+ return XT_CONTINUE;
+}
+
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
+{
+ struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+ struct clusterip_config *config;
+ int ret;
+
+ if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
+ cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
+ pr_info("unknown mode %u\n", cipinfo->hash_mode);
+ return -EINVAL;
+
+ }
+ if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
+ e->ip.dst.s_addr == 0) {
+ pr_info("Please specify destination IP\n");
+ return -EINVAL;
+ }
+
+ /* FIXME: further sanity checks */
+
+ config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
+ if (!config) {
+ if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
+ pr_info("no config found for %pI4, need 'new'\n",
+ &e->ip.dst.s_addr);
+ return -EINVAL;
+ } else {
+ struct net_device *dev;
+
+ if (e->ip.iniface[0] == '\0') {
+ pr_info("Please specify an interface name\n");
+ return -EINVAL;
+ }
+
+ dev = dev_get_by_name(par->net, e->ip.iniface);
+ if (!dev) {
+ pr_info("no such interface %s\n",
+ e->ip.iniface);
+ return -ENOENT;
+ }
+
+ config = clusterip_config_init(cipinfo,
+ e->ip.dst.s_addr, dev);
+ if (!config) {
+ dev_put(dev);
+ return -ENOMEM;
+ }
+ dev_mc_add(config->dev, config->clustermac);
+ }
+ }
+ cipinfo->config = config;
+
+ ret = nf_ct_l3proto_try_module_get(par->family);
+ if (ret < 0)
+ pr_info("cannot load conntrack support for proto=%u\n",
+ par->family);
+
+ if (!par->net->xt.clusterip_deprecated_warning) {
+ pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
+ "use xt_cluster instead\n");
+ par->net->xt.clusterip_deprecated_warning = true;
+ }
+
+ return ret;
+}
+
+/* drop reference count of cluster config when rule is deleted */
+static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+
+ /* if no more entries are referencing the config, remove it
+ * from the list and destroy the proc entry */
+ clusterip_config_entry_put(cipinfo->config);
+
+ clusterip_config_put(cipinfo->config);
+
+ nf_ct_l3proto_module_put(par->family);
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_ipt_clusterip_tgt_info
+{
+ u_int32_t flags;
+ u_int8_t clustermac[6];
+ u_int16_t num_total_nodes;
+ u_int16_t num_local_nodes;
+ u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
+ u_int32_t hash_mode;
+ u_int32_t hash_initval;
+ compat_uptr_t config;
+};
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target clusterip_tg_reg __read_mostly = {
+ .name = "CLUSTERIP",
+ .family = NFPROTO_IPV4,
+ .target = clusterip_tg,
+ .checkentry = clusterip_tg_check,
+ .destroy = clusterip_tg_destroy,
+ .targetsize = sizeof(struct ipt_clusterip_tgt_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
+#endif /* CONFIG_COMPAT */
+ .me = THIS_MODULE
+};
+
+
+/***********************************************************************
+ * ARP MANGLING CODE
+ ***********************************************************************/
+
+/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
+struct arp_payload {
+ u_int8_t src_hw[ETH_ALEN];
+ __be32 src_ip;
+ u_int8_t dst_hw[ETH_ALEN];
+ __be32 dst_ip;
+} __packed;
+
+#ifdef DEBUG
+static void arp_print(struct arp_payload *payload)
+{
+#define HBUFFERLEN 30
+ char hbuffer[HBUFFERLEN];
+ int j,k;
+
+ for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+ hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
+ hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
+ hbuffer[k++]=':';
+ }
+ hbuffer[--k]='\0';
+
+ pr_debug("src %pI4@%s, dst %pI4\n",
+ &payload->src_ip, hbuffer, &payload->dst_ip);
+}
+#endif
+
+static unsigned int
+arp_mangle(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct arphdr *arp = arp_hdr(skb);
+ struct arp_payload *payload;
+ struct clusterip_config *c;
+ struct net *net = dev_net(state->in ? state->in : state->out);
+
+ /* we don't care about non-ethernet and non-ipv4 ARP */
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+ return NF_ACCEPT;
+
+ /* we only want to mangle arp requests and replies */
+ if (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST))
+ return NF_ACCEPT;
+
+ payload = (void *)(arp+1);
+
+ /* if there is no clusterip configuration for the arp reply's
+ * source ip, we don't want to mangle it */
+ c = clusterip_config_find_get(net, payload->src_ip, 0);
+ if (!c)
+ return NF_ACCEPT;
+
+ /* normally the linux kernel always replies to arp queries of
+ * addresses on different interfacs. However, in the CLUSTERIP case
+ * this wouldn't work, since we didn't subscribe the mcast group on
+ * other interfaces */
+ if (c->dev != state->out) {
+ pr_debug("not mangling arp reply on different "
+ "interface: cip'%s'-skb'%s'\n",
+ c->dev->name, state->out->name);
+ clusterip_config_put(c);
+ return NF_ACCEPT;
+ }
+
+ /* mangle reply hardware address */
+ memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
+
+#ifdef DEBUG
+ pr_debug("mangled arp reply: ");
+ arp_print(payload);
+#endif
+
+ clusterip_config_put(c);
+
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops cip_arp_ops __read_mostly = {
+ .hook = arp_mangle,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = -1
+};
+
+/***********************************************************************
+ * PROC DIR HANDLING
+ ***********************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+struct clusterip_seq_position {
+ unsigned int pos; /* position */
+ unsigned int weight; /* number of bits set == size */
+ unsigned int bit; /* current bit */
+ unsigned long val; /* current value */
+};
+
+static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct clusterip_config *c = s->private;
+ unsigned int weight;
+ u_int32_t local_nodes;
+ struct clusterip_seq_position *idx;
+
+ /* FIXME: possible race */
+ local_nodes = c->local_nodes;
+ weight = hweight32(local_nodes);
+ if (*pos >= weight)
+ return NULL;
+
+ idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
+ if (!idx)
+ return ERR_PTR(-ENOMEM);
+
+ idx->pos = *pos;
+ idx->weight = weight;
+ idx->bit = ffs(local_nodes);
+ idx->val = local_nodes;
+ clear_bit(idx->bit - 1, &idx->val);
+
+ return idx;
+}
+
+static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct clusterip_seq_position *idx = v;
+
+ *pos = ++idx->pos;
+ if (*pos >= idx->weight) {
+ kfree(v);
+ return NULL;
+ }
+ idx->bit = ffs(idx->val);
+ clear_bit(idx->bit - 1, &idx->val);
+ return idx;
+}
+
+static void clusterip_seq_stop(struct seq_file *s, void *v)
+{
+ if (!IS_ERR(v))
+ kfree(v);
+}
+
+static int clusterip_seq_show(struct seq_file *s, void *v)
+{
+ struct clusterip_seq_position *idx = v;
+
+ if (idx->pos != 0)
+ seq_putc(s, ',');
+
+ seq_printf(s, "%u", idx->bit);
+
+ if (idx->pos == idx->weight - 1)
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static const struct seq_operations clusterip_seq_ops = {
+ .start = clusterip_seq_start,
+ .next = clusterip_seq_next,
+ .stop = clusterip_seq_stop,
+ .show = clusterip_seq_show,
+};
+
+static int clusterip_proc_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &clusterip_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+ struct clusterip_config *c = PDE_DATA(inode);
+
+ sf->private = c;
+
+ clusterip_config_get(c);
+ }
+
+ return ret;
+}
+
+static int clusterip_proc_release(struct inode *inode, struct file *file)
+{
+ struct clusterip_config *c = PDE_DATA(inode);
+ int ret;
+
+ ret = seq_release(inode, file);
+
+ if (!ret)
+ clusterip_config_put(c);
+
+ return ret;
+}
+
+static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
+ size_t size, loff_t *ofs)
+{
+ struct clusterip_config *c = PDE_DATA(file_inode(file));
+#define PROC_WRITELEN 10
+ char buffer[PROC_WRITELEN+1];
+ unsigned long nodenum;
+ int rc;
+
+ if (size > PROC_WRITELEN)
+ return -EIO;
+ if (copy_from_user(buffer, input, size))
+ return -EFAULT;
+ buffer[size] = 0;
+
+ if (*buffer == '+') {
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
+ if (clusterip_add_node(c, nodenum))
+ return -ENOMEM;
+ } else if (*buffer == '-') {
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
+ if (clusterip_del_node(c, nodenum))
+ return -ENOENT;
+ } else
+ return -EIO;
+
+ return size;
+}
+
+static const struct file_operations clusterip_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = clusterip_proc_open,
+ .read = seq_read,
+ .write = clusterip_proc_write,
+ .llseek = seq_lseek,
+ .release = clusterip_proc_release,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+static int clusterip_net_init(struct net *net)
+{
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ INIT_LIST_HEAD(&cn->configs);
+
+ spin_lock_init(&cn->lock);
+
+#ifdef CONFIG_PROC_FS
+ cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
+ if (!cn->procdir) {
+ pr_err("Unable to proc dir entry\n");
+ return -ENOMEM;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ return 0;
+}
+
+static void clusterip_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ proc_remove(cn->procdir);
+#endif
+}
+
+static struct pernet_operations clusterip_net_ops = {
+ .init = clusterip_net_init,
+ .exit = clusterip_net_exit,
+ .id = &clusterip_net_id,
+ .size = sizeof(struct clusterip_net),
+};
+
+static int __init clusterip_tg_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&clusterip_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ goto cleanup_subsys;
+
+ ret = nf_register_hook(&cip_arp_ops);
+ if (ret < 0)
+ goto cleanup_target;
+
+ pr_info("ClusterIP Version %s loaded successfully\n",
+ CLUSTERIP_VERSION);
+
+ return 0;
+
+cleanup_target:
+ xt_unregister_target(&clusterip_tg_reg);
+cleanup_subsys:
+ unregister_pernet_subsys(&clusterip_net_ops);
+ return ret;
+}
+
+static void __exit clusterip_tg_exit(void)
+{
+ pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+
+ nf_unregister_hook(&cip_arp_ops);
+ xt_unregister_target(&clusterip_tg_reg);
+ unregister_pernet_subsys(&clusterip_net_ops);
+
+ /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+ rcu_barrier_bh();
+}
+
+module_init(clusterip_tg_init);
+module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
new file mode 100644
index 000000000..4bf3dc49a
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -0,0 +1,138 @@
+/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
+ *
+ * (C) 2002 by Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <net/checksum.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ECN.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
+
+/* set ECT codepoint from IP header.
+ * return false if there was an error. */
+static inline bool
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+ __u8 oldtos;
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return false;
+ iph = ip_hdr(skb);
+ oldtos = iph->tos;
+ iph->tos &= ~IPT_ECN_IP_MASK;
+ iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+ }
+ return true;
+}
+
+/* Return false if there was an error. */
+static inline bool
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
+{
+ struct tcphdr _tcph, *tcph;
+ __be16 oldval;
+
+ /* Not enough header? */
+ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+ if (!tcph)
+ return false;
+
+ if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
+ tcph->ece == einfo->proto.tcp.ece) &&
+ (!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+ tcph->cwr == einfo->proto.tcp.cwr))
+ return true;
+
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ return false;
+ tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
+
+ oldval = ((__be16 *)tcph)[6];
+ if (einfo->operation & IPT_ECN_OP_SET_ECE)
+ tcph->ece = einfo->proto.tcp.ece;
+ if (einfo->operation & IPT_ECN_OP_SET_CWR)
+ tcph->cwr = einfo->proto.tcp.cwr;
+
+ inet_proto_csum_replace2(&tcph->check, skb,
+ oldval, ((__be16 *)tcph)[6], 0);
+ return true;
+}
+
+static unsigned int
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ipt_ECN_info *einfo = par->targinfo;
+
+ if (einfo->operation & IPT_ECN_OP_SET_IP)
+ if (!set_ect_ip(skb, einfo))
+ return NF_DROP;
+
+ if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) &&
+ ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(skb, einfo))
+ return NF_DROP;
+
+ return XT_CONTINUE;
+}
+
+static int ecn_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_ECN_info *einfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (einfo->operation & IPT_ECN_OP_MASK) {
+ pr_info("unsupported ECN operation %x\n", einfo->operation);
+ return -EINVAL;
+ }
+ if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
+ pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+ return -EINVAL;
+ }
+ if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
+ (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
+ pr_info("cannot use TCP operations on a non-tcp rule\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_target ecn_tg_reg __read_mostly = {
+ .name = "ECN",
+ .family = NFPROTO_IPV4,
+ .target = ecn_tg,
+ .targetsize = sizeof(struct ipt_ECN_info),
+ .table = "mangle",
+ .checkentry = ecn_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ecn_tg_init(void)
+{
+ return xt_register_target(&ecn_tg_reg);
+}
+
+static void __exit ecn_tg_exit(void)
+{
+ xt_unregister_target(&ecn_tg_reg);
+}
+
+module_init(ecn_tg_init);
+module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
new file mode 100644
index 000000000..da7f02a0b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -0,0 +1,91 @@
+/* Masquerade. Simple mapping which alters range to a local IP address
+ (depending on route). */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/types.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
+
+/* FIXME: Multiple targets. --RR */
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("bad rangesize %u\n", mr->rangesize);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static unsigned int
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct nf_nat_range range;
+ const struct nf_nat_ipv4_multi_range_compat *mr;
+
+ mr = par->targinfo;
+ range.flags = mr->range[0].flags;
+ range.min_proto = mr->range[0].min;
+ range.max_proto = mr->range[0].max;
+
+ return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out);
+}
+
+static struct xt_target masquerade_tg_reg __read_mostly = {
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV4,
+ .target = masquerade_tg,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init masquerade_tg_init(void)
+{
+ int ret;
+
+ ret = xt_register_target(&masquerade_tg_reg);
+
+ if (ret == 0)
+ nf_nat_masquerade_ipv4_register_notifier();
+
+ return ret;
+}
+
+static void __exit masquerade_tg_exit(void)
+{
+ xt_unregister_target(&masquerade_tg_reg);
+ nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
new file mode 100644
index 000000000..87907d4bd
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -0,0 +1,113 @@
+/*
+ * This is a module which is used for rejecting packets.
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <net/icmp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_REJECT.h>
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+#include <linux/netfilter_bridge.h>
+#endif
+
+#include <net/netfilter/ipv4/nf_reject.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
+
+static unsigned int
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ipt_reject_info *reject = par->targinfo;
+ int hook = par->hooknum;
+
+ switch (reject->with) {
+ case IPT_ICMP_NET_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_NET_UNREACH, hook);
+ break;
+ case IPT_ICMP_HOST_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_HOST_UNREACH, hook);
+ break;
+ case IPT_ICMP_PROT_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_PROT_UNREACH, hook);
+ break;
+ case IPT_ICMP_PORT_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_PORT_UNREACH, hook);
+ break;
+ case IPT_ICMP_NET_PROHIBITED:
+ nf_send_unreach(skb, ICMP_NET_ANO, hook);
+ break;
+ case IPT_ICMP_HOST_PROHIBITED:
+ nf_send_unreach(skb, ICMP_HOST_ANO, hook);
+ break;
+ case IPT_ICMP_ADMIN_PROHIBITED:
+ nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
+ break;
+ case IPT_TCP_RESET:
+ nf_send_reset(skb, hook);
+ case IPT_ICMP_ECHOREPLY:
+ /* Doesn't happen. */
+ break;
+ }
+
+ return NF_DROP;
+}
+
+static int reject_tg_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_reject_info *rejinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
+ pr_info("ECHOREPLY no longer supported.\n");
+ return -EINVAL;
+ } else if (rejinfo->with == IPT_TCP_RESET) {
+ /* Must specify that it's a TCP packet */
+ if (e->ip.proto != IPPROTO_TCP ||
+ (e->ip.invflags & XT_INV_PROTO)) {
+ pr_info("TCP_RESET invalid for non-tcp\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static struct xt_target reject_tg_reg __read_mostly = {
+ .name = "REJECT",
+ .family = NFPROTO_IPV4,
+ .target = reject_tg,
+ .targetsize = sizeof(struct ipt_reject_info),
+ .table = "filter",
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT),
+ .checkentry = reject_tg_check,
+ .me = THIS_MODULE,
+};
+
+static int __init reject_tg_init(void)
+{
+ return xt_register_target(&reject_tg_reg);
+}
+
+static void __exit reject_tg_exit(void)
+{
+ xt_unregister_target(&reject_tg_reg);
+}
+
+module_init(reject_tg_init);
+module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 000000000..e9e677930
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *nhs)
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
new file mode 100644
index 000000000..14a2aa8b8
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -0,0 +1,91 @@
+/* Kernel module to match AH parameters. */
+/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <linux/netfilter_ipv4/ipt_ah.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
+MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
+
+/* Returns 1 if the spi is matched by the range, 0 otherwise */
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
+{
+ bool r;
+ pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, spi, max);
+ r=(spi >= min && spi <= max) ^ invert;
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
+ return r;
+}
+
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+ const struct ipt_ah *ahinfo = par->matchinfo;
+
+ /* Must not be a fragment. */
+ if (par->fragoff != 0)
+ return false;
+
+ ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ /* We've been asked to examine this packet, and we
+ * can't. Hence, no choice but to drop.
+ */
+ pr_debug("Dropping evil AH tinygram.\n");
+ par->hotdrop = true;
+ return 0;
+ }
+
+ return spi_match(ahinfo->spis[0], ahinfo->spis[1],
+ ntohl(ah->spi),
+ !!(ahinfo->invflags & IPT_AH_INV_SPI));
+}
+
+static int ah_mt_check(const struct xt_mtchk_param *par)
+{
+ const struct ipt_ah *ahinfo = par->matchinfo;
+
+ /* Must specify no unknown invflags */
+ if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
+ pr_debug("unknown flags %X\n", ahinfo->invflags);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_match ah_mt_reg __read_mostly = {
+ .name = "ah",
+ .family = NFPROTO_IPV4,
+ .match = ah_mt,
+ .matchsize = sizeof(struct ipt_ah),
+ .proto = IPPROTO_AH,
+ .checkentry = ah_mt_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ah_mt_init(void)
+{
+ return xt_register_match(&ah_mt_reg);
+}
+
+static void __exit ah_mt_exit(void)
+{
+ xt_unregister_match(&ah_mt_reg);
+}
+
+module_init(ah_mt_init);
+module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 000000000..4bfaedf9b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+ const struct net_device *dev, u8 flags)
+{
+ struct fib_result res;
+ bool dev_match;
+ struct net *net = dev_net(dev);
+ int ret __maybe_unused;
+
+ if (fib_lookup(net, fl4, &res))
+ return false;
+
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+ return false;
+ }
+ dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (FIB_RES_DEV(res) == dev)
+ dev_match = true;
+#endif
+ if (dev_match || flags & XT_RPFILTER_LOOSE)
+ return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+ return dev_match;
+}
+
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+ return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info;
+ const struct iphdr *iph;
+ struct flowi4 flow;
+ bool invert;
+
+ info = par->matchinfo;
+ invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (rpfilter_is_local(skb))
+ return true ^ invert;
+
+ iph = ip_hdr(skb);
+ if (ipv4_is_multicast(iph->daddr)) {
+ if (ipv4_is_zeronet(iph->saddr))
+ return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ }
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
+ flow.daddr = iph->saddr;
+ flow.saddr = rpfilter_get_saddr(iph->daddr);
+ flow.flowi4_oif = 0;
+ flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+ return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV4,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
new file mode 100644
index 000000000..a0f3beca5
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -0,0 +1,109 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables filter table");
+
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT))
+
+static const struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_FILTER,
+};
+
+static unsigned int
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct net *net;
+
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
+ return NF_ACCEPT;
+
+ net = dev_net(state->in ? state->in : state->out);
+ return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter);
+}
+
+static struct nf_hook_ops *filter_ops __read_mostly;
+
+/* Default to forward because I got too much mail already. */
+static bool forward = true;
+module_param(forward, bool, 0000);
+
+static int __net_init iptable_filter_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
+ /* Entry 1 is the FORWARD hook */
+ ((struct ipt_standard *)repl->entries)[1].target.verdict =
+ forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+
+ net->ipv4.iptable_filter =
+ ipt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
+}
+
+static void __net_exit iptable_filter_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_filter);
+}
+
+static struct pernet_operations iptable_filter_net_ops = {
+ .init = iptable_filter_net_init,
+ .exit = iptable_filter_net_exit,
+};
+
+static int __init iptable_filter_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
+ if (IS_ERR(filter_ops)) {
+ ret = PTR_ERR(filter_ops);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ }
+
+ return ret;
+}
+
+static void __exit iptable_filter_fini(void)
+{
+ xt_hook_unlink(&packet_filter, filter_ops);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+}
+
+module_init(iptable_filter_init);
+module_exit(iptable_filter_fini);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
new file mode 100644
index 000000000..62cbb8c5f
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -0,0 +1,147 @@
+/*
+ * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/sock.h>
+#include <net/route.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("iptables mangle table");
+
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+ (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT) | \
+ (1 << NF_INET_POST_ROUTING))
+
+static const struct xt_table packet_mangler = {
+ .name = "mangle",
+ .valid_hooks = MANGLE_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_MANGLE,
+};
+
+static unsigned int
+ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
+{
+ struct net_device *out = state->out;
+ unsigned int ret;
+ const struct iphdr *iph;
+ u_int8_t tos;
+ __be32 saddr, daddr;
+ u_int32_t mark;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ /* Save things which could affect route */
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state,
+ dev_net(out)->ipv4.iptable_mangle);
+ /* Reroute for ANY change. */
+ if (ret != NF_DROP && ret != NF_STOLEN) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+
+ return ret;
+}
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+iptable_mangle_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
+ return ipt_mangle_out(skb, state);
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, state,
+ dev_net(state->out)->ipv4.iptable_mangle);
+ /* PREROUTING/INPUT/FORWARD: */
+ return ipt_do_table(skb, ops->hooknum, state,
+ dev_net(state->in)->ipv4.iptable_mangle);
+}
+
+static struct nf_hook_ops *mangle_ops __read_mostly;
+
+static int __net_init iptable_mangle_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_mangler);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_mangle =
+ ipt_register_table(net, &packet_mangler, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
+}
+
+static void __net_exit iptable_mangle_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_mangle);
+}
+
+static struct pernet_operations iptable_mangle_net_ops = {
+ .init = iptable_mangle_net_init,
+ .exit = iptable_mangle_net_exit,
+};
+
+static int __init iptable_mangle_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_mangle_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
+ if (IS_ERR(mangle_ops)) {
+ ret = PTR_ERR(mangle_ops);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ }
+
+ return ret;
+}
+
+static void __exit iptable_mangle_fini(void)
+{
+ xt_hook_unlink(&packet_mangler, mangle_ops);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+}
+
+module_init(iptable_mangle_init);
+module_exit(iptable_mangle_fini);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
new file mode 100644
index 000000000..0d4d9cdf9
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -0,0 +1,154 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv4_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+};
+
+static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+
+ return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table);
+}
+
+static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain);
+}
+
+static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain);
+}
+
+static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain);
+}
+
+static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain);
+}
+
+static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = iptable_nat_ipv4_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = iptable_nat_ipv4_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = iptable_nat_ipv4_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = iptable_nat_ipv4_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init iptable_nat_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
+}
+
+static void __net_exit iptable_nat_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.nat_table);
+}
+
+static struct pernet_operations iptable_nat_net_ops = {
+ .init = iptable_nat_net_init,
+ .exit = iptable_nat_net_exit,
+};
+
+static int __init iptable_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&iptable_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit iptable_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+}
+
+module_init(iptable_nat_init);
+module_exit(iptable_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
new file mode 100644
index 000000000..0356e6da4
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -0,0 +1,89 @@
+/*
+ * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
+ *
+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
+
+static const struct xt_table packet_raw = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_RAW,
+};
+
+/* The work comes in here from netfilter.c. */
+static unsigned int
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct net *net;
+
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
+ return NF_ACCEPT;
+
+ net = dev_net(state->in ? state->in : state->out);
+ return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw);
+}
+
+static struct nf_hook_ops *rawtable_ops __read_mostly;
+
+static int __net_init iptable_raw_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_raw);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_raw =
+ ipt_register_table(net, &packet_raw, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
+}
+
+static void __net_exit iptable_raw_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_raw);
+}
+
+static struct pernet_operations iptable_raw_net_ops = {
+ .init = iptable_raw_net_init,
+ .exit = iptable_raw_net_exit,
+};
+
+static int __init iptable_raw_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_raw_net_ops);
+ if (ret < 0)
+ return ret;
+
+ /* Register hooks */
+ rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
+ if (IS_ERR(rawtable_ops)) {
+ ret = PTR_ERR(rawtable_ops);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ }
+
+ return ret;
+}
+
+static void __exit iptable_raw_fini(void)
+{
+ xt_hook_unlink(&packet_raw, rawtable_ops);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+}
+
+module_init(iptable_raw_init);
+module_exit(iptable_raw_fini);
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 000000000..4bce3980c
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,109 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static const struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_SECURITY,
+};
+
+static unsigned int
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct net *net;
+
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* Somebody is playing with raw sockets. */
+ return NF_ACCEPT;
+
+ net = dev_net(state->in ? state->in : state->out);
+ return ipt_do_table(skb, ops->hooknum, state,
+ net->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops *sectbl_ops __read_mostly;
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&security_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+ .init = iptable_security_net_init,
+ .exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
+ if (IS_ERR(sectbl_ops)) {
+ ret = PTR_ERR(sectbl_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+ xt_hook_unlink(&security_table, sectbl_ops);
+ unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
new file mode 100644
index 000000000..30ad9554b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -0,0 +1,542 @@
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_log.h>
+
+static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const __be32 *ap;
+ __be32 _addrs[2];
+ ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
+ sizeof(u_int32_t) * 2, _addrs);
+ if (ap == NULL)
+ return false;
+
+ tuple->src.u3.ip = ap[0];
+ tuple->dst.u3.ip = ap[1];
+
+ return true;
+}
+
+static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ tuple->src.u3.ip = orig->dst.u3.ip;
+ tuple->dst.u3.ip = orig->src.u3.ip;
+
+ return true;
+}
+
+static void ipv4_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
+}
+
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ unsigned int *dataoff, u_int8_t *protonum)
+{
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (iph == NULL)
+ return -NF_ACCEPT;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though. */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -NF_ACCEPT;
+
+ *dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ /* Check bogus IP headers */
+ if (*dataoff > skb->len) {
+ pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
+ "nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -NF_ACCEPT;
+ }
+
+ return NF_ACCEPT;
+}
+
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
+}
+
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb);
+}
+
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+ return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb);
+}
+
+/* Connection tracking may drop packets, but never alters them, so
+ make it the first hook. */
+static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+ {
+ .hook = ipv4_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+};
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
+
+static struct ctl_table ip_ct_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_max",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_count",
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_buckets",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_checksum",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_log_invalid",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
+ },
+ { }
+};
+#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
+
+/* Fast function for those who don't want to parse /proc (and I don't
+ blame them). */
+/* Reversing the socket's dst/src point of view gives us the reply
+ mapping. */
+static int
+getorigdst(struct sock *sk, int optval, void __user *user, int *len)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_tuple tuple;
+
+ memset(&tuple, 0, sizeof(tuple));
+ tuple.src.u3.ip = inet->inet_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.ip = inet->inet_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
+ tuple.src.l3num = PF_INET;
+ tuple.dst.protonum = sk->sk_protocol;
+
+ /* We only do TCP and SCTP at the moment: is there a better way? */
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+ return -ENOPROTOOPT;
+ }
+
+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
+ pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
+ *len, sizeof(struct sockaddr_in));
+ return -EINVAL;
+ }
+
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ if (h) {
+ struct sockaddr_in sin;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u.tcp.port;
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple.dst.u3.ip;
+ memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
+
+ pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+ &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+ nf_ct_put(ct);
+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
+ return -EFAULT;
+ else
+ return 0;
+ }
+ pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+ &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
+ return -ENOENT;
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *tuple)
+{
+ if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+ nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
+ [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+ [CTA_IP_V4_DST] = { .type = NLA_U32 },
+};
+
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *t)
+{
+ if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
+ return -EINVAL;
+
+ t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+ t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+
+ return 0;
+}
+
+static int ipv4_nlattr_tuple_size(void)
+{
+ return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
+}
+#endif
+
+static struct nf_sockopt_ops so_getorigdst = {
+ .pf = PF_INET,
+ .get_optmin = SO_ORIGINAL_DST,
+ .get_optmax = SO_ORIGINAL_DST+1,
+ .get = getorigdst,
+ .owner = THIS_MODULE,
+};
+
+static int ipv4_init_net(struct net *net)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ struct nf_ip_net *in = &net->ct.nf_ct_proto;
+ in->ctl_table = kmemdup(ip_ct_sysctl_table,
+ sizeof(ip_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!in->ctl_table)
+ return -ENOMEM;
+
+ in->ctl_table[0].data = &nf_conntrack_max;
+ in->ctl_table[1].data = &net->ct.count;
+ in->ctl_table[2].data = &net->ct.htable_size;
+ in->ctl_table[3].data = &net->ct.sysctl_checksum;
+ in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
+#endif
+ return 0;
+}
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
+ .l3proto = PF_INET,
+ .name = "ipv4",
+ .pkt_to_tuple = ipv4_pkt_to_tuple,
+ .invert_tuple = ipv4_invert_tuple,
+ .print_tuple = ipv4_print_tuple,
+ .get_l4proto = ipv4_get_l4proto,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .tuple_to_nlattr = ipv4_tuple_to_nlattr,
+ .nlattr_tuple_size = ipv4_nlattr_tuple_size,
+ .nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nla_policy = ipv4_nla_policy,
+#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ .ctl_table_path = "net/ipv4/netfilter",
+#endif
+ .init_net = ipv4_init_net,
+ .me = THIS_MODULE,
+};
+
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+ &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("ip_conntrack");
+MODULE_LICENSE("GPL");
+
+static int ipv4_net_init(struct net *net)
+{
+ int ret = 0;
+
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_tcp4: pernet registration failed\n");
+ goto out_tcp;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_udp4: pernet registration failed\n");
+ goto out_udp;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ pr_err("nf_conntrack_icmp4: pernet registration failed\n");
+ goto out_icmp;
+ }
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: pernet registration failed\n");
+ goto out_ipv4;
+ }
+ return 0;
+out_ipv4:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+out_icmp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+out_udp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+out_tcp:
+ return ret;
+}
+
+static void ipv4_net_exit(struct net *net)
+{
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+}
+
+static struct pernet_operations ipv4_net_ops = {
+ .init = ipv4_net_init,
+ .exit = ipv4_net_exit,
+};
+
+static int __init nf_conntrack_l3proto_ipv4_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv4_enable();
+
+ ret = nf_register_sockopt(&so_getorigdst);
+ if (ret < 0) {
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+ return ret;
+ }
+
+ ret = register_pernet_subsys(&ipv4_net_ops);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
+ goto cleanup_sockopt;
+ }
+
+ ret = nf_register_hooks(ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register hooks.\n");
+ goto cleanup_pernet;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+ goto cleanup_hooks;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+ goto cleanup_tcp4;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+ goto cleanup_udp4;
+ }
+
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+ goto cleanup_icmpv4;
+ }
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ ret = nf_conntrack_ipv4_compat_init();
+ if (ret < 0)
+ goto cleanup_proto;
+#endif
+ return ret;
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+#endif
+ cleanup_icmpv4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ cleanup_hooks:
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ cleanup_pernet:
+ unregister_pernet_subsys(&ipv4_net_ops);
+ cleanup_sockopt:
+ nf_unregister_sockopt(&so_getorigdst);
+ return ret;
+}
+
+static void __exit nf_conntrack_l3proto_ipv4_fini(void)
+{
+ synchronize_net();
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ nf_conntrack_ipv4_compat_fini();
+#endif
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ unregister_pernet_subsys(&ipv4_net_ops);
+ nf_unregister_sockopt(&so_getorigdst);
+}
+
+module_init(nf_conntrack_l3proto_ipv4_init);
+module_exit(nf_conntrack_l3proto_ipv4_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
new file mode 100644
index 000000000..f0dfe92a0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -0,0 +1,469 @@
+/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
+#include <linux/export.h>
+
+struct ct_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+ struct hlist_nulls_node *n;
+
+ for (st->bucket = 0;
+ st->bucket < net->ct.htable_size;
+ st->bucket++) {
+ n = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ if (!is_a_nulls(n))
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
+ struct hlist_nulls_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
+ while (is_a_nulls(head)) {
+ if (likely(get_nulls_value(head) == st->bucket)) {
+ if (++st->bucket >= net->ct.htable_size)
+ return NULL;
+ }
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ }
+ return head;
+}
+
+static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_nulls_node *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+ int ret;
+ u32 len;
+ char *secctx;
+
+ ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+ if (ret)
+ return;
+
+ seq_printf(s, "secctx=%s ", secctx);
+
+ security_release_secctx(secctx, len);
+}
+#else
+static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+}
+#endif
+
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_tuple_hash *hash = v;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ int ret = 0;
+
+ NF_CT_ASSERT(ct);
+ if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ return 0;
+
+
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ goto release;
+ if (nf_ct_l3num(ct) != AF_INET)
+ goto release;
+
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ NF_CT_ASSERT(l3proto);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ NF_CT_ASSERT(l4proto);
+
+ ret = -ENOSPC;
+ seq_printf(s, "%-8s %u %ld ",
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+
+ if (l4proto->print_conntrack)
+ l4proto->print_conntrack(s, ct);
+
+ if (seq_has_overflowed(s))
+ goto release;
+
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
+ goto release;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
+ goto release;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+ seq_printf(s, "[UNREPLIED] ");
+
+ print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto);
+
+ if (seq_has_overflowed(s))
+ goto release;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ goto release;
+
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ seq_printf(s, "[ASSURED] ");
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ seq_printf(s, "mark=%u ", ct->mark);
+#endif
+
+ ct_show_secctx(s, ct);
+
+ seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
+
+ if (seq_has_overflowed(s))
+ goto release;
+
+ ret = 0;
+release:
+ nf_ct_put(ct);
+ return ret;
+}
+
+static const struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
+}
+
+static const struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+/* expects */
+struct ct_expect_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+ n = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+
+ head = rcu_dereference(hlist_next_rcu(head));
+ while (head == NULL) {
+ if (++st->bucket >= nf_ct_expect_hsize)
+ return NULL;
+ head = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ }
+ return head;
+}
+
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_expect_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_expect_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_expect_get_idx(seq, *pos);
+}
+
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_expect_get_next(seq, v);
+}
+
+static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *n = v;
+
+ exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
+
+ if (exp->tuple.src.l3num != AF_INET)
+ return 0;
+
+ if (exp->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&exp->timeout)
+ ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
+
+ print_tuple(s, &exp->tuple,
+ __nf_ct_l3proto_find(exp->tuple.src.l3num),
+ __nf_ct_l4proto_find(exp->tuple.src.l3num,
+ exp->tuple.dst.protonum));
+ seq_putc(s, '\n');
+
+ return 0;
+}
+
+static const struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
+}
+
+static const struct file_operations ip_exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
+ const struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete,
+ st->search_restart
+ );
+ return 0;
+}
+
+static const struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init ip_conntrack_net_init(struct net *net)
+{
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+
+ proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
+ if (!proc)
+ goto err1;
+
+ proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
+ &ip_exp_file_ops);
+ if (!proc_exp)
+ goto err2;
+
+ proc_stat = proc_create("ip_conntrack", S_IRUGO,
+ net->proc_net_stat, &ct_cpu_seq_fops);
+ if (!proc_stat)
+ goto err3;
+ return 0;
+
+err3:
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+err2:
+ remove_proc_entry("ip_conntrack", net->proc_net);
+err1:
+ return -ENOMEM;
+}
+
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+ remove_proc_entry("ip_conntrack", net->proc_net_stat);
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+ remove_proc_entry("ip_conntrack", net->proc_net);
+}
+
+static struct pernet_operations ip_conntrack_net_ops = {
+ .init = ip_conntrack_net_init,
+ .exit = ip_conntrack_net_exit,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+ return register_pernet_subsys(&ip_conntrack_net_ops);
+}
+
+void __exit nf_conntrack_ipv4_compat_fini(void)
+{
+ unregister_pernet_subsys(&ip_conntrack_net_ops);
+}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
new file mode 100644
index 000000000..80d5554b9
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -0,0 +1,428 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/in.h>
+#include <linux/icmp.h>
+#include <linux/seq_file.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
+
+static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp;
+}
+
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{
+ const struct icmphdr *hp;
+ struct icmphdr _hdr;
+
+ hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ return false;
+
+ tuple->dst.u.icmp.type = hp->type;
+ tuple->src.u.icmp.id = hp->un.echo.id;
+ tuple->dst.u.icmp.code = hp->code;
+
+ return true;
+}
+
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+ [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+ [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+ [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+ [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+ [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+ [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+ [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+ [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
+{
+ if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[orig->dst.u.icmp.type])
+ return false;
+
+ tuple->src.u.icmp.id = orig->src.u.icmp.id;
+ tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
+ tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
+ return true;
+}
+
+/* Print out the per-protocol part of the tuple. */
+static void icmp_print_tuple(struct seq_file *s,
+ const struct nf_conntrack_tuple *tuple)
+{
+ seq_printf(s, "type=%u code=%u id=%u ",
+ tuple->dst.u.icmp.type,
+ tuple->dst.u.icmp.code,
+ ntohs(tuple->src.u.icmp.id));
+}
+
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+ return &icmp_pernet(net)->timeout;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int icmp_packet(struct nf_conn *ct,
+ const struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ u_int8_t pf,
+ unsigned int hooknum,
+ unsigned int *timeout)
+{
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+
+ return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff, unsigned int *timeouts)
+{
+ static const u_int8_t valid_new[] = {
+ [ICMP_ECHO] = 1,
+ [ICMP_TIMESTAMP] = 1,
+ [ICMP_INFO_REQUEST] = 1,
+ [ICMP_ADDRESS] = 1
+ };
+
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+ !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+ /* Can't create a new ICMP `conn' with this. */
+ pr_debug("icmp: can't create new conn with type %u\n",
+ ct->tuplehash[0].tuple.dst.u.icmp.type);
+ nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+ return false;
+ }
+ return true;
+}
+
+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
+static int
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
+{
+ struct nf_conntrack_tuple innertuple, origtuple;
+ const struct nf_conntrack_l4proto *innerproto;
+ const struct nf_conntrack_tuple_hash *h;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+
+ NF_CT_ASSERT(skb->nfct == NULL);
+
+ /* Are they talking about one of our connections? */
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb) + ip_hdrlen(skb)
+ + sizeof(struct icmphdr),
+ PF_INET, &origtuple)) {
+ pr_debug("icmp_error_message: failed to get tuple\n");
+ return -NF_ACCEPT;
+ }
+
+ /* rcu_read_lock()ed by nf_hook_slow */
+ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+ &nf_conntrack_l3proto_ipv4, innerproto)) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ *ctinfo = IP_CT_RELATED;
+
+ h = nf_conntrack_find_get(net, zone, &innertuple);
+ if (!h) {
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
+ }
+
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return NF_ACCEPT;
+}
+
+/* Small and modified version of icmp_rcv */
+static int
+icmp_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
+{
+ const struct icmphdr *icmph;
+ struct icmphdr _ih;
+
+ /* Not enough header? */
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+ if (icmph == NULL) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
+ return -NF_ACCEPT;
+ }
+
+ /* See ip_conntrack_proto_tcp.c */
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: bad HW ICMP checksum ");
+ return -NF_ACCEPT;
+ }
+
+ /*
+ * 18 is the highest 'known' ICMP type. Anything else is a mystery
+ *
+ * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
+ * discarded.
+ */
+ if (icmph->type > NR_ICMP_TYPES) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
+ "nf_ct_icmp: invalid ICMP type ");
+ return -NF_ACCEPT;
+ }
+
+ /* Need to track icmp error message? */
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB &&
+ icmph->type != ICMP_REDIRECT)
+ return NF_ACCEPT;
+
+ return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
+ const struct nf_conntrack_tuple *t)
+{
+ if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 },
+};
+
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
+ struct nf_conntrack_tuple *tuple)
+{
+ if (!tb[CTA_PROTO_ICMP_TYPE] ||
+ !tb[CTA_PROTO_ICMP_CODE] ||
+ !tb[CTA_PROTO_ICMP_ID])
+ return -EINVAL;
+
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+ if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type])
+ return -EINVAL;
+
+ return 0;
+}
+
+static int icmp_nlattr_tuple_size(void)
+{
+ return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmp_pernet(net);
+
+ if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMP timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+ [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table icmp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmp_sysctl_table,
+ sizeof(icmp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
+ sizeof(icmp_compat_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_compat_table)
+ return -ENOMEM;
+
+ pn->ctl_compat_table[0].data = &in->timeout;
+#endif
+#endif
+ return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ ret = icmp_kmemdup_compat_sysctl_table(pn, in);
+ if (ret < 0)
+ return ret;
+
+ ret = icmp_kmemdup_sysctl_table(pn, in);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
+{
+ .l3proto = PF_INET,
+ .l4proto = IPPROTO_ICMP,
+ .name = "icmp",
+ .pkt_to_tuple = icmp_pkt_to_tuple,
+ .invert_tuple = icmp_invert_tuple,
+ .print_tuple = icmp_print_tuple,
+ .packet = icmp_packet,
+ .get_timeouts = icmp_get_timeouts,
+ .new = icmp_new,
+ .error = icmp_error,
+ .destroy = NULL,
+ .me = NULL,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .tuple_to_nlattr = icmp_tuple_to_nlattr,
+ .nlattr_tuple_size = icmp_nlattr_tuple_size,
+ .nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nla_policy = icmp_nla_policy,
+#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmp_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmp_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmp_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmp_init_net,
+ .get_net_proto = icmp_get_net_proto,
+};
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
new file mode 100644
index 000000000..c88b7d434
--- /dev/null
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -0,0 +1,129 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ int err;
+
+ skb_orphan(skb);
+
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (!err) {
+ ip_send_check(ip_hdr(skb));
+ skb->ignore_df = 1;
+ }
+
+ return err;
+}
+
+static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ u16 zone = NF_CT_DEFAULT_ZONE;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ if (skb->nfct)
+ zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING)
+ return IP_DEFRAG_CONNTRACK_IN + zone;
+ else
+ return IP_DEFRAG_CONNTRACK_OUT + zone;
+}
+
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(skb->sk);
+
+ if (sk && (sk->sk_family == PF_INET) &&
+ inet->nodefrag)
+ return NF_ACCEPT;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#if !IS_ENABLED(CONFIG_NF_NAT)
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+ return NF_ACCEPT;
+#endif
+#endif
+ /* Gather fragments. */
+ if (ip_is_fragment(ip_hdr(skb))) {
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
+ if (nf_ct_ipv4_gather_frags(skb, user))
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ipv4_defrag_ops[] = {
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+};
+
+static int __init nf_defrag_init(void)
+{
+ return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+static void __exit nf_defrag_fini(void)
+{
+ nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+void nf_defrag_ipv4_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
new file mode 100644
index 000000000..e7ad950cf
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -0,0 +1,161 @@
+/*
+ * (C) 2014 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * Based on code from ebt_log from:
+ *
+ * Bart De Schuymer <bdschuym@pandora.be>
+ * Harald Welte <laforge@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = LOGLEVEL_NOTICE,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+struct arppayload {
+ unsigned char mac_src[ETH_ALEN];
+ unsigned char ip_src[4];
+ unsigned char mac_dst[ETH_ALEN];
+ unsigned char ip_dst[4];
+};
+
+static void dump_arp_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int nhoff)
+{
+ const struct arphdr *ah;
+ struct arphdr _arph;
+ const struct arppayload *ap;
+ struct arppayload _arpp;
+
+ ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ if (ah == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+ nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
+ ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
+
+ /* If it's for Ethernet and the lengths are OK, then log the ARP
+ * payload.
+ */
+ if (ah->ar_hrd != htons(1) ||
+ ah->ar_hln != ETH_ALEN ||
+ ah->ar_pln != sizeof(__be32))
+ return;
+
+ ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+ if (ap == NULL) {
+ nf_log_buf_add(m, " INCOMPLETE [%Zu bytes]",
+ skb->len - sizeof(_arph));
+ return;
+ }
+ nf_log_buf_add(m, " MACSRC=%pM IPSRC=%pI4 MACDST=%pM IPDST=%pI4",
+ ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
+}
+
+static void nf_log_arp_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+ prefix);
+ dump_arp_packet(m, loginfo, skb, 0);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_arp_logger __read_mostly = {
+ .name = "nf_log_arp",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_arp_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_arp_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_arp_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_arp_logger);
+}
+
+static struct pernet_operations nf_log_arp_net_ops = {
+ .init = nf_log_arp_net_init,
+ .exit = nf_log_arp_net_exit,
+};
+
+static int __init nf_log_arp_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_arp_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger);
+ if (ret < 0) {
+ pr_err("failed to register logger\n");
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ unregister_pernet_subsys(&nf_log_arp_net_ops);
+ return ret;
+}
+
+static void __exit nf_log_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_arp_net_ops);
+ nf_log_unregister(&nf_arp_logger);
+}
+
+module_init(nf_log_arp_init);
+module_exit(nf_log_arp_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter ARP packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(3, 0);
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
new file mode 100644
index 000000000..076aadda0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -0,0 +1,397 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/xt_LOG.h>
+#include <net/netfilter/nf_log.h>
+
+static struct nf_loginfo default_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = LOGLEVEL_NOTICE,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* One level of recursion won't kill us */
+static void dump_ipv4_packet(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb, unsigned int iphoff)
+{
+ struct iphdr _iph;
+ const struct iphdr *ih;
+ unsigned int logflags;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_MASK;
+
+ ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Important fields:
+ * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
+ /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
+ nf_log_buf_add(m, "SRC=%pI4 DST=%pI4 ", &ih->saddr, &ih->daddr);
+
+ /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
+ nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
+ ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
+
+ /* Max length: 6 "CE DF MF " */
+ if (ntohs(ih->frag_off) & IP_CE)
+ nf_log_buf_add(m, "CE ");
+ if (ntohs(ih->frag_off) & IP_DF)
+ nf_log_buf_add(m, "DF ");
+ if (ntohs(ih->frag_off) & IP_MF)
+ nf_log_buf_add(m, "MF ");
+
+ /* Max length: 11 "FRAG:65535 " */
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
+
+ if ((logflags & XT_LOG_IPOPT) &&
+ ih->ihl * 4 > sizeof(struct iphdr)) {
+ const unsigned char *op;
+ unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
+ unsigned int i, optsize;
+
+ optsize = ih->ihl * 4 - sizeof(struct iphdr);
+ op = skb_header_pointer(skb, iphoff+sizeof(_iph),
+ optsize, _opt);
+ if (op == NULL) {
+ nf_log_buf_add(m, "TRUNCATED");
+ return;
+ }
+
+ /* Max length: 127 "OPT (" 15*4*2chars ") " */
+ nf_log_buf_add(m, "OPT (");
+ for (i = 0; i < optsize; i++)
+ nf_log_buf_add(m, "%02X", op[i]);
+ nf_log_buf_add(m, ") ");
+ }
+
+ switch (ih->protocol) {
+ case IPPROTO_TCP:
+ if (nf_log_dump_tcp_header(m, skb, ih->protocol,
+ ntohs(ih->frag_off) & IP_OFFSET,
+ iphoff+ih->ihl*4, logflags))
+ return;
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ if (nf_log_dump_udp_header(m, skb, ih->protocol,
+ ntohs(ih->frag_off) & IP_OFFSET,
+ iphoff+ih->ihl*4))
+ return;
+ break;
+ case IPPROTO_ICMP: {
+ struct icmphdr _icmph;
+ const struct icmphdr *ich;
+ static const size_t required_len[NR_ICMP_TYPES+1]
+ = { [ICMP_ECHOREPLY] = 4,
+ [ICMP_DEST_UNREACH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_SOURCE_QUENCH]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_REDIRECT]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_ECHO] = 4,
+ [ICMP_TIME_EXCEEDED]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_PARAMETERPROB]
+ = 8 + sizeof(struct iphdr),
+ [ICMP_TIMESTAMP] = 20,
+ [ICMP_TIMESTAMPREPLY] = 20,
+ [ICMP_ADDRESS] = 12,
+ [ICMP_ADDRESSREPLY] = 12 };
+
+ /* Max length: 11 "PROTO=ICMP " */
+ nf_log_buf_add(m, "PROTO=ICMP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
+ sizeof(_icmph), &_icmph);
+ if (ich == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Max length: 18 "TYPE=255 CODE=255 " */
+ nf_log_buf_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ if (ich->type <= NR_ICMP_TYPES &&
+ required_len[ich->type] &&
+ skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ switch (ich->type) {
+ case ICMP_ECHOREPLY:
+ case ICMP_ECHO:
+ /* Max length: 19 "ID=65535 SEQ=65535 " */
+ nf_log_buf_add(m, "ID=%u SEQ=%u ",
+ ntohs(ich->un.echo.id),
+ ntohs(ich->un.echo.sequence));
+ break;
+
+ case ICMP_PARAMETERPROB:
+ /* Max length: 14 "PARAMETER=255 " */
+ nf_log_buf_add(m, "PARAMETER=%u ",
+ ntohl(ich->un.gateway) >> 24);
+ break;
+ case ICMP_REDIRECT:
+ /* Max length: 24 "GATEWAY=255.255.255.255 " */
+ nf_log_buf_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
+ /* Fall through */
+ case ICMP_DEST_UNREACH:
+ case ICMP_SOURCE_QUENCH:
+ case ICMP_TIME_EXCEEDED:
+ /* Max length: 3+maxlen */
+ if (!iphoff) { /* Only recurse once. */
+ nf_log_buf_add(m, "[");
+ dump_ipv4_packet(m, info, skb,
+ iphoff + ih->ihl*4+sizeof(_icmph));
+ nf_log_buf_add(m, "] ");
+ }
+
+ /* Max length: 10 "MTU=65535 " */
+ if (ich->type == ICMP_DEST_UNREACH &&
+ ich->code == ICMP_FRAG_NEEDED) {
+ nf_log_buf_add(m, "MTU=%u ",
+ ntohs(ich->un.frag.mtu));
+ }
+ }
+ break;
+ }
+ /* Max Length */
+ case IPPROTO_AH: {
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 9 "PROTO=AH " */
+ nf_log_buf_add(m, "PROTO=AH ");
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_ahdr), &_ahdr);
+ if (ah == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(ah->spi));
+ break;
+ }
+ case IPPROTO_ESP: {
+ struct ip_esp_hdr _esph;
+ const struct ip_esp_hdr *eh;
+
+ /* Max length: 10 "PROTO=ESP " */
+ nf_log_buf_add(m, "PROTO=ESP ");
+
+ if (ntohs(ih->frag_off) & IP_OFFSET)
+ break;
+
+ /* Max length: 25 "INCOMPLETE [65535 bytes] " */
+ eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
+ sizeof(_esph), &_esph);
+ if (eh == NULL) {
+ nf_log_buf_add(m, "INCOMPLETE [%u bytes] ",
+ skb->len - iphoff - ih->ihl*4);
+ break;
+ }
+
+ /* Length: 15 "SPI=0xF1234567 " */
+ nf_log_buf_add(m, "SPI=0x%x ", ntohl(eh->spi));
+ break;
+ }
+ /* Max length: 10 "PROTO 255 " */
+ default:
+ nf_log_buf_add(m, "PROTO=%u ", ih->protocol);
+ }
+
+ /* Max length: 15 "UID=4294967295 " */
+ if ((logflags & XT_LOG_UID) && !iphoff)
+ nf_log_dump_sk_uid_gid(m, skb->sk);
+
+ /* Max length: 16 "MARK=0xFFFFFFFF " */
+ if (!iphoff && skb->mark)
+ nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
+
+ /* Proto Max log string length */
+ /* IP: 40+46+6+11+127 = 230 */
+ /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
+ /* UDP: 10+max(25,20) = 35 */
+ /* UDPLITE: 14+max(25,20) = 39 */
+ /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
+ /* ESP: 10+max(25)+15 = 50 */
+ /* AH: 9+max(25)+15 = 49 */
+ /* unknown: 10 */
+
+ /* (ICMP allows recursion one level deep) */
+ /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
+ /* maxlen = 230+ 91 + 230 + 252 = 803 */
+}
+
+static void dump_ipv4_mac_header(struct nf_log_buf *m,
+ const struct nf_loginfo *info,
+ const struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ unsigned int logflags = 0;
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+
+ if (!(logflags & XT_LOG_MACDECODE))
+ goto fallback;
+
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ ntohs(eth_hdr(skb)->h_proto));
+ return;
+ default:
+ break;
+ }
+
+fallback:
+ nf_log_buf_add(m, "MAC=");
+ if (dev->hard_header_len &&
+ skb->mac_header != skb->network_header) {
+ const unsigned char *p = skb_mac_header(skb);
+ unsigned int i;
+
+ nf_log_buf_add(m, "%02x", *p++);
+ for (i = 1; i < dev->hard_header_len; i++, p++)
+ nf_log_buf_add(m, ":%02x", *p);
+ }
+ nf_log_buf_add(m, " ");
+}
+
+static void nf_log_ip_packet(struct net *net, u_int8_t pf,
+ unsigned int hooknum, const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const struct nf_loginfo *loginfo,
+ const char *prefix)
+{
+ struct nf_log_buf *m;
+
+ /* FIXME: Disabled from containers until syslog ns is supported */
+ if (!net_eq(net, &init_net))
+ return;
+
+ m = nf_log_buf_open();
+
+ if (!loginfo)
+ loginfo = &default_loginfo;
+
+ nf_log_dump_packet_common(m, pf, hooknum, skb, in,
+ out, loginfo, prefix);
+
+ if (in != NULL)
+ dump_ipv4_mac_header(m, loginfo, skb);
+
+ dump_ipv4_packet(m, loginfo, skb, 0);
+
+ nf_log_buf_close(m);
+}
+
+static struct nf_logger nf_ip_logger __read_mostly = {
+ .name = "nf_log_ipv4",
+ .type = NF_LOG_TYPE_LOG,
+ .logfn = nf_log_ip_packet,
+ .me = THIS_MODULE,
+};
+
+static int __net_init nf_log_ipv4_net_init(struct net *net)
+{
+ nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
+ return 0;
+}
+
+static void __net_exit nf_log_ipv4_net_exit(struct net *net)
+{
+ nf_log_unset(net, &nf_ip_logger);
+}
+
+static struct pernet_operations nf_log_ipv4_net_ops = {
+ .init = nf_log_ipv4_net_init,
+ .exit = nf_log_ipv4_net_exit,
+};
+
+static int __init nf_log_ipv4_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&nf_log_ipv4_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger);
+ if (ret < 0) {
+ pr_err("failed to register logger\n");
+ goto err1;
+ }
+
+ return 0;
+
+err1:
+ unregister_pernet_subsys(&nf_log_ipv4_net_ops);
+ return ret;
+}
+
+static void __exit nf_log_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_log_ipv4_net_ops);
+ nf_log_unregister(&nf_ip_logger);
+}
+
+module_init(nf_log_ipv4_init);
+module_exit(nf_log_ipv4_exit);
+
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter IPv4 packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(AF_INET, 0);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
new file mode 100644
index 000000000..574f7ebba
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -0,0 +1,631 @@
+/*
+ * H.323 extension for NAT alteration.
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 NAT module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+/****************************************************************************/
+static int set_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned char **data, int dataoff,
+ unsigned int addroff, __be32 ip, __be16 port)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct {
+ __be32 ip;
+ __be16 port;
+ } __attribute__ ((__packed__)) buf;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+
+ buf.ip = ip;
+ buf.port = port;
+ addroff += dataoff;
+
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ protoff, addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
+ return -1;
+ }
+
+ /* Relocate data pointer */
+ th = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return -1;
+ *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
+ } else {
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+ protoff, addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
+ return -1;
+ }
+ /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ * or pull everything in a linear buffer, so we can safely
+ * use the skb pointers now */
+ *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_h225_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_h245_addr(struct sk_buff *skb, unsigned protoff,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, protoff, data, dataoff,
+ taddr->unicastAddress.iPAddress.network,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ const struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
+ if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GW->GK */
+
+ /* Fix for Gnomemeeting */
+ if (i > 0 &&
+ get_h225_addr(ct, *data, &taddr[0],
+ &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000)
+ i = 0;
+
+ pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, port,
+ &ct->tuplehash[!dir].tuple.dst.u3.ip,
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.dst.u3,
+ info->sig_port[!dir]);
+ } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GK->GW */
+ pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, port,
+ &ct->tuplehash[!dir].tuple.src.u3.ip,
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.src.u3,
+ info->sig_port[!dir]);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port) {
+ pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, ntohs(port),
+ &ct->tuplehash[!dir].tuple.dst.u3.ip,
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
+ return set_h225_addr(skb, protoff, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.
+ dst.u.udp.port);
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->expectfn = nf_nat_follow_master;
+ rtp_exp->dir = !dir;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->expectfn = nf_nat_follow_master;
+ rtcp_exp->dir = !dir;
+
+ /* Lookup existing expects */
+ for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
+ if (info->rtp_port[i][dir] == rtp_port) {
+ /* Expected */
+
+ /* Use allocated ports first. This will refresh
+ * the expects */
+ rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir];
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(ntohs(info->rtp_port[i][dir]) + 1);
+ break;
+ } else if (info->rtp_port[i][dir] == 0) {
+ /* Not expected */
+ break;
+ }
+ }
+
+ /* Run out of expectations */
+ if (i >= H323_RTP_CHANNEL_MAX) {
+ net_notice_ratelimited("nf_nat_h323: out of expectations\n");
+ return 0;
+ }
+
+ /* Try to get a pair of ports. */
+ for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ nated_port != 0; nated_port += 2) {
+ int ret;
+
+ rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
+ ret = nf_ct_expect_related(rtp_exp);
+ if (ret == 0) {
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(nated_port + 1);
+ ret = nf_ct_expect_related(rtcp_exp);
+ if (ret == 0)
+ break;
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
+ nf_ct_unexpect_related(rtp_exp);
+ nated_port = 0;
+ break;
+ }
+ } else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_h323: out of RTP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons((port & htons(1)) ? nated_port + 1 :
+ nated_port)) == 0) {
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(rtp_exp);
+ nf_ct_unexpect_related(rtcp_exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
+ &rtp_exp->tuple.src.u3.ip,
+ ntohs(rtp_exp->tuple.src.u.udp.port),
+ &rtp_exp->tuple.dst.u3.ip,
+ ntohs(rtp_exp->tuple.dst.u.udp.port));
+ pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n",
+ &rtcp_exp->tuple.src.u3.ip,
+ ntohs(rtcp_exp->tuple.src.u.udp.port),
+ &rtcp_exp->tuple.dst.u3.ip,
+ ntohs(rtcp_exp->tuple.dst.u.udp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) < 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************
+ * This conntrack expect function replaces nf_conntrack_q931_expect()
+ * which was set by nf_conntrack_h323.c.
+ ****************************************************************************/
+static void ip_nat_q931_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
+ nf_nat_follow_master(new, this);
+ return;
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr =
+ new->master->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+ union nf_inet_addr addr;
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_q931_expect;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ set_h225_addr(skb, protoff, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static void ip_nat_callforwarding_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr = this->saved_addr;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ exp->saved_addr = exp->tuple.dst.u3;
+ exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_callforwarding_expect;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+static struct nf_ct_helper_expectfn q931_nat = {
+ .name = "Q.931",
+ .expectfn = ip_nat_q931_expect,
+};
+
+static struct nf_ct_helper_expectfn callforwarding_nat = {
+ .name = "callforwarding",
+ .expectfn = ip_nat_callforwarding_expect,
+};
+
+/****************************************************************************/
+static int __init init(void)
+{
+ BUG_ON(set_h245_addr_hook != NULL);
+ BUG_ON(set_h225_addr_hook != NULL);
+ BUG_ON(set_sig_addr_hook != NULL);
+ BUG_ON(set_ras_addr_hook != NULL);
+ BUG_ON(nat_rtp_rtcp_hook != NULL);
+ BUG_ON(nat_t120_hook != NULL);
+ BUG_ON(nat_h245_hook != NULL);
+ BUG_ON(nat_callforwarding_hook != NULL);
+ BUG_ON(nat_q931_hook != NULL);
+
+ RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
+ RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
+ RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
+ RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ RCU_INIT_POINTER(nat_t120_hook, nat_t120);
+ RCU_INIT_POINTER(nat_h245_hook, nat_h245);
+ RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
+ RCU_INIT_POINTER(nat_q931_hook, nat_q931);
+ nf_ct_helper_expectfn_register(&q931_nat);
+ nf_ct_helper_expectfn_register(&callforwarding_nat);
+ return 0;
+}
+
+/****************************************************************************/
+static void __exit fini(void)
+{
+ RCU_INIT_POINTER(set_h245_addr_hook, NULL);
+ RCU_INIT_POINTER(set_h225_addr_hook, NULL);
+ RCU_INIT_POINTER(set_sig_addr_hook, NULL);
+ RCU_INIT_POINTER(set_ras_addr_hook, NULL);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
+ RCU_INIT_POINTER(nat_t120_hook, NULL);
+ RCU_INIT_POINTER(nat_h245_hook, NULL);
+ RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
+ RCU_INIT_POINTER(nat_q931_hook, NULL);
+ nf_ct_helper_expectfn_unregister(&q931_nat);
+ nf_ct_helper_expectfn_unregister(&callforwarding_nat);
+ synchronize_rcu();
+}
+
+/****************************************************************************/
+module_init(init);
+module_exit(fini);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_h323");
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
new file mode 100644
index 000000000..e59cc05c0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -0,0 +1,481 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (ct->status & statusbit) {
+ fl4->daddr = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl4->saddr = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_sport = t->src.u.all;
+ }
+}
+#endif /* CONFIG_XFRM */
+
+static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+ ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+}
+
+static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ unsigned int hdroff;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+ hdroff = iphdroff + iph->ihl * 4;
+
+ if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ __be32 oldip, newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = t->src.u3.ip;
+ } else {
+ oldip = iph->daddr;
+ newip = t->dst.u3.ip;
+ }
+ inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ ip_hdrlen(skb);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V4_MINIP]) {
+ range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V4_MAXIP])
+ range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+ else
+ range->max_addr.ip = range->min_addr.ip;
+
+ return 0;
+}
+#endif
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+ .l3proto = NFPROTO_IPV4,
+ .in_range = nf_nat_ipv4_in_range,
+ .secure_port = nf_nat_ipv4_secure_port,
+ .manip_pkt = nf_nat_ipv4_manip_pkt,
+ .csum_update = nf_nat_ipv4_csum_update,
+ .csum_recalc = nf_nat_ipv4_csum_recalc,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_to_range = nf_nat_ipv4_nlattr_to_range,
+#endif
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv4_decode_session,
+#endif
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ unsigned int hdrlen = ip_hdrlen(skb);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
+ if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt may reallocate */
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look like the reply to an incoming packet */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+unsigned int
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ * and local-out, and nf_nat_out protects post-routing.
+ */
+ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = do_chain(ops, skb, state, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ break;
+
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat,
+ state->out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
+
+unsigned int
+nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct))
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
+
+unsigned int
+nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
+
+unsigned int
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn);
+
+static int __init nf_nat_l3proto_ipv4_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv4_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
+
+module_init(nf_nat_l3proto_ipv4_init);
+module_exit(nf_nat_l3proto_ipv4_exit);
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
new file mode 100644
index 000000000..c6eb42100
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -0,0 +1,153 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <net/protocol.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct nf_nat_range *range,
+ const struct net_device *out)
+{
+ struct nf_conn *ct;
+ struct nf_conn_nat *nat;
+ enum ip_conntrack_info ctinfo;
+ struct nf_nat_range newrange;
+ const struct rtable *rt;
+ __be32 newsrc, nh;
+
+ NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
+
+ ct = nf_ct_get(skb, &ctinfo);
+ nat = nfct_nat(ct);
+
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ /* Source address is 0.0.0.0 - locally generated packet that is
+ * probably not supposed to be masqueraded.
+ */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+ return NF_ACCEPT;
+
+ rt = skb_rtable(skb);
+ nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+ newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+ if (!newsrc) {
+ pr_info("%s ate my IP address\n", out->name);
+ return NF_DROP;
+ }
+
+ nat->masq_index = out->ifindex;
+
+ /* Transfer from original range. */
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static int device_cmp(struct nf_conn *i, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(i) != NFPROTO_IPV4)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ * conntracks which were associated with that device,
+ * and forget them.
+ */
+ NF_CT_ASSERT(dev->ifindex != 0);
+
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+
+void nf_nat_masquerade_ipv4_register_notifier(void)
+{
+ /* check if the notifier was already set */
+ if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
+ return;
+
+ /* Register for device down reports */
+ register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
+
+void nf_nat_masquerade_ipv4_unregister_notifier(void)
+{
+ /* check if the notifier still has clients */
+ if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
+ return;
+
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
new file mode 100644
index 000000000..657d2307f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -0,0 +1,311 @@
+/*
+ * nf_nat_pptp.c
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * (needs netfilter tuple reservation)
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_NAT_PPTP_VERSION "3.0"
+
+#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+MODULE_ALIAS("ip_nat_pptp");
+
+static void pptp_nat_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct net *net = nf_ct_net(ct);
+ const struct nf_conn *master = ct->master;
+ struct nf_conntrack_expect *other_exp;
+ struct nf_conntrack_tuple t;
+ const struct nf_ct_pptp_master *ct_pptp_info;
+ const struct nf_nat_pptp *nat_pptp_info;
+ struct nf_nat_range range;
+
+ ct_pptp_info = nfct_help_data(master);
+ nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
+
+ /* And here goes the grand finale of corrosion... */
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ pr_debug("we are PNS->PAC\n");
+ /* therefore, build tuple for PAC->PNS */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = ct_pptp_info->pac_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ } else {
+ pr_debug("we are PAC->PNS\n");
+ /* build tuple for PNS->PAC */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = nat_pptp_info->pns_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = nat_pptp_info->pac_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ }
+
+ pr_debug("trying to unexpect other dir: ");
+ nf_ct_dump_tuple_ip(&t);
+ other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
+ if (other_exp) {
+ nf_ct_unexpect_related(other_exp);
+ nf_ct_expect_put(other_exp);
+ pr_debug("success\n");
+ } else {
+ pr_debug("not found!\n");
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3;
+ if (exp->dir == IP_CT_DIR_REPLY) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+
+{
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_callid;
+ unsigned int cid_off;
+
+ ct_pptp_info = nfct_help_data(ct);
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ new_callid = ct_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+ * down to here */
+ pr_debug("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
+
+ /* mangle packet */
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void
+pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+{
+ const struct nf_conn *ct = expect_orig->master;
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+
+ ct_pptp_info = nfct_help_data(ct);
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ /* save original PAC call ID in nat_info */
+ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+ /* alter expectation for PNS->PAC direction */
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
+
+ /* alter expectation for PAC->PNS direction */
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ expect_reply->dir = IP_CT_DIR_REPLY;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+{
+ const struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
+
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+ new_pcid = nat_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REPLY:
+ pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+ break;
+ case PPTP_IN_CALL_CONNECT:
+ pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
+ break;
+ case PPTP_IN_CALL_REQUEST:
+ /* only need to nat in case PAC is behind NAT box */
+ return NF_ACCEPT;
+ case PPTP_WAN_ERROR_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
+ break;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
+ break;
+ case PPTP_SET_LINK_INFO:
+ pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
+ break;
+ default:
+ pr_debug("unknown inbound packet %s\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+ * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+ /* mangle packet */
+ pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
+
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static int __init nf_nat_helper_pptp_init(void)
+{
+ nf_nat_need_gre();
+
+ BUG_ON(nf_nat_pptp_hook_outbound != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_inbound != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+
+ BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ return 0;
+}
+
+static void __exit nf_nat_helper_pptp_fini(void)
+{
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
+ synchronize_rcu();
+}
+
+module_init(nf_nat_helper_pptp_init);
+module_exit(nf_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
new file mode 100644
index 000000000..9414923f1
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -0,0 +1,149 @@
+/*
+ * nf_nat_proto_gre.c
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+/* generate unique tuple ... */
+static void
+gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t key;
+ __be16 *keyptr;
+ unsigned int min, i, range_size;
+
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!ct->master)
+ return;
+
+ if (maniptype == NF_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ pr_debug("%p: NATing GRE PPTP\n", ct);
+ min = 1;
+ range_size = 0xffff;
+ } else {
+ min = ntohs(range->min_proto.gre.key);
+ range_size = ntohs(range->max_proto.gre.key) - min + 1;
+ }
+
+ pr_debug("min = %u, range_size = %u\n", min, range_size);
+
+ for (i = 0; ; ++key) {
+ *keyptr = htons(min + key % range_size);
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+
+ pr_debug("%p: no NAT mapping\n", ct);
+ return;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct gre_hdr *greh;
+ struct gre_hdr_pptp *pgreh;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ return false;
+
+ greh = (void *)skb->data + hdroff;
+ pgreh = (struct gre_hdr_pptp *)greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype != NF_NAT_MANIP_DST)
+ return true;
+ switch (greh->version) {
+ case GRE_VERSION_1701:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
+ break;
+ case GRE_VERSION_PPTP:
+ pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
+ break;
+ default:
+ pr_debug("can't nat unknown GRE version\n");
+ return false;
+ }
+ return true;
+}
+
+static const struct nf_nat_l4proto gre = {
+ .l4proto = IPPROTO_GRE,
+ .manip_pkt = gre_manip_pkt,
+ .in_range = nf_nat_l4proto_in_range,
+ .unique_tuple = gre_unique_tuple,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_gre_init(void)
+{
+ return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
+}
+
+static void __exit nf_nat_proto_gre_fini(void)
+{
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
+}
+
+module_init(nf_nat_proto_gre_init);
+module_exit(nf_nat_proto_gre_fini);
+
+void nf_nat_need_gre(void)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
new file mode 100644
index 000000000..4557b4ab8
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -0,0 +1,83 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+ /* If no range specified... */
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xFFFF;
+
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+ (id % range_size));
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ return;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmphdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ inet_proto_csum_replace2(&hdr->checksum, skb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
+ .l4proto = IPPROTO_ICMP,
+ .manip_pkt = icmp_manip_pkt,
+ .in_range = icmp_in_range,
+ .unique_tuple = icmp_unique_tuple,
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
new file mode 100644
index 000000000..7c6766713
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -0,0 +1,1313 @@
+/*
+ * nf_nat_snmp_basic.c
+ *
+ * Basic SNMP Application Layer Gateway
+ *
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
+ * conflicting private address realms.
+ *
+ * Static NAT is used to remap the networks from the view of the network
+ * management system at the IP layer, and this module remaps some application
+ * layer addresses to match.
+ *
+ * The simplest form of ALG is performed, where only tagged IP addresses
+ * are modified. The module does not need to be MIB aware and only scans
+ * messages at the ASN.1/BER level.
+ *
+ * Currently, only SNMPv1 and SNMPv2 are supported.
+ *
+ * More information on ALG and associated issues can be found in
+ * RFC 2962
+ *
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * McLean & Jochen Friedrich, stripped down for use in the kernel.
+ *
+ * Copyright (c) 2000 RP Internet (www.rpi.net.au).
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
+
+#define SNMP_PORT 161
+#define SNMP_TRAP_PORT 162
+#define NOCT1(n) (*(u8 *)(n))
+
+static int debug;
+static DEFINE_SPINLOCK(snmp_lock);
+
+/*
+ * Application layer address mapping mimics the NAT mapping, but
+ * only for the first octet in this case (a more flexible system
+ * can be implemented if needed).
+ */
+struct oct1_map
+{
+ u_int8_t from;
+ u_int8_t to;
+};
+
+
+/*****************************************************************************
+ *
+ * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* Class */
+#define ASN1_UNI 0 /* Universal */
+#define ASN1_APL 1 /* Application */
+#define ASN1_CTX 2 /* Context */
+#define ASN1_PRV 3 /* Private */
+
+/* Tag */
+#define ASN1_EOC 0 /* End Of Contents */
+#define ASN1_BOL 1 /* Boolean */
+#define ASN1_INT 2 /* Integer */
+#define ASN1_BTS 3 /* Bit String */
+#define ASN1_OTS 4 /* Octet String */
+#define ASN1_NUL 5 /* Null */
+#define ASN1_OJI 6 /* Object Identifier */
+#define ASN1_OJD 7 /* Object Description */
+#define ASN1_EXT 8 /* External */
+#define ASN1_SEQ 16 /* Sequence */
+#define ASN1_SET 17 /* Set */
+#define ASN1_NUMSTR 18 /* Numerical String */
+#define ASN1_PRNSTR 19 /* Printable String */
+#define ASN1_TEXSTR 20 /* Teletext String */
+#define ASN1_VIDSTR 21 /* Video String */
+#define ASN1_IA5STR 22 /* IA5 String */
+#define ASN1_UNITIM 23 /* Universal Time */
+#define ASN1_GENTIM 24 /* General Time */
+#define ASN1_GRASTR 25 /* Graphical String */
+#define ASN1_VISSTR 26 /* Visible String */
+#define ASN1_GENSTR 27 /* General String */
+
+/* Primitive / Constructed methods*/
+#define ASN1_PRI 0 /* Primitive */
+#define ASN1_CON 1 /* Constructed */
+
+/*
+ * Error codes.
+ */
+#define ASN1_ERR_NOERROR 0
+#define ASN1_ERR_DEC_EMPTY 2
+#define ASN1_ERR_DEC_EOC_MISMATCH 3
+#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
+#define ASN1_ERR_DEC_BADVALUE 5
+
+/*
+ * ASN.1 context.
+ */
+struct asn1_ctx
+{
+ int error; /* Error condition */
+ unsigned char *pointer; /* Octet just to be decoded */
+ unsigned char *begin; /* First octet */
+ unsigned char *end; /* Octet after last octet */
+};
+
+/*
+ * Octet string (not null terminated)
+ */
+struct asn1_octstr
+{
+ unsigned char *data;
+ unsigned int len;
+};
+
+static void asn1_open(struct asn1_ctx *ctx,
+ unsigned char *buf,
+ unsigned int len)
+{
+ ctx->begin = buf;
+ ctx->end = buf + len;
+ ctx->pointer = buf;
+ ctx->error = ASN1_ERR_NOERROR;
+}
+
+static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
+{
+ if (ctx->pointer >= ctx->end) {
+ ctx->error = ASN1_ERR_DEC_EMPTY;
+ return 0;
+ }
+ *ch = *(ctx->pointer)++;
+ return 1;
+}
+
+static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
+{
+ unsigned char ch;
+
+ *tag = 0;
+
+ do
+ {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *tag <<= 7;
+ *tag |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned char ch;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *cls = (ch & 0xC0) >> 6;
+ *con = (ch & 0x20) >> 5;
+ *tag = (ch & 0x1F);
+
+ if (*tag == 0x1F) {
+ if (!asn1_tag_decode(ctx, tag))
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
+ unsigned int *def,
+ unsigned int *len)
+{
+ unsigned char ch, cnt;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch == 0x80)
+ *def = 0;
+ else {
+ *def = 1;
+
+ if (ch < 0x80)
+ *len = ch;
+ else {
+ cnt = ch & 0x7F;
+ *len = 0;
+
+ while (cnt > 0) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+ *len <<= 8;
+ *len |= ch;
+ cnt--;
+ }
+ }
+ }
+
+ /* don't trust len bigger than ctx buffer */
+ if (*len > ctx->end - ctx->pointer)
+ return 0;
+
+ return 1;
+}
+
+static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
+ unsigned char **eoc,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
+{
+ unsigned int def, len;
+
+ if (!asn1_id_decode(ctx, cls, con, tag))
+ return 0;
+
+ def = len = 0;
+ if (!asn1_length_decode(ctx, &def, &len))
+ return 0;
+
+ /* primitive shall be definite, indefinite shall be constructed */
+ if (*con == ASN1_PRI && !def)
+ return 0;
+
+ if (def)
+ *eoc = ctx->pointer + len;
+ else
+ *eoc = NULL;
+ return 1;
+}
+
+static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ unsigned char ch;
+
+ if (eoc == NULL) {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ if (ch != 0x00) {
+ ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
+ return 0;
+ }
+ return 1;
+ } else {
+ if (ctx->pointer != eoc) {
+ ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
+ return 0;
+ }
+ return 1;
+ }
+}
+
+static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
+{
+ ctx->pointer = eoc;
+ return 1;
+}
+
+static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = (signed char) ch;
+ len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned int *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned int)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long *integer)
+{
+ unsigned char ch;
+ unsigned int len;
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer = ch;
+ if (ch == 0) len = 0;
+ else len = 1;
+
+ while (ctx->pointer < eoc) {
+ if (++len > sizeof (unsigned long)) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ return 0;
+ }
+
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *integer <<= 8;
+ *integer |= ch;
+ }
+ return 1;
+}
+
+static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned char **octets,
+ unsigned int *len)
+{
+ unsigned char *ptr;
+
+ *len = 0;
+
+ *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
+ if (*octets == NULL)
+ return 0;
+
+ ptr = *octets;
+ while (ctx->pointer < eoc) {
+ if (!asn1_octet_decode(ctx, ptr++)) {
+ kfree(*octets);
+ *octets = NULL;
+ return 0;
+ }
+ (*len)++;
+ }
+ return 1;
+}
+
+static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
+ unsigned long *subid)
+{
+ unsigned char ch;
+
+ *subid = 0;
+
+ do {
+ if (!asn1_octet_decode(ctx, &ch))
+ return 0;
+
+ *subid <<= 7;
+ *subid |= ch & 0x7F;
+ } while ((ch & 0x80) == 0x80);
+ return 1;
+}
+
+static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
+ unsigned char *eoc,
+ unsigned long **oid,
+ unsigned int *len)
+{
+ unsigned long subid;
+ unsigned long *optr;
+ size_t size;
+
+ size = eoc - ctx->pointer + 1;
+
+ /* first subid actually encodes first two subids */
+ if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+ return 0;
+
+ *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
+ if (*oid == NULL)
+ return 0;
+
+ optr = *oid;
+
+ if (!asn1_subid_decode(ctx, &subid)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (subid < 40) {
+ optr[0] = 0;
+ optr[1] = subid;
+ } else if (subid < 80) {
+ optr[0] = 1;
+ optr[1] = subid - 40;
+ } else {
+ optr[0] = 2;
+ optr[1] = subid - 80;
+ }
+
+ *len = 2;
+ optr += 2;
+
+ while (ctx->pointer < eoc) {
+ if (++(*len) > size) {
+ ctx->error = ASN1_ERR_DEC_BADVALUE;
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+
+ if (!asn1_subid_decode(ctx, optr++)) {
+ kfree(*oid);
+ *oid = NULL;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * SNMP decoding routines (gxsnmp author Dirk Wisse)
+ *
+ *****************************************************************************/
+
+/* SNMP Versions */
+#define SNMP_V1 0
+#define SNMP_V2C 1
+#define SNMP_V2 2
+#define SNMP_V3 3
+
+/* Default Sizes */
+#define SNMP_SIZE_COMM 256
+#define SNMP_SIZE_OBJECTID 128
+#define SNMP_SIZE_BUFCHR 256
+#define SNMP_SIZE_BUFINT 128
+#define SNMP_SIZE_SMALLOBJECTID 16
+
+/* Requests */
+#define SNMP_PDU_GET 0
+#define SNMP_PDU_NEXT 1
+#define SNMP_PDU_RESPONSE 2
+#define SNMP_PDU_SET 3
+#define SNMP_PDU_TRAP1 4
+#define SNMP_PDU_BULK 5
+#define SNMP_PDU_INFORM 6
+#define SNMP_PDU_TRAP2 7
+
+/* Errors */
+#define SNMP_NOERROR 0
+#define SNMP_TOOBIG 1
+#define SNMP_NOSUCHNAME 2
+#define SNMP_BADVALUE 3
+#define SNMP_READONLY 4
+#define SNMP_GENERROR 5
+#define SNMP_NOACCESS 6
+#define SNMP_WRONGTYPE 7
+#define SNMP_WRONGLENGTH 8
+#define SNMP_WRONGENCODING 9
+#define SNMP_WRONGVALUE 10
+#define SNMP_NOCREATION 11
+#define SNMP_INCONSISTENTVALUE 12
+#define SNMP_RESOURCEUNAVAILABLE 13
+#define SNMP_COMMITFAILED 14
+#define SNMP_UNDOFAILED 15
+#define SNMP_AUTHORIZATIONERROR 16
+#define SNMP_NOTWRITABLE 17
+#define SNMP_INCONSISTENTNAME 18
+
+/* General SNMP V1 Traps */
+#define SNMP_TRAP_COLDSTART 0
+#define SNMP_TRAP_WARMSTART 1
+#define SNMP_TRAP_LINKDOWN 2
+#define SNMP_TRAP_LINKUP 3
+#define SNMP_TRAP_AUTFAILURE 4
+#define SNMP_TRAP_EQPNEIGHBORLOSS 5
+#define SNMP_TRAP_ENTSPECIFIC 6
+
+/* SNMPv1 Types */
+#define SNMP_NULL 0
+#define SNMP_INTEGER 1 /* l */
+#define SNMP_OCTETSTR 2 /* c */
+#define SNMP_DISPLAYSTR 2 /* c */
+#define SNMP_OBJECTID 3 /* ul */
+#define SNMP_IPADDR 4 /* uc */
+#define SNMP_COUNTER 5 /* ul */
+#define SNMP_GAUGE 6 /* ul */
+#define SNMP_TIMETICKS 7 /* ul */
+#define SNMP_OPAQUE 8 /* c */
+
+/* Additional SNMPv2 Types */
+#define SNMP_UINTEGER 5 /* ul */
+#define SNMP_BITSTR 9 /* uc */
+#define SNMP_NSAP 10 /* uc */
+#define SNMP_COUNTER64 11 /* ul */
+#define SNMP_NOSUCHOBJECT 12
+#define SNMP_NOSUCHINSTANCE 13
+#define SNMP_ENDOFMIBVIEW 14
+
+union snmp_syntax
+{
+ unsigned char uc[0]; /* 8 bit unsigned */
+ char c[0]; /* 8 bit signed */
+ unsigned long ul[0]; /* 32 bit unsigned */
+ long l[0]; /* 32 bit signed */
+};
+
+struct snmp_object
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned short type;
+ unsigned int syntax_len;
+ union snmp_syntax syntax;
+};
+
+struct snmp_request
+{
+ unsigned long id;
+ unsigned int error_status;
+ unsigned int error_index;
+};
+
+struct snmp_v1_trap
+{
+ unsigned long *id;
+ unsigned int id_len;
+ unsigned long ip_address; /* pointer */
+ unsigned int general;
+ unsigned int specific;
+ unsigned long time;
+};
+
+/* SNMP types */
+#define SNMP_IPA 0
+#define SNMP_CNT 1
+#define SNMP_GGE 2
+#define SNMP_TIT 3
+#define SNMP_OPQ 4
+#define SNMP_C64 6
+
+/* SNMP errors */
+#define SERR_NSO 0
+#define SERR_NSI 1
+#define SERR_EOM 2
+
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check);
+struct snmp_cnv
+{
+ unsigned int class;
+ unsigned int tag;
+ int syntax;
+};
+
+static const struct snmp_cnv snmp_conv[] = {
+ {ASN1_UNI, ASN1_NUL, SNMP_NULL},
+ {ASN1_UNI, ASN1_INT, SNMP_INTEGER},
+ {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
+ {ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
+ {ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
+ {ASN1_APL, SNMP_IPA, SNMP_IPADDR},
+ {ASN1_APL, SNMP_CNT, SNMP_COUNTER}, /* Counter32 */
+ {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
+ {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
+ {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
+
+ /* SNMPv2 data types and errors */
+ {ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
+ {ASN1_APL, SNMP_C64, SNMP_COUNTER64},
+ {ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
+ {ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
+ {ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
+ {0, 0, -1}
+};
+
+static unsigned char snmp_tag_cls2syntax(unsigned int tag,
+ unsigned int cls,
+ unsigned short *syntax)
+{
+ const struct snmp_cnv *cnv;
+
+ cnv = snmp_conv;
+
+ while (cnv->syntax != -1) {
+ if (cnv->tag == tag && cnv->class == cls) {
+ *syntax = cnv->syntax;
+ return 1;
+ }
+ cnv++;
+ }
+ return 0;
+}
+
+static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
+ struct snmp_object **obj)
+{
+ unsigned int cls, con, tag, len, idlen;
+ unsigned short type;
+ unsigned char *eoc, *end, *p;
+ unsigned long *lp, *id;
+ unsigned long ul;
+ long l;
+
+ *obj = NULL;
+ id = NULL;
+
+ if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &id, &idlen))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
+ kfree(id);
+ return 0;
+ }
+
+ if (con != ASN1_PRI) {
+ kfree(id);
+ return 0;
+ }
+
+ type = 0;
+ if (!snmp_tag_cls2syntax(tag, cls, &type)) {
+ kfree(id);
+ return 0;
+ }
+
+ l = 0;
+ switch (type) {
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, &lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(lp);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
+ }
+
+ (*obj)->syntax_len = len;
+ (*obj)->type = type;
+ (*obj)->id = id;
+ (*obj)->id_len = idlen;
+
+ if (!asn1_eoc_decode(ctx, eoc)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
+ struct snmp_request *request)
+{
+ unsigned int cls, con, tag;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_ulong_decode(ctx, end, &request->id))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_status))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+
+ if (!asn1_uint_decode(ctx, end, &request->error_index))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Fast checksum update for possibly oddly-aligned UDP byte, from the
+ * code example in the draft.
+ */
+static void fast_csum(__sum16 *csum,
+ const unsigned char *optr,
+ const unsigned char *nptr,
+ int offset)
+{
+ unsigned char s[4];
+
+ if (offset & 1) {
+ s[0] = ~0;
+ s[1] = ~*optr;
+ s[2] = 0;
+ s[3] = *nptr;
+ } else {
+ s[0] = ~*optr;
+ s[1] = ~0;
+ s[2] = *nptr;
+ s[3] = 0;
+ }
+
+ *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
+}
+
+/*
+ * Mangle IP address.
+ * - begin points to the start of the snmp messgae
+ * - addr points to the start of the address
+ */
+static inline void mangle_address(unsigned char *begin,
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ if (map->from == NOCT1(addr)) {
+ u_int32_t old;
+
+ if (debug)
+ memcpy(&old, addr, sizeof(old));
+
+ *addr = map->to;
+
+ /* Update UDP checksum if being used */
+ if (*check) {
+ fast_csum(check,
+ &map->from, &map->to, addr - begin);
+
+ }
+
+ if (debug)
+ printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n",
+ &old, addr);
+ }
+}
+
+static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
+ struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned int cls, con, tag, len;
+ unsigned char *end;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
+ return 0;
+
+ if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
+ return 0;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_id_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
+ goto err_id_free;
+
+ if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
+ goto err_id_free;
+
+ /* IPv4 only */
+ if (len != 4)
+ goto err_addr_free;
+
+ mangle_address(ctx->begin, ctx->pointer - 4, map, check);
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->general))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ goto err_addr_free;
+
+ if (!asn1_uint_decode(ctx, end, &trap->specific))
+ goto err_addr_free;
+
+ if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
+ goto err_addr_free;
+
+ if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
+ (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
+ goto err_addr_free;
+
+ if (!asn1_ulong_decode(ctx, end, &trap->time))
+ goto err_addr_free;
+
+ return 1;
+
+err_addr_free:
+ kfree((unsigned long *)trap->ip_address);
+
+err_id_free:
+ kfree(trap->id);
+
+ return 0;
+}
+
+/*****************************************************************************
+ *
+ * Misc. routines
+ *
+ *****************************************************************************/
+
+static void hex_dump(const unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ printk("\n");
+ printk("%02x ", *(buf + i));
+ }
+ printk("\n");
+}
+
+/*
+ * Parse and mangle SNMP message according to mapping.
+ * (And this is the fucking 'basic' method).
+ */
+static int snmp_parse_mangle(unsigned char *msg,
+ u_int16_t len,
+ const struct oct1_map *map,
+ __sum16 *check)
+{
+ unsigned char *eoc, *end;
+ unsigned int cls, con, tag, vers, pdutype;
+ struct asn1_ctx ctx;
+ struct asn1_octstr comm;
+ struct snmp_object *obj;
+
+ if (debug > 1)
+ hex_dump(msg, len);
+
+ asn1_open(&ctx, msg, len);
+
+ /*
+ * Start of SNMP message.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ /*
+ * Version 1 or 2 handled.
+ */
+ if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
+ return 0;
+ if (!asn1_uint_decode (&ctx, end, &vers))
+ return 0;
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
+ if (vers > 1)
+ return 1;
+
+ /*
+ * Community.
+ */
+ if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
+ return 0;
+ if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
+ return 0;
+ if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
+ return 0;
+ if (debug > 1) {
+ unsigned int i;
+
+ printk(KERN_DEBUG "bsalg: community: ");
+ for (i = 0; i < comm.len; i++)
+ printk("%c", comm.data[i]);
+ printk("\n");
+ }
+ kfree(comm.data);
+
+ /*
+ * PDU type
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
+ return 0;
+ if (cls != ASN1_CTX || con != ASN1_CON)
+ return 0;
+ if (debug > 1) {
+ static const unsigned char *const pdus[] = {
+ [SNMP_PDU_GET] = "get",
+ [SNMP_PDU_NEXT] = "get-next",
+ [SNMP_PDU_RESPONSE] = "response",
+ [SNMP_PDU_SET] = "set",
+ [SNMP_PDU_TRAP1] = "trapv1",
+ [SNMP_PDU_BULK] = "bulk",
+ [SNMP_PDU_INFORM] = "inform",
+ [SNMP_PDU_TRAP2] = "trapv2"
+ };
+
+ if (pdutype > SNMP_PDU_TRAP2)
+ printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
+ else
+ printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
+ }
+ if (pdutype != SNMP_PDU_RESPONSE &&
+ pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
+ return 1;
+
+ /*
+ * Request header or v1 trap
+ */
+ if (pdutype == SNMP_PDU_TRAP1) {
+ struct snmp_v1_trap trap;
+ unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
+
+ if (ret) {
+ kfree(trap.id);
+ kfree((unsigned long *)trap.ip_address);
+ } else
+ return ret;
+
+ } else {
+ struct snmp_request req;
+
+ if (!snmp_request_decode(&ctx, &req))
+ return 0;
+
+ if (debug > 1)
+ printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
+ "error_index=%u\n", req.id, req.error_status,
+ req.error_index);
+ }
+
+ /*
+ * Loop through objects, look for IP addresses to mangle.
+ */
+ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
+ return 0;
+
+ if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
+ return 0;
+
+ while (!asn1_eoc_decode(&ctx, eoc)) {
+ unsigned int i;
+
+ if (!snmp_object_decode(&ctx, &obj)) {
+ if (obj) {
+ kfree(obj->id);
+ kfree(obj);
+ }
+ return 0;
+ }
+
+ if (debug > 1) {
+ printk(KERN_DEBUG "bsalg: object: ");
+ for (i = 0; i < obj->id_len; i++) {
+ if (i > 0)
+ printk(".");
+ printk("%lu", obj->id[i]);
+ }
+ printk(": type=%u\n", obj->type);
+
+ }
+
+ if (obj->type == SNMP_IPADDR)
+ mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+
+ kfree(obj->id);
+ kfree(obj);
+ }
+
+ if (!asn1_eoc_decode(&ctx, eoc))
+ return 0;
+
+ return 1;
+}
+
+/*****************************************************************************
+ *
+ * NAT routines.
+ *
+ *****************************************************************************/
+
+/*
+ * SNMP translation routine.
+ */
+static int snmp_translate(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct sk_buff *skb)
+{
+ struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+ u_int16_t udplen = ntohs(udph->len);
+ u_int16_t paylen = udplen - sizeof(struct udphdr);
+ int dir = CTINFO2DIR(ctinfo);
+ struct oct1_map map;
+
+ /*
+ * Determine mappping for application layer addresses based
+ * on NAT manipulations for the packet.
+ */
+ if (dir == IP_CT_DIR_ORIGINAL) {
+ /* SNAT traps */
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ } else {
+ /* DNAT replies */
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
+ }
+
+ if (map.from == map.to)
+ return NF_ACCEPT;
+
+ if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
+ paylen, &map, &udph->check)) {
+ net_warn_ratelimited("bsalg: parser failed\n");
+ return NF_DROP;
+ }
+ return NF_ACCEPT;
+}
+
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
+
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
+ return NF_ACCEPT;
+
+ /*
+ * Make sure the packet length is ok. So far, we were only guaranteed
+ * to have a valid length IP header plus 8 bytes, which means we have
+ * enough room for a UDP header. Just verify the UDP length field so we
+ * can mess around with the payload.
+ */
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
+ net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
+ &iph->saddr, &iph->daddr);
+ return NF_DROP;
+ }
+
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, ctinfo, skb);
+ spin_unlock_bh(&snmp_lock);
+ return ret;
+}
+
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper snmp_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
+};
+
+/*****************************************************************************
+ *
+ * Module stuff.
+ *
+ *****************************************************************************/
+
+static int __init nf_nat_snmp_basic_init(void)
+{
+ int ret = 0;
+
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+
+ ret = nf_conntrack_helper_register(&snmp_trap_helper);
+ if (ret < 0) {
+ nf_conntrack_helper_unregister(&snmp_helper);
+ return ret;
+ }
+ return ret;
+}
+
+static void __exit nf_nat_snmp_basic_fini(void)
+{
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
+}
+
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
+
+module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
new file mode 100644
index 000000000..3262e41ff
--- /dev/null
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -0,0 +1,192 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+
+const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
+ struct tcphdr *_oth, int hook)
+{
+ const struct tcphdr *oth;
+
+ /* IP header checks: fragment. */
+ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
+ return NULL;
+
+ oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
+ sizeof(struct tcphdr), _oth);
+ if (oth == NULL)
+ return NULL;
+
+ /* No RST for RST. */
+ if (oth->rst)
+ return NULL;
+
+ /* Check checksum */
+ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
+ return NULL;
+
+ return oth;
+}
+EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get);
+
+struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb,
+ const struct sk_buff *oldskb,
+ __u8 protocol, int ttl)
+{
+ struct iphdr *niph, *oiph = ip_hdr(oldskb);
+
+ skb_reset_network_header(nskb);
+ niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+ niph->version = 4;
+ niph->ihl = sizeof(struct iphdr) / 4;
+ niph->tos = 0;
+ niph->id = 0;
+ niph->frag_off = htons(IP_DF);
+ niph->protocol = protocol;
+ niph->check = 0;
+ niph->saddr = oiph->daddr;
+ niph->daddr = oiph->saddr;
+ niph->ttl = ttl;
+
+ nskb->protocol = htons(ETH_P_IP);
+
+ return niph;
+}
+EXPORT_SYMBOL_GPL(nf_reject_iphdr_put);
+
+void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
+ const struct tcphdr *oth)
+{
+ struct iphdr *niph = ip_hdr(nskb);
+ struct tcphdr *tcph;
+
+ skb_reset_transport_header(nskb);
+ tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
+ memset(tcph, 0, sizeof(*tcph));
+ tcph->source = oth->dest;
+ tcph->dest = oth->source;
+ tcph->doff = sizeof(struct tcphdr) / 4;
+
+ if (oth->ack) {
+ tcph->seq = oth->ack_seq;
+ } else {
+ tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+ oldskb->len - ip_hdrlen(oldskb) -
+ (oth->doff << 2));
+ tcph->ack = 1;
+ }
+
+ tcph->rst = 1;
+ tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
+ niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)tcph - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+}
+EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
+
+/* Send RST reply */
+void nf_send_reset(struct sk_buff *oldskb, int hook)
+{
+ struct sk_buff *nskb;
+ const struct iphdr *oiph;
+ struct iphdr *niph;
+ const struct tcphdr *oth;
+ struct tcphdr _oth;
+
+ oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook);
+ if (!oth)
+ return;
+
+ if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
+ return;
+
+ oiph = ip_hdr(oldskb);
+
+ nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
+ LL_MAX_HEADER, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ /* ip_route_me_harder expects skb->dst to be set */
+ skb_dst_set_noref(nskb, skb_dst(oldskb));
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+ niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
+ ip4_dst_hoplimit(skb_dst(nskb)));
+ nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
+
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ /* "Never happens" */
+ if (nskb->len > dst_mtu(skb_dst(nskb)))
+ goto free_nskb;
+
+ nf_ct_attach(nskb, oldskb);
+
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ /* If we use ip_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
+ niph->tot_len = htons(nskb->len);
+ ip_send_check(niph);
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ goto free_nskb;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip_local_out(nskb);
+
+ return;
+
+ free_nskb:
+ kfree_skb(nskb);
+}
+EXPORT_SYMBOL_GPL(nf_send_reset);
+
+void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
+{
+ struct iphdr *iph = ip_hdr(skb_in);
+ u8 proto;
+
+ if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
+ return;
+
+ if (skb_csum_unnecessary(skb_in)) {
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+ return;
+ }
+
+ if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
+ proto = iph->protocol;
+ else
+ proto = 0;
+
+ if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
+ icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
+}
+EXPORT_SYMBOL_GPL(nf_send_unreach);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 000000000..8412268bb
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, state);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static const struct nf_chain_type filter_arp = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_ARP,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 000000000..aa180d3a6
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv4(ops, skb, state);
+}
+
+struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv4);
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv4 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv4);
+ ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv4);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 000000000..bf5c30ae1
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct nf_conn *ct)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain);
+}
+
+static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain);
+}
+
+static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain);
+}
+
+static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain);
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv4 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nft_nat_ipv4_in,
+ [NF_INET_POST_ROUTING] = nft_nat_ipv4_out,
+ [NF_INET_LOCAL_OUT] = nft_nat_ipv4_local_fn,
+ [NF_INET_LOCAL_IN] = nft_nat_ipv4_fn,
+ },
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 000000000..e335b0afd
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv4 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
new file mode 100644
index 000000000..40e414c4c
--- /dev/null
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range range;
+
+ memset(&range, 0, sizeof(range));
+ range.flags = priv->flags;
+
+ regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+ &range, pkt->out);
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+ .type = &nft_masq_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv4_eval,
+ .init = nft_masq_init,
+ .dump = nft_masq_dump,
+ .validate = nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "masq",
+ .ops = &nft_masq_ipv4_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_ipv4_module_init(void)
+{
+ int ret;
+
+ ret = nft_register_expr(&nft_masq_ipv4_type);
+ if (ret < 0)
+ return ret;
+
+ nf_nat_masquerade_ipv4_register_notifier();
+
+ return ret;
+}
+
+static void __exit nft_masq_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_masq_ipv4_type);
+ nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+module_init(nft_masq_ipv4_module_init);
+module_exit(nft_masq_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
new file mode 100644
index 000000000..d8d795df9
--- /dev/null
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
+#include <net/netfilter/nft_redir.h>
+
+static void nft_redir_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_redir *priv = nft_expr_priv(expr);
+ struct nf_nat_ipv4_multi_range_compat mr;
+
+ memset(&mr, 0, sizeof(mr));
+ if (priv->sreg_proto_min) {
+ mr.range[0].min.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
+ mr.range[0].max.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
+ mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+
+ mr.range[0].flags |= priv->flags;
+
+ regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
+ pkt->ops->hooknum);
+}
+
+static struct nft_expr_type nft_redir_ipv4_type;
+static const struct nft_expr_ops nft_redir_ipv4_ops = {
+ .type = &nft_redir_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+ .eval = nft_redir_ipv4_eval,
+ .init = nft_redir_init,
+ .dump = nft_redir_dump,
+ .validate = nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "redir",
+ .ops = &nft_redir_ipv4_ops,
+ .policy = nft_redir_policy,
+ .maxattr = NFTA_REDIR_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_redir_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_redir_ipv4_type);
+}
+
+static void __exit nft_redir_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_redir_ipv4_type);
+}
+
+module_init(nft_redir_ipv4_module_init);
+module_exit(nft_redir_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000..b07e58b51
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+static void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ default:
+ break;
+ }
+
+ regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");