From 89bc7a0f64de7bed2e0bc68a23d75699a610cd37 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:26:03 +0100
Subject: netfilter: xt_recent: save 8 bytes per htable

Moving rnd_inited into the hole after the uint8 lets go of the uint32
rnd_inited was using, plus the padding that would follow the int group.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..5bdc1fbf3ad7 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -81,13 +81,13 @@ struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
 	atomic_t use;
 	u_int8_t family;
+	bool rnd_initialized;
 
 	struct hashlimit_cfg1 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
 	u_int32_t rnd;			/* random seed for hash */
-	int rnd_initialized;
 	unsigned int count;		/* number entries in table */
 	struct timer_list timer;	/* timer for gc */
 
@@ -150,7 +150,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 	 * the first hashtable entry */
 	if (!ht->rnd_initialized) {
 		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
-		ht->rnd_initialized = 1;
+		ht->rnd_initialized = true;
 	}
 
 	if (ht->cfg.max && ht->count >= ht->cfg.max) {
@@ -235,7 +235,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	atomic_set(&hinfo->use, 1);
 	hinfo->count = 0;
 	hinfo->family = family;
-	hinfo->rnd_initialized = 0;
+	hinfo->rnd_initialized = false;
 	spin_lock_init(&hinfo->lock);
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
@@ -296,7 +296,7 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	atomic_set(&hinfo->use, 1);
 	hinfo->count = 0;
 	hinfo->family = family;
-	hinfo->rnd_initialized = 0;
+	hinfo->rnd_initialized = false;
 	spin_lock_init(&hinfo->lock);
 
 	hinfo->pde = proc_create_data(minfo->name, 0,
-- 
cgit v1.2.3


From 5191d50192ec1281e51cbcb5248cb2667ff4d896 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:27:25 +0100
Subject: netfilter: xtables: do not grab random bytes at __init

"It is deliberately not done in the init function, since we might not
have sufficient random while booting."

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_NFQUEUE.c | 6 +++++-
 net/netfilter/xt_RATEEST.c | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f28f6a5fc02d..12dcd7007c3e 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -28,6 +28,7 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static u32 jhash_initval __read_mostly;
+static bool rnd_inited __read_mostly;
 
 static unsigned int
 nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
@@ -90,6 +91,10 @@ static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 maxid;
 
+	if (unlikely(!rnd_inited)) {
+		get_random_bytes(&jhash_initval, sizeof(jhash_initval));
+		rnd_inited = true;
+	}
 	if (info->queues_total == 0) {
 		pr_err("NFQUEUE: number of total queues is 0\n");
 		return false;
@@ -135,7 +140,6 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 
 static int __init nfqueue_tg_init(void)
 {
-	get_random_bytes(&jhash_initval, sizeof(jhash_initval));
 	return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
 }
 
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index d80b8192e0d4..87ae97e5516f 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -23,6 +23,7 @@ static DEFINE_MUTEX(xt_rateest_mutex);
 #define RATEEST_HSIZE	16
 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
 static unsigned int jhash_rnd __read_mostly;
+static bool rnd_inited __read_mostly;
 
 static unsigned int xt_rateest_hash(const char *name)
 {
@@ -93,6 +94,11 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		struct gnet_estimator	est;
 	} cfg;
 
+	if (unlikely(!rnd_inited)) {
+		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+		rnd_inited = true;
+	}
+
 	est = xt_rateest_lookup(info->name);
 	if (est) {
 		/*
@@ -164,7 +170,6 @@ static int __init xt_rateest_tg_init(void)
 	for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
 		INIT_HLIST_HEAD(&rateest_hash[i]);
 
-	get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
 	return xt_register_target(&xt_rateest_tg_reg);
 }
 
-- 
cgit v1.2.3


From 294188ae32f984a072c64c959354b2f6f52f80a7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 4 Jan 2010 16:28:38 +0100
Subject: netfilter: xtables: obtain random bytes earlier, in checkentry

We can initialize the random hash bytes on checkentry. This is
preferable since it is outside the hot path.

Reference: http://bugzilla.netfilter.org/show_bug.cgi?id=621
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_connlimit.c | 17 ++++++-----------
 net/netfilter/xt_recent.c    | 20 ++++++++------------
 2 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 38f03f75a636..8103bef78e44 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -40,15 +40,11 @@ struct xt_connlimit_data {
 	spinlock_t lock;
 };
 
-static u_int32_t connlimit_rnd;
-static bool connlimit_rnd_inited;
+static u_int32_t connlimit_rnd __read_mostly;
+static bool connlimit_rnd_inited __read_mostly;
 
 static inline unsigned int connlimit_iphash(__be32 addr)
 {
-	if (unlikely(!connlimit_rnd_inited)) {
-		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
-		connlimit_rnd_inited = true;
-	}
 	return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
 }
 
@@ -59,11 +55,6 @@ connlimit_iphash6(const union nf_inet_addr *addr,
 	union nf_inet_addr res;
 	unsigned int i;
 
-	if (unlikely(!connlimit_rnd_inited)) {
-		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
-		connlimit_rnd_inited = true;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
 		res.ip6[i] = addr->ip6[i] & mask->ip6[i];
 
@@ -226,6 +217,10 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
 
+	if (unlikely(!connlimit_rnd_inited)) {
+		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
+		connlimit_rnd_inited = true;
+	}
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "cannot load conntrack support for "
 		       "address family %u\n", par->family);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index fc70a49c0afd..768d01ff1fea 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -90,25 +90,17 @@ static struct proc_dir_entry *recent_proc_dir;
 static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
-static u_int32_t hash_rnd;
-static bool hash_rnd_initted;
+static u_int32_t hash_rnd __read_mostly;
+static bool hash_rnd_inited __read_mostly;
 
-static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
 {
-	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-		hash_rnd_initted = true;
-	}
 	return jhash_1word((__force u32)addr->ip, hash_rnd) &
 	       (ip_list_hash_size - 1);
 }
 
-static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
 {
-	if (!hash_rnd_initted) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-		hash_rnd_initted = true;
-	}
 	return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
 	       (ip_list_hash_size - 1);
 }
@@ -287,6 +279,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	unsigned i;
 	bool ret = false;
 
+	if (unlikely(!hash_rnd_inited)) {
+		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+		hash_rnd_inited = true;
+	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-- 
cgit v1.2.3


From 6f7edb4881bf51300060e89915926e070ace8c4d Mon Sep 17 00:00:00 2001
From: "Catalin(ux) M. BOIE" <catab@embedromix.ro>
Date: Tue, 5 Jan 2010 05:50:24 +0100
Subject: IPVS: Allow boot time change of hash size

I was very frustrated about the fact that I have to recompile the kernel
to change the hash size. So, I created this patch.

If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel
command line, or, if you built IPVS as modules, you can add
options ip_vs conn_tab_bits=??.

To keep everything backward compatible, you still can select the size at
compile time, and that will be used as default.

It has been about a year since this patch was originally posted
and subsequently dropped on the basis of insufficient test data.

Mark Bergsma has provided the following test results which seem
to strongly support the need for larger hash table sizes:

We do however run into the same problem with the default setting (212 =
4096 entries), as most of our LVS balancers handle around a million
connections/SLAB entries at any point in time (around 100-150 kpps
load). With only 4096 hash table entries this implies that each entry
consists of a linked list of 256 connections *on average*.

To provide some statistics, I did an oprofile run on an 2.6.31 kernel,
with both the default 4096 table size, and the same kernel recompiled
with IP_VS_CONN_TAB_BITS set to 18 (218 = 262144 entries). I built a
quick test setup with a part of Wikimedia/Wikipedia's live traffic
mirrored by the switch to the test host.

With the default setting, at ~ 120 kpps packet load we saw a typical %si
CPU usage of around 30-35%, and oprofile reported a hot spot in
ip_vs_conn_in_get:

samples  %        image name               app name
symbol name
1719761  42.3741  ip_vs.ko                 ip_vs.ko      ip_vs_conn_in_get
302577    7.4554  bnx2                     bnx2          /bnx2
181984    4.4840  vmlinux                  vmlinux       __ticket_spin_lock
128636    3.1695  vmlinux                  vmlinux       ip_route_input
74345     1.8318  ip_vs.ko                 ip_vs.ko      ip_vs_conn_out_get
68482     1.6874  vmlinux                  vmlinux       mwait_idle

After loading the recompiled kernel with 218 entries, %si CPU usage
dropped in half to around 12-18%, and oprofile looks much healthier,
with only 7% spent in ip_vs_conn_in_get:

samples  %        image name               app name
symbol name
265641   14.4616  bnx2                     bnx2         /bnx2
143251    7.7986  vmlinux                  vmlinux      __ticket_spin_lock
140661    7.6576  ip_vs.ko                 ip_vs.ko     ip_vs_conn_in_get
94364     5.1372  vmlinux                  vmlinux      mwait_idle
86267     4.6964  vmlinux                  vmlinux      ip_route_input

[ horms@verge.net.au: trivial up-port and minor style fixes ]
Signed-off-by: Catalin(ux) M. BOIE <catab@embedromix.ro>
Cc: Mark Bergsma <mark@wikimedia.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig      |  4 ++++
 net/netfilter/ipvs/ip_vs_conn.c | 42 ++++++++++++++++++++++++++++++-----------
 net/netfilter/ipvs/ip_vs_ctl.c  |  8 ++++----
 3 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 79a698052218..c71e543d2549 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -68,6 +68,10 @@ config	IP_VS_TAB_BITS
 	  each hash entry uses 8 bytes, so you can estimate how much memory is
 	  needed for your box.
 
+	  You can overwrite this number setting conn_tab_bits module parameter
+	  or by appending ip_vs.conn_tab_bits=? to the kernel command line
+	  if IP VS was compiled built-in.
+
 comment "IPVS transport protocol load balancing support"
 
 config	IP_VS_PROTO_TCP
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 27c30cf933da..60bb41a8d8d4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -40,6 +40,21 @@
 #include <net/ip_vs.h>
 
 
+#ifndef CONFIG_IP_VS_TAB_BITS
+#define CONFIG_IP_VS_TAB_BITS	12
+#endif
+
+/*
+ * Connection hash size. Default is what was selected at compile time.
+*/
+int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
+MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
+
+/* size and mask values */
+int ip_vs_conn_tab_size;
+int ip_vs_conn_tab_mask;
+
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
@@ -125,11 +140,11 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
 	if (af == AF_INET6)
 		return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
 				    (__force u32)port, proto, ip_vs_conn_rnd)
-			& IP_VS_CONN_TAB_MASK;
+			& ip_vs_conn_tab_mask;
 #endif
 	return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
 			    ip_vs_conn_rnd)
-		& IP_VS_CONN_TAB_MASK;
+		& ip_vs_conn_tab_mask;
 }
 
 
@@ -760,7 +775,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 	int idx;
 	struct ip_vs_conn *cp;
 
-	for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			if (pos-- == 0) {
@@ -797,7 +812,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	idx = l - ip_vs_conn_tab;
 	ct_read_unlock_bh(idx);
 
-	while (++idx < IP_VS_CONN_TAB_SIZE) {
+	while (++idx < ip_vs_conn_tab_size) {
 		ct_read_lock_bh(idx);
 		list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
 			seq->private = &ip_vs_conn_tab[idx];
@@ -976,8 +991,8 @@ void ip_vs_random_dropentry(void)
 	/*
 	 * Randomly scan 1/32 of the whole table every second
 	 */
-	for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
-		unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+	for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
+		unsigned hash = net_random() & ip_vs_conn_tab_mask;
 
 		/*
 		 *  Lock is actually needed in this loop.
@@ -1029,7 +1044,7 @@ static void ip_vs_conn_flush(void)
 	struct ip_vs_conn *cp;
 
   flush_again:
-	for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		/*
 		 *  Lock is actually needed in this loop.
 		 */
@@ -1060,10 +1075,15 @@ int __init ip_vs_conn_init(void)
 {
 	int idx;
 
+	/* Compute size and mask */
+	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
+	ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
+
 	/*
 	 * Allocate the connection hash table and initialize its list heads
 	 */
-	ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+	ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
+				 sizeof(struct list_head));
 	if (!ip_vs_conn_tab)
 		return -ENOMEM;
 
@@ -1078,12 +1098,12 @@ int __init ip_vs_conn_init(void)
 
 	pr_info("Connection hash table configured "
 		"(size=%d, memory=%ldKbytes)\n",
-		IP_VS_CONN_TAB_SIZE,
-		(long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+		ip_vs_conn_tab_size,
+		(long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
 	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
 		  sizeof(struct ip_vs_conn));
 
-	for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 		INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
 	}
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 6bde12da2fe0..93420ea8b0bc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1843,7 +1843,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 	if (v == SEQ_START_TOKEN) {
 		seq_printf(seq,
 			"IP Virtual Server version %d.%d.%d (size=%d)\n",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
 		seq_puts(seq,
 			 "Prot LocalAddress:Port Scheduler Flags\n");
 		seq_puts(seq,
@@ -2374,7 +2374,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		char buf[64];
 
 		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
-			NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
 		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
 			ret = -EFAULT;
 			goto out;
@@ -2387,7 +2387,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_getinfo info;
 		info.version = IP_VS_VERSION_CODE;
-		info.size = IP_VS_CONN_TAB_SIZE;
+		info.size = ip_vs_conn_tab_size;
 		info.num_services = ip_vs_num_services;
 		if (copy_to_user(user, &info, sizeof(info)) != 0)
 			ret = -EFAULT;
@@ -3231,7 +3231,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	case IPVS_CMD_GET_INFO:
 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
 		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
-			    IP_VS_CONN_TAB_SIZE);
+			    ip_vs_conn_tab_size);
 		break;
 	}
 
-- 
cgit v1.2.3


From a79e7ac4ad77e1833e8f69e99113204d03018255 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 11:53:31 +0100
Subject: ipvs: use standardized format in sprintf

Use the same format string as net/ipv4/netfilter/nf_nat_ftp.c
to encode an ipv4 address and port.

Both uses should be a single common function.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33e2c799cba7..73f38ea98f25 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		from.ip = n_cp->vaddr.ip;
 		port = n_cp->vport;
-		sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+		sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip),
 			(ntohs(port)>>8)&255, ntohs(port)&255);
 		buf_len = strlen(buf);
 
-- 
cgit v1.2.3


From 7f635d0d1bf84ad7a0032cbce9d902b9384c48b7 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 11 Jan 2010 11:55:36 +0100
Subject: netfilter: xt_osf: change %pi4 to %pI4

commit 8a27f7c90ffcb791eed7574922b51fb60b08fc89
changed the output style of %pi4 to use fixed
width leading zero IP addresses "001.002.003.004".

It's useful when printing multiple lines of
addresses, but was a change in output style for
some existing uses.

Using %pI4 restores the previous output style.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_osf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4d1a41bbd5d7..4169e200588d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -334,7 +334,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
 			if (info->flags & XT_OSF_LOG)
 				nf_log_packet(p->family, p->hooknum, skb,
 					p->in, p->out, NULL,
-					"%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+					"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
 					f->genre, f->version, f->subtype,
 					&ip->saddr, ntohs(tcp->source),
 					&ip->daddr, ntohs(tcp->dest),
@@ -349,7 +349,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
 
 	if (!fcount && (info->flags & XT_OSF_LOG))
 		nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
-			"Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+			"Remote OS is not known: %pI4:%u -> %pI4:%u\n",
 				&ip->saddr, ntohs(tcp->source),
 				&ip->daddr, ntohs(tcp->dest));
 
-- 
cgit v1.2.3


From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jan 2010 16:02:14 +0100
Subject: netfilter: nfnetlink: netns support

Make nfnl socket per-petns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 13 ++++----
 net/netfilter/nfnetlink.c            | 65 +++++++++++++++++++++++-------------
 net/netfilter/nfnetlink_log.c        |  3 +-
 net/netfilter/nfnetlink_queue.c      |  2 +-
 4 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59d8064eb522..d4c5d06677f9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	} else
 		return 0;
 
-	if (!item->report && !nfnetlink_has_listeners(group))
+	if (!item->report && !nfnetlink_has_listeners(&init_net, group))
 		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+	err = nfnetlink_send(skb, &init_net, item->pid, group, item->report,
+			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
 
@@ -571,7 +572,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, group, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, group, -ENOBUFS);
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		return 0;
 
 	if (!item->report &&
-	    !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+	    !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+	nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
 		       item->report, GFP_ATOMIC);
 	return 0;
 
@@ -1572,7 +1573,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(0, 0, -ENOBUFS);
+	nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS);
 	return 0;
 }
 #endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index eedc0c1ac7a4..8eb0cc23ada3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static struct sock *nfnl = NULL;
 static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
@@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
 	return &ss->cb[cb_id];
 }
 
-int nfnetlink_has_listeners(unsigned int group)
+int nfnetlink_has_listeners(struct net *net, unsigned int group)
 {
-	return netlink_has_listeners(nfnl, group);
+	return netlink_has_listeners(net->nfnl, group);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
 
-int nfnetlink_send(struct sk_buff *skb, u32 pid,
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 		   unsigned group, int echo, gfp_t flags)
 {
-	return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
+	return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(u32 pid, u32 group, int error)
+void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(nfnl, pid, group, error);
+	netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
 {
-	return netlink_unicast(nfnl, skb, pid, flags);
+	return netlink_unicast(net->nfnl, skb, pid, flags);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
 static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
+	struct net *net = sock_net(skb->sk);
 	const struct nfnl_callback *nc;
 	const struct nfnetlink_subsystem *ss;
 	int type, err;
@@ -170,7 +170,7 @@ replay:
 		if (err < 0)
 			return err;
 
-		err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda);
+		err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda);
 		if (err == -EAGAIN)
 			goto replay;
 		return err;
@@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 	nfnl_unlock();
 }
 
-static void __exit nfnetlink_exit(void)
+static int __net_init nfnetlink_net_init(struct net *net)
 {
-	printk("Removing netfilter NETLINK layer.\n");
-	netlink_kernel_release(nfnl);
-	return;
+	struct sock *nfnl;
+
+	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX,
+				     nfnetlink_rcv, NULL, THIS_MODULE);
+	if (!nfnl)
+		return -ENOMEM;
+	net->nfnl_stash = nfnl;
+	rcu_assign_pointer(net->nfnl, nfnl);
+	return 0;
 }
 
-static int __init nfnetlink_init(void)
+static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
 {
-	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	struct net *net;
 
-	nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
-				     nfnetlink_rcv, NULL, THIS_MODULE);
-	if (!nfnl) {
-		printk(KERN_ERR "cannot initialize nfnetlink!\n");
-		return -ENOMEM;
-	}
+	list_for_each_entry(net, net_exit_list, exit_list)
+		rcu_assign_pointer(net->nfnl, NULL);
+	synchronize_net();
+	list_for_each_entry(net, net_exit_list, exit_list)
+		netlink_kernel_release(net->nfnl_stash);
+}
 
-	return 0;
+static struct pernet_operations nfnetlink_net_ops = {
+	.init		= nfnetlink_net_init,
+	.exit_batch	= nfnetlink_net_exit_batch,
+};
+
+static int __init nfnetlink_init(void)
+{
+	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	return register_pernet_subsys(&nfnetlink_net_ops);
 }
 
+static void __exit nfnetlink_exit(void)
+{
+	printk("Removing netfilter NETLINK layer.\n");
+	unregister_pernet_subsys(&nfnetlink_net_ops);
+}
 module_init(nfnetlink_init);
 module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9de0470d557e..285e9029a9ff 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst)
 			  NLMSG_DONE,
 			  sizeof(struct nfgenmsg));
 
-	status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
 	inst->skb = NULL;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7e3fa410641e..5c589b27d6eb 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	}
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+	err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
 	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
-- 
cgit v1.2.3


From 9592a5c01e79dbc59eb56fa26b124e94ffcd0962 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 13 Jan 2010 16:04:18 +0100
Subject: netfilter: ctnetlink: netns support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 65 +++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 26 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d4c5d06677f9..79478dfba27e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -30,6 +30,7 @@
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -456,6 +457,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 static int
 ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 {
+	struct net *net;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
@@ -482,7 +484,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	} else
 		return 0;
 
-	if (!item->report && !nfnetlink_has_listeners(&init_net, group))
+	net = nf_ct_net(ct);
+	if (!item->report && !nfnetlink_has_listeners(net, group))
 		return 0;
 
 	skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -559,7 +562,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, &init_net, item->pid, group, item->report,
+	err = nfnetlink_send(skb, net, item->pid, group, item->report,
 			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
@@ -572,7 +575,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(&init_net, 0, group, -ENOBUFS);
+	nfnetlink_set_err(net, 0, group, -ENOBUFS);
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -587,6 +590,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
@@ -597,7 +601,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conn *)cb->args[1];
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
-		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
+		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]],
 					 hnnode) {
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
@@ -769,6 +773,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conn *ct;
@@ -782,7 +787,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
 	else {
 		/* Flush the whole table */
-		nf_conntrack_flush_report(&init_net,
+		nf_conntrack_flush_report(net,
 					 NETLINK_CB(skb).pid,
 					 nlmsg_report(nlh));
 		return 0;
@@ -791,7 +796,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&init_net, &tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -829,6 +834,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conn *ct;
@@ -851,7 +857,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(&init_net, &tuple);
+	h = nf_conntrack_find_get(net, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1176,7 +1182,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(const struct nlattr * const cda[],
+ctnetlink_create_conntrack(struct net *net,
+			   const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
 			   u8 u3)
@@ -1185,7 +1192,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1286,7 +1293,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(&init_net, &master);
+		master_h = nf_conntrack_find_get(net, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1314,6 +1321,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			const struct nlmsghdr *nlh,
 			const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1334,9 +1342,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(&init_net, &otuple);
+		h = __nf_conntrack_find(net, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(&init_net, &rtuple);
+		h = __nf_conntrack_find(net, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1344,7 +1352,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
-			ct = ctnetlink_create_conntrack(cda, &otuple,
+			ct = ctnetlink_create_conntrack(net, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct)) {
 				err = PTR_ERR(ct);
@@ -1526,9 +1534,10 @@ nla_put_failure:
 static int
 ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 {
+	struct nf_conntrack_expect *exp = item->exp;
+	struct net *net = nf_ct_exp_net(exp);
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	struct nf_conntrack_expect *exp = item->exp;
 	struct sk_buff *skb;
 	unsigned int type;
 	int flags = 0;
@@ -1540,7 +1549,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		return 0;
 
 	if (!item->report &&
-	    !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW))
+	    !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
 		return 0;
 
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1563,7 +1572,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+	nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
 		       item->report, GFP_ATOMIC);
 	return 0;
 
@@ -1573,7 +1582,7 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS);
+	nfnetlink_set_err(net, 0, 0, -ENOBUFS);
 	return 0;
 }
 #endif
@@ -1587,7 +1596,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
 static int
 ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = &init_net;
+	struct net *net = sock_net(skb->sk);
 	struct nf_conntrack_expect *exp, *last;
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	struct hlist_node *n;
@@ -1640,6 +1649,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct sk_buff *skb2;
@@ -1661,7 +1671,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(&init_net, &tuple);
+	exp = nf_ct_expect_find_get(net, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1701,6 +1711,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_helper *h;
@@ -1717,7 +1728,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(&init_net, &tuple);
+		exp = nf_ct_expect_find_get(net, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1747,7 +1758,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &init_net.ct.expect_hash[i],
+						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
 				if (m_help->helper == h
@@ -1763,7 +1774,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		spin_lock_bh(&nf_conntrack_lock);
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
-						  &init_net.ct.expect_hash[i],
+						  &net->ct.expect_hash[i],
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
@@ -1784,7 +1795,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
 }
 
 static int
-ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
+ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
+			u_int8_t u3,
 			u32 pid, int report)
 {
 	struct nf_conntrack_tuple tuple, mask, master_tuple;
@@ -1806,7 +1818,7 @@ ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(&init_net, &master_tuple);
+	h = nf_conntrack_find_get(net, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1846,6 +1858,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		     const struct nlmsghdr *nlh,
 		     const struct nlattr * const cda[])
 {
+	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
@@ -1862,13 +1875,13 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(&init_net, &tuple);
+	exp = __nf_ct_expect_find(net, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(cda,
+			err = ctnetlink_create_expect(net, cda,
 						      u3,
 						      NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-- 
cgit v1.2.3


From 83fc81024bd8572f31db784f8c0079e999a4fa44 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:07:50 +0100
Subject: netfilter: xt_connlimit: netns support

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_connlimit.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 8103bef78e44..0d9d18ea2b09 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -90,7 +90,8 @@ same_source_net(const union nf_inet_addr *addr,
 	}
 }
 
-static int count_them(struct xt_connlimit_data *data,
+static int count_them(struct net *net,
+		      struct xt_connlimit_data *data,
 		      const struct nf_conntrack_tuple *tuple,
 		      const union nf_inet_addr *addr,
 		      const union nf_inet_addr *mask,
@@ -113,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = nf_conntrack_find_get(&init_net, &conn->tuple);
+		found    = nf_conntrack_find_get(net, &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
@@ -171,6 +172,7 @@ static int count_them(struct xt_connlimit_data *data,
 static bool
 connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
+	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
@@ -195,7 +197,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 
 	spin_lock_bh(&info->data->lock);
-	connections = count_them(info->data, tuple_ptr, &addr,
+	connections = count_them(net, info->data, tuple_ptr, &addr,
 	                         &info->mask, par->family);
 	spin_unlock_bh(&info->data->lock);
 
-- 
cgit v1.2.3


From a1004d8e3d463012f231bab104325ecb15637f78 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:14:50 +0100
Subject: netfilter: xt_hashlimit: simplify seqfile code

Simply pass hashtable to seqfile iterators, proc entry itself is not needed.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5bdc1fbf3ad7..944fd11c8989 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -841,8 +841,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 	__acquires(htable->lock)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket;
 
 	spin_lock_bh(&htable->lock);
@@ -859,8 +858,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
 
 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
 	*pos = ++(*bucket);
@@ -874,8 +872,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
 static void dl_seq_stop(struct seq_file *s, void *v)
 	__releases(htable->lock)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
 	kfree(bucket);
@@ -917,8 +914,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 
 static int dl_seq_show(struct seq_file *s, void *v)
 {
-	struct proc_dir_entry *pde = s->private;
-	struct xt_hashlimit_htable *htable = pde->data;
+	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 	struct dsthash_ent *ent;
 	struct hlist_node *pos;
@@ -944,7 +940,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
-		sf->private = PDE(inode);
+		sf->private = PDE(inode)->data;
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 7d07d5632b672c892a65882c2a119345fd9596c9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:31:00 +0100
Subject: netfilter: xt_recent: netns support

Make recent table list per-netns.
Make proc files per-netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 136 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 41 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 768d01ff1fea..203333107367 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -28,6 +28,7 @@
 #include <linux/skbuff.h>
 #include <linux/inet.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_recent.h>
@@ -78,15 +79,26 @@ struct recent_table {
 	struct list_head	iphash[0];
 };
 
-static LIST_HEAD(tables);
+struct recent_net {
+	struct list_head	tables;
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry	*xt_recent;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	struct proc_dir_entry	*ipt_recent;
+#endif
+#endif
+};
+
+static int recent_net_id;
+static inline struct recent_net *recent_pernet(struct net *net)
+{
+	return net_generic(net, recent_net_id);
+}
+
 static DEFINE_SPINLOCK(recent_lock);
 static DEFINE_MUTEX(recent_mutex);
 
 #ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-static struct proc_dir_entry *proc_old_dir;
-#endif
-static struct proc_dir_entry *recent_proc_dir;
 static const struct file_operations recent_old_fops, recent_mt_fops;
 #endif
 
@@ -172,11 +184,12 @@ static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
 	list_move_tail(&e->lru_list, &t->lru_list);
 }
 
-static struct recent_table *recent_table_lookup(const char *name)
+static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
+						const char *name)
 {
 	struct recent_table *t;
 
-	list_for_each_entry(t, &tables, list)
+	list_for_each_entry(t, &recent_net->tables, list)
 		if (!strcmp(t->name, name))
 			return t;
 	return NULL;
@@ -195,6 +208,8 @@ static void recent_table_flush(struct recent_table *t)
 static bool
 recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
+	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct recent_net *recent_net = recent_pernet(net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 	struct recent_entry *e;
@@ -227,7 +242,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		ttl++;
 
 	spin_lock_bh(&recent_lock);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	e = recent_entry_lookup(t, &addr, par->match->family,
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
@@ -271,6 +286,7 @@ out:
 
 static bool recent_mt_check(const struct xt_mtchk_param *par)
 {
+	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 #ifdef CONFIG_PROC_FS
@@ -297,7 +313,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	if (t != NULL) {
 		t->refcnt++;
 		ret = true;
@@ -314,7 +330,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	for (i = 0; i < ip_list_hash_size; i++)
 		INIT_LIST_HEAD(&t->iphash[i]);
 #ifdef CONFIG_PROC_FS
-	pde = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+	pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
 		  &recent_mt_fops, t);
 	if (pde == NULL) {
 		kfree(t);
@@ -323,10 +339,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	pde->uid = ip_list_uid;
 	pde->gid = ip_list_gid;
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	pde = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+	pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
 		      &recent_old_fops, t);
 	if (pde == NULL) {
-		remove_proc_entry(t->name, proc_old_dir);
+		remove_proc_entry(t->name, recent_net->xt_recent);
 		kfree(t);
 		goto out;
 	}
@@ -335,7 +351,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 #endif
 #endif
 	spin_lock_bh(&recent_lock);
-	list_add_tail(&t->list, &tables);
+	list_add_tail(&t->list, &recent_net->tables);
 	spin_unlock_bh(&recent_lock);
 	ret = true;
 out:
@@ -345,20 +361,21 @@ out:
 
 static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 {
+	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
 	struct recent_table *t;
 
 	mutex_lock(&recent_mutex);
-	t = recent_table_lookup(info->name);
+	t = recent_table_lookup(recent_net, info->name);
 	if (--t->refcnt == 0) {
 		spin_lock_bh(&recent_lock);
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-		remove_proc_entry(t->name, proc_old_dir);
+		remove_proc_entry(t->name, recent_net->ipt_recent);
 #endif
-		remove_proc_entry(t->name, recent_proc_dir);
+		remove_proc_entry(t->name, recent_net->xt_recent);
 #endif
 		recent_table_flush(t);
 		kfree(t);
@@ -607,8 +624,65 @@ static const struct file_operations recent_mt_fops = {
 	.release = seq_release_private,
 	.owner   = THIS_MODULE,
 };
+
+static int __net_init recent_proc_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
+	if (!recent_net->xt_recent)
+		return -ENOMEM;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
+	if (!recent_net->ipt_recent) {
+		proc_net_remove(net, "xt_recent");
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void __net_exit recent_proc_net_exit(struct net *net)
+{
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+	proc_net_remove(net, "ipt_recent");
+#endif
+	proc_net_remove(net, "xt_recent");
+}
+#else
+static inline int recent_proc_net_init(struct net *net)
+{
+	return 0;
+}
+
+static inline void recent_proc_net_exit(struct net *net)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
+static int __net_init recent_net_init(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	INIT_LIST_HEAD(&recent_net->tables);
+	return recent_proc_net_init(net);
+}
+
+static void __net_exit recent_net_exit(struct net *net)
+{
+	struct recent_net *recent_net = recent_pernet(net);
+
+	BUG_ON(!list_empty(&recent_net->tables));
+	recent_proc_net_exit(net);
+}
+
+static struct pernet_operations recent_net_ops = {
+	.init	= recent_net_init,
+	.exit	= recent_net_exit,
+	.id	= &recent_net_id,
+	.size	= sizeof(struct recent_net),
+};
+
 static struct xt_match recent_mt_reg[] __read_mostly = {
 	{
 		.name       = "recent",
@@ -640,39 +714,19 @@ static int __init recent_mt_init(void)
 		return -EINVAL;
 	ip_list_hash_size = 1 << fls(ip_list_tot);
 
-	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
+	err = register_pernet_subsys(&recent_net_ops);
 	if (err)
 		return err;
-	recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
-	if (recent_proc_dir == NULL) {
-		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-		err = -ENOMEM;
-	}
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	if (err < 0)
-		return err;
-	proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
-	if (proc_old_dir == NULL) {
-		remove_proc_entry("xt_recent", init_net.proc_net);
-		xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-		err = -ENOMEM;
-	}
-#endif
-#endif
+	err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+	if (err)
+		unregister_pernet_subsys(&recent_net_ops);
 	return err;
 }
 
 static void __exit recent_mt_exit(void)
 {
-	BUG_ON(!list_empty(&tables));
 	xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	remove_proc_entry("ipt_recent", init_net.proc_net);
-#endif
-	remove_proc_entry("xt_recent", init_net.proc_net);
-#endif
+	unregister_pernet_subsys(&recent_net_ops);
 }
 
 module_init(recent_mt_init);
-- 
cgit v1.2.3


From e89fc3f1b06d9241f65e580b002789abaa6d11ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 18 Jan 2010 08:33:28 +0100
Subject: netfilter: xt_hashlimit: netns support

Make hashtable per-netns.
Make proc files per-netns.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 141 ++++++++++++++++++++++++++++++-------------
 1 file changed, 98 insertions(+), 43 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 944fd11c8989..fb7fcb773a3f 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -26,6 +26,7 @@
 #endif
 
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,9 +41,19 @@ MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
 MODULE_ALIAS("ipt_hashlimit");
 MODULE_ALIAS("ip6t_hashlimit");
 
+struct hashlimit_net {
+	struct hlist_head	htables;
+	struct proc_dir_entry	*ipt_hashlimit;
+	struct proc_dir_entry	*ip6t_hashlimit;
+};
+
+static int hashlimit_net_id;
+static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
+{
+	return net_generic(net, hashlimit_net_id);
+}
+
 /* need to declare this at the top */
-static struct proc_dir_entry *hashlimit_procdir4;
-static struct proc_dir_entry *hashlimit_procdir6;
 static const struct file_operations dl_file_ops;
 
 /* hash table crap */
@@ -93,13 +104,13 @@ struct xt_hashlimit_htable {
 
 	/* seq_file stuff */
 	struct proc_dir_entry *pde;
+	struct net *net;
 
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
 static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
 static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
-static HLIST_HEAD(hashlimit_htables);
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline bool dst_cmp(const struct dsthash_ent *ent,
@@ -185,8 +196,9 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
+static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
 	unsigned int i;
@@ -239,26 +251,29 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	spin_lock_init(&hinfo->lock);
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
-		hashlimit_procdir4 : hashlimit_procdir6,
+		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
 		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
 		return -1;
 	}
+	hinfo->net = net;
 
 	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_add_head(&hinfo->node, &hashlimit_htables);
+	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
 	spin_unlock_bh(&hashlimit_lock);
 
 	return 0;
 }
 
-static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
+static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
+			 u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	unsigned int size;
 	unsigned int i;
@@ -301,19 +316,20 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 
 	hinfo->pde = proc_create_data(minfo->name, 0,
 		(family == NFPROTO_IPV4) ?
-		hashlimit_procdir4 : hashlimit_procdir6,
+		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
 		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
 		return -1;
 	}
+	hinfo->net = net;
 
 	setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_add_head(&hinfo->node, &hashlimit_htables);
+	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
 	spin_unlock_bh(&hashlimit_lock);
 
 	return 0;
@@ -364,24 +380,30 @@ static void htable_gc(unsigned long htlong)
 
 static void htable_destroy(struct xt_hashlimit_htable *hinfo)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net);
+	struct proc_dir_entry *parent;
+
 	del_timer_sync(&hinfo->timer);
 
-	/* remove proc entry */
-	remove_proc_entry(hinfo->pde->name,
-			  hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
-						     hashlimit_procdir6);
+	if (hinfo->family == NFPROTO_IPV4)
+		parent = hashlimit_net->ipt_hashlimit;
+	else
+		parent = hashlimit_net->ip6t_hashlimit;
+	remove_proc_entry(hinfo->pde->name, parent);
 	htable_selective_cleanup(hinfo, select_all);
 	vfree(hinfo);
 }
 
-static struct xt_hashlimit_htable *htable_find_get(const char *name,
+static struct xt_hashlimit_htable *htable_find_get(struct net *net,
+						   const char *name,
 						   u_int8_t family)
 {
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
 
 	spin_lock_bh(&hashlimit_lock);
-	hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
+	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
 		if (!strcmp(name, hinfo->pde->name) &&
 		    hinfo->family == family) {
 			atomic_inc(&hinfo->use);
@@ -665,6 +687,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
+	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
 
 	/* Check for overflow. */
@@ -694,8 +717,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	r->hinfo = htable_find_get(r->name, par->match->family);
-	if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
+	r->hinfo = htable_find_get(net, r->name, par->match->family);
+	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -706,6 +729,7 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 
 static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
+	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 
 	/* Check for overflow. */
@@ -735,8 +759,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	 * the list of htable's in htable_create(), since then we would
 	 * create duplicate proc files. -HW */
 	mutex_lock(&hlimit_mutex);
-	info->hinfo = htable_find_get(info->name, par->match->family);
-	if (!info->hinfo && htable_create(info, par->match->family) != 0) {
+	info->hinfo = htable_find_get(net, info->name, par->match->family);
+	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
 		mutex_unlock(&hlimit_mutex);
 		return false;
 	}
@@ -953,10 +977,61 @@ static const struct file_operations dl_file_ops = {
 	.release = seq_release
 };
 
+static int __net_init hashlimit_proc_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
+	if (!hashlimit_net->ipt_hashlimit)
+		return -ENOMEM;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
+	if (!hashlimit_net->ip6t_hashlimit) {
+		proc_net_remove(net, "ipt_hashlimit");
+		return -ENOMEM;
+	}
+#endif
+	return 0;
+}
+
+static void __net_exit hashlimit_proc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "ipt_hashlimit");
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+	proc_net_remove(net, "ip6t_hashlimit");
+#endif
+}
+
+static int __net_init hashlimit_net_init(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	INIT_HLIST_HEAD(&hashlimit_net->htables);
+	return hashlimit_proc_net_init(net);
+}
+
+static void __net_exit hashlimit_net_exit(struct net *net)
+{
+	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+	BUG_ON(!hlist_empty(&hashlimit_net->htables));
+	hashlimit_proc_net_exit(net);
+}
+
+static struct pernet_operations hashlimit_net_ops = {
+	.init	= hashlimit_net_init,
+	.exit	= hashlimit_net_exit,
+	.id	= &hashlimit_net_id,
+	.size	= sizeof(struct hashlimit_net),
+};
+
 static int __init hashlimit_mt_init(void)
 {
 	int err;
 
+	err = register_pernet_subsys(&hashlimit_net_ops);
+	if (err < 0)
+		return err;
 	err = xt_register_matches(hashlimit_mt_reg,
 	      ARRAY_SIZE(hashlimit_mt_reg));
 	if (err < 0)
@@ -970,41 +1045,21 @@ static int __init hashlimit_mt_init(void)
 		printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
 		goto err2;
 	}
-	hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
-	if (!hashlimit_procdir4) {
-		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
-				"entry\n");
-		goto err3;
-	}
-	err = 0;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
-	if (!hashlimit_procdir6) {
-		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
-				"entry\n");
-		err = -ENOMEM;
-	}
-#endif
-	if (!err)
-		return 0;
-	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-err3:
-	kmem_cache_destroy(hashlimit_cachep);
+	return 0;
+
 err2:
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 err1:
+	unregister_pernet_subsys(&hashlimit_net_ops);
 	return err;
 
 }
 
 static void __exit hashlimit_mt_exit(void)
 {
-	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
-#endif
 	kmem_cache_destroy(hashlimit_cachep);
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+	unregister_pernet_subsys(&hashlimit_net_ops);
 }
 
 module_init(hashlimit_mt_init);
-- 
cgit v1.2.3


From a5d896adf019143adf72d08826fe5359b6a8762c Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Mon, 18 Jan 2010 09:44:39 +0100
Subject: netfilter: nfnetlink_queue: simplify warning message

This patch remove variable part from a debug message to have
message concatenation from syslog.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5c589b27d6eb..90cf36decea3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -414,8 +414,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 		queue->queue_dropped++;
 		if (net_ratelimit())
 			  printk(KERN_WARNING "nf_queue: full at %d entries, "
-				 "dropping packets(s). Dropped: %d\n",
-				 queue->queue_total, queue->queue_dropped);
+				 "dropping packets(s).\n",
+				 queue->queue_total);
 		goto err_out_free_nskb;
 	}
 
-- 
cgit v1.2.3


From 10a199394b8f9b4c4e0be6e14a61109a7d891b1b Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Tue, 2 Feb 2010 15:33:38 +0100
Subject: netfilter: xt_TCPMSS: SYN packets are allowed to contain data

The TCPMSS target is dropping SYN packets where:
  1) There is data, or
  2) The data offset makes the TCP header larger than the packet.

Both of these result in an error level printk. This printk has been
removed.

This change avoids dropping SYN packets containing data. If there
is also no MSS option (as well as data), one will not be added
because of possible complications due to the increased packet size.

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_TCPMSS.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index eda64c1cb1e5..6f21b4377dbb 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -60,17 +60,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	tcplen = skb->len - tcphoff;
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 
-	/* Since it passed flags test in tcp match, we know it is is
-	   not a fragment, and has data >= tcp header length.  SYN
-	   packets should not contain data: if they did, then we risk
-	   running over MTU, sending Frag Needed and breaking things
-	   badly. --RR */
-	if (tcplen != tcph->doff*4) {
-		if (net_ratelimit())
-			printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
-			       skb->len);
+	/* Header cannot be larger than the packet */
+	if (tcplen < tcph->doff*4)
 		return -1;
-	}
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
@@ -115,6 +107,12 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		}
 	}
 
+	/* There is data after the header so the option can't be added
+	   without moving it, and doing so may make the SYN packet
+	   itself too large. Accept the packet unmodified instead. */
+	if (tcplen > tcph->doff*4)
+		return 0;
+
 	/*
 	 * MSS Option not found ?! add it..
 	 */
-- 
cgit v1.2.3


From 2eff25c18c3d332d3c4dd98f2ac9b7114e9771b0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:24:54 +0100
Subject: netfilter: xt_hashlimit: fix race condition and simplify locking

As noticed by Shin Hong <hongshin@gmail.com>, there is a race between
htable_find_get() and htable_put():

htable_put():				htable_find_get():

					spin_lock_bh(&hashlimit_lock);
					<search entry>
atomic_dec_and_test(&hinfo->use)
					atomic_inc(&hinfo->use)
					spin_unlock_bh(&hashlimit_lock)
					return hinfo;
spin_lock_bh(&hashlimit_lock);
hlist_del(&hinfo->node);
spin_unlock_bh(&hashlimit_lock);
htable_destroy(hinfo);

The entire locking concept is overly complicated, tables are only
created/referenced and released in process context, so a single
mutex works just fine. Remove the hashinfo_spinlock and atomic
reference count and use the mutex to protect table lookups/creation
and reference count changes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 52 +++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 34 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index fb7fcb773a3f..017c95966aa8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -90,7 +90,7 @@ struct dsthash_ent {
 
 struct xt_hashlimit_htable {
 	struct hlist_node node;		/* global list of all htables */
-	atomic_t use;
+	int use;
 	u_int8_t family;
 	bool rnd_initialized;
 
@@ -109,8 +109,7 @@ struct xt_hashlimit_htable {
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
-static DEFINE_SPINLOCK(hashlimit_lock);	/* protects htables list */
-static DEFINE_MUTEX(hlimit_mutex);	/* additional checkentry protection */
+static DEFINE_MUTEX(hashlimit_mutex);	/* protects htables list */
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
 static inline bool dst_cmp(const struct dsthash_ent *ent,
@@ -244,7 +243,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	for (i = 0; i < hinfo->cfg.size; i++)
 		INIT_HLIST_HEAD(&hinfo->hash[i]);
 
-	atomic_set(&hinfo->use, 1);
+	hinfo->use = 1;
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
@@ -263,9 +262,9 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	spin_unlock_bh(&hashlimit_lock);
+	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -308,7 +307,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	for (i = 0; i < hinfo->cfg.size; i++)
 		INIT_HLIST_HEAD(&hinfo->hash[i]);
 
-	atomic_set(&hinfo->use, 1);
+	hinfo->use = 1;
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
@@ -328,9 +327,9 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	spin_unlock_bh(&hashlimit_lock);
+	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -402,27 +401,24 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
 	struct xt_hashlimit_htable *hinfo;
 	struct hlist_node *pos;
 
-	spin_lock_bh(&hashlimit_lock);
 	hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
 		if (!strcmp(name, hinfo->pde->name) &&
 		    hinfo->family == family) {
-			atomic_inc(&hinfo->use);
-			spin_unlock_bh(&hashlimit_lock);
+			hinfo->use++;
 			return hinfo;
 		}
 	}
-	spin_unlock_bh(&hashlimit_lock);
 	return NULL;
 }
 
 static void htable_put(struct xt_hashlimit_htable *hinfo)
 {
-	if (atomic_dec_and_test(&hinfo->use)) {
-		spin_lock_bh(&hashlimit_lock);
+	mutex_lock(&hashlimit_mutex);
+	if (--hinfo->use == 0) {
 		hlist_del(&hinfo->node);
-		spin_unlock_bh(&hashlimit_lock);
 		htable_destroy(hinfo);
 	}
+	mutex_unlock(&hashlimit_mutex);
 }
 
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -710,19 +706,13 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	if (r->name[sizeof(r->name) - 1] != '\0')
 		return false;
 
-	/* This is the best we've got: We cannot release and re-grab lock,
-	 * since checkentry() is called before x_tables.c grabs xt_mutex.
-	 * We also cannot grab the hashtable spinlock, since htable_create will
-	 * call vmalloc, and that can sleep.  And we cannot just re-search
-	 * the list of htable's in htable_create(), since then we would
-	 * create duplicate proc files. -HW */
-	mutex_lock(&hlimit_mutex);
+	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->match->family);
 	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
-		mutex_unlock(&hlimit_mutex);
+		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
-	mutex_unlock(&hlimit_mutex);
+	mutex_unlock(&hashlimit_mutex);
 
 	return true;
 }
@@ -752,19 +742,13 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 			return false;
 	}
 
-	/* This is the best we've got: We cannot release and re-grab lock,
-	 * since checkentry() is called before x_tables.c grabs xt_mutex.
-	 * We also cannot grab the hashtable spinlock, since htable_create will
-	 * call vmalloc, and that can sleep.  And we cannot just re-search
-	 * the list of htable's in htable_create(), since then we would
-	 * create duplicate proc files. -HW */
-	mutex_lock(&hlimit_mutex);
+	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->match->family);
 	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
-		mutex_unlock(&hlimit_mutex);
+		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
-	mutex_unlock(&hlimit_mutex);
+	mutex_unlock(&hashlimit_mutex);
 	return true;
 }
 
-- 
cgit v1.2.3


From 794e68716bab578ae8f8912dc934496d7c7abc90 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:41:29 +0100
Subject: netfilter: ctnetlink: only assign helpers for matching protocols

Make sure not to assign a helper for a different network or transport
layer protocol to a connection.

Additionally change expectation deletion by helper to compare the name
directly - there might be multiple helper registrations using the same
name, currently one of them is chosen in an unpredictable manner and
only those expectations are removed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_helper.c  |  8 +++++---
 net/netfilter/nf_conntrack_netlink.c | 23 +++++++++++------------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..c0e461f466ae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -65,7 +65,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 }
 
 struct nf_conntrack_helper *
-__nf_conntrack_helper_find_byname(const char *name)
+__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 {
 	struct nf_conntrack_helper *h;
 	struct hlist_node *n;
@@ -73,13 +73,15 @@ __nf_conntrack_helper_find_byname(const char *name)
 
 	for (i = 0; i < nf_ct_helper_hsize; i++) {
 		hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
-			if (!strcmp(h->name, name))
+			if (!strcmp(h->name, name) &&
+			    h->tuple.src.l3num == l3num &&
+			    h->tuple.dst.protonum == protonum)
 				return h;
 		}
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
 
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 79478dfba27e..16f86d61e5d1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1001,7 +1001,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		return 0;
 	}
 
-	helper = __nf_conntrack_helper_find_byname(helpname);
+	helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+					    nf_ct_protonum(ct));
 	if (helper == NULL) {
 #ifdef CONFIG_MODULES
 		spin_unlock_bh(&nf_conntrack_lock);
@@ -1012,7 +1013,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		}
 
 		spin_lock_bh(&nf_conntrack_lock);
-		helper = __nf_conntrack_helper_find_byname(helpname);
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
 		if (helper)
 			return -EAGAIN;
 #endif
@@ -1211,7 +1213,8 @@ ctnetlink_create_conntrack(struct net *net,
  		if (err < 0)
 			goto err2;
 
-		helper = __nf_conntrack_helper_find_byname(helpname);
+		helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+						    nf_ct_protonum(ct));
 		if (helper == NULL) {
 			rcu_read_unlock();
 #ifdef CONFIG_MODULES
@@ -1221,7 +1224,9 @@ ctnetlink_create_conntrack(struct net *net,
 			}
 
 			rcu_read_lock();
-			helper = __nf_conntrack_helper_find_byname(helpname);
+			helper = __nf_conntrack_helper_find(helpname,
+							    nf_ct_l3num(ct),
+							    nf_ct_protonum(ct));
 			if (helper) {
 				err = -EAGAIN;
 				goto err2;
@@ -1714,7 +1719,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct net *net = sock_net(ctnl);
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
-	struct nf_conntrack_helper *h;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
@@ -1751,18 +1755,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 
 		/* delete all expectations for this helper */
 		spin_lock_bh(&nf_conntrack_lock);
-		h = __nf_conntrack_helper_find_byname(name);
-		if (!h) {
-			spin_unlock_bh(&nf_conntrack_lock);
-			return -EOPNOTSUPP;
-		}
 		for (i = 0; i < nf_ct_expect_hsize; i++) {
 			hlist_for_each_entry_safe(exp, n, next,
 						  &net->ct.expect_hash[i],
 						  hnode) {
 				m_help = nfct_help(exp->master);
-				if (m_help->helper == h
-				    && del_timer(&exp->timeout)) {
+				if (!strcmp(m_help->helper->name, name) &&
+				    del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect(exp);
 					nf_ct_expect_put(exp);
 				}
-- 
cgit v1.2.3


From 858b31330054a9ad259feceea0ad1ce5385c47f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:48:53 +0100
Subject: netfilter: nf_conntrack: split up IPCT_STATUS event

Split up the IPCT_STATUS event into an IPCT_REPLY event, which is generated
when the IPS_SEEN_REPLY bit is set, and an IPCT_ASSURED event, which is
generated when the IPS_ASSURED bit is set.

In combination with a following patch to support selective event delivery,
this can be used for "sparse" conntrack replication: start replicating the
conntrack entry after it reached the ASSURED state and that way it's SYN-flood
resistant.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c          | 2 +-
 net/netfilter/nf_conntrack_netlink.c       | 6 ++++--
 net/netfilter/nf_conntrack_proto_gre.c     | 2 +-
 net/netfilter/nf_conntrack_proto_sctp.c    | 2 +-
 net/netfilter/nf_conntrack_proto_tcp.c     | 2 +-
 net/netfilter/nf_conntrack_proto_udp.c     | 2 +-
 net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
 7 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0e98c3282d42..091ff770eb7b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -825,7 +825,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_REPLY, ct);
 
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 16f86d61e5d1..ff594eb138c1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1371,7 +1371,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			else
 				events = IPCT_NEW;
 
-			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
@@ -1396,7 +1397,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (err == 0) {
 			nf_conntrack_get(&ct->ct_general);
 			spin_unlock_bh(&nf_conntrack_lock);
-			nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+						      (1 << IPCT_ASSURED) |
 						      (1 << IPCT_HELPER) |
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c99cfba64ddc..d899b1a69940 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -241,7 +241,7 @@ static int gre_packet(struct nf_conn *ct,
 				   ct->proto.gre.stream_timeout);
 		/* Also, more likely to be important, and not a probe. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb,
 				   ct->proto.gre.timeout);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index f9d930f80276..b68ff15ed979 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct,
 	    new_state == SCTP_CONNTRACK_ESTABLISHED) {
 		pr_debug("Setting assured bit\n");
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	}
 
 	return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3c96437b45ad..ad118053971a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct,
 		   after SYN_RECV or a valid answer for a picked up
 		   connection. */
 		set_bit(IPS_ASSURED_BIT, &ct->status);
-		nf_conntrack_event_cache(IPCT_STATUS, ct);
+		nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	}
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 5c5518bedb4b..8d38f9a4bed8 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct,
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, ct);
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
 
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 458655bb2106..0b1bc9ba6678 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
 				   nf_ct_udplite_timeout_stream);
 		/* Also, more likely to be important, and not a probe */
 		if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
-			nf_conntrack_event_cache(IPCT_STATUS, ct);
+			nf_conntrack_event_cache(IPCT_ASSURED, ct);
 	} else
 		nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
 
-- 
cgit v1.2.3


From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 13:51:51 +0100
Subject: netfilter: ctnetlink: support selective event delivery

Add two masks for conntrack end expectation events to struct nf_conntrack_ecache
and use them to filter events. Their default value is "all events" when the
event sysctl is on and "no events" when it is off. A following patch will add
specific initializations. Expectation events depend on the ecache struct of
their master conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c    | 2 +-
 net/netfilter/nf_conntrack_netlink.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 091ff770eb7b..53b8da6ad6b7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -648,7 +648,7 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ff594eb138c1..f5c0b09e12f1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
 
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
-- 
cgit v1.2.3


From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 14:13:03 +0100
Subject: netfilter: nf_conntrack: support conntrack templates

Support initializing selected parameters of new conntrack entries from a
"conntrack template", which is a specially marked conntrack entry attached
to the skb.

Currently the helper and the event delivery masks can be initialized this
way.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c    | 50 ++++++++++++++++++++++++------------
 net/netfilter/nf_conntrack_helper.c  | 17 +++++++++---
 net/netfilter/nf_conntrack_netlink.c |  2 +-
 3 files changed, 48 insertions(+), 21 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 53b8da6ad6b7..471e2a79d26f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 /* Allocate a new conntrack: we return -ENOMEM if classification
    failed due to stress.  Otherwise it really is unclassifiable. */
 static struct nf_conntrack_tuple_hash *
-init_conntrack(struct net *net,
+init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
 	       struct nf_conntrack_l3proto *l3proto,
 	       struct nf_conntrack_l4proto *l4proto,
@@ -628,6 +628,7 @@ init_conntrack(struct net *net,
 	struct nf_conn *ct;
 	struct nf_conn_help *help;
 	struct nf_conntrack_tuple repl_tuple;
+	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -648,7 +649,11 @@ init_conntrack(struct net *net,
 	}
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+
+	ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
+	nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+				 ecache ? ecache->expmask : 0,
+			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
 	exp = nf_ct_find_expectation(net, tuple);
@@ -673,7 +678,7 @@ init_conntrack(struct net *net,
 		nf_conntrack_get(&ct->master->ct_general);
 		NF_CT_STAT_INC(net, expect_new);
 	} else {
-		__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
 		NF_CT_STAT_INC(net, new);
 	}
 
@@ -694,7 +699,7 @@ init_conntrack(struct net *net,
 
 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
 static inline struct nf_conn *
-resolve_normal_ct(struct net *net,
+resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int16_t l3num,
@@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net,
 	/* look for tuple match */
 	h = nf_conntrack_find_get(net, &tuple);
 	if (!h) {
-		h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
+		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+				   skb, dataoff);
 		if (!h)
 			return NULL;
 		if (IS_ERR(h))
@@ -755,7 +761,7 @@ unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
 {
-	struct nf_conn *ct;
+	struct nf_conn *ct, *tmpl = NULL;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
@@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	int set_reply = 0;
 	int ret;
 
-	/* Previously seen (loopback or untracked)?  Ignore. */
 	if (skb->nfct) {
-		NF_CT_STAT_INC_ATOMIC(net, ignore);
-		return NF_ACCEPT;
+		/* Previously seen (loopback or untracked)?  Ignore. */
+		tmpl = (struct nf_conn *)skb->nfct;
+		if (!nf_ct_is_template(tmpl)) {
+			NF_CT_STAT_INC_ATOMIC(net, ignore);
+			return NF_ACCEPT;
+		}
+		skb->nfct = NULL;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
@@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		pr_debug("not prepared to track yet or error occured\n");
 		NF_CT_STAT_INC_ATOMIC(net, error);
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	l4proto = __nf_ct_l4proto_find(pf, protonum);
@@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		if (ret <= 0) {
 			NF_CT_STAT_INC_ATOMIC(net, error);
 			NF_CT_STAT_INC_ATOMIC(net, invalid);
-			return -ret;
+			ret = -ret;
+			goto out;
 		}
 	}
 
-	ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+	ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		return NF_ACCEPT;
+		ret = NF_ACCEPT;
+		goto out;
 	}
 
 	if (IS_ERR(ct)) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(net, drop);
-		return NF_DROP;
+		ret = NF_DROP;
+		goto out;
 	}
 
 	NF_CT_ASSERT(skb->nfct);
@@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		NF_CT_STAT_INC_ATOMIC(net, invalid);
 		if (ret == -NF_DROP)
 			NF_CT_STAT_INC_ATOMIC(net, drop);
-		return -ret;
+		ret = -ret;
+		goto out;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
 		nf_conntrack_event_cache(IPCT_REPLY, ct);
+out:
+	if (tmpl)
+		nf_ct_put(tmpl);
 
 	return ret;
 }
@@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 		return;
 
 	rcu_read_lock();
-	__nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+	__nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c0e461f466ae..8144b0da5515 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
 
-int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags)
+int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+			      gfp_t flags)
 {
+	struct nf_conntrack_helper *helper = NULL;
+	struct nf_conn_help *help;
 	int ret = 0;
-	struct nf_conntrack_helper *helper;
-	struct nf_conn_help *help = nfct_help(ct);
 
-	helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	if (tmpl != NULL) {
+		help = nfct_help(tmpl);
+		if (help != NULL)
+			helper = help->helper;
+	}
+
+	help = nfct_help(ct);
+	if (helper == NULL)
+		helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 	if (helper == NULL) {
 		if (help)
 			rcu_assign_pointer(help->helper, NULL);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f5c0b09e12f1..09044f9f4b2e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net,
 		}
 	} else {
 		/* try an implicit helper assignation */
-		err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+		err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
 		if (err < 0)
 			goto err2;
 	}
-- 
cgit v1.2.3


From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 17:17:06 +0100
Subject: netfilter: xtables: add CT target

Add a new target for the raw table, which can be used to specify conntrack
parameters for specific connections, f.i. the conntrack helper.

The target attaches a "template" connection tracking entry to the skb, which
is used by the conntrack core when initializing a new conntrack.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig               |  12 +++
 net/netfilter/Makefile              |   1 +
 net/netfilter/nf_conntrack_helper.c |  19 +++++
 net/netfilter/xt_CT.c               | 158 ++++++++++++++++++++++++++++++++++++
 4 files changed, 190 insertions(+)
 create mode 100644 net/netfilter/xt_CT.c

(limited to 'net/netfilter')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 634d14affc8d..4469d45261f4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_CT
+	tristate '"CT" target support'
+	depends on NF_CONNTRACK
+	depends on IP_NF_RAW || IP6_NF_RAW
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `CT' target, which allows to specify initial
+	  connection tracking parameters like events to be delivered and
+	  the helper to be used.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_DSCP
 	tristate '"DSCP" and "TOS" target support'
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 49f62ee4e9ff..f873644f02f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8144b0da5515..a74a5769877b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 }
 EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
 
+struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+	struct nf_conntrack_helper *h;
+
+	h = __nf_conntrack_helper_find(name, l3num, protonum);
+#ifdef CONFIG_MODULES
+	if (h == NULL) {
+		if (request_module("nfct-helper-%s", name) == 0)
+			h = __nf_conntrack_helper_find(name, l3num, protonum);
+	}
+#endif
+	if (h != NULL && !try_module_get(h->me))
+		h = NULL;
+
+	return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
+
 struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
 	struct nf_conn_help *help;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
new file mode 100644
index 000000000000..8183a054256f
--- /dev/null
+++ b/net/netfilter/xt_CT.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/selinux.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CT.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+
+static unsigned int xt_ct_target(struct sk_buff *skb,
+				 const struct xt_target_param *par)
+{
+	const struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+
+	/* Previously seen (loopback)? Ignore. */
+	if (skb->nfct != NULL)
+		return XT_CONTINUE;
+
+	atomic_inc(&ct->ct_general.use);
+	skb->nfct = &ct->ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+
+	return XT_CONTINUE;
+}
+
+static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
+{
+	if (par->family == AF_INET) {
+		const struct ipt_entry *e = par->entryinfo;
+
+		if (e->ip.invflags & IPT_INV_PROTO)
+			return 0;
+		return e->ip.proto;
+	} else if (par->family == AF_INET6) {
+		const struct ip6t_entry *e = par->entryinfo;
+
+		if (e->ipv6.invflags & IP6T_INV_PROTO)
+			return 0;
+		return e->ipv6.proto;
+	} else
+		return 0;
+}
+
+static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conntrack_tuple t;
+	struct nf_conn_help *help;
+	struct nf_conn *ct;
+	u8 proto;
+
+	if (info->flags & ~XT_CT_NOTRACK)
+		return false;
+
+	if (info->flags & XT_CT_NOTRACK) {
+		ct = &nf_conntrack_untracked;
+		atomic_inc(&ct->ct_general.use);
+		goto out;
+	}
+
+	if (nf_ct_l3proto_try_module_get(par->family) < 0)
+		goto err1;
+
+	memset(&t, 0, sizeof(t));
+	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	if (IS_ERR(ct))
+		goto err2;
+
+	if ((info->ct_events || info->exp_events) &&
+	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
+				  GFP_KERNEL))
+		goto err3;
+
+	if (info->helper[0]) {
+		proto = xt_ct_find_proto(par);
+		if (!proto)
+			goto err3;
+
+		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+		if (help == NULL)
+			goto err3;
+
+		help->helper = nf_conntrack_helper_try_module_get(info->helper,
+								  par->family,
+								  proto);
+		if (help->helper == NULL)
+			goto err3;
+	}
+
+	__set_bit(IPS_TEMPLATE_BIT, &ct->status);
+	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
+out:
+	info->ct = ct;
+	return true;
+
+err3:
+	nf_conntrack_free(ct);
+err2:
+	nf_ct_l3proto_module_put(par->family);
+err1:
+	return false;
+}
+
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_ct_target_info *info = par->targinfo;
+	struct nf_conn *ct = info->ct;
+	struct nf_conn_help *help;
+
+	if (ct != &nf_conntrack_untracked) {
+		help = nfct_help(ct);
+		if (help)
+			module_put(help->helper->me);
+
+		nf_ct_l3proto_module_put(par->family);
+	}
+	nf_ct_put(info->ct);
+}
+
+static struct xt_target xt_ct_tg __read_mostly = {
+	.name		= "CT",
+	.family		= NFPROTO_UNSPEC,
+	.targetsize	= XT_ALIGN(sizeof(struct xt_ct_target_info)),
+	.checkentry	= xt_ct_tg_check,
+	.destroy	= xt_ct_tg_destroy,
+	.target		= xt_ct_target,
+	.table		= "raw",
+	.me		= THIS_MODULE,
+};
+
+static int __init xt_ct_tg_init(void)
+{
+	return xt_register_target(&xt_ct_tg);
+}
+
+static void __exit xt_ct_tg_exit(void)
+{
+	xt_unregister_target(&xt_ct_tg);
+}
+
+module_init(xt_ct_tg_init);
+module_exit(xt_ct_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: connection tracking target");
+MODULE_ALIAS("ipt_CT");
+MODULE_ALIAS("ip6t_CT");
-- 
cgit v1.2.3


From 3ad2f3fbb961429d2aa627465ae4829758bc7e07 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 3 Feb 2010 08:01:28 +0800
Subject: tree-wide: Assorted spelling fixes

In particular, several occurances of funny versions of 'success',
'unknown', 'therefore', 'acknowledge', 'argument', 'achieve', 'address',
'beginning', 'desirable', 'separate' and 'necessary' are fixed.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Joe Perches <joe@perches.com>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/netfilter/nf_conntrack_sip.c | 4 ++--
 net/netfilter/xt_hashlimit.c     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..fbe94adee7ac 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -276,7 +276,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
  * tabs, spaces and continuation lines, which are treated as a single whitespace
  * character.
  *
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
  * equivalent to multiple headers.
  */
 static const struct sip_header ct_sip_hdrs[] = {
@@ -412,7 +412,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_header);
 
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
 static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
 			      unsigned int dataoff, unsigned int datalen,
 			      enum sip_header_types type,
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..cbaac92dad59 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
 /*
  *	xt_hashlimit - Netfilter module to limit the number of packets per time
- *	seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)
  *
  *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
-- 
cgit v1.2.3


From 42107f5009da223daa800d6da6904d77297ae829 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 10 Feb 2010 15:03:27 +0100
Subject: netfilter: xtables: symmetric COMPAT_XT_ALIGN definition

Rewrite COMPAT_XT_ALIGN in terms of dummy structure hack.
Compat counters logically have nothing to do with it.
Use ALIGN() macro while I'm at it for same types.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..5c564ff10a3b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -364,7 +364,7 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s match: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->match->name,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
@@ -514,7 +514,7 @@ int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
+		pr_err("%s_tables: %s target: invalid size %u != %u\n",
 		       xt_prefix[par->family], par->target->name,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
-- 
cgit v1.2.3


From d0b0268fddea3235a8df35e52167c3b206bf2f5a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 10 Feb 2010 15:38:33 +0100
Subject: netfilter: ctnetlink: add missing netlink attribute policies

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 41 +++++++++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 5 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 327c5174440c..5c103b8c7df0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -708,6 +708,11 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	return ret;
 }
 
+static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
+	[CTA_TUPLE_IP]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_PROTO]	= { .type = NLA_NESTED },
+};
+
 static int
 ctnetlink_parse_tuple(const struct nlattr * const cda[],
 		      struct nf_conntrack_tuple *tuple,
@@ -718,7 +723,7 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL);
+	nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
 
 	if (!tb[CTA_TUPLE_IP])
 		return -EINVAL;
@@ -745,12 +750,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
+	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
+};
+
 static inline int
 ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
 {
 	struct nlattr *tb[CTA_HELP_MAX+1];
 
-	nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL);
+	nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
 
 	if (!tb[CTA_HELP_NAME])
 		return -EINVAL;
@@ -761,11 +770,17 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
 }
 
 static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+	[CTA_TUPLE_ORIG]	= { .type = NLA_NESTED },
+	[CTA_TUPLE_REPLY]	= { .type = NLA_NESTED },
 	[CTA_STATUS] 		= { .type = NLA_U32 },
+	[CTA_PROTOINFO]		= { .type = NLA_NESTED },
+	[CTA_HELP]		= { .type = NLA_NESTED },
+	[CTA_NAT_SRC]		= { .type = NLA_NESTED },
 	[CTA_TIMEOUT] 		= { .type = NLA_U32 },
 	[CTA_MARK]		= { .type = NLA_U32 },
-	[CTA_USE]		= { .type = NLA_U32 },
 	[CTA_ID]		= { .type = NLA_U32 },
+	[CTA_NAT_DST]		= { .type = NLA_NESTED },
+	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
 };
 
 static int
@@ -1053,6 +1068,12 @@ ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[])
 	return 0;
 }
 
+static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
+	[CTA_PROTOINFO_TCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_DCCP]	= { .type = NLA_NESTED },
+	[CTA_PROTOINFO_SCTP]	= { .type = NLA_NESTED },
+};
+
 static inline int
 ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[])
 {
@@ -1061,7 +1082,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
-	nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
+	nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
 
 	rcu_read_lock();
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1073,12 +1094,18 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
 }
 
 #ifdef CONFIG_NF_NAT_NEEDED
+static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
+	[CTA_NAT_SEQ_CORRECTION_POS]	= { .type = NLA_U32 },
+	[CTA_NAT_SEQ_OFFSET_BEFORE]	= { .type = NLA_U32 },
+	[CTA_NAT_SEQ_OFFSET_AFTER]	= { .type = NLA_U32 },
+};
+
 static inline int
 change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
 {
 	struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
 
-	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
+	nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
 
 	if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
 		return -EINVAL;
@@ -1648,8 +1675,12 @@ out:
 }
 
 static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED },
+	[CTA_EXPECT_MASK]	= { .type = NLA_NESTED },
 	[CTA_EXPECT_TIMEOUT]	= { .type = NLA_U32 },
 	[CTA_EXPECT_ID]		= { .type = NLA_U32 },
+	[CTA_EXPECT_HELP_NAME]	= { .type = NLA_NUL_STRING },
 };
 
 static int
-- 
cgit v1.2.3


From 2b95efe7f6bb750256a702cc32d33b0cb2cd8223 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 13:57:48 +0200
Subject: netfilter: xtables: use xt_table for hook instantiation

The respective xt_table structures already have most of the metadata
needed for hook setup. Add a 'priority' field to struct xt_table so
that xt_hook_link() can be called with a reduced number of arguments.

So should we be having more tables in the future, it comes at no
static cost (only runtime, as before) - space saved:
6807373->6806555.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/x_tables.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..b51cb0d7234a 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1091,6 +1091,60 @@ static const struct file_operations xt_target_ops = {
 
 #endif /* CONFIG_PROC_FS */
 
+/**
+ * xt_hook_link - set up hooks for a new table
+ * @table:	table with metadata needed to set up hooks
+ * @fn:		Hook function
+ *
+ * This function will take care of creating and registering the necessary
+ * Netfilter hooks for XT tables.
+ */
+struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+{
+	unsigned int hook_mask = table->valid_hooks;
+	uint8_t i, num_hooks = hweight32(hook_mask);
+	uint8_t hooknum;
+	struct nf_hook_ops *ops;
+	int ret;
+
+	ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+	if (ops == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
+	     hook_mask >>= 1, ++hooknum) {
+		if (!(hook_mask & 1))
+			continue;
+		ops[i].hook     = fn;
+		ops[i].owner    = table->me;
+		ops[i].pf       = table->af;
+		ops[i].hooknum  = hooknum;
+		ops[i].priority = table->priority;
+		++i;
+	}
+
+	ret = nf_register_hooks(ops, num_hooks);
+	if (ret < 0) {
+		kfree(ops);
+		return ERR_PTR(ret);
+	}
+
+	return ops;
+}
+EXPORT_SYMBOL_GPL(xt_hook_link);
+
+/**
+ * xt_hook_unlink - remove hooks for a table
+ * @ops:	nf_hook_ops array as returned by nf_hook_link
+ * @hook_mask:	the very same mask that was passed to nf_hook_link
+ */
+void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
+{
+	nf_unregister_hooks(ops, hweight32(table->valid_hooks));
+	kfree(ops);
+}
+EXPORT_SYMBOL_GPL(xt_hook_unlink);
+
 int xt_proto_init(struct net *net, u_int8_t af)
 {
 #ifdef CONFIG_PROC_FS
-- 
cgit v1.2.3


From e3eaa9910b380530cfd2c0670fcd3f627674da8a Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Jun 2009 22:14:54 +0200
Subject: netfilter: xtables: generate initial table on-demand

The static initial tables are pretty large, and after the net
namespace has been instantiated, they just hang around for nothing.
This commit removes them and creates tables on-demand at runtime when
needed.

Size shrinks by 7735 bytes (x86_64).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/x_tables.c    |  4 +++-
 net/netfilter/xt_repldata.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 net/netfilter/xt_repldata.h

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index b51cb0d7234a..dc2e05cb54c0 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,7 +26,9 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_arp.h>
-
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
new file mode 100644
index 000000000000..6efe4e5a81c6
--- /dev/null
+++ b/net/netfilter/xt_repldata.h
@@ -0,0 +1,35 @@
+/*
+ * Today's hack: quantum tunneling in structs
+ *
+ * 'entries' and 'term' are never anywhere referenced by word in code. In fact,
+ * they serve as the hanging-off data accessed through repl.data[].
+ */
+
+#define xt_alloc_initial_table(type, typ2) ({ \
+	unsigned int hook_mask = info->valid_hooks; \
+	unsigned int nhooks = hweight32(hook_mask); \
+	unsigned int bytes = 0, hooknum = 0, i = 0; \
+	struct { \
+		struct type##_replace repl; \
+		struct type##_standard entries[nhooks]; \
+		struct type##_error term; \
+	} *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+	if (tbl == NULL) \
+		return NULL; \
+	strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+	tbl->term = (struct type##_error)typ2##_ERROR_INIT;  \
+	tbl->repl.valid_hooks = hook_mask; \
+	tbl->repl.num_entries = nhooks + 1; \
+	tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
+	                 sizeof(struct type##_error); \
+	for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
+		if (!(hook_mask & 1)) \
+			continue; \
+		tbl->repl.hook_entry[hooknum] = bytes; \
+		tbl->repl.underflow[hooknum]  = bytes; \
+		tbl->entries[i++] = (struct type##_standard) \
+			typ2##_STANDARD_INIT(NF_ACCEPT); \
+		bytes += sizeof(struct type##_standard); \
+	} \
+	tbl; \
+})
-- 
cgit v1.2.3


From d1e7a03f4fee4059ee3fa7ce0edb7c48c1a75fcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:22:28 +0100
Subject: netfilter: ctnetlink: dump expectation helper name

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5c103b8c7df0..db35edac307b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1512,6 +1512,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
 {
 	struct nf_conn *master = exp->master;
+	struct nf_conntrack_helper *helper;
 	long timeout = (exp->timeout.expires - jiffies) / HZ;
 
 	if (timeout < 0)
@@ -1528,6 +1529,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 
 	NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
 	NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+	helper = rcu_dereference(nfct_help(master)->helper);
+	if (helper)
+		NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
 
 	return 0;
 
-- 
cgit v1.2.3


From b87921bdf25485afd8f5a5f25e86b5acef32a9cf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:22:48 +0100
Subject: netfilter: nf_conntrack: show helper and class in
 /proc/net/nf_conntrack_expect

Make the output a bit more informative by showing the helper an expectation
belongs to and the expectation class.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 9 +++++++++
 net/netfilter/nf_conntrack_sip.c    | 3 +++
 2 files changed, 12 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 2f25ff610982..33b85f834c06 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -500,6 +500,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v)
 static int exp_seq_show(struct seq_file *s, void *v)
 {
 	struct nf_conntrack_expect *expect;
+	struct nf_conntrack_helper *helper;
 	struct hlist_node *n = v;
 	char *delim = "";
 
@@ -525,6 +526,14 @@ static int exp_seq_show(struct seq_file *s, void *v)
 	if (expect->flags & NF_CT_EXPECT_INACTIVE)
 		seq_printf(s, "%sINACTIVE", delim);
 
+	helper = rcu_dereference(nfct_help(expect->master)->helper);
+	if (helper) {
+		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
+		if (helper->expect_policy[expect->class].name)
+			seq_printf(s, "/%s",
+				   helper->expect_policy[expect->class].name);
+	}
+
 	return seq_putc(s, '\n');
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..419c5cabb332 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1305,14 +1305,17 @@ static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
+		.name		= "signalling",
 		.max_expected	= 1,
 		.timeout	= 3 * 60,
 	},
 	[SIP_EXPECT_AUDIO] = {
+		.name		= "audio",
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
 	[SIP_EXPECT_VIDEO] = {
+		.name		= "video",
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
-- 
cgit v1.2.3


From 54101f4f3bed87fa968b57f77f8ff3b09fd88bcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:23:12 +0100
Subject: netfilter: nf_conntrack_sip: fix ct_sip_parse_request() REGISTER
 request parsing

When requests are parsed, the "sip:" part of the SIP URI should be skipped.

Usually this doesn't matter because address parsing skips forward until after
the username part, but in case REGISTER requests it doesn't contain a username
and the address can not be parsed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 419c5cabb332..0ca2f2b5c2fa 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -236,12 +236,13 @@ int ct_sip_parse_request(const struct nf_conn *ct,
 		return 0;
 
 	/* Find SIP URI */
-	limit -= strlen("sip:");
-	for (; dptr < limit; dptr++) {
+	for (; dptr < limit - strlen("sip:"); dptr++) {
 		if (*dptr == '\r' || *dptr == '\n')
 			return -1;
-		if (strnicmp(dptr, "sip:", strlen("sip:")) == 0)
+		if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) {
+			dptr += strlen("sip:");
 			break;
+		}
 	}
 	if (!skp_epaddr_len(ct, dptr, limit, &shift))
 		return 0;
-- 
cgit v1.2.3


From 3b6b9fab42fe98358d70735cf98d43fc18dc79c9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:23:53 +0100
Subject: netfilter: nf_conntrack_sip: pass data offset to NAT functions

When using TCP multiple SIP messages might be present in a single packet.
A following patch will parse them by setting the dptr to the beginning of
each message. The NAT helper needs to reload the dptr value after mangling
the packet however, so it needs to know the offset of the message to the
beginning of the packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 82 +++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 38 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ca2f2b5c2fa..0ec37d6a2df7 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -50,12 +50,13 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen,
 				       struct nf_conntrack_expect *exp,
@@ -63,17 +64,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int matchlen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
 
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
-				     unsigned int dataoff,
 				     unsigned int *datalen,
+				     unsigned int sdpoff,
 				     enum sdp_header_types type,
 				     enum sdp_header_types term,
 				     const union nf_inet_addr *addr)
 				     __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
 
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr,
 				     unsigned int *datalen,
 				     unsigned int matchoff,
@@ -82,14 +83,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
 
 unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
-					const char **dptr,
 					unsigned int dataoff,
+					const char **dptr,
 					unsigned int *datalen,
+					unsigned int sdpoff,
 					const union nf_inet_addr *addr)
 					__read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
 
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
 				      const char **dptr,
 				      unsigned int *datalen,
 				      struct nf_conntrack_expect *rtp_exp,
@@ -729,7 +731,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
-static int set_expected_rtp_rtcp(struct sk_buff *skb,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 				 const char **dptr, unsigned int *datalen,
 				 union nf_inet_addr *daddr, __be16 port,
 				 enum sip_expectation_classes class,
@@ -806,7 +808,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 	if (direct_rtp) {
 		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
 		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, dptr, datalen,
+		    !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -828,7 +830,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
 
 	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
 	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp,
+		ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+				       rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -867,7 +870,7 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
 	return NULL;
 }
 
-static int process_sdp(struct sk_buff *skb,
+static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		       const char **dptr, unsigned int *datalen,
 		       unsigned int cseq)
 {
@@ -942,7 +945,7 @@ static int process_sdp(struct sk_buff *skb,
 		else
 			return NF_DROP;
 
-		ret = set_expected_rtp_rtcp(skb, dptr, datalen,
+		ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
 					    &rtp_addr, htons(port), t->class,
 					    mediaoff, medialen);
 		if (ret != NF_ACCEPT)
@@ -950,8 +953,9 @@ static int process_sdp(struct sk_buff *skb,
 
 		/* Update media connection address if present */
 		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen,
-					      c_hdr, SDP_HDR_MEDIA, &rtp_addr);
+			ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
+					      mediaoff, c_hdr, SDP_HDR_MEDIA,
+					      &rtp_addr);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
@@ -961,14 +965,15 @@ static int process_sdp(struct sk_buff *skb,
 	/* Update session connection and owner addresses */
 	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
 	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
+		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
+					 &rtp_addr);
 
 	if (ret == NF_ACCEPT && i > 0)
 		help->help.ct_sip_info.invite_cseq = cseq;
 
 	return ret;
 }
-static int process_invite_response(struct sk_buff *skb,
+static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -978,13 +983,13 @@ static int process_invite_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_update_response(struct sk_buff *skb,
+static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -994,13 +999,13 @@ static int process_update_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_prack_response(struct sk_buff *skb,
+static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int cseq, unsigned int code)
 {
@@ -1010,13 +1015,13 @@ static int process_prack_response(struct sk_buff *skb,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dptr, datalen, cseq);
+		return process_sdp(skb, dataoff, dptr, datalen, cseq);
 	else if (help->help.ct_sip_info.invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_bye_request(struct sk_buff *skb,
+static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
 {
@@ -1031,7 +1036,7 @@ static int process_bye_request(struct sk_buff *skb,
  * signalling connections. The expectation is marked inactive and is activated
  * when receiving a response indicating success from the registrar.
  */
-static int process_register_request(struct sk_buff *skb,
+static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq)
 {
@@ -1101,7 +1106,7 @@ static int process_register_request(struct sk_buff *skb,
 
 	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
 	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, dptr, datalen, exp,
+		ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
 					matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
@@ -1117,7 +1122,7 @@ store_cseq:
 	return ret;
 }
 
-static int process_register_response(struct sk_buff *skb,
+static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 				     const char **dptr, unsigned int *datalen,
 				     unsigned int cseq, unsigned int code)
 {
@@ -1127,7 +1132,7 @@ static int process_register_response(struct sk_buff *skb,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
-	unsigned int matchoff, matchlen, dataoff = 0;
+	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
 
@@ -1154,7 +1159,7 @@ static int process_register_response(struct sk_buff *skb,
 	while (1) {
 		unsigned int c_expires = expires;
 
-		ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+		ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
 					      SIP_HDR_CONTACT, &in_contact,
 					      &matchoff, &matchlen,
 					      &addr, &port);
@@ -1193,13 +1198,13 @@ static const struct sip_handler sip_handlers[] = {
 	SIP_HANDLER("REGISTER", process_register_request, process_register_response),
 };
 
-static int process_sip_response(struct sk_buff *skb,
+static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
 				const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchoff, matchlen;
-	unsigned int code, cseq, dataoff, i;
+	unsigned int matchoff, matchlen, matchend;
+	unsigned int code, cseq, i;
 
 	if (*datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
@@ -1213,7 +1218,7 @@ static int process_sip_response(struct sk_buff *skb,
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
 	if (!cseq)
 		return NF_DROP;
-	dataoff = matchoff + matchlen + 1;
+	matchend = matchoff + matchlen + 1;
 
 	for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
 		const struct sip_handler *handler;
@@ -1221,15 +1226,16 @@ static int process_sip_response(struct sk_buff *skb,
 		handler = &sip_handlers[i];
 		if (handler->response == NULL)
 			continue;
-		if (*datalen < dataoff + handler->len ||
-		    strnicmp(*dptr + dataoff, handler->method, handler->len))
+		if (*datalen < matchend + handler->len ||
+		    strnicmp(*dptr + matchend, handler->method, handler->len))
 			continue;
-		return handler->response(skb, dptr, datalen, cseq, code);
+		return handler->response(skb, dataoff, dptr, datalen,
+					 cseq, code);
 	}
 	return NF_ACCEPT;
 }
 
-static int process_sip_request(struct sk_buff *skb,
+static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -1254,7 +1260,7 @@ static int process_sip_request(struct sk_buff *skb,
 		if (!cseq)
 			return NF_DROP;
 
-		return handler->request(skb, dptr, datalen, cseq);
+		return handler->request(skb, dataoff, dptr, datalen, cseq);
 	}
 	return NF_ACCEPT;
 }
@@ -1288,13 +1294,13 @@ static int sip_help(struct sk_buff *skb,
 		return NF_ACCEPT;
 
 	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, &dptr, &datalen);
+		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
 	else
-		ret = process_sip_response(skb, &dptr, &datalen);
+		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
 
 	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen))
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
 			ret = NF_DROP;
 	}
 
-- 
cgit v1.2.3


From f5b321bd37fbec9188feb1f721ab46a5ac0b35da Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:26:19 +0100
Subject: netfilter: nf_conntrack_sip: add TCP support

Add TCP support, which is mandated by RFC3261 for all SIP elements.

SIP over TCP is similar to UDP, except that messages are delimited
by Content-Length: headers and multiple messages may appear in one
packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 205 +++++++++++++++++++++++++++++++++------
 1 file changed, 176 insertions(+), 29 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0ec37d6a2df7..1cc75c5a822b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -16,6 +16,7 @@
 #include <linux/inet.h>
 #include <linux/in.h>
 #include <linux/udp.h>
+#include <linux/tcp.h>
 #include <linux/netfilter.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -287,7 +288,8 @@ static const struct sip_header ct_sip_hdrs[] = {
 	[SIP_HDR_FROM]			= SIP_HDR("From", "f", "sip:", skp_epaddr_len),
 	[SIP_HDR_TO]			= SIP_HDR("To", "t", "sip:", skp_epaddr_len),
 	[SIP_HDR_CONTACT]		= SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
-	[SIP_HDR_VIA]			= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_UDP]		= SIP_HDR("Via", "v", "UDP ", epaddr_len),
+	[SIP_HDR_VIA_TCP]		= SIP_HDR("Via", "v", "TCP ", epaddr_len),
 	[SIP_HDR_EXPIRES]		= SIP_HDR("Expires", NULL, NULL, digits_len),
 	[SIP_HDR_CONTENT_LENGTH]	= SIP_HDR("Content-Length", "l", NULL, digits_len),
 };
@@ -519,6 +521,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
 
+static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr,
+			      unsigned int dataoff, unsigned int datalen,
+			      const char *name,
+			      unsigned int *matchoff, unsigned int *matchlen)
+{
+	const char *limit = dptr + datalen;
+	const char *start;
+	const char *end;
+
+	limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+	if (!limit)
+		limit = dptr + datalen;
+
+	start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+	if (!start)
+		return 0;
+	start += strlen(name);
+
+	end = ct_sip_header_search(start, limit, ";", strlen(";"));
+	if (!end)
+		end = limit;
+
+	*matchoff = start - dptr;
+	*matchlen = end - start;
+	return 1;
+}
+
 /* Parse address from header parameter and return address, offset and length */
 int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
 			       unsigned int dataoff, unsigned int datalen,
@@ -577,6 +606,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
 }
 EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
 
+static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
+				  unsigned int dataoff, unsigned int datalen,
+				  u8 *proto)
+{
+	unsigned int matchoff, matchlen;
+
+	if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=",
+			       &matchoff, &matchlen)) {
+		if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP")))
+			*proto = IPPROTO_TCP;
+		else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP")))
+			*proto = IPPROTO_UDP;
+		else
+			return 0;
+
+		if (*proto != nf_ct_protonum(ct))
+			return 0;
+	} else
+		*proto = nf_ct_protonum(ct);
+
+	return 1;
+}
+
 /* SDP header parsing: a SDP session description contains an ordered set of
  * headers, starting with a section containing general session parameters,
  * optionally followed by multiple media descriptions.
@@ -685,7 +737,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
 
 static int refresh_signalling_expectation(struct nf_conn *ct,
 					  union nf_inet_addr *addr,
-					  __be16 port,
+					  u8 proto, __be16 port,
 					  unsigned int expires)
 {
 	struct nf_conn_help *help = nfct_help(ct);
@@ -697,6 +749,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
 	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 		if (exp->class != SIP_EXPECT_SIGNALLING ||
 		    !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+		    exp->tuple.dst.protonum != proto ||
 		    exp->tuple.dst.u.udp.port != port)
 			continue;
 		if (!del_timer(&exp->timeout))
@@ -1048,6 +1101,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	struct nf_conntrack_expect *exp;
 	union nf_inet_addr *saddr, daddr;
 	__be16 port;
+	u8 proto;
 	unsigned int expires = 0;
 	int ret;
 	typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
@@ -1080,6 +1134,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
 		return NF_ACCEPT;
 
+	if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen,
+				   &proto) == 0)
+		return NF_ACCEPT;
+
 	if (ct_sip_parse_numerical_param(ct, *dptr,
 					 matchoff + matchlen, *datalen,
 					 "expires=", NULL, NULL, &expires) < 0)
@@ -1099,7 +1157,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 		saddr = &ct->tuplehash[!dir].tuple.src.u3;
 
 	nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
-			  saddr, &daddr, IPPROTO_UDP, NULL, &port);
+			  saddr, &daddr, proto, NULL, &port);
 	exp->timeout.expires = sip_timeout * HZ;
 	exp->helper = nfct_help(ct)->helper;
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
@@ -1132,6 +1190,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	union nf_inet_addr addr;
 	__be16 port;
+	u8 proto;
 	unsigned int matchoff, matchlen, coff = 0;
 	unsigned int expires = 0;
 	int in_contact = 0, ret;
@@ -1172,6 +1231,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
 			continue;
 
+		if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen,
+					   *datalen, &proto) == 0)
+			continue;
+
 		ret = ct_sip_parse_numerical_param(ct, *dptr,
 						   matchoff + matchlen,
 						   *datalen, "expires=",
@@ -1180,7 +1243,8 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
 			return NF_DROP;
 		if (c_expires == 0)
 			break;
-		if (refresh_signalling_expectation(ct, &addr, port, c_expires))
+		if (refresh_signalling_expectation(ct, &addr, proto, port,
+						   c_expires))
 			return NF_ACCEPT;
 	}
 
@@ -1265,50 +1329,123 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
-static int sip_help(struct sk_buff *skb,
-		    unsigned int protoff,
-		    struct nf_conn *ct,
-		    enum ip_conntrack_info ctinfo)
+static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
+			   unsigned int dataoff, const char **dptr,
+			   unsigned int *datalen)
+{
+	typeof(nf_nat_sip_hook) nf_nat_sip;
+	int ret;
+
+	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+		ret = process_sip_request(skb, dataoff, dptr, datalen);
+	else
+		ret = process_sip_response(skb, dataoff, dptr, datalen);
+
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+			ret = NF_DROP;
+	}
+
+	return ret;
+}
+
+static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
+	struct tcphdr *th, _tcph;
 	unsigned int dataoff, datalen;
-	const char *dptr;
+	unsigned int matchoff, matchlen, clen;
+	unsigned int msglen, origlen;
+	const char *dptr, *end;
+	s16 diff, tdiff = 0;
 	int ret;
-	typeof(nf_nat_sip_hook) nf_nat_sip;
+
+	if (ctinfo != IP_CT_ESTABLISHED &&
+	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+		return NF_ACCEPT;
 
 	/* No Data ? */
-	dataoff = protoff + sizeof(struct udphdr);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+	if (th == NULL)
+		return NF_ACCEPT;
+	dataoff = protoff + th->doff * 4;
 	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
 	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (!skb_is_nonlinear(skb))
-		dptr = skb->data + dataoff;
-	else {
+	if (skb_is_nonlinear(skb)) {
 		pr_debug("Copy of skbuff not supported yet.\n");
 		return NF_ACCEPT;
 	}
 
+	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 	if (datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
 
-	if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, dataoff, &dptr, &datalen);
-	else
-		ret = process_sip_response(skb, dataoff, &dptr, &datalen);
+	while (1) {
+		if (ct_sip_get_header(ct, dptr, 0, datalen,
+				      SIP_HDR_CONTENT_LENGTH,
+				      &matchoff, &matchlen) <= 0)
+			break;
 
-	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
-		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen))
-			ret = NF_DROP;
+		clen = simple_strtoul(dptr + matchoff, (char **)&end, 10);
+		if (dptr + matchoff == end)
+			break;
+
+		if (end + strlen("\r\n\r\n") > dptr + datalen)
+			break;
+		if (end[0] != '\r' || end[1] != '\n' ||
+		    end[2] != '\r' || end[3] != '\n')
+			break;
+		end += strlen("\r\n\r\n") + clen;
+
+		msglen = origlen = end - dptr;
+
+		ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+		if (ret != NF_ACCEPT)
+			break;
+		diff     = msglen - origlen;
+		tdiff   += diff;
+
+		dataoff += msglen;
+		dptr    += msglen;
+		datalen  = datalen + diff - msglen;
 	}
 
 	return ret;
 }
 
-static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
-static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+	unsigned int dataoff, datalen;
+	const char *dptr;
+
+	/* No Data ? */
+	dataoff = protoff + sizeof(struct udphdr);
+	if (dataoff >= skb->len)
+		return NF_ACCEPT;
+
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+	if (skb_is_nonlinear(skb)) {
+		pr_debug("Copy of skbuff not supported yet.\n");
+		return NF_ACCEPT;
+	}
+
+	dptr = skb->data + dataoff;
+	datalen = skb->len - dataoff;
+	if (datalen < strlen("SIP/2.0 200"))
+		return NF_ACCEPT;
+
+	return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
 
 static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
 	[SIP_EXPECT_SIGNALLING] = {
@@ -1333,7 +1470,7 @@ static void nf_conntrack_sip_fini(void)
 	int i, j;
 
 	for (i = 0; i < ports_c; i++) {
-		for (j = 0; j < 2; j++) {
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			if (sip[i][j].me == NULL)
 				continue;
 			nf_conntrack_helper_unregister(&sip[i][j]);
@@ -1353,14 +1490,24 @@ static int __init nf_conntrack_sip_init(void)
 		memset(&sip[i], 0, sizeof(sip[i]));
 
 		sip[i][0].tuple.src.l3num = AF_INET;
-		sip[i][1].tuple.src.l3num = AF_INET6;
-		for (j = 0; j < 2; j++) {
-			sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][0].help = sip_help_udp;
+		sip[i][1].tuple.src.l3num = AF_INET;
+		sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][1].help = sip_help_tcp;
+
+		sip[i][2].tuple.src.l3num = AF_INET6;
+		sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
+		sip[i][2].help = sip_help_udp;
+		sip[i][3].tuple.src.l3num = AF_INET6;
+		sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
+		sip[i][3].help = sip_help_tcp;
+
+		for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
 			sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
 			sip[i][j].expect_policy = sip_exp_policy;
 			sip[i][j].expect_class_max = SIP_EXPECT_MAX;
 			sip[i][j].me = THIS_MODULE;
-			sip[i][j].help = sip_help;
 
 			tmpname = &sip_names[i][j][0];
 			if (ports[i] == SIP_PORT)
-- 
cgit v1.2.3


From 48f8ac26537c1b7b1a2422f5232f45d06c945348 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:29:38 +0100
Subject: netfilter: nf_nat_sip: add TCP support

Add support for mangling TCP SIP packets.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 1cc75c5a822b..3bb3aaff76e9 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -56,6 +56,9 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
+
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int dataoff,
 				       const char **dptr,
@@ -1360,6 +1363,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 	const char *dptr, *end;
 	s16 diff, tdiff = 0;
 	int ret;
+	typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
 
 	if (ctinfo != IP_CT_ESTABLISHED &&
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
@@ -1415,6 +1419,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 		datalen  = datalen + diff - msglen;
 	}
 
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
+		if (nf_nat_sip_seq_adjust)
+			nf_nat_sip_seq_adjust(skb, tdiff);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 11 Feb 2010 12:30:21 +0100
Subject: netfilter: nf_conntrack_sip: add T.38 FAX support

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_sip.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 3bb3aaff76e9..fbe8ff5a420a 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -907,6 +907,7 @@ err1:
 static const struct sdp_media_type sdp_media_types[] = {
 	SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
 	SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+	SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE),
 };
 
 static const struct sdp_media_type *sdp_media_type(const char *dptr,
@@ -932,7 +933,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 {
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	struct nf_conn_help *help = nfct_help(ct);
 	unsigned int matchoff, matchlen;
 	unsigned int mediaoff, medialen;
 	unsigned int sdpoff;
@@ -1024,9 +1024,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
 					 &rtp_addr);
 
-	if (ret == NF_ACCEPT && i > 0)
-		help->help.ct_sip_info.invite_cseq = cseq;
-
 	return ret;
 }
 static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
@@ -1077,6 +1074,22 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 	return NF_ACCEPT;
 }
 
+static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int cseq)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct nf_conn_help *help = nfct_help(ct);
+	unsigned int ret;
+
+	flush_expectations(ct, true);
+	ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+	if (ret == NF_ACCEPT)
+		help->help.ct_sip_info.invite_cseq = cseq;
+	return ret;
+}
+
 static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
@@ -1257,7 +1270,7 @@ flush:
 }
 
 static const struct sip_handler sip_handlers[] = {
-	SIP_HANDLER("INVITE", process_sdp, process_invite_response),
+	SIP_HANDLER("INVITE", process_invite_request, process_invite_response),
 	SIP_HANDLER("UPDATE", process_sdp, process_update_response),
 	SIP_HANDLER("ACK", process_sdp, NULL),
 	SIP_HANDLER("PRACK", process_sdp, process_prack_response),
@@ -1473,6 +1486,11 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
 		.max_expected	= 2 * IP_CT_DIR_MAX,
 		.timeout	= 3 * 60,
 	},
+	[SIP_EXPECT_IMAGE] = {
+		.name		= "image",
+		.max_expected	= IP_CT_DIR_MAX,
+		.timeout	= 3 * 60,
+	},
 };
 
 static void nf_conntrack_sip_fini(void)
-- 
cgit v1.2.3


From ca1c2e2da9637c131436bf6d6ae41b58f5353afe Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 12 Feb 2010 06:25:36 +0100
Subject: netfilter: don't use INIT_RCU_HEAD()

call_rcu() will unconditionally reinitialize RCU head anyway.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 1 -
 net/netfilter/nf_conntrack_extend.c | 1 -
 net/netfilter/nfnetlink_queue.c     | 1 -
 3 files changed, 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 33b85f834c06..6182fb1b55de 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -232,7 +232,6 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 
 	new->master = me;
 	atomic_set(&new->use, 1);
-	INIT_RCU_HEAD(&new->rcu);
 	return new;
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fef95be334bd..fdc8fb4ae10f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -59,7 +59,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
 	if (!*ext)
 		return NULL;
 
-	INIT_RCU_HEAD(&(*ext)->rcu);
 	(*ext)->offset[id] = off;
 	(*ext)->len = len;
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 90cf36decea3..7ba4abc405c9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -112,7 +112,6 @@ instance_create(u_int16_t queue_num, int pid)
 	inst->copy_mode = NFQNL_COPY_NONE;
 	spin_lock_init(&inst->lock);
 	INIT_LIST_HEAD(&inst->queue_list);
-	INIT_RCU_HEAD(&inst->rcu);
 
 	if (!try_module_get(THIS_MODULE)) {
 		err = -EAGAIN;
-- 
cgit v1.2.3


From 98e6d2d5ee26bf56850a10eb64139c68fb09ba19 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 15 Feb 2010 16:31:35 +0100
Subject: netfilter: xt_recent: inform user when hitcount is too large

It is one of these things that iptables cannot catch and which can
cause "Invalid argument" to be printed. Without a hint in dmesg, it is
not going to be helpful.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 203333107367..132cfaa84cdc 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -53,7 +53,7 @@ module_param(ip_list_perms, uint, 0400);
 module_param(ip_list_uid, uint, 0400);
 module_param(ip_list_gid, uint, 0400);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
 MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
@@ -306,8 +306,12 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
 	    (info->seconds || info->hit_count))
 		return false;
-	if (info->hit_count > ip_pkt_list_tot)
+	if (info->hit_count > ip_pkt_list_tot) {
+		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
+			"packets to be remembered (%u)\n",
+			info->hit_count, ip_pkt_list_tot);
 		return false;
+	}
 	if (info->name[0] == '\0' ||
 	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
 		return false;
-- 
cgit v1.2.3


From b402405d71beed8e4df354844353f66b4e18269f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 25 Jun 2009 18:32:12 +0200
Subject: netfilter: xtables: print details on size mismatch

Print which revision has been used and which size are which
(kernel/user) for easier debugging.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/x_tables.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 255ab0657ce8..12503199826f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -366,8 +366,10 @@ int xt_check_match(struct xt_mtchk_param *par,
 		 * ebt_among is exempt from centralized matchsize checking
 		 * because it uses a dynamic-size data set.
 		 */
-		pr_err("%s_tables: %s match: invalid size %u != %u\n",
+		pr_err("%s_tables: %s.%u match: invalid size "
+		       "%u (kernel) != (user) %u\n",
 		       xt_prefix[par->family], par->match->name,
+		       par->match->revision,
 		       XT_ALIGN(par->match->matchsize), size);
 		return -EINVAL;
 	}
@@ -516,8 +518,10 @@ int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
 	if (XT_ALIGN(par->target->targetsize) != size) {
-		pr_err("%s_tables: %s target: invalid size %u != %u\n",
+		pr_err("%s_tables: %s.%u target: invalid size "
+		       "%u (kernel) != (user) %u\n",
 		       xt_prefix[par->family], par->target->name,
+		       par->target->revision,
 		       XT_ALIGN(par->target->targetsize), size);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 739674fb7febf116e7d647031fab16989a08a965 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Jun 2009 08:23:19 +0200
Subject: netfilter: xtables: constify args in compat copying functions

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/x_tables.c     | 8 ++++----
 net/netfilter/xt_hashlimit.c | 4 ++--
 net/netfilter/xt_limit.c     | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 12503199826f..69c56287d518 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -485,8 +485,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
 
-int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
-			    unsigned int *size)
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+			    void __user **dstptr, unsigned int *size)
 {
 	const struct xt_match *match = m->u.kernel.match;
 	struct compat_xt_entry_match __user *cm = *dstptr;
@@ -588,8 +588,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
 }
 EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
 
-int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
-			     unsigned int *size)
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+			     void __user **dstptr, unsigned int *size)
 {
 	const struct xt_target *target = t->u.kernel.target;
 	struct compat_xt_entry_target __user *ct = *dstptr;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 017c95966aa8..e47fb805ffb4 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -775,7 +775,7 @@ struct compat_xt_hashlimit_info {
 	compat_uptr_t master;
 };
 
-static void hashlimit_mt_compat_from_user(void *dst, void *src)
+static void hashlimit_mt_compat_from_user(void *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
@@ -783,7 +783,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src)
 	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
 }
 
-static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
+static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2773be6a71dd..a0ca5339af41 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -148,7 +148,7 @@ struct compat_xt_rateinfo {
 
 /* To keep the full "prev" timestamp, the upper 32 bits are stored in the
  * master pointer, which does not need to be preserved. */
-static void limit_mt_compat_from_user(void *dst, void *src)
+static void limit_mt_compat_from_user(void *dst, const void *src)
 {
 	const struct compat_xt_rateinfo *cm = src;
 	struct xt_rateinfo m = {
@@ -162,7 +162,7 @@ static void limit_mt_compat_from_user(void *dst, void *src)
 	memcpy(dst, &m, sizeof(m));
 }
 
-static int limit_mt_compat_to_user(void __user *dst, void *src)
+static int limit_mt_compat_to_user(void __user *dst, const void *src)
 {
 	const struct xt_rateinfo *m = src;
 	struct compat_xt_rateinfo cm = {
-- 
cgit v1.2.3


From 8fea97ec1772bbf553d89187340ef624d548e115 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 17:45:08 +0100
Subject: netfilter: nf_conntrack: pass template to l4proto ->error() handler

The error handlers might need the template to get the conntrack zone
introduced in the next patches to perform a conntrack lookup.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c          | 3 ++-
 net/netfilter/nf_conntrack_proto_dccp.c    | 5 +++--
 net/netfilter/nf_conntrack_proto_tcp.c     | 2 +-
 net/netfilter/nf_conntrack_proto_udp.c     | 4 ++--
 net/netfilter/nf_conntrack_proto_udplite.c | 2 +-
 5 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bd831410a396..65351ed5d815 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -799,7 +799,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
 	if (l4proto->error != NULL) {
-		ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+		ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
+				     pf, hooknum);
 		if (ret <= 0) {
 			NF_CT_STAT_INC_ATOMIC(net, error);
 			NF_CT_STAT_INC_ATOMIC(net, invalid);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index dd375500dccc..9a2815549375 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -561,8 +561,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-static int dccp_error(struct net *net, struct sk_buff *skb,
-		      unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+static int dccp_error(struct net *net, struct nf_conn *tmpl,
+		      struct sk_buff *skb, unsigned int dataoff,
+		      enum ip_conntrack_info *ctinfo,
 		      u_int8_t pf, unsigned int hooknum)
 {
 	struct dccp_hdr _dh, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index ad118053971a..9dd8cd4fb6e6 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct net *net,
+static int tcp_error(struct net *net, struct nf_conn *tmpl,
 		     struct sk_buff *skb,
 		     unsigned int dataoff,
 		     enum ip_conntrack_info *ctinfo,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 8d38f9a4bed8..8289088b8218 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
-		     enum ip_conntrack_info *ctinfo,
+static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+		     unsigned int dataoff, enum ip_conntrack_info *ctinfo,
 		     u_int8_t pf,
 		     unsigned int hooknum)
 {
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 0b1bc9ba6678..263b5a72588d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
 	return true;
 }
 
-static int udplite_error(struct net *net,
+static int udplite_error(struct net *net, struct nf_conn *tmpl,
 			 struct sk_buff *skb,
 			 unsigned int dataoff,
 			 enum ip_conntrack_info *ctinfo,
-- 
cgit v1.2.3


From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:13:33 +0100
Subject: netfilter: nf_conntrack: add support for "conntrack zones"

Normally, each connection needs a unique identity. Conntrack zones allow
to specify a numerical zone using the CT target, connections in different
zones can use the same identity.

Example:

iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1
iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig                   |  13 ++++
 net/netfilter/nf_conntrack_core.c       | 109 ++++++++++++++++++++++++--------
 net/netfilter/nf_conntrack_expect.c     |  21 ++++--
 net/netfilter/nf_conntrack_h323_main.c  |   3 +-
 net/netfilter/nf_conntrack_netlink.c    |  20 +++---
 net/netfilter/nf_conntrack_pptp.c       |  14 ++--
 net/netfilter/nf_conntrack_sip.c        |   3 +-
 net/netfilter/nf_conntrack_standalone.c |   6 ++
 net/netfilter/xt_CT.c                   |   8 ++-
 net/netfilter/xt_connlimit.c            |   4 +-
 10 files changed, 147 insertions(+), 54 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4469d45261f4..18d77b5c351a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK
 
 	  If unsure, say 'N'.
 
+config NF_CONNTRACK_ZONES
+	bool  'Connection tracking zones'
+	depends on NETFILTER_ADVANCED
+	depends on NETFILTER_XT_TARGET_CT
+	help
+	  This option enables support for connection tracking zones.
+	  Normally, each connection needs to have a unique system wide
+	  identity. Connection tracking zones allow to have multiple
+	  connections using the same identity, as long as they are
+	  contained in different zones.
+
+	  If unsure, say `N'.
+
 config NF_CONNTRACK_EVENTS
 	bool "Connection tracking events"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 65351ed5d815..0c9bbe93cc16 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
 static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-				  unsigned int size, unsigned int rnd)
+				  u16 zone, unsigned int size, unsigned int rnd)
 {
 	unsigned int n;
 	u_int32_t h;
@@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	 */
 	n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
 	h = jhash2((u32 *)tuple, n,
-		   rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
-			  tuple->dst.protonum));
+		   zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
+				 tuple->dst.protonum));
 
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct net *net,
+static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
 				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, net->ct.htable_size,
+	return __hash_conntrack(tuple, zone, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
  * - Caller must lock nf_conntrack_lock before calling this function
  */
 struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 	local_bh_disable();
 begin:
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuple_equal(tuple, &h->tuple)) {
+		if (nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
 			NF_CT_STAT_INC(net, found);
 			local_bh_enable();
 			return h;
@@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
 	rcu_read_lock();
 begin:
-	h = __nf_conntrack_find(net, tuple);
+	h = __nf_conntrack_find(net, zone, tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		if (unlikely(nf_ct_is_dying(ct) ||
 			     !atomic_inc_not_zero(&ct->ct_general.use)))
 			h = NULL;
 		else {
-			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
+			if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
+				     nf_ct_zone(ct) != zone)) {
 				nf_ct_put(ct);
 				goto begin;
 			}
@@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
 	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
+	u16 zone;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	struct hlist_nulls_node *n;
 	enum ip_conntrack_info ctinfo;
 	struct net *net;
+	u16 zone;
 
 	ct = nf_ct_get(skb, &ctinfo);
 	net = nf_ct_net(ct);
@@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	zone = nf_ct_zone(ct);
+	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	   not in the hash.  If there is, we lost race. */
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 	hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
 		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-				      &h->tuple))
+				      &h->tuple) &&
+		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
 			goto out;
 
 	/* Remove from unconfirmed list */
@@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(net, tuple);
+	struct nf_conn *ct;
+	u16 zone = nf_ct_zone(ignored_conntrack);
+	unsigned int hash = hash_conntrack(net, zone, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
 	 */
 	rcu_read_lock_bh();
 	hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
-		if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
-		    nf_ct_tuple_equal(tuple, &h->tuple)) {
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (ct != ignored_conntrack &&
+		    nf_ct_tuple_equal(tuple, &h->tuple) &&
+		    nf_ct_zone(ct) == zone) {
 			NF_CT_STAT_INC(net, found);
 			rcu_read_unlock_bh();
 			return 1;
@@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	return dropped;
 }
 
-struct nf_conn *nf_conntrack_alloc(struct net *net,
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 				   const struct nf_conntrack_tuple *orig,
 				   const struct nf_conntrack_tuple *repl,
 				   gfp_t gfp)
@@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(net, orig);
+		unsigned int hash = hash_conntrack(net, zone, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 #ifdef CONFIG_NET_NS
 	ct->ct_net = net;
 #endif
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (zone) {
+		struct nf_conntrack_zone *nf_ct_zone;
+
+		nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
+		if (!nf_ct_zone)
+			goto out_free;
+		nf_ct_zone->id = zone;
+	}
+#endif
 	/*
 	 * changes to lookup keys must be done before setting refcnt to 1
 	 */
 	smp_wmb();
 	atomic_set(&ct->ct_general.use, 1);
 	return ct;
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+out_free:
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+	return ERR_PTR(-ENOMEM);
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
 
@@ -631,13 +661,14 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple repl_tuple;
 	struct nf_conntrack_ecache *ecache;
 	struct nf_conntrack_expect *exp;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
 
-	ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
 	if (IS_ERR(ct)) {
 		pr_debug("Can't allocate conntrack.\n");
 		return (struct nf_conntrack_tuple_hash *)ct;
@@ -657,7 +688,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 			     GFP_ATOMIC);
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = nf_ct_find_expectation(net, tuple);
+	exp = nf_ct_find_expectation(net, zone, tuple);
 	if (exp) {
 		pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
 			 ct, exp);
@@ -713,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
+	u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
@@ -722,7 +754,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* look for tuple match */
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h) {
 		h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
 				   skb, dataoff);
@@ -958,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
+	.len	= sizeof(struct nf_conntrack_zone),
+	.align	= __alignof__(struct nf_conntrack_zone),
+	.id	= NF_CT_EXT_ZONE,
+};
+#endif
+
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 
 #include <linux/netfilter/nfnetlink.h>
@@ -1139,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	nf_ct_extend_unregister(&nf_ct_zone_extend);
+#endif
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1214,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	unsigned int hashsize, old_size;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
+	struct nf_conn *ct;
 
 	if (current->nsproxy->net_ns != &init_net)
 		return -EOPNOTSUPP;
@@ -1240,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
+			ct = nf_ct_tuplehash_to_ctrack(h);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize,
+			bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
+						  hashsize,
 						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
@@ -1299,6 +1345,11 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	ret = nf_ct_extend_register(&nf_ct_zone_extend);
+	if (ret < 0)
+		goto err_extend;
+#endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
 #ifdef CONFIG_NET_NS
 	nf_conntrack_untracked.ct_net = &init_net;
@@ -1309,6 +1360,10 @@ static int nf_conntrack_init_init_net(void)
 
 	return 0;
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+err_extend:
+	nf_conntrack_helper_fini();
+#endif
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 6182fb1b55de..acb29ccaa41f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -27,6 +27,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 unsigned int nf_ct_expect_hsize __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
@@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
 }
 
 struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, u16 zone,
+		    const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 	struct hlist_node *n;
@@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
-		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone)
 			return i;
 	}
 	return NULL;
@@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 /* Just find a expectation corresponding to a tuple. */
 struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, u16 zone,
+		      const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i;
 
 	rcu_read_lock();
-	i = __nf_ct_expect_find(net, tuple);
+	i = __nf_ct_expect_find(net, zone, tuple);
 	if (i && !atomic_inc_not_zero(&i->use))
 		i = NULL;
 	rcu_read_unlock();
@@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, u16 zone,
+		       const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_expect *i, *exp = NULL;
 	struct hlist_node *n;
@@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 	h = nf_ct_expect_dst_hash(tuple);
 	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
-		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+		    nf_ct_zone(i->master) == zone) {
 			exp = i;
 			break;
 		}
@@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
 {
 	return a->master == b->master && a->class == b->class &&
 		nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
-		nf_ct_tuple_mask_equal(&a->mask, &b->mask);
+		nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+		nf_ct_zone(a->master) == nf_ct_zone(b->master);
 }
 
 /* Generally a bad idea to call this: could have matched already. */
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 66369490230e..a1c8dd917e12 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -29,6 +29,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /* Parameters */
@@ -1216,7 +1217,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
 	tuple.dst.u.tcp.port = port;
 	tuple.dst.protonum = IPPROTO_TCP;
 
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 	if (exp && exp->master == ct)
 		return exp;
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index db35edac307b..51089cfe1167 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -811,7 +811,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -872,7 +872,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, &tuple);
+	h = nf_conntrack_find_get(net, 0, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1221,7 +1221,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1325,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, &master);
+		master_h = nf_conntrack_find_get(net, 0, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1374,9 +1374,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, &otuple);
+		h = __nf_conntrack_find(net, 0, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, &rtuple);
+		h = __nf_conntrack_find(net, 0, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1714,7 +1714,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, &tuple);
+	exp = nf_ct_expect_find_get(net, 0, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1770,7 +1770,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, &tuple);
+		exp = nf_ct_expect_find_get(net, 0, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1855,7 +1855,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, &master_tuple);
+	h = nf_conntrack_find_get(net, 0, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1912,7 +1912,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, &tuple);
+	exp = __nf_ct_expect_find(net, 0, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 3807ac7faf4c..088944824e13 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
@@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
-		exp_other = nf_ct_expect_find_get(net, &inv_t);
+		exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
 		if (exp_other) {
 			/* delete other expectation.  */
 			pr_debug("found\n");
@@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct,
 	rcu_read_unlock();
 }
 
-static int destroy_sibling_or_exp(struct net *net,
+static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
 				  const struct nf_conntrack_tuple *t)
 {
 	const struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_expect *exp;
 	struct nf_conn *sibling;
+	u16 zone = nf_ct_zone(ct);
 
 	pr_debug("trying to timeout ct or exp for tuple ");
 	nf_ct_dump_tuple(t);
 
-	h = nf_conntrack_find_get(net, t);
+	h = nf_conntrack_find_get(net, zone, t);
 	if (h)  {
 		sibling = nf_ct_tuplehash_to_ctrack(h);
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net,
 		nf_ct_put(sibling);
 		return 1;
 	} else {
-		exp = nf_ct_expect_find_get(net, t);
+		exp = nf_ct_expect_find_get(net, zone, t);
 		if (exp) {
 			pr_debug("unexpect_related of expect %p\n", exp);
 			nf_ct_unexpect_related(exp);
@@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout original pns->pac ct/exp\n");
 
 	/* try reply (pac->pns) tuple */
@@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
 	t.dst.protonum = IPPROTO_GRE;
 	t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
 	t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
-	if (!destroy_sibling_or_exp(net, &t))
+	if (!destroy_sibling_or_exp(net, ct, &t))
 		pr_debug("failed to timeout reply pac->pns ct/exp\n");
 }
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index fbe8ff5a420a..8dd75d90efc0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -23,6 +23,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
 
 MODULE_LICENSE("GPL");
@@ -836,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 
 	rcu_read_lock();
 	do {
-		exp = __nf_ct_expect_find(net, &tuple);
+		exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
 
 		if (!exp || exp->master == ct ||
 		    nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e310f1561bb2..24a42efe62ef 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -26,6 +26,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 MODULE_LICENSE("GPL");
 
@@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
 		goto release;
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+	if (seq_printf(s, "zone=%u ", nf_ct_zone(ct)))
+		goto release;
+#endif
+
 	if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
 		goto release;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 8183a054256f..61c50fa84703 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -16,6 +16,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
 				 const struct xt_target_param *par)
@@ -69,11 +70,16 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
 		goto out;
 	}
 
+#ifndef CONFIG_NF_CONNTRACK_ZONES
+	if (info->zone)
+		goto err1;
+#endif
+
 	if (nf_ct_l3proto_try_module_get(par->family) < 0)
 		goto err1;
 
 	memset(&t, 0, sizeof(t));
-	ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL);
+	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
 	if (IS_ERR(ct))
 		goto err2;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 0d9d18ea2b09..26997ce90e48 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 /* we will save the tuples of all connections we care about */
 struct xt_connlimit_conn {
@@ -114,7 +115,8 @@ static int count_them(struct net *net,
 
 	/* check the saved connections */
 	list_for_each_entry_safe(conn, tmp, hash, list) {
-		found    = nf_conntrack_find_get(net, &conn->tuple);
+		found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
+						 &conn->tuple);
 		found_ct = NULL;
 
 		if (found != NULL)
-- 
cgit v1.2.3


From ef00f89f1eb7e056aab9dfe068521e6f2320c94a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 15 Feb 2010 18:14:57 +0100
Subject: netfilter: ctnetlink: add zone support

Parse and dump the conntrack zone in ctnetlink.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 92 ++++++++++++++++++++++++++++--------
 1 file changed, 73 insertions(+), 19 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 51089cfe1167..8b05f364b2f2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -39,6 +39,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -379,6 +380,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_status(skb, ct) < 0 ||
 	    ctnetlink_dump_timeout(skb, ct) < 0 ||
 	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
@@ -517,6 +521,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto nla_put_failure;
 	nla_nest_end(skb, nest_parms);
 
+	if (nf_ct_zone(ct))
+		NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
 	if (ctnetlink_dump_id(skb, ct) < 0)
 		goto nla_put_failure;
 
@@ -750,6 +757,21 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static int
+ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
+{
+	if (attr)
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+		*zone = ntohs(nla_get_be16(attr));
+#else
+		return -EOPNOTSUPP;
+#endif
+	else
+		*zone = 0;
+
+	return 0;
+}
+
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
 	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING },
 };
@@ -781,6 +803,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 	[CTA_ID]		= { .type = NLA_U32 },
 	[CTA_NAT_DST]		= { .type = NLA_NESTED },
 	[CTA_TUPLE_MASTER]	= { .type = NLA_NESTED },
+	[CTA_ZONE]		= { .type = NLA_U16 },
 };
 
 static int
@@ -794,7 +817,12 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conn *ct;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
@@ -811,7 +839,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -856,12 +884,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2 = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP)
 		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
 					  ctnetlink_done);
 
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_TUPLE_ORIG])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
 	else if (cda[CTA_TUPLE_REPLY])
@@ -872,7 +905,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	h = nf_conntrack_find_get(net, 0, &tuple);
+	h = nf_conntrack_find_get(net, zone, &tuple);
 	if (!h)
 		return -ENOENT;
 
@@ -1211,7 +1244,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net,
+ctnetlink_create_conntrack(struct net *net, u16 zone,
 			   const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
@@ -1221,7 +1254,7 @@ ctnetlink_create_conntrack(struct net *net,
 	int err = -EINVAL;
 	struct nf_conntrack_helper *helper;
 
-	ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC);
+	ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
 	if (IS_ERR(ct))
 		return ERR_PTR(-ENOMEM);
 
@@ -1325,7 +1358,7 @@ ctnetlink_create_conntrack(struct net *net,
 		if (err < 0)
 			goto err2;
 
-		master_h = nf_conntrack_find_get(net, 0, &master);
+		master_h = nf_conntrack_find_get(net, zone, &master);
 		if (master_h == NULL) {
 			err = -ENOENT;
 			goto err2;
@@ -1358,7 +1391,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_tuple_hash *h = NULL;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+	if (err < 0)
+		return err;
 
 	if (cda[CTA_TUPLE_ORIG]) {
 		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
@@ -1374,9 +1412,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	spin_lock_bh(&nf_conntrack_lock);
 	if (cda[CTA_TUPLE_ORIG])
-		h = __nf_conntrack_find(net, 0, &otuple);
+		h = __nf_conntrack_find(net, zone, &otuple);
 	else if (cda[CTA_TUPLE_REPLY])
-		h = __nf_conntrack_find(net, 0, &rtuple);
+		h = __nf_conntrack_find(net, zone, &rtuple);
 
 	if (h == NULL) {
 		err = -ENOENT;
@@ -1384,7 +1422,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			struct nf_conn *ct;
 			enum ip_conntrack_events events;
 
-			ct = ctnetlink_create_conntrack(net, cda, &otuple,
+			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
 							&rtuple, u3);
 			if (IS_ERR(ct)) {
 				err = PTR_ERR(ct);
@@ -1698,7 +1736,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct sk_buff *skb2;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		return netlink_dump_start(ctnl, skb, nlh,
@@ -1706,6 +1745,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 					  ctnetlink_exp_done);
 	}
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	if (cda[CTA_EXPECT_MASTER])
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
 	else
@@ -1714,7 +1757,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = nf_ct_expect_find_get(net, 0, &tuple);
+	exp = nf_ct_expect_find_get(net, zone, &tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1761,16 +1804,21 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct hlist_node *n, *next;
 	u_int8_t u3 = nfmsg->nfgen_family;
 	unsigned int i;
+	u16 zone;
 	int err;
 
 	if (cda[CTA_EXPECT_TUPLE]) {
 		/* delete a single expect by tuple */
+		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+		if (err < 0)
+			return err;
+
 		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 		if (err < 0)
 			return err;
 
 		/* bump usage count to 2 */
-		exp = nf_ct_expect_find_get(net, 0, &tuple);
+		exp = nf_ct_expect_find_get(net, zone, &tuple);
 		if (!exp)
 			return -ENOENT;
 
@@ -1832,7 +1880,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
 }
 
 static int
-ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
+ctnetlink_create_expect(struct net *net, u16 zone,
+			const struct nlattr * const cda[],
 			u_int8_t u3,
 			u32 pid, int report)
 {
@@ -1855,7 +1904,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[],
 		return err;
 
 	/* Look for master conntrack of this expectation */
-	h = nf_conntrack_find_get(net, 0, &master_tuple);
+	h = nf_conntrack_find_get(net, zone, &master_tuple);
 	if (!h)
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1900,25 +1949,30 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 	struct nf_conntrack_expect *exp;
 	struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	u_int8_t u3 = nfmsg->nfgen_family;
-	int err = 0;
+	u16 zone;
+	int err;
 
 	if (!cda[CTA_EXPECT_TUPLE]
 	    || !cda[CTA_EXPECT_MASK]
 	    || !cda[CTA_EXPECT_MASTER])
 		return -EINVAL;
 
+	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+	if (err < 0)
+		return err;
+
 	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
 	if (err < 0)
 		return err;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	exp = __nf_ct_expect_find(net, 0, &tuple);
+	exp = __nf_ct_expect_find(net, zone, &tuple);
 
 	if (!exp) {
 		spin_unlock_bh(&nf_conntrack_lock);
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
-			err = ctnetlink_create_expect(net, cda,
+			err = ctnetlink_create_expect(net, zone, cda,
 						      u3,
 						      NETLINK_CB(skb).pid,
 						      nlmsg_report(nlh));
-- 
cgit v1.2.3


From 3e5e524ffb5fcf2447eb5dd9f8e54ad22dd9baa7 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fwestphal@astaro.com>
Date: Mon, 15 Feb 2010 18:17:10 +0100
Subject: netfilter: CONFIG_COMPAT: allow delta to exceed 32767

with 32 bit userland and 64 bit kernels, it is unlikely but possible
that insertion of new rules fails even tough there are only about 2000
iptables rules.

This happens because the compat delta is using a short int.
Easily reproducible via "iptables -m limit" ; after about 2050
rules inserting new ones fails with -ELOOP.

Note that compat_delta included 2 bytes of padding on x86_64, so
structure size remains the same.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 69c56287d518..0a12cedfe9e3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 struct compat_delta {
 	struct compat_delta *next;
 	unsigned int offset;
-	short delta;
+	int delta;
 };
 
 struct xt_af {
@@ -439,10 +439,10 @@ void xt_compat_flush_offsets(u_int8_t af)
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
-short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
 	struct compat_delta *tmp;
-	short delta;
+	int delta;
 
 	for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
 		if (tmp->offset < offset)
-- 
cgit v1.2.3


From 8a5ce54562f296b1996813a413b0f2307ec9351a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 17 Feb 2010 06:43:47 +0000
Subject: xt_hashlimit: fix locking

Commit 2eff25c18c3d332d3c4dd98f2ac9b7114e9771b0
(netfilter: xt_hashlimit: fix race condition and simplify locking)
added a mutex deadlock :
htable_create() is called with hashlimit_mutex already locked

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_hashlimit.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index e47fb805ffb4..d952806b6469 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -262,9 +262,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
@@ -327,9 +325,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
 	add_timer(&hinfo->timer);
 
-	mutex_lock(&hashlimit_mutex);
 	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-	mutex_unlock(&hashlimit_mutex);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2906f66a5682e5670a5eefe991843689b8d8563f Mon Sep 17 00:00:00 2001
From: Venkata Mohan Reddy <mohanreddykv@gmail.com>
Date: Thu, 18 Feb 2010 12:31:05 +0100
Subject: ipvs: SCTP Trasport Loadbalancing Support

Enhance IPVS to load balance SCTP transport protocol packets. This is done
based on the SCTP rfc 4960. All possible control chunks have been taken
care. The state machine used in this code looks some what lengthy. I tried
to make the state machine easy to understand.

Signed-off-by: Venkata Mohan Reddy Koppula <mohanreddykv@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/Kconfig            |    7 +
 net/netfilter/ipvs/Makefile           |    1 +
 net/netfilter/ipvs/ip_vs_core.c       |   62 +-
 net/netfilter/ipvs/ip_vs_ctl.c        |    5 +-
 net/netfilter/ipvs/ip_vs_proto.c      |    3 +
 net/netfilter/ipvs/ip_vs_proto_sctp.c | 1183 +++++++++++++++++++++++++++++++++
 net/netfilter/ipvs/ip_vs_sync.c       |   14 +
 7 files changed, 1264 insertions(+), 11 deletions(-)
 create mode 100644 net/netfilter/ipvs/ip_vs_proto_sctp.c

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 817a8898203b..712ccad13344 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -104,6 +104,13 @@ config	IP_VS_PROTO_AH
 	  This option enables support for load balancing AH (Authentication
 	  Header) transport protocol. Say Y if unsure.
 
+config  IP_VS_PROTO_SCTP
+	bool "SCTP load balancing support"
+	select LIBCRC32C
+	---help---
+	  This option enables support for load balancing SCTP transport
+	  protocol. Say Y if unsure.
+
 comment "IPVS scheduler"
 
 config	IP_VS_RR
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 73a46fe1fe4c..e3baefd7066e 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
 
 ip_vs-objs :=	ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o	   \
 		ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o	   		   \
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 847ffca40184..72e96d823ebf 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <linux/sctp.h>
 #include <linux/icmp.h>
 
 #include <net/ip.h>
@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
 		return "UDP";
 	case IPPROTO_TCP:
 		return "TCP";
+	case IPPROTO_SCTP:
+		return "SCTP";
 	case IPPROTO_ICMP:
 		return "ICMP";
 #ifdef CONFIG_IP_VS_IPV6
@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		ip_send_check(ciph);
 	}
 
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+	/* the TCP/UDP/SCTP port */
+	if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+	    IPPROTO_SCTP == ciph->protocol) {
 		__be16 *ports = (void *)ciph + ciph->ihl*4;
 
 		if (inout)
@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		ciph->saddr = cp->daddr.in6;
 	}
 
-	/* the TCP/UDP port */
-	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+	/* the TCP/UDP/SCTP port */
+	if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
+	    IPPROTO_SCTP == ciph->nexthdr) {
 		__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
 
 		if (inout)
@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 		goto out;
 	}
 
-	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+	if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+	    IPPROTO_SCTP == protocol)
 		offset += 2 * sizeof(__u16);
 	if (!skb_make_writable(skb, offset))
 		goto out;
@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 }
 #endif
 
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+	sctp_chunkhdr_t *sch, schunk;
+	sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
+			sizeof(schunk), &schunk);
+	if (sch == NULL)
+		return 0;
+	if (sch->type == SCTP_CID_ABORT)
+		return 1;
+	return 0;
+}
+
 static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 {
 	struct tcphdr _tcph, *th;
@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	if (unlikely(!cp)) {
 		if (sysctl_ip_vs_nat_icmp_send &&
 		    (pp->protocol == IPPROTO_TCP ||
-		     pp->protocol == IPPROTO_UDP)) {
+		     pp->protocol == IPPROTO_UDP ||
+		     pp->protocol == IPPROTO_SCTP)) {
 			__be16 _ports[2], *pptr;
 
 			pptr = skb_header_pointer(skb, iph.len,
@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 				 * existing entry if it is not RST
 				 * packet or not TCP packet.
 				 */
-				if (iph.protocol != IPPROTO_TCP
-				    || !is_tcp_reset(skb, iph.len)) {
+				if ((iph.protocol != IPPROTO_TCP &&
+				     iph.protocol != IPPROTO_SCTP)
+				     || ((iph.protocol == IPPROTO_TCP
+					  && !is_tcp_reset(skb, iph.len))
+					 || (iph.protocol == IPPROTO_SCTP
+						&& !is_sctp_abort(skb,
+							iph.len)))) {
 #ifdef CONFIG_IP_VS_IPV6
 					if (af == AF_INET6)
 						icmpv6_send(skb,
@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
 	/* do the statistics and put it back */
 	ip_vs_in_stats(cp, skb);
-	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+	if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
+	    IPPROTO_SCTP == cih->nexthdr)
 		offset += 2 * sizeof(__u16);
 	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
 	/* do not touch skb anymore */
@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 * encorage the standby servers to update the connections timeout
 	 */
 	pkts = atomic_add_return(1, &cp->in_pkts);
+	if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+	    cp->protocol == IPPROTO_SCTP) {
+		if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
+			(atomic_read(&cp->in_pkts) %
+			 sysctl_ip_vs_sync_threshold[1]
+			 == sysctl_ip_vs_sync_threshold[0])) ||
+				(cp->old_state != cp->state &&
+				 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
+				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
+				  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
+			ip_vs_sync_conn(cp);
+			goto out;
+		}
+	}
+
 	if (af == AF_INET &&
 	    (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
 	    (((cp->protocol != IPPROTO_TCP ||
@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	       (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
 	       (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
 		ip_vs_sync_conn(cp);
+out:
 	cp->old_state = cp->state;
 
 	ip_vs_conn_put(cp);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 00d0b152db39..7ee9c3426f44 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		}
 	}
 
-	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
-	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
+	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
+	    usvc.protocol != IPPROTO_SCTP) {
 		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
 		       usvc.protocol, &usvc.addr.ip,
 		       ntohs(usvc.port), usvc.sched_name);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 3e7671674549..0e584553819d 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void)
 #ifdef CONFIG_IP_VS_PROTO_UDP
 	REGISTER_PROTOCOL(&ip_vs_protocol_udp);
 #endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+	REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
+#endif
 #ifdef CONFIG_IP_VS_PROTO_AH
 	REGISTER_PROTOCOL(&ip_vs_protocol_ah);
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
new file mode 100644
index 000000000000..c9a3f7a21d53
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -0,0 +1,1183 @@
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/sctp/checksum.h>
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+sctp_conn_in_get(int af,
+		 const struct sk_buff *skb,
+		 struct ip_vs_protocol *pp,
+		 const struct ip_vs_iphdr *iph,
+		 unsigned int proto_off,
+		 int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) 
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->saddr, pptr[0],
+					 &iph->daddr, pptr[1]);
+	else 
+		return ip_vs_conn_in_get(af, iph->protocol,
+					 &iph->daddr, pptr[1],
+					 &iph->saddr, pptr[0]);
+}
+
+static struct ip_vs_conn *
+sctp_conn_out_get(int af,
+		  const struct sk_buff *skb,
+		  struct ip_vs_protocol *pp,
+		  const struct ip_vs_iphdr *iph,
+		  unsigned int proto_off,
+		  int inverse)
+{
+	__be16 _ports[2], *pptr;
+
+	pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+	if (pptr == NULL)
+		return NULL;
+
+	if (likely(!inverse)) 
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->saddr, pptr[0],
+					  &iph->daddr, pptr[1]);
+	else 
+		return ip_vs_conn_out_get(af, iph->protocol,
+					  &iph->daddr, pptr[1],
+					  &iph->saddr, pptr[0]);
+}
+
+static int
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		   int *verdict, struct ip_vs_conn **cpp)
+{
+	struct ip_vs_service *svc;
+	sctp_chunkhdr_t _schunkh, *sch;
+	sctp_sctphdr_t *sh, _sctph;
+	struct ip_vs_iphdr iph;
+
+	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+	sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return 0;
+
+	sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t),
+				 sizeof(_schunkh), &_schunkh);
+	if (sch == NULL)
+		return 0;
+
+	if ((sch->type == SCTP_CID_INIT) &&
+	    (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+				     &iph.daddr, sh->dest))) {
+		if (ip_vs_todrop()) {
+			/*
+			 * It seems that we are very loaded.
+			 * We have to drop this packet :(
+			 */
+			ip_vs_service_put(svc);
+			*verdict = NF_DROP;
+			return 0;
+		}
+		/*
+		 * Let the virtual server select a real server for the
+		 * incoming connection, and create a connection entry.
+		 */
+		*cpp = ip_vs_schedule(svc, skb);
+		if (!*cpp) {
+			*verdict = ip_vs_leave(svc, skb, pp);
+			return 0;
+		}
+		ip_vs_service_put(svc);
+	}
+
+	return 1;
+}
+
+static int
+sctp_snat_handler(struct sk_buff *skb,
+		  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+	sctp_sctphdr_t *sctph;
+	unsigned int sctphoff;
+	__be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+	sctph->source = cp->vport;
+
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+		crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+				          crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
+
+	return 1;
+}
+
+static int
+sctp_dnat_handler(struct sk_buff *skb,
+		  struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+
+	sctp_sctphdr_t *sctph;
+	unsigned int sctphoff;
+	__be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (cp->af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	/* csum_check requires unshared skb */
+	if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+		return 0;
+
+	if (unlikely(cp->app != NULL)) {
+		/* Some checks before mangling */
+		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+			return 0;
+
+		/* Call application helper if needed */
+		if (!ip_vs_app_pkt_out(cp, skb))
+			return 0;
+	}
+
+	sctph = (void *) skb_network_header(skb) + sctphoff;
+	sctph->dest = cp->dport;
+
+	/* Calculate the checksum */
+	crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+	for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+		crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+					  crc32);
+	crc32 = sctp_end_cksum(crc32);
+	sctph->checksum = crc32;
+
+	return 1;
+}
+
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	struct sk_buff *list = skb_shinfo(skb)->frag_list;
+	unsigned int sctphoff;
+	struct sctphdr *sh, _sctph;
+	__le32 cmp;
+	__le32 val;
+	__u32 tmp;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		sctphoff = sizeof(struct ipv6hdr);
+	else
+#endif
+		sctphoff = ip_hdrlen(skb);
+
+	sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
+	if (sh == NULL)
+		return 0;
+
+	cmp = sh->checksum;
+
+	tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
+	for (; list; list = list->next)
+		tmp = sctp_update_cksum((__u8 *) list->data,
+					skb_headlen(list), tmp);
+
+	val = sctp_end_cksum(tmp);
+
+	if (val != cmp) {
+		/* CRC failure, dump it. */
+		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+				"Failed checksum for");
+		return 0;
+	}
+	return 1;
+}
+
+struct ipvs_sctp_nextstate {
+	int next_state;
+};
+enum ipvs_sctp_event_t {
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_SER,
+	IP_VS_SCTP_EVE_INIT_CLI,
+	IP_VS_SCTP_EVE_INIT_SER,
+	IP_VS_SCTP_EVE_INIT_ACK_CLI,
+	IP_VS_SCTP_EVE_INIT_ACK_SER,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_SER,
+	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ACK_SER,
+	IP_VS_SCTP_EVE_ABORT_CLI,
+	IP_VS_SCTP_EVE__ABORT_SER,
+	IP_VS_SCTP_EVE_SHUT_CLI,
+	IP_VS_SCTP_EVE_SHUT_SER,
+	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+	IP_VS_SCTP_EVE_SHUT_ACK_SER,
+	IP_VS_SCTP_EVE_SHUT_COM_CLI,
+	IP_VS_SCTP_EVE_SHUT_COM_SER,
+	IP_VS_SCTP_EVE_LAST
+};
+
+static enum ipvs_sctp_event_t sctp_events[255] = {
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_INIT_CLI,
+	IP_VS_SCTP_EVE_INIT_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_ABORT_CLI,
+	IP_VS_SCTP_EVE_SHUT_CLI,
+	IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+	IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_DATA_CLI,
+	IP_VS_SCTP_EVE_SHUT_COM_CLI,
+};
+
+static struct ipvs_sctp_nextstate
+ sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
+	/*
+	 * STATE : IP_VS_SCTP_S_NONE
+	 */
+	/*next state *//*event */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
+	 },
+	/*
+	 * STATE : IP_VS_SCTP_S_INIT_CLI
+	 * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_SER
+	 * Server sent INIT and waiting for INIT ACK from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_ACK_CLI
+	 * Client sent INIT ACK and waiting for ECHO from the server
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK has been resent by the client, let us stay is in
+	  * the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK sent by the server, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * ECHO by client, it should not happen, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO by server, this is what we are expecting, move to ECHO_SER
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, it should not happen, close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * Unexpected COOKIE ACK from server, staty in the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_INIT_ACK_SER
+	 * Server sent INIT ACK and waiting for ECHO from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * Unexpected INIT_ACK by the client, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK resent by the server, let us move to same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client send the ECHO, this is what we are expecting,
+	  * move to ECHO_CLI
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO received from the server, Not sure what to do,
+	  * let us close it
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, let us stay in the same state
+	  */
+	 {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, hmm... this should not happen, lets close
+	  * the connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ECHO_CLI
+	 * Cient  sent ECHO and waiting COOKEI ACK from the Server
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK has been by the client, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client resent the ECHO, let us stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO received from the server, Not sure what to do,
+	  * let us close it
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, this shoud not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, this is what we are awaiting,lets move to
+	  * ESTABLISHED.
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ECHO_SER
+	 * Server sent ECHO and waiting COOKEI ACK from the client
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 /*
+	  * INIT_ACK has been by the server, let us close the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent the ECHO, not sure what to do, let's close the
+	  * connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 /*
+	  * ECHO resent by the server, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, this is what we are expecting, let's move
+	  * to ESTABLISHED.
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 /*
+	  * COOKIE ACK from server, this should not happen, lets close the
+	  * connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_ESTABLISHED
+	 * Association established
+	 */
+	{{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_CLI
+	 * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
+	  * to SHUDOWN_ACK_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_SER
+	 * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN resent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
+	  * move to SHUT_ACK_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_ACK_CLI
+	 * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client resent SHUDTDOWN_ACK, let's stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server sent SHUTDOWN ACK, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this should not happen, let's close the
+	  * connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 /*
+	  * SHUTDOWN COMPLETE from server this is what we are expecting.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+
+	/*
+	 * State : IP_VS_SCTP_S_SHUT_ACK_SER
+	 * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
+	 */
+	/*
+	 * We recieved the data chuck, keep the state unchanged. I assume
+	 * that still data chuncks  can be received by both the peers in
+	 * SHUDOWN state
+	 */
+
+	{{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 /*
+	  * We have got an INIT from client. From the spec.“Upon receipt of
+	  * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+	  * an INIT ACK using the same parameters it sent in its  original
+	  * INIT chunk (including its Initiate Tag, unchanged”).
+	  */
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 /*
+	  * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+	  * “If an INIT ACK is received by an endpoint in any state other
+	  * than the COOKIE-WAIT state, the endpoint should discard the
+	  * INIT ACK chunk”. Stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 /*
+	  * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+	  * peer and peer shall move to the ESTABISHED. if it doesn't handle
+	  * it will send ERROR chunk. So, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 /*
+	  * COOKIE ACK from client, not sure what to do stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 /*
+	  * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+	  */
+	 {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 /*
+	  * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+	  */
+	 {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 /*
+	  * client sent SHUDTDOWN_ACK, this should not happen let's close
+	  * the connection.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 /*
+	  * Server resent SHUTDOWN ACK, stay in the same state
+	  */
+	 {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 /*
+	  * SHUTDOWN COM from client, this what we are expecting, let's close
+	  * the connection
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 /*
+	  * SHUTDOWN COMPLETE from server this should not happen.
+	  */
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 },
+	/*
+	 * State : IP_VS_SCTP_S_CLOSED
+	 */
+	{{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+	 {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+	 {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+	 {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+	 }
+};
+
+/*
+ *      Timeout table[state]
+ */
+static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]         =     2 * HZ,
+	[IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_ACK_CLI] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_INIT_ACK_SER] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ECHO_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ECHO_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_ESTABLISHED]  =    15 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_CLI]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_SER]     =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_ACK_CLI] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_SHUT_ACK_SER] =     1 * 60 * HZ,
+	[IP_VS_SCTP_S_CLOSED]       =    10 * HZ,
+	[IP_VS_SCTP_S_LAST]         =     2 * HZ,
+};
+
+static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
+	[IP_VS_SCTP_S_NONE]         =    "NONE",
+	[IP_VS_SCTP_S_INIT_CLI]     =    "INIT_CLI",
+	[IP_VS_SCTP_S_INIT_SER]     =    "INIT_SER",
+	[IP_VS_SCTP_S_INIT_ACK_CLI] =    "INIT_ACK_CLI",
+	[IP_VS_SCTP_S_INIT_ACK_SER] =    "INIT_ACK_SER",
+	[IP_VS_SCTP_S_ECHO_CLI]     =    "COOKIE_ECHO_CLI",
+	[IP_VS_SCTP_S_ECHO_SER]     =    "COOKIE_ECHO_SER",
+	[IP_VS_SCTP_S_ESTABLISHED]  =    "ESTABISHED",
+	[IP_VS_SCTP_S_SHUT_CLI]     =    "SHUTDOWN_CLI",
+	[IP_VS_SCTP_S_SHUT_SER]     =    "SHUTDOWN_SER",
+	[IP_VS_SCTP_S_SHUT_ACK_CLI] =    "SHUTDOWN_ACK_CLI",
+	[IP_VS_SCTP_S_SHUT_ACK_SER] =    "SHUTDOWN_ACK_SER",
+	[IP_VS_SCTP_S_CLOSED]       =    "CLOSED",
+	[IP_VS_SCTP_S_LAST]         =    "BUG!"
+};
+
+
+static const char *sctp_state_name(int state)
+{
+	if (state >= IP_VS_SCTP_S_LAST)
+		return "ERR!";
+	if (sctp_state_name_table[state])
+		return sctp_state_name_table[state];
+	return "?";
+}
+
+static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+}
+
+static int
+sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+
+return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
+				sctp_state_name_table, sname, to);
+}
+
+static inline int
+set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+		int direction, const struct sk_buff *skb)
+{
+	sctp_chunkhdr_t _sctpch, *sch;
+	unsigned char chunk_type;
+	int event, next_state;
+	int ihl;
+
+#ifdef CONFIG_IP_VS_IPV6
+	ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+	ihl = ip_hdrlen(skb);
+#endif
+
+	sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
+				sizeof(_sctpch), &_sctpch);
+	if (sch == NULL)
+		return 0;
+
+	chunk_type = sch->type;
+	/*
+	 * Section 3: Multiple chunks can be bundled into one SCTP packet
+	 * up to the MTU size, except for the INIT, INIT ACK, and
+	 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
+	 * any other chunk in a packet.
+	 *
+	 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
+	 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
+	 * bundled with an ABORT, but they MUST be placed before the ABORT
+	 * in the SCTP packet or they will be ignored by the receiver.
+	 */
+	if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
+	    (sch->type == SCTP_CID_COOKIE_ACK)) {
+		sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
+				sch->length), sizeof(_sctpch), &_sctpch);
+		if (sch) {
+			if (sch->type == SCTP_CID_ABORT)
+				chunk_type = sch->type;
+		}
+	}
+
+	event = sctp_events[chunk_type];
+
+	/*
+	 *  If the direction is IP_VS_DIR_OUTPUT, this event is from server
+	 */
+	if (direction == IP_VS_DIR_OUTPUT)
+		event++;
+	/*
+	 * get next state
+	 */
+	next_state = sctp_states_table[cp->state][event].next_state;
+
+	if (next_state != cp->state) {
+		struct ip_vs_dest *dest = cp->dest;
+
+		IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
+				"%s:%d state: %s->%s conn->refcnt:%d\n",
+				pp->name,
+				((direction == IP_VS_DIR_OUTPUT) ?
+				 "output " : "input "),
+				IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+				ntohs(cp->dport),
+				IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+				ntohs(cp->cport),
+				sctp_state_name(cp->state),
+				sctp_state_name(next_state),
+				atomic_read(&cp->refcnt));
+		if (dest) {
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				(next_state != IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_dec(&dest->activeconns);
+				atomic_inc(&dest->inactconns);
+				cp->flags |= IP_VS_CONN_F_INACTIVE;
+			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+				   (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
+				atomic_inc(&dest->activeconns);
+				atomic_dec(&dest->inactconns);
+				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
+		}
+	}
+
+	 cp->timeout = pp->timeout_table[cp->state = next_state];
+
+	 return 1;
+}
+
+static int
+sctp_state_transition(struct ip_vs_conn *cp, int direction,
+		const struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+	int ret = 0;
+
+	spin_lock(&cp->lock);
+	ret = set_sctp_state(pp, cp, direction, skb);
+	spin_unlock(&cp->lock);
+
+	return ret;
+}
+
+/*
+ *      Hash table for SCTP application incarnations
+ */
+#define SCTP_APP_TAB_BITS        4
+#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
+#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
+
+static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(sctp_app_lock);
+
+static inline __u16 sctp_app_hashkey(__be16 port)
+{
+	return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
+		& SCTP_APP_TAB_MASK;
+}
+
+static int sctp_register_app(struct ip_vs_app *inc)
+{
+	struct ip_vs_app *i;
+	__u16 hash;
+	__be16 port = inc->port;
+	int ret = 0;
+
+	hash = sctp_app_hashkey(port);
+
+	spin_lock_bh(&sctp_app_lock);
+	list_for_each_entry(i, &sctp_apps[hash], p_list) {
+		if (i->port == port) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	list_add(&inc->p_list, &sctp_apps[hash]);
+	atomic_inc(&ip_vs_protocol_sctp.appcnt);
+out:
+	spin_unlock_bh(&sctp_app_lock);
+
+	return ret;
+}
+
+static void sctp_unregister_app(struct ip_vs_app *inc)
+{
+	spin_lock_bh(&sctp_app_lock);
+	atomic_dec(&ip_vs_protocol_sctp.appcnt);
+	list_del(&inc->p_list);
+	spin_unlock_bh(&sctp_app_lock);
+}
+
+static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+{
+	int hash;
+	struct ip_vs_app *inc;
+	int result = 0;
+
+	/* Default binding: bind app only for NAT */
+	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+		return 0;
+	/* Lookup application incarnations and bind the right one */
+	hash = sctp_app_hashkey(cp->vport);
+
+	spin_lock(&sctp_app_lock);
+	list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+		if (inc->port == cp->vport) {
+			if (unlikely(!ip_vs_app_inc_get(inc)))
+				break;
+			spin_unlock(&sctp_app_lock);
+
+			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+					"%s:%u to app %s on port %u\n",
+					__func__,
+					IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+					ntohs(cp->cport),
+					IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+					ntohs(cp->vport),
+					inc->name, ntohs(inc->port));
+			cp->app = inc;
+			if (inc->init_conn)
+				result = inc->init_conn(inc, cp);
+			goto out;
+		}
+	}
+	spin_unlock(&sctp_app_lock);
+out:
+	return result;
+}
+
+static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+{
+	IP_VS_INIT_HASH_TABLE(sctp_apps);
+	pp->timeout_table = sctp_timeouts;
+}
+
+
+static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+{
+
+}
+
+struct ip_vs_protocol ip_vs_protocol_sctp = {
+	.name = "SCTP",
+	.protocol = IPPROTO_SCTP,
+	.num_states = IP_VS_SCTP_S_LAST,
+	.dont_defrag = 0,
+	.appcnt = ATOMIC_INIT(0),
+	.init = ip_vs_sctp_init,
+	.exit = ip_vs_sctp_exit,
+	.register_app = sctp_register_app,
+	.unregister_app = sctp_unregister_app,
+	.conn_schedule = sctp_conn_schedule,
+	.conn_in_get = sctp_conn_in_get,
+	.conn_out_get = sctp_conn_out_get,
+	.snat_handler = sctp_snat_handler,
+	.dnat_handler = sctp_dnat_handler,
+	.csum_check = sctp_csum_check,
+	.state_name = sctp_state_name,
+	.state_transition = sctp_state_transition,
+	.app_conn_bind = sctp_app_conn_bind,
+	.debug_packet = ip_vs_tcpudp_debug_packet,
+	.timeout_change = sctp_timeout_change,
+	.set_state_timeout = sctp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e177f0dc2084..8fb0ae616761 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 					flags |= IP_VS_CONN_F_INACTIVE;
 				else
 					flags &= ~IP_VS_CONN_F_INACTIVE;
+			} else if (s->protocol == IPPROTO_SCTP) {
+				if (state != IP_VS_SCTP_S_ESTABLISHED)
+					flags |= IP_VS_CONN_F_INACTIVE;
+				else
+					flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
 			cp = ip_vs_conn_new(AF_INET, s->protocol,
 					    (union nf_inet_addr *)&s->caddr,
@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 				atomic_dec(&dest->inactconns);
 				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
 			}
+		} else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+			   (cp->state != state)) {
+			dest = cp->dest;
+			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+			     (state != IP_VS_SCTP_S_ESTABLISHED)) {
+			    atomic_dec(&dest->activeconns);
+			    atomic_inc(&dest->inactconns);
+			    cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+			}
 		}
 
 		if (opt)
-- 
cgit v1.2.3


From 3ffe533c87281b68d469b279ff3a5056f9c75862 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 18 Feb 2010 08:25:24 +0000
Subject: ipv6: drop unused "dev" arg of icmpv6_send()

Dunno, what was the idea, it wasn't used for a long time.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_core.c |  5 ++---
 net/netfilter/ipvs/ip_vs_xmit.c | 10 +++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 72e96d823ebf..44590887a92c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -515,8 +515,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
-			    skb->dev);
+		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 	else
 #endif
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -1048,7 +1047,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 						icmpv6_send(skb,
 							    ICMPV6_DEST_UNREACH,
 							    ICMPV6_PORT_UNREACH,
-							    0, skb->dev);
+							    0);
 					else
 #endif
 						icmp_send(skb,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 30b3189bd29c..223b5018c7dc 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -311,7 +311,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -454,7 +454,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
 		goto tx_error;
@@ -672,7 +672,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -814,7 +814,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		dst_release(&rt->u.dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -965,7 +965,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst);
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
-- 
cgit v1.2.3


From a88e22adf5aad79b6e2ddd1bf0109c2ba8b46b0e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 19 Feb 2010 14:24:39 +0100
Subject: netfilter: ctnetlink: fix creation of conntrack with helpers

This patch fixes a bug that triggers an assertion if you create
a conntrack entry with a helper and netfilter debugging is enabled.
Basically, we hit the assertion because the confirmation flag is
set before the conntrack extensions are added. To fix this, we
move the extension addition before the aforementioned flag is
set.

This patch also removes the possibility of setting a helper for
existing conntracks. This operation would also trigger the
assertion since we are not allowed to add new extensions for
existing conntracks. We know noone that could benefit from
this operation sanely.

Thanks to Eric Dumazet for initial posting a preliminary patch
to address this issue.

Reported-by: David Ramblewski <David.Ramblewski@atosorigin.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8b05f364b2f2..2b2af631d2b8 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1077,9 +1077,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 		/* need to zero data of old helper */
 		memset(&help->help, 0, sizeof(help->help));
 	} else {
-		help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
-		if (help == NULL)
-			return -ENOMEM;
+		/* we cannot set a helper for an existing conntrack */
+		return -EOPNOTSUPP;
 	}
 
 	rcu_assign_pointer(help->helper, helper);
@@ -1263,7 +1262,6 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 	ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
 	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
-	ct->status |= IPS_CONFIRMED;
 
 	rcu_read_lock();
  	if (cda[CTA_HELP]) {
@@ -1314,14 +1312,19 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 			goto err2;
 	}
 
-	if (cda[CTA_STATUS]) {
-		err = ctnetlink_change_status(ct, cda);
+	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+		err = ctnetlink_change_nat(ct, cda);
 		if (err < 0)
 			goto err2;
 	}
 
-	if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
-		err = ctnetlink_change_nat(ct, cda);
+	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+	/* we must add conntrack extensions before confirmation. */
+	ct->status |= IPS_CONFIRMED;
+
+	if (cda[CTA_STATUS]) {
+		err = ctnetlink_change_status(ct, cda);
 		if (err < 0)
 			goto err2;
 	}
@@ -1340,9 +1343,6 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 			goto err2;
 	}
 
-	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
-	nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
-
 #if defined(CONFIG_NF_CONNTRACK_MARK)
 	if (cda[CTA_MARK])
 		ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
-- 
cgit v1.2.3


From 64507fdbc29c3a622180378210ecea8659b14e40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Feb 2010 15:28:38 +0100
Subject: netfilter: nf_queue: fix NF_STOLEN skb leak

commit 3bc38712e3a6e059 (handle NF_STOP and unknown verdicts in
nf_reinject) was a partial fix to packet leaks.

If user asks NF_STOLEN status, we must free the skb as well.

Reported-by: Afi Gjermund <afigjermund@gmail.com>
Signed-off-by: Eric DUmazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 3a6fd77f7761..ba095fd014e5 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -265,7 +265,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 		local_bh_disable();
 		entry->okfn(skb);
 		local_bh_enable();
-	case NF_STOLEN:
 		break;
 	case NF_QUEUE:
 		if (!__nf_queue(skb, elem, entry->pf, entry->hook,
@@ -273,6 +272,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 				verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 		break;
+	case NF_STOLEN:
 	default:
 		kfree_skb(skb);
 	}
-- 
cgit v1.2.3


From 2c08522e5d2f0af2d6f05be558946dcbf8173683 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 23 Feb 2010 14:55:21 +0100
Subject: netfilter: xt_recent: fix buffer overflow

e->index overflows e->stamps[] every ip_pkt_list_tot packets.

Consider the case when ip_pkt_list_tot==1; the first packet received is stored
in e->stamps[0] and e->index is initialized to 1. The next received packet
timestamp is then stored at e->stamps[1] in recent_entry_update(),
a buffer overflow because the maximum e->stamps[] index is 0.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 132cfaa84cdc..1278f0aa7434 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -177,10 +177,10 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 
 static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
 {
+	e->index %= ip_pkt_list_tot;
 	e->stamps[e->index++] = jiffies;
 	if (e->index > e->nstamps)
 		e->nstamps = e->index;
-	e->index %= ip_pkt_list_tot;
 	list_move_tail(&e->lru_list, &t->lru_list);
 }
 
-- 
cgit v1.2.3


From 8ccb92ad41cb311e52ad1b1fe77992c7f47a3b63 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 23 Feb 2010 14:59:12 +0100
Subject: netfilter: xt_recent: fix false match

A rule with a zero hit_count will always match.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1278f0aa7434..7073dbb8100c 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -267,7 +267,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		for (i = 0; i < e->nstamps; i++) {
 			if (info->seconds && time_after(time, e->stamps[i]))
 				continue;
-			if (++hits >= info->hit_count) {
+			if (info->hit_count && ++hits >= info->hit_count) {
 				ret = !ret;
 				break;
 			}
-- 
cgit v1.2.3


From dcea992aca82cb08b4674c4c783e325835408d1e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Feb 2010 18:34:48 +0100
Subject: netfilter: xtables: replace XT_MATCH_ITERATE macro

The macro is replaced by a list.h-like foreach loop. This makes
the code more inspectable.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_TCPMSS.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6f21b4377dbb..0e357ac9a2a8 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -239,6 +239,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -248,8 +249,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IPT_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
@@ -259,6 +261,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
+	const struct xt_entry_match *ematch;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -268,8 +271,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
-	if (IP6T_MATCH_ITERATE(e, find_syn_match))
-		return true;
+	xt_ematch_foreach(ematch, e)
+		if (find_syn_match(ematch))
+			return true;
 	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
 	return false;
 }
-- 
cgit v1.2.3


From 51f0bc78680edccb6574ef56bd32f9e2939c8a5a Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Fri, 26 Feb 2010 17:45:14 +0100
Subject: IPVS: ip_vs_lblcr: use list headA

Use list_head rather than a custom list implementation.

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_lblcr.c | 44 +++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index f7476b95ab46..caa58fa1438a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -45,6 +45,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/jiffies.h>
+#include <linux/list.h>
 
 /* for sysctl */
 #include <linux/fs.h>
@@ -85,25 +86,25 @@ static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
 /*
  *      IPVS destination set structure and operations
  */
-struct ip_vs_dest_list {
-	struct ip_vs_dest_list  *next;          /* list link */
+struct ip_vs_dest_set_elem {
+	struct list_head	list;          /* list link */
 	struct ip_vs_dest       *dest;          /* destination server */
 };
 
 struct ip_vs_dest_set {
 	atomic_t                size;           /* set size */
 	unsigned long           lastmod;        /* last modified time */
-	struct ip_vs_dest_list  *list;          /* destination list */
+	struct list_head	list;           /* destination list */
 	rwlock_t	        lock;           /* lock for this list */
 };
 
 
-static struct ip_vs_dest_list *
+static struct ip_vs_dest_set_elem *
 ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
-	struct ip_vs_dest_list *e;
+	struct ip_vs_dest_set_elem *e;
 
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		if (e->dest == dest)
 			/* already existed */
 			return NULL;
@@ -118,9 +119,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 	atomic_inc(&dest->refcnt);
 	e->dest = dest;
 
-	/* link it to the list */
-	e->next = set->list;
-	set->list = e;
+	list_add(&e->list, &set->list);
 	atomic_inc(&set->size);
 
 	set->lastmod = jiffies;
@@ -130,34 +129,33 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 static void
 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 {
-	struct ip_vs_dest_list *e, **ep;
+	struct ip_vs_dest_set_elem *e;
 
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+	list_for_each_entry(e, &set->list, list) {
 		if (e->dest == dest) {
 			/* HIT */
-			*ep = e->next;
 			atomic_dec(&set->size);
 			set->lastmod = jiffies;
 			atomic_dec(&e->dest->refcnt);
+			list_del(&e->list);
 			kfree(e);
 			break;
 		}
-		ep = &e->next;
 	}
 }
 
 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 {
-	struct ip_vs_dest_list *e, **ep;
+	struct ip_vs_dest_set_elem *e, *ep;
 
 	write_lock(&set->lock);
-	for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
-		*ep = e->next;
+	list_for_each_entry_safe(e, ep, &set->list, list) {
 		/*
 		 * We don't kfree dest because it is refered either
 		 * by its service or by the trash dest list.
 		 */
 		atomic_dec(&e->dest->refcnt);
+		list_del(&e->list);
 		kfree(e);
 	}
 	write_unlock(&set->lock);
@@ -166,7 +164,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
 /* get weighted least-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_list *e;
+	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *least;
 	int loh, doh;
 
@@ -174,7 +172,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		return NULL;
 
 	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		least = e->dest;
 		if (least->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
@@ -190,7 +188,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted least load */
   nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
 		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
 			continue;
@@ -220,7 +218,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 /* get weighted most-connection node in the destination set */
 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 {
-	register struct ip_vs_dest_list *e;
+	register struct ip_vs_dest_set_elem *e;
 	struct ip_vs_dest *dest, *most;
 	int moh, doh;
 
@@ -228,7 +226,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 		return NULL;
 
 	/* select the first destination server, whose weight > 0 */
-	for (e=set->list; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		most = e->dest;
 		if (atomic_read(&most->weight) > 0) {
 			moh = atomic_read(&most->activeconns) * 50
@@ -240,7 +238,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 
 	/* find the destination with the weighted most load */
   nextstage:
-	for (e=e->next; e!=NULL; e=e->next) {
+	list_for_each_entry(e, &set->list, list) {
 		dest = e->dest;
 		doh = atomic_read(&dest->activeconns) * 50
 			+ atomic_read(&dest->inactconns);
@@ -389,7 +387,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 
 		/* initilize its dest set */
 		atomic_set(&(en->set.size), 0);
-		en->set.list = NULL;
+		INIT_LIST_HEAD(&en->set.list);
 		rwlock_init(&en->set.lock);
 
 		ip_vs_lblcr_hash(tbl, en);
-- 
cgit v1.2.3


From a49c65037146bfb2fe300b8277b10b4479fea5fc Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 26 Feb 2010 17:48:40 +0100
Subject: netfilter: nfnetlink_log: fix silly refcount leak

Quick fix for memory/module refcount leak.
Reference count of listener instance never reaches 0.

Start/stop of ulogd2 is enough to trigger this bug!

Now, refcounting there looks very fishy in particular this code:

 	if (!try_module_get(THIS_MODULE)) {
		...

and creation of listener instance with refcount 2,
so it may very well be ripped and redone.  :-)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 285e9029a9ff..d9b8fb8ab340 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -768,7 +768,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			}
 
 			instance_destroy(inst);
-			goto out;
+			goto out_put;
 		default:
 			ret = -ENOTSUPP;
 			break;
-- 
cgit v1.2.3


From 37b7ef7203240b3aba577bb1ff6765fe15225976 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 16 Mar 2010 13:30:21 +0000
Subject: netfilter: ctnetlink: fix reliable event delivery if message building
 fails

This patch fixes a bug that allows to lose events when reliable
event delivery mode is used, ie. if NETLINK_BROADCAST_SEND_ERROR
and NETLINK_RECV_NO_ENOBUFS socket options are set.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 +++-
 net/netfilter/nfnetlink.c            | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..569410a85953 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -582,7 +582,9 @@ nla_put_failure:
 nlmsg_failure:
 	kfree_skb(skb);
 errout:
-	nfnetlink_set_err(net, 0, group, -ENOBUFS);
+	if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+		return -ENOBUFS;
+
 	return 0;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..6afa3d52ea5f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,9 +113,9 @@ int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
 }
 EXPORT_SYMBOL_GPL(nfnetlink_send);
 
-void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
 {
-	netlink_set_err(net->nfnl, pid, group, error);
+	return netlink_set_err(net->nfnl, pid, group, error);
 }
 EXPORT_SYMBOL_GPL(nfnetlink_set_err);
 
-- 
cgit v1.2.3


From ef1691504c83ba3eb636c0cfd3ed33f7a6d0b4ee Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 22 Mar 2010 18:25:20 +0100
Subject: netfilter: xt_recent: fix regression in rules using a zero hit_count

Commit 8ccb92ad (netfilter: xt_recent: fix false match) fixed supposedly
false matches in rules using a zero hit_count. As it turns out there is
nothing false about these matches and people are actually using entries
with a hit_count of zero to make rules dependant on addresses inserted
manually through /proc.

Since this slipped past the eyes of three reviewers, instead of
reverting the commit in question, this patch explicitly checks
for a hit_count of zero to make the intentions more clear.

Reported-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Tested-by: Thomas Jarosch <thomas.jarosch@intra2net.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7073dbb8100c..971d172afece 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -267,7 +267,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		for (i = 0; i < e->nstamps; i++) {
 			if (info->seconds && time_after(time, e->stamps[i]))
 				continue;
-			if (info->hit_count && ++hits >= info->hit_count) {
+			if (!info->hit_count || ++hits >= info->hit_count) {
 				ret = !ret;
 				break;
 			}
-- 
cgit v1.2.3


From 55e0d7cf279177dfe320f54816320558bc370f24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Mar 2010 11:00:22 +0100
Subject: netfilter: xt_hashlimit: dl_seq_stop() fix

If dl_seq_start() memory allocation fails, we crash later in
dl_seq_stop(), trying to kfree(ERR_PTR(-ENOMEM))

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9e9c48963942..70d561a2d9e0 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -879,7 +879,8 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 	struct xt_hashlimit_htable *htable = s->private;
 	unsigned int *bucket = (unsigned int *)v;
 
-	kfree(bucket);
+	if (!IS_ERR(bucket))
+		kfree(bucket);
 	spin_unlock_bh(&htable->lock);
 }
 
-- 
cgit v1.2.3


From 8f5992291457c8e6de2f5fe39849de6756be1a96 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 25 Mar 2010 17:25:11 +0100
Subject: netfilter: xt_hashlimit: IPV6 bugfix

A missing break statement in hashlimit_ipv6_mask(), and masks
between /64 and /95 are not working at all...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 70d561a2d9e0..215a64835de8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -493,6 +493,7 @@ static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
 	case 64 ... 95:
 		i[2] = maskl(i[2], p - 64);
 		i[3] = 0;
+		break;
 	case 96 ... 127:
 		i[3] = maskl(i[3], p - 96);
 		break;
-- 
cgit v1.2.3


From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Mar 2010 17:04:11 +0900
Subject: include cleanup: Update gfp.h and slab.h includes to prepare for
 breaking implicit slab.h inclusion from percpu.h

percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files.  percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.

percpu.h -> slab.h dependency is about to be removed.  Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability.  As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.

  http://userweb.kernel.org/~tj/misc/slabh-sweep.py

The script does the followings.

* Scan files for gfp and slab usages and update includes such that
  only the necessary includes are there.  ie. if only gfp is used,
  gfp.h, if slab is used, slab.h.

* When the script inserts a new include, it looks at the include
  blocks and try to put the new include such that its order conforms
  to its surrounding.  It's put in the include block which contains
  core kernel includes, in the same order that the rest are ordered -
  alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
  doesn't seem to be any matching order.

* If the script can't find a place to put a new include (mostly
  because the file doesn't have fitting include block), it prints out
  an error message indicating which .h file needs to be added to the
  file.

The conversion was done in the following steps.

1. The initial automatic conversion of all .c files updated slightly
   over 4000 files, deleting around 700 includes and adding ~480 gfp.h
   and ~3000 slab.h inclusions.  The script emitted errors for ~400
   files.

2. Each error was manually checked.  Some didn't need the inclusion,
   some needed manual addition while adding it to implementation .h or
   embedding .c file was more appropriate for others.  This step added
   inclusions to around 150 files.

3. The script was run again and the output was compared to the edits
   from #2 to make sure no file was left behind.

4. Several build tests were done and a couple of problems were fixed.
   e.g. lib/decompress_*.c used malloc/free() wrappers around slab
   APIs requiring slab.h to be added manually.

5. The script was run on all .h files but without automatically
   editing them as sprinkling gfp.h and slab.h inclusions around .h
   files could easily lead to inclusion dependency hell.  Most gfp.h
   inclusion directives were ignored as stuff from gfp.h was usually
   wildly available and often used in preprocessor macros.  Each
   slab.h inclusion directive was examined and added manually as
   necessary.

6. percpu.h was updated not to include slab.h.

7. Build test were done on the following configurations and failures
   were fixed.  CONFIG_GCOV_KERNEL was turned off for all tests (as my
   distributed build env didn't work with gcov compiles) and a few
   more options had to be turned off depending on archs to make things
   build (like ipr on powerpc/64 which failed due to missing writeq).

   * x86 and x86_64 UP and SMP allmodconfig and a custom test config.
   * powerpc and powerpc64 SMP allmodconfig
   * sparc and sparc64 SMP allmodconfig
   * ia64 SMP allmodconfig
   * s390 SMP allmodconfig
   * alpha SMP allmodconfig
   * um on x86_64 SMP allmodconfig

8. percpu.h modifications were reverted so that it could be applied as
   a separate patch and serve as bisection point.

Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
---
 net/netfilter/core.c                    | 1 +
 net/netfilter/ipvs/ip_vs_app.c          | 1 +
 net/netfilter/ipvs/ip_vs_conn.c         | 1 +
 net/netfilter/ipvs/ip_vs_core.c         | 1 +
 net/netfilter/ipvs/ip_vs_ctl.c          | 1 +
 net/netfilter/ipvs/ip_vs_dh.c           | 1 +
 net/netfilter/ipvs/ip_vs_est.c          | 1 -
 net/netfilter/ipvs/ip_vs_ftp.c          | 1 +
 net/netfilter/ipvs/ip_vs_lblc.c         | 1 +
 net/netfilter/ipvs/ip_vs_lblcr.c        | 1 +
 net/netfilter/ipvs/ip_vs_proto.c        | 1 +
 net/netfilter/ipvs/ip_vs_sh.c           | 1 +
 net/netfilter/ipvs/ip_vs_wrr.c          | 1 +
 net/netfilter/ipvs/ip_vs_xmit.c         | 1 +
 net/netfilter/nf_conntrack_acct.c       | 1 +
 net/netfilter/nf_conntrack_amanda.c     | 1 +
 net/netfilter/nf_conntrack_ecache.c     | 1 +
 net/netfilter/nf_conntrack_ftp.c        | 1 +
 net/netfilter/nf_conntrack_h323_main.c  | 1 +
 net/netfilter/nf_conntrack_helper.c     | 1 -
 net/netfilter/nf_conntrack_irc.c        | 1 +
 net/netfilter/nf_conntrack_netlink.c    | 1 +
 net/netfilter/nf_conntrack_proto.c      | 1 +
 net/netfilter/nf_conntrack_proto_dccp.c | 1 +
 net/netfilter/nf_conntrack_proto_gre.c  | 1 +
 net/netfilter/nf_conntrack_sane.c       | 1 +
 net/netfilter/nf_conntrack_standalone.c | 1 +
 net/netfilter/nf_queue.c                | 1 +
 net/netfilter/nfnetlink_log.c           | 1 +
 net/netfilter/nfnetlink_queue.c         | 1 +
 net/netfilter/x_tables.c                | 1 +
 net/netfilter/xt_CT.c                   | 1 +
 net/netfilter/xt_LED.c                  | 1 +
 net/netfilter/xt_RATEEST.c              | 1 +
 net/netfilter/xt_TCPMSS.c               | 1 +
 net/netfilter/xt_connlimit.c            | 1 +
 net/netfilter/xt_dccp.c                 | 1 +
 net/netfilter/xt_limit.c                | 1 +
 net/netfilter/xt_quota.c                | 1 +
 net/netfilter/xt_recent.c               | 1 +
 net/netfilter/xt_statistic.c            | 1 +
 net/netfilter/xt_string.c               | 1 +
 42 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 60ec4e4badaa..78b505d33bfb 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -19,6 +19,7 @@
 #include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 3c7e42735b60..1cb0e834f8ff 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -27,6 +27,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 60bb41a8d8d4..d8f7e8ef67b4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
+#include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 44590887a92c..1cd6e3fd058b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -33,6 +33,7 @@
 #include <linux/tcp.h>
 #include <linux/sctp.h>
 #include <linux/icmp.h>
+#include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/tcp.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7ee9c3426f44..36dc1d88c2fa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/swap.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index fe3e18834b91..95fd0d14200b 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -39,6 +39,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 702b53ca937c..ff28801962e0 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -17,7 +17,6 @@
 
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
-#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/sysctl.h>
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 73f38ea98f25..2c7f185dfae4 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -32,6 +32,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
+#include <linux/gfp.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <asm/unaligned.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1b9370db2305..94a45213faa6 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -43,6 +43,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index caa58fa1438a..535dc2b419d8 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -46,6 +46,7 @@
 #include <linux/skbuff.h>
 #include <linux/jiffies.h>
 #include <linux/list.h>
+#include <linux/slab.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 0e584553819d..7fc49f4cf5ad 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -19,6 +19,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
+#include <linux/gfp.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/protocol.h>
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 8e6cfd36e6f0..e6cc174fbc06 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -36,6 +36,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 3c115fc19784..30db633f88f1 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -23,6 +23,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/gcd.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 223b5018c7dc..e450cd6f4eb5 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 018f90db511c..ab81b380eae6 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 07d9d8857e5d..372e80f07a81 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -16,6 +16,7 @@
 #include <linux/in.h>
 #include <linux/udp.h>
 #include <linux/netfilter.h>
+#include <linux/gfp.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d5a9bcd7d61b..f516961a83b4 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -18,6 +18,7 @@
 #include <linux/percpu.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/slab.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f0732aa18e4f..2ae3169e7633 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -13,6 +13,7 @@
 #include <linux/moduleparam.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/ipv6.h>
 #include <linux/ctype.h>
 #include <linux/inet.h>
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a1c8dd917e12..a487c8038044 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -17,6 +17,7 @@
 #include <linux/inet.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/slab.h>
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4509fa6726f8..59e1a4cd4e8b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -15,7 +15,6 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/stddef.h>
-#include <linux/slab.h>
 #include <linux/random.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 8bd98c84f77e..7673930ca342 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -15,6 +15,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 569410a85953..afc52f2ee4ac 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,6 +27,7 @@
 #include <linux/netlink.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter.h>
 #include <net/netlink.h>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1a4568bf7ea5..a44fa75b5178 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 9a2815549375..5292560d6d4a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
 #include <linux/dccp.h>
+#include <linux/slab.h>
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index d899b1a69940..cf616e55ca41 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
 #include <linux/in.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <net/dst.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index dcfecbb81c46..d9e27734b2a2 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -20,6 +20,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/in.h>
 #include <linux/tcp.h>
 #include <net/netfilter/nf_conntrack.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 24a42efe62ef..faa8eb3722b9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -8,6 +8,7 @@
 
 #include <linux/types.h>
 #include <linux/netfilter.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ba095fd014e5..c49ef219899e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,4 +1,5 @@
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d9b8fb8ab340..203643fb2c52 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,6 +28,7 @@
 #include <linux/list.h>
 #include <linux/jhash.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/sock.h>
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/nfnetlink_log.h>
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7ba4abc405c9..e70a6ef1f4f2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0a12cedfe9e3..665f5beef6ad 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 61c50fa84703..ee18b231b950 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/gfp.h>
 #include <linux/skbuff.h>
 #include <linux/selinux.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 8ff7843bb921..3271c8e52153 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/slab.h>
 #include <linux/leds.h>
 #include <linux/mutex.h>
 
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 87ae97e5516f..d16d55df4f61 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -11,6 +11,7 @@
 #include <linux/jhash.h>
 #include <linux/rtnetlink.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <net/gen_stats.h>
 #include <net/netlink.h>
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 0e357ac9a2a8..c5f4b9919e9a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <linux/gfp.h>
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <net/dst.h>
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 26997ce90e48..388ca4596098 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -17,6 +17,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/jhash.h>
+#include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0989f29ade2e..395af5943ffd 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/ip.h>
 #include <linux/dccp.h>
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a0ca5339af41..e5d7e1ffb1a4 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2d5562498c43 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -4,6 +4,7 @@
  * Sam Johnston <samj@samj.net>
  */
 #include <linux/skbuff.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 971d172afece..834b736857cb 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -27,6 +27,7 @@
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
 #include <linux/inet.h>
+#include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d8c0f8f1a78e..937ce0633e99 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
+#include <linux/slab.h>
 
 #include <linux/netfilter/xt_statistic.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..96801ffd8af8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -7,6 +7,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-- 
cgit v1.2.3