From 131ad62d8fc06d9d0a5c61d9526876352c2f2bbd Mon Sep 17 00:00:00 2001
From: Mr Dash Four <mr.dash.four@googlemail.com>
Date: Thu, 30 Jun 2011 13:31:57 +0200
Subject: netfilter: add SELinux context support to AUDIT target

In this revision the conversion of secid to SELinux context and adding it
to the audit log is moved from xt_AUDIT.c to audit.c with the aid of a
separate helper function - audit_log_secctx - which does both the conversion
and logging of SELinux context, thus also preventing internal secid number
being leaked to userspace. If conversion is not successful an error is raised.

With the introduction of this helper function the work done in xt_AUDIT.c is
much more simplified. It also opens the possibility of this helper function
being used by other modules (including auditd itself), if desired. With this
addition, typical (raw auditd) output after applying the patch would be:

type=NETFILTER_PKT msg=audit(1305852240.082:31012): action=0 hook=1 len=52 inif=? outif=eth0 saddr=10.1.1.7 daddr=10.1.2.1 ipid=16312 proto=6 sport=56150 dport=22 obj=system_u:object_r:ssh_client_packet_t:s0
type=NETFILTER_PKT msg=audit(1306772064.079:56): action=0 hook=3 len=48 inif=eth0 outif=? smac=00:05:5d:7c:27:0b dmac=00:02:b3:0a:7f:81 macproto=0x0800 saddr=10.1.2.1 daddr=10.1.1.7 ipid=462 proto=6 sport=22 dport=3561 obj=system_u:object_r:ssh_server_packet_t:s0

Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Mr Dash Four <mr.dash.four@googlemail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_AUDIT.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net/netfilter')

diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 363a99ec0637..4bca15a0c385 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -163,6 +163,11 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		break;
 	}
 
+#ifdef CONFIG_NETWORK_SECMARK
+	if (skb->secmark)
+		audit_log_secctx(ab, skb->secmark);
+#endif
+
 	audit_log_end(ab);
 
 errout:
-- 
cgit v1.2.3


From 6b75e3e8d664a9a1b99d31a7f4976ae70d1d090a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 18 Jul 2011 16:08:07 +0200
Subject: netfilter: nfnetlink: add RCU in nfnetlink_rcv_msg()

Goal of this patch is to permit nfnetlink providers not mandate
nfnl_mutex being held while nfnetlink_rcv_msg() calls them.

If struct nfnl_callback contains a non NULL call_rcu(), then
nfnetlink_rcv_msg() will use it instead of call() field, holding
rcu_read_lock instead of nfnl_mutex

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Florian Westphal <fw@strlen.de>
CC: Eric Leblond <eric@regit.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink.c | 40 ++++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b4a4532823e8..1905976b5135 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -37,7 +37,7 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
+static const struct nfnetlink_subsystem __rcu *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
 
 void nfnl_lock(void)
@@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
 		nfnl_unlock();
 		return -EBUSY;
 	}
-	subsys_table[n->subsys_id] = n;
+	rcu_assign_pointer(subsys_table[n->subsys_id], n);
 	nfnl_unlock();
 
 	return 0;
@@ -71,7 +71,7 @@ int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n)
 	nfnl_lock();
 	subsys_table[n->subsys_id] = NULL;
 	nfnl_unlock();
-
+	synchronize_rcu();
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
@@ -83,7 +83,7 @@ static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t t
 	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
-	return subsys_table[subsys_id];
+	return rcu_dereference(subsys_table[subsys_id]);
 }
 
 static inline const struct nfnl_callback *
@@ -139,21 +139,27 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	type = nlh->nlmsg_type;
 replay:
+	rcu_read_lock();
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_MODULES
-		nfnl_unlock();
+		rcu_read_unlock();
 		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
-		nfnl_lock();
+		rcu_read_lock();
 		ss = nfnetlink_get_subsys(type);
 		if (!ss)
 #endif
+		{
+			rcu_read_unlock();
 			return -EINVAL;
+		}
 	}
 
 	nc = nfnetlink_find_client(type, ss);
-	if (!nc)
+	if (!nc) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}
 
 	{
 		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
@@ -167,7 +173,23 @@ replay:
 		if (err < 0)
 			return err;
 
-		err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda);
+		if (nc->call_rcu) {
+			err = nc->call_rcu(net->nfnl, skb, nlh,
+					   (const struct nlattr **)cda);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			nfnl_lock();
+			if (rcu_dereference_protected(
+					subsys_table[NFNL_SUBSYS_ID(type)],
+					lockdep_is_held(&nfnl_mutex)) != ss ||
+			    nfnetlink_find_client(type, ss) != nc)
+				err = -EAGAIN;
+			else
+				err = nc->call(net->nfnl, skb, nlh,
+						   (const struct nlattr **)cda);
+			nfnl_unlock();
+		}
 		if (err == -EAGAIN)
 			goto replay;
 		return err;
@@ -176,9 +198,7 @@ replay:
 
 static void nfnetlink_rcv(struct sk_buff *skb)
 {
-	nfnl_lock();
 	netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
-	nfnl_unlock();
 }
 
 static int __net_init nfnetlink_net_init(struct net *net)
-- 
cgit v1.2.3


From 84a797dd0b9f7130357b70577fcbda8e638c71a7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 18 Jul 2011 16:08:27 +0200
Subject: netfilter: nfnetlink_queue: provide rcu enabled callbacks

nenetlink_queue operations on SMP are not efficent if several queues are
used, because of nfnl_mutex contention when applications give packet
verdict.

Use new call_rcu field in struct nfnl_callback to advertize a callback
that is called under rcu_read_lock instead of nfnl_mutex.

On my 2x4x2 machine, I was able to reach 2.000.000 pps going through
user land returning NF_ACCEPT verdicts without losses, instead of less
than 500.000 pps before patch.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Florian Westphal <fw@strlen.de>
CC: Eric Leblond <eric@regit.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b83123f12b42..c645b87915b8 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -619,39 +619,26 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	struct nfqnl_instance *queue;
 	unsigned int verdict;
 	struct nf_queue_entry *entry;
-	int err;
 
-	rcu_read_lock();
 	queue = instance_lookup(queue_num);
-	if (!queue) {
-		err = -ENODEV;
-		goto err_out_unlock;
-	}
+	if (!queue)
+		return -ENODEV;
 
-	if (queue->peer_pid != NETLINK_CB(skb).pid) {
-		err = -EPERM;
-		goto err_out_unlock;
-	}
+	if (queue->peer_pid != NETLINK_CB(skb).pid)
+		return -EPERM;
 
-	if (!nfqa[NFQA_VERDICT_HDR]) {
-		err = -EINVAL;
-		goto err_out_unlock;
-	}
+	if (!nfqa[NFQA_VERDICT_HDR])
+		return -EINVAL;
 
 	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
 	verdict = ntohl(vhdr->verdict);
 
-	if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) {
-		err = -EINVAL;
-		goto err_out_unlock;
-	}
+	if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)
+		return -EINVAL;
 
 	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
-	if (entry == NULL) {
-		err = -ENOENT;
-		goto err_out_unlock;
-	}
-	rcu_read_unlock();
+	if (entry == NULL)
+		return -ENOENT;
 
 	if (nfqa[NFQA_PAYLOAD]) {
 		if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
@@ -664,10 +651,6 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 
 	nf_reinject(entry, verdict);
 	return 0;
-
-err_out_unlock:
-	rcu_read_unlock();
-	return err;
 }
 
 static int
@@ -780,9 +763,9 @@ err_out_unlock:
 }
 
 static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
-	[NFQNL_MSG_PACKET]	= { .call = nfqnl_recv_unsupp,
+	[NFQNL_MSG_PACKET]	= { .call_rcu = nfqnl_recv_unsupp,
 				    .attr_count = NFQA_MAX, },
-	[NFQNL_MSG_VERDICT]	= { .call = nfqnl_recv_verdict,
+	[NFQNL_MSG_VERDICT]	= { .call_rcu = nfqnl_recv_verdict,
 				    .attr_count = NFQA_MAX,
 				    .policy = nfqa_verdict_policy },
 	[NFQNL_MSG_CONFIG]	= { .call = nfqnl_recv_config,
-- 
cgit v1.2.3


From 5863702a3421b0d2a63a473cf96afeb9fe09070d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 19 Jul 2011 11:44:17 +0200
Subject: netfilter: nfnetlink_queue: assert monotonic packet ids

Packet identifier is currently setup in nfqnl_build_packet_message(),
using one atomic_inc_return().

Problem is that since several cpus might concurrently call
nfqnl_enqueue_packet() for the same queue, we can deliver packets to
consumer in non monotonic way (packet N+1 being delivered after packet
N)

This patch moves the packet id setup from nfqnl_build_packet_message()
to nfqnl_enqueue_packet() to guarantee correct delivery order.

This also removes one atomic operation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Florian Westphal <fw@strlen.de>
CC: Pablo Neira Ayuso <pablo@netfilter.org>
CC: Eric Leblond <eric@regit.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c645b87915b8..3b2af8cb7de9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -58,7 +58,7 @@ struct nfqnl_instance {
  */
 	spinlock_t	lock;
 	unsigned int	queue_total;
-	atomic_t	id_sequence;		/* 'sequence' of pkt ids */
+	unsigned int	id_sequence;		/* 'sequence' of pkt ids */
 	struct list_head queue_list;		/* packets in queue */
 };
 
@@ -213,13 +213,15 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
 
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
-			   struct nf_queue_entry *entry)
+			   struct nf_queue_entry *entry,
+			   __be32 **packet_id_ptr)
 {
 	sk_buff_data_t old_tail;
 	size_t size;
 	size_t data_len = 0;
 	struct sk_buff *skb;
-	struct nfqnl_msg_packet_hdr pmsg;
+	struct nlattr *nla;
+	struct nfqnl_msg_packet_hdr *pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct sk_buff *entskb = entry->skb;
@@ -272,12 +274,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
-	entry->id = atomic_inc_return(&queue->id_sequence);
-	pmsg.packet_id 		= htonl(entry->id);
-	pmsg.hw_protocol	= entskb->protocol;
-	pmsg.hook		= entry->hook;
-
-	NLA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg);
+	nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
+	pmsg = nla_data(nla);
+	pmsg->hw_protocol	= entskb->protocol;
+	pmsg->hook		= entry->hook;
+	*packet_id_ptr		= &pmsg->packet_id;
 
 	indev = entry->indev;
 	if (indev) {
@@ -388,6 +389,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	struct sk_buff *nskb;
 	struct nfqnl_instance *queue;
 	int err = -ENOBUFS;
+	__be32 *packet_id_ptr;
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
 	queue = instance_lookup(queuenum);
@@ -401,7 +403,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 		goto err_out;
 	}
 
-	nskb = nfqnl_build_packet_message(queue, entry);
+	nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
 	if (nskb == NULL) {
 		err = -ENOMEM;
 		goto err_out;
@@ -420,6 +422,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 				 queue->queue_total);
 		goto err_out_free_nskb;
 	}
+	entry->id = ++queue->id_sequence;
+	*packet_id_ptr = htonl(entry->id);
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
 	err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
@@ -852,7 +856,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->peer_pid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
-			  atomic_read(&inst->id_sequence), 1);
+			  inst->id_sequence, 1);
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
-- 
cgit v1.2.3


From 97d32cf9440d2111a12471740446d4d63231b79a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Jul 2011 11:46:33 +0200
Subject: netfilter: nfnetlink_queue: batch verdict support

Introduces a new nfnetlink type that applies a given
verdict to all queued packets with an id <= the id in the verdict
message.

If a mark is provided it is applied to all matched packets.

This reduces the number of verdicts that have to be sent.
Applications that make use of this feature need to maintain
a timeout to send a batchverdict periodically to avoid starvation.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 115 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 103 insertions(+), 12 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3b2af8cb7de9..fbfcd834140b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -171,6 +171,13 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
        queue->queue_total++;
 }
 
+static void
+__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
+{
+	list_del(&entry->list);
+	queue->queue_total--;
+}
+
 static struct nf_queue_entry *
 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
@@ -185,10 +192,8 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 		}
 	}
 
-	if (entry) {
-		list_del(&entry->list);
-		queue->queue_total--;
-	}
+	if (entry)
+		__dequeue_entry(queue, entry);
 
 	spin_unlock_bh(&queue->lock);
 
@@ -611,6 +616,92 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
 	[NFQA_PAYLOAD]		= { .type = NLA_UNSPEC },
 };
 
+static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
+	[NFQA_VERDICT_HDR]	= { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
+	[NFQA_MARK]		= { .type = NLA_U32 },
+};
+
+static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
+{
+	struct nfqnl_instance *queue;
+
+	queue = instance_lookup(queue_num);
+	if (!queue)
+		return ERR_PTR(-ENODEV);
+
+	if (queue->peer_pid != nlpid)
+		return ERR_PTR(-EPERM);
+
+	return queue;
+}
+
+static struct nfqnl_msg_verdict_hdr*
+verdicthdr_get(const struct nlattr * const nfqa[])
+{
+	struct nfqnl_msg_verdict_hdr *vhdr;
+	unsigned int verdict;
+
+	if (!nfqa[NFQA_VERDICT_HDR])
+		return NULL;
+
+	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
+	verdict = ntohl(vhdr->verdict);
+	if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)
+		return NULL;
+	return vhdr;
+}
+
+static int nfq_id_after(unsigned int id, unsigned int max)
+{
+	return (int)(id - max) > 0;
+}
+
+static int
+nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
+		   const struct nlmsghdr *nlh,
+		   const struct nlattr * const nfqa[])
+{
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	struct nf_queue_entry *entry, *tmp;
+	unsigned int verdict, maxid;
+	struct nfqnl_msg_verdict_hdr *vhdr;
+	struct nfqnl_instance *queue;
+	LIST_HEAD(batch_list);
+	u16 queue_num = ntohs(nfmsg->res_id);
+
+	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+	if (IS_ERR(queue))
+		return PTR_ERR(queue);
+
+	vhdr = verdicthdr_get(nfqa);
+	if (!vhdr)
+		return -EINVAL;
+
+	verdict = ntohl(vhdr->verdict);
+	maxid = ntohl(vhdr->id);
+
+	spin_lock_bh(&queue->lock);
+
+	list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
+		if (nfq_id_after(entry->id, maxid))
+			break;
+		__dequeue_entry(queue, entry);
+		list_add_tail(&entry->list, &batch_list);
+	}
+
+	spin_unlock_bh(&queue->lock);
+
+	if (list_empty(&batch_list))
+		return -ENOENT;
+
+	list_for_each_entry_safe(entry, tmp, &batch_list, list) {
+		if (nfqa[NFQA_MARK])
+			entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
+		nf_reinject(entry, verdict);
+	}
+	return 0;
+}
+
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		   const struct nlmsghdr *nlh,
@@ -626,20 +717,17 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 
 	queue = instance_lookup(queue_num);
 	if (!queue)
-		return -ENODEV;
 
-	if (queue->peer_pid != NETLINK_CB(skb).pid)
-		return -EPERM;
+	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+	if (IS_ERR(queue))
+		return PTR_ERR(queue);
 
-	if (!nfqa[NFQA_VERDICT_HDR])
+	vhdr = verdicthdr_get(nfqa);
+	if (!vhdr)
 		return -EINVAL;
 
-	vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
 	verdict = ntohl(vhdr->verdict);
 
-	if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)
-		return -EINVAL;
-
 	entry = find_dequeue_entry(queue, ntohl(vhdr->id));
 	if (entry == NULL)
 		return -ENOENT;
@@ -775,6 +863,9 @@ static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
 	[NFQNL_MSG_CONFIG]	= { .call = nfqnl_recv_config,
 				    .attr_count = NFQA_CFG_MAX,
 				    .policy = nfqa_cfg_policy },
+	[NFQNL_MSG_VERDICT_BATCH]={ .call_rcu = nfqnl_recv_verdict_batch,
+				    .attr_count = NFQA_MAX,
+				    .policy = nfqa_verdict_batch_policy },
 };
 
 static const struct nfnetlink_subsystem nfqnl_subsys = {
-- 
cgit v1.2.3


From 89dc79b787d20e4b6c4077dcee1c5b1be4ab55b8 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Thu, 21 Jul 2011 12:06:18 +0200
Subject: netfilter: ipset: hash:net,iface fixed to handle overlapping nets
 behind different interfaces

If overlapping networks with different interfaces was added to
the set, the type did not handle it properly. Example

    ipset create test hash:net,iface
    ipset add test 192.168.0.0/16,eth0
    ipset add test 192.168.0.0/24,eth1

Now, if a packet was sent from 192.168.0.0/24,eth0, the type returned
a match.

In the patch the algorithm is fixed in order to correctly handle
overlapping networks.

Limitation: the same network cannot be stored with more than 64 different
interfaces in a single set.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_hash_ip.c        |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipport.c    |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipportip.c  |  6 +++--
 net/netfilter/ipset/ip_set_hash_ipportnet.c |  6 +++--
 net/netfilter/ipset/ip_set_hash_net.c       |  6 +++--
 net/netfilter/ipset/ip_set_hash_netiface.c  | 40 +++++++++++++++++++++++------
 net/netfilter/ipset/ip_set_hash_netport.c   |  6 +++--
 7 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'net/netfilter')

diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index fa80bb9b9c81..f2d576e6b769 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -53,7 +53,8 @@ struct hash_ip4_telem {
 
 static inline bool
 hash_ip4_data_equal(const struct hash_ip4_elem *ip1,
-		    const struct hash_ip4_elem *ip2)
+		    const struct hash_ip4_elem *ip2,
+		    u32 *multi)
 {
 	return ip1->ip == ip2->ip;
 }
@@ -225,7 +226,8 @@ struct hash_ip6_telem {
 
 static inline bool
 hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
-		    const struct hash_ip6_elem *ip2)
+		    const struct hash_ip6_elem *ip2,
+		    u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index bbf51b67b170..6ee10f5d59bd 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -60,7 +60,8 @@ struct hash_ipport4_telem {
 
 static inline bool
 hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
-			const struct hash_ipport4_elem *ip2)
+			const struct hash_ipport4_elem *ip2,
+			u32 *multi)
 {
 	return ip1->ip == ip2->ip &&
 	       ip1->port == ip2->port &&
@@ -276,7 +277,8 @@ struct hash_ipport6_telem {
 
 static inline bool
 hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
-			const struct hash_ipport6_elem *ip2)
+			const struct hash_ipport6_elem *ip2,
+			u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ip1->port == ip2->port &&
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 96525f529a54..fb90e344e907 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -62,7 +62,8 @@ struct hash_ipportip4_telem {
 
 static inline bool
 hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
-			  const struct hash_ipportip4_elem *ip2)
+			  const struct hash_ipportip4_elem *ip2,
+			  u32 *multi)
 {
 	return ip1->ip == ip2->ip &&
 	       ip1->ip2 == ip2->ip2 &&
@@ -286,7 +287,8 @@ struct hash_ipportip6_telem {
 
 static inline bool
 hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
-			  const struct hash_ipportip6_elem *ip2)
+			  const struct hash_ipportip6_elem *ip2,
+			  u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index d2d6ab89f087..deb3e3dfa5fc 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -62,7 +62,8 @@ struct hash_ipportnet4_telem {
 
 static inline bool
 hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
-			   const struct hash_ipportnet4_elem *ip2)
+			   const struct hash_ipportnet4_elem *ip2,
+			   u32 *multi)
 {
 	return ip1->ip == ip2->ip &&
 	       ip1->ip2 == ip2->ip2 &&
@@ -335,7 +336,8 @@ struct hash_ipportnet6_telem {
 
 static inline bool
 hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
-			   const struct hash_ipportnet6_elem *ip2)
+			   const struct hash_ipportnet6_elem *ip2,
+			   u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ipv6_addr_cmp(&ip1->ip2.in6, &ip2->ip2.in6) == 0 &&
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 2d4b1f48e8c9..60d016541c58 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -58,7 +58,8 @@ struct hash_net4_telem {
 
 static inline bool
 hash_net4_data_equal(const struct hash_net4_elem *ip1,
-		    const struct hash_net4_elem *ip2)
+		     const struct hash_net4_elem *ip2,
+		     u32 *multi)
 {
 	return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr;
 }
@@ -249,7 +250,8 @@ struct hash_net6_telem {
 
 static inline bool
 hash_net6_data_equal(const struct hash_net6_elem *ip1,
-		     const struct hash_net6_elem *ip2)
+		     const struct hash_net6_elem *ip2,
+		     u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ip1->cidr == ip2->cidr;
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 3d6c53b6211a..e13095deb50d 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -99,7 +99,7 @@ iface_test(struct rb_root *root, const char **iface)
 
 	while (n) {
 		const char *d = iface_data(n);
-		int res = ifname_compare(*iface, d);
+		long res = ifname_compare(*iface, d);
 
 		if (res < 0)
 			n = n->rb_left;
@@ -121,7 +121,7 @@ iface_add(struct rb_root *root, const char **iface)
 
 	while (*n) {
 		char *ifname = iface_data(*n);
-		int res = ifname_compare(*iface, ifname);
+		long res = ifname_compare(*iface, ifname);
 
 		p = *n;
 		if (res < 0)
@@ -159,31 +159,42 @@ hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b);
 
 /* The type variant functions: IPv4 */
 
+struct hash_netiface4_elem_hashed {
+	__be32 ip;
+	u8 physdev;
+	u8 cidr;
+	u16 padding;
+};
+
+#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed)
+
 /* Member elements without timeout */
 struct hash_netiface4_elem {
 	__be32 ip;
-	const char *iface;
 	u8 physdev;
 	u8 cidr;
 	u16 padding;
+	const char *iface;
 };
 
 /* Member elements with timeout support */
 struct hash_netiface4_telem {
 	__be32 ip;
-	const char *iface;
 	u8 physdev;
 	u8 cidr;
 	u16 padding;
+	const char *iface;
 	unsigned long timeout;
 };
 
 static inline bool
 hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
-			  const struct hash_netiface4_elem *ip2)
+			  const struct hash_netiface4_elem *ip2,
+			  u32 *multi)
 {
 	return ip1->ip == ip2->ip &&
 	       ip1->cidr == ip2->cidr &&
+	       (++*multi) &&
 	       ip1->physdev == ip2->physdev &&
 	       ip1->iface == ip2->iface;
 }
@@ -257,6 +268,7 @@ nla_put_failure:
 
 #define IP_SET_HASH_WITH_NETS
 #define IP_SET_HASH_WITH_RBTREE
+#define IP_SET_HASH_WITH_MULTI
 
 #define PF		4
 #define HOST_MASK	32
@@ -424,29 +436,40 @@ hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* The type variant functions: IPv6 */
 
+struct hash_netiface6_elem_hashed {
+	union nf_inet_addr ip;
+	u8 physdev;
+	u8 cidr;
+	u16 padding;
+};
+
+#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed)
+
 struct hash_netiface6_elem {
 	union nf_inet_addr ip;
-	const char *iface;
 	u8 physdev;
 	u8 cidr;
 	u16 padding;
+	const char *iface;
 };
 
 struct hash_netiface6_telem {
 	union nf_inet_addr ip;
-	const char *iface;
 	u8 physdev;
 	u8 cidr;
 	u16 padding;
+	const char *iface;
 	unsigned long timeout;
 };
 
 static inline bool
 hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
-			  const struct hash_netiface6_elem *ip2)
+			  const struct hash_netiface6_elem *ip2,
+			  u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ip1->cidr == ip2->cidr &&
+	       (++*multi) &&
 	       ip1->physdev == ip2->physdev &&
 	       ip1->iface == ip2->iface;
 }
@@ -681,6 +704,7 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 	h->maxelem = maxelem;
 	get_random_bytes(&h->initval, sizeof(h->initval));
 	h->timeout = IPSET_NO_TIMEOUT;
+	h->ahash_max = AHASH_MAX_SIZE;
 
 	hbits = htable_bits(hashsize);
 	h->table = ip_set_alloc(
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index fe203d12f56b..8f9de7207ec9 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -59,7 +59,8 @@ struct hash_netport4_telem {
 
 static inline bool
 hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
-			 const struct hash_netport4_elem *ip2)
+			 const struct hash_netport4_elem *ip2,
+			 u32 *multi)
 {
 	return ip1->ip == ip2->ip &&
 	       ip1->port == ip2->port &&
@@ -300,7 +301,8 @@ struct hash_netport6_telem {
 
 static inline bool
 hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
-			 const struct hash_netport6_elem *ip2)
+			 const struct hash_netport6_elem *ip2,
+			 u32 *multi)
 {
 	return ipv6_addr_cmp(&ip1->ip.in6, &ip2->ip.in6) == 0 &&
 	       ip1->port == ip2->port &&
-- 
cgit v1.2.3