summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
committerDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
commitc42d7121fbee1ee30fd9221d594e9c5a4bc1fed6 (patch)
tree37ccf6ebfc28dc4aad92e9d3831d8825eafb7a5a /net/netfilter/nf_conntrack_core.c
parentde0ba9a0d8909996f9e293d311c2cc459fa77d67 (diff)
parent4b512e1c1f8de6b9ceb796ecef8658e0a083cab7 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for net-next, they are: 1) Count pre-established connections as active in "least connection" schedulers such that pre-established connections to avoid overloading backend servers on peak demands, from Michal Kubecek via Simon Horman. 2) Address a race condition when resizing the conntrack table by caching the bucket size when fulling iterating over the hashtable in these three possible scenarios: 1) dump via /proc/net/nf_conntrack, 2) unlinking userspace helper and 3) unlinking custom conntrack timeout. From Liping Zhang. 3) Revisit early_drop() path to perform lockless traversal on conntrack eviction under stress, use del_timer() as synchronization point to avoid two CPUs evicting the same entry, from Florian Westphal. 4) Move NAT hlist_head to nf_conn object, this simplifies the existing NAT extension and it doesn't increase size since recent patches to align nf_conn, from Florian. 5) Use rhashtable for the by-source NAT hashtable, also from Florian. 6) Don't allow --physdev-is-out from OUTPUT chain, just like --physdev-out is not either, from Hangbin Liu. 7) Automagically set on nf_conntrack counters if the user tries to match ct bytes/packets from nftables, from Liping Zhang. 8) Remove possible_net_t fields in nf_tables set objects since we just simply pass the net pointer to the backend set type implementations. 9) Fix possible off-by-one in h323, from Toby DiPasquale. 10) early_drop() may be called from ctnetlink patch, so we must hold rcu read size lock from them too, this amends Florian's patch #3 coming in this batch, from Liping Zhang. 11) Use binary search to validate jump offset in x_tables, this addresses the O(n!) validation that was introduced recently resolve security issues with unpriviledge namespaces, from Florian. 12) Fix reference leak to connlabel in error path of nft_ct, from Zhang. 13) Three updates for nft_log: Fix log prefix leak in error path. Bail out on loglevel larger than debug in nft_log and set on the new NF_LOG_F_COPY_LEN flag when snaplen is specified. Again from Zhang. 14) Allow to filter rule dumps in nf_tables based on table and chain names. 15) Simplify connlabel to always use 128 bits to store labels and get rid of unused function in xt_connlabel, from Florian. 16) Replace set_expect_timeout() by mod_timer() from the h323 conntrack helper, by Gao Feng. 17) Put back x_tables module reference in nft_compat on error, from Liping Zhang. 18) Add a reference count to the x_tables extensions cache in nft_compat, so we can remove them when unused and avoid a crash if the extensions are rmmod, again from Zhang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c115
1 files changed, 67 insertions, 48 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0ad936814fa2..9198e690e692 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
+/* must be called with rcu read lock held */
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+ struct hlist_nulls_head *hptr;
+ unsigned int sequence, hsz;
+
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hsz = nf_conntrack_htable_size;
+ hptr = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ *hash = hptr;
+ *hsize = hsz;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
+
/*
* Warning :
* - Caller must take a reference on returned object
@@ -818,67 +835,69 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+ struct hlist_nulls_head *head)
{
- /* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
- struct nf_conn *tmp;
struct hlist_nulls_node *n;
- unsigned int i, hash, sequence;
- struct nf_conn *ct = NULL;
- spinlock_t *lockp;
- bool ret = false;
+ unsigned int drops = 0;
+ struct nf_conn *tmp;
- i = 0;
+ hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
- local_bh_disable();
-restart:
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- for (; i < NF_CT_EVICTION_RANGE; i++) {
- hash = scale_hash(_hash++);
- lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- nf_conntrack_lock(lockp);
- if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
- spin_unlock(lockp);
- goto restart;
- }
- hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
- hnnode) {
- tmp = nf_ct_tuplehash_to_ctrack(h);
-
- if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
- !net_eq(nf_ct_net(tmp), net) ||
- nf_ct_is_dying(tmp))
- continue;
-
- if (atomic_inc_not_zero(&tmp->ct_general.use)) {
- ct = tmp;
- break;
- }
- }
+ if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+ !net_eq(nf_ct_net(tmp), net) ||
+ nf_ct_is_dying(tmp))
+ continue;
- spin_unlock(lockp);
- if (ct)
- break;
+ if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ /* kill only if still in same netns -- might have moved due to
+ * SLAB_DESTROY_BY_RCU rules.
+ *
+ * We steal the timer reference. If that fails timer has
+ * already fired or someone else deleted it. Just drop ref
+ * and move to next entry.
+ */
+ if (net_eq(nf_ct_net(tmp), net) &&
+ nf_ct_is_confirmed(tmp) &&
+ del_timer(&tmp->timeout) &&
+ nf_ct_delete(tmp, 0, 0))
+ drops++;
+
+ nf_ct_put(tmp);
}
- local_bh_enable();
+ return drops;
+}
- if (!ct)
- return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+ unsigned int i;
- /* kill only if in same netns -- might have moved due to
- * SLAB_DESTROY_BY_RCU rules
- */
- if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
- if (nf_ct_delete(ct, 0, 0)) {
- NF_CT_STAT_INC_ATOMIC(net, early_drop);
- ret = true;
+ for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+ struct hlist_nulls_head *ct_hash;
+ unsigned hash, sequence, drops;
+
+ rcu_read_lock();
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hash = scale_hash(_hash++);
+ ct_hash = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ drops = early_drop_list(net, &ct_hash[hash]);
+ rcu_read_unlock();
+
+ if (drops) {
+ NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+ return true;
}
}
- nf_ct_put(ct);
- return ret;
+ return false;
}
static struct nf_conn *