summaryrefslogtreecommitdiff
path: root/net/netfilter/nf_conntrack_expect.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_expect.c')
-rw-r--r--net/netfilter/nf_conntrack_expect.c370
1 files changed, 245 insertions, 125 deletions
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 504fb6c083f9..3ac64e25f10c 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -19,6 +19,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/kernel.h>
+#include <linux/jhash.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -26,11 +27,20 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-LIST_HEAD(nf_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
+struct hlist_head *nf_ct_expect_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
-struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
-static unsigned int nf_conntrack_expect_next_id;
+unsigned int nf_ct_expect_hsize __read_mostly;
+EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
+
+static unsigned int nf_ct_expect_hash_rnd __read_mostly;
+static unsigned int nf_ct_expect_count;
+unsigned int nf_ct_expect_max __read_mostly;
+static int nf_ct_expect_hash_rnd_initted __read_mostly;
+static int nf_ct_expect_vmalloc;
+
+static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
+static unsigned int nf_ct_expect_next_id;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
@@ -40,60 +50,83 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
NF_CT_ASSERT(master_help);
NF_CT_ASSERT(!timer_pending(&exp->timeout));
- list_del(&exp->list);
- NF_CT_STAT_INC(expect_delete);
+ hlist_del(&exp->hnode);
+ nf_ct_expect_count--;
+
+ hlist_del(&exp->lnode);
master_help->expecting--;
- nf_conntrack_expect_put(exp);
+ nf_ct_expect_put(exp);
+
+ NF_CT_STAT_INC(expect_delete);
}
EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
-static void expectation_timed_out(unsigned long ul_expect)
+static void nf_ct_expectation_timed_out(unsigned long ul_expect)
{
struct nf_conntrack_expect *exp = (void *)ul_expect;
write_lock_bh(&nf_conntrack_lock);
nf_ct_unlink_expect(exp);
write_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_expect_put(exp);
+ nf_ct_expect_put(exp);
+}
+
+static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
+{
+ if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
+ get_random_bytes(&nf_ct_expect_hash_rnd, 4);
+ nf_ct_expect_hash_rnd_initted = 1;
+ }
+
+ return jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+ (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
+ (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd) %
+ nf_ct_expect_hsize;
}
struct nf_conntrack_expect *
-__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
+ struct hlist_node *n;
+ unsigned int h;
+
+ if (!nf_ct_expect_count)
+ return NULL;
- list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+ h = nf_ct_expect_dst_hash(tuple);
+ hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
return i;
}
return NULL;
}
-EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
read_lock_bh(&nf_conntrack_lock);
- i = __nf_conntrack_expect_find(tuple);
+ i = __nf_ct_expect_find(tuple);
if (i)
atomic_inc(&i->use);
read_unlock_bh(&nf_conntrack_lock);
return i;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-find_expectation(const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *exp;
- exp = __nf_conntrack_expect_find(tuple);
+ exp = __nf_ct_expect_find(tuple);
if (!exp)
return NULL;
@@ -119,17 +152,18 @@ find_expectation(const struct nf_conntrack_tuple *tuple)
/* delete all expectations for this conntrack */
void nf_ct_remove_expectations(struct nf_conn *ct)
{
- struct nf_conntrack_expect *i, *tmp;
struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_expect *exp;
+ struct hlist_node *n, *next;
/* Optimization: most connection never expect any others. */
if (!help || help->expecting == 0)
return;
- list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- nf_conntrack_expect_put(i);
+ hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
}
}
}
@@ -141,25 +175,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
{
/* Part covered by intersection of masks must be unequal,
otherwise they clash */
- struct nf_conntrack_tuple intersect_mask;
+ struct nf_conntrack_tuple_mask intersect_mask;
int count;
- intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
- intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
- intersect_mask.dst.protonum = a->mask.dst.protonum
- & b->mask.dst.protonum;
for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
intersect_mask.src.u3.all[count] =
a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
}
- for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
- intersect_mask.dst.u3.all[count] =
- a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
- }
-
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
}
@@ -168,36 +193,29 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
{
return a->master == b->master
&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
- && nf_ct_tuple_equal(&a->mask, &b->mask);
+ && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
}
/* Generally a bad idea to call this: could have matched already. */
-void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
+void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
- struct nf_conntrack_expect *i;
-
write_lock_bh(&nf_conntrack_lock);
- /* choose the oldest expectation to evict */
- list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- write_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_expect_put(i);
- return;
- }
+ if (del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
}
write_unlock_bh(&nf_conntrack_lock);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
+EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
/* We don't increase the master conntrack refcount for non-fulfilled
* conntracks. During the conntrack destruction, the expectations are
* always killed before the conntrack itself */
-struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
+struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
{
struct nf_conntrack_expect *new;
- new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
+ new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
if (!new)
return NULL;
@@ -205,12 +223,12 @@ struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
atomic_set(&new->use, 1);
return new;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
+EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
-void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
- union nf_conntrack_address *saddr,
- union nf_conntrack_address *daddr,
- u_int8_t proto, __be16 *src, __be16 *dst)
+void nf_ct_expect_init(struct nf_conntrack_expect *exp, int family,
+ union nf_conntrack_address *saddr,
+ union nf_conntrack_address *daddr,
+ u_int8_t proto, __be16 *src, __be16 *dst)
{
int len;
@@ -224,8 +242,6 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
exp->helper = NULL;
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
- exp->mask.src.l3num = 0xFFFF;
- exp->mask.dst.protonum = 0xFF;
if (saddr) {
memcpy(&exp->tuple.src.u3, saddr, len);
@@ -242,59 +258,50 @@ void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
}
- if (daddr) {
- memcpy(&exp->tuple.dst.u3, daddr, len);
- if (sizeof(exp->tuple.dst.u3) > len)
- /* address needs to be cleared for nf_ct_tuple_equal */
- memset((void *)&exp->tuple.dst.u3 + len, 0x00,
- sizeof(exp->tuple.dst.u3) - len);
- memset(&exp->mask.dst.u3, 0xFF, len);
- if (sizeof(exp->mask.dst.u3) > len)
- memset((void *)&exp->mask.dst.u3 + len, 0x00,
- sizeof(exp->mask.dst.u3) - len);
- } else {
- memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
- memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
- }
-
if (src) {
- exp->tuple.src.u.all = (__force u16)*src;
- exp->mask.src.u.all = 0xFFFF;
+ exp->tuple.src.u.all = *src;
+ exp->mask.src.u.all = htons(0xFFFF);
} else {
exp->tuple.src.u.all = 0;
exp->mask.src.u.all = 0;
}
- if (dst) {
- exp->tuple.dst.u.all = (__force u16)*dst;
- exp->mask.dst.u.all = 0xFFFF;
- } else {
- exp->tuple.dst.u.all = 0;
- exp->mask.dst.u.all = 0;
- }
+ memcpy(&exp->tuple.dst.u3, daddr, len);
+ if (sizeof(exp->tuple.dst.u3) > len)
+ /* address needs to be cleared for nf_ct_tuple_equal */
+ memset((void *)&exp->tuple.dst.u3 + len, 0x00,
+ sizeof(exp->tuple.dst.u3) - len);
+
+ exp->tuple.dst.u.all = *dst;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
+EXPORT_SYMBOL_GPL(nf_ct_expect_init);
-void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
+void nf_ct_expect_put(struct nf_conntrack_expect *exp)
{
if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(nf_conntrack_expect_cachep, exp);
+ kmem_cache_free(nf_ct_expect_cachep, exp);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
+EXPORT_SYMBOL_GPL(nf_ct_expect_put);
-static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
+static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
{
struct nf_conn_help *master_help = nfct_help(exp->master);
+ unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
atomic_inc(&exp->use);
+
+ hlist_add_head(&exp->lnode, &master_help->expectations);
master_help->expecting++;
- list_add(&exp->list, &nf_conntrack_expect_list);
- setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
+ hlist_add_head(&exp->hnode, &nf_ct_expect_hash[h]);
+ nf_ct_expect_count++;
+
+ setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
+ (unsigned long)exp);
exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
add_timer(&exp->timeout);
- exp->id = ++nf_conntrack_expect_next_id;
+ exp->id = ++nf_ct_expect_next_id;
atomic_inc(&exp->use);
NF_CT_STAT_INC(expect_create);
}
@@ -302,16 +309,16 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
/* Race with expectations being used means we could have none to find; OK. */
static void evict_oldest_expect(struct nf_conn *master)
{
- struct nf_conntrack_expect *i;
+ struct nf_conn_help *master_help = nfct_help(master);
+ struct nf_conntrack_expect *exp = NULL;
+ struct hlist_node *n;
- list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- nf_ct_unlink_expect(i);
- nf_conntrack_expect_put(i);
- }
- break;
- }
+ hlist_for_each_entry(exp, n, &master_help->expectations, lnode)
+ ; /* nothing */
+
+ if (exp && del_timer(&exp->timeout)) {
+ nf_ct_unlink_expect(exp);
+ nf_ct_expect_put(exp);
}
}
@@ -327,11 +334,13 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
return 1;
}
-int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
+int nf_ct_expect_related(struct nf_conntrack_expect *expect)
{
struct nf_conntrack_expect *i;
struct nf_conn *master = expect->master;
struct nf_conn_help *master_help = nfct_help(master);
+ struct hlist_node *n;
+ unsigned int h;
int ret;
NF_CT_ASSERT(master_help);
@@ -341,7 +350,8 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
ret = -ESHUTDOWN;
goto out;
}
- list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+ h = nf_ct_expect_dst_hash(&expect->tuple);
+ hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
if (refresh_timer(i)) {
@@ -358,57 +368,86 @@ int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
master_help->expecting >= master_help->helper->max_expected)
evict_oldest_expect(master);
- nf_conntrack_expect_insert(expect);
- nf_conntrack_expect_event(IPEXP_NEW, expect);
+ if (nf_ct_expect_count >= nf_ct_expect_max) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "nf_conntrack: expectation table full");
+ ret = -EMFILE;
+ goto out;
+ }
+
+ nf_ct_expect_insert(expect);
+ nf_ct_expect_event(IPEXP_NEW, expect);
ret = 0;
out:
write_unlock_bh(&nf_conntrack_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
+EXPORT_SYMBOL_GPL(nf_ct_expect_related);
#ifdef CONFIG_PROC_FS
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
+struct ct_expect_iter_state {
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
{
- struct list_head *e = &nf_conntrack_expect_list;
- loff_t i;
+ struct ct_expect_iter_state *st = seq->private;
- /* strange seq_file api calls stop even if we fail,
- * thus we need to grab lock since stop unlocks */
- read_lock_bh(&nf_conntrack_lock);
+ for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+ if (!hlist_empty(&nf_ct_expect_hash[st->bucket]))
+ return nf_ct_expect_hash[st->bucket].first;
+ }
+ return NULL;
+}
- if (list_empty(e))
- return NULL;
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct ct_expect_iter_state *st = seq->private;
- for (i = 0; i <= *pos; i++) {
- e = e->next;
- if (e == &nf_conntrack_expect_list)
+ head = head->next;
+ while (head == NULL) {
+ if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
+ head = nf_ct_expect_hash[st->bucket].first;
}
- return e;
+ return head;
}
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
{
- struct list_head *e = v;
+ struct hlist_node *head = ct_expect_get_first(seq);
- ++*pos;
- e = e->next;
+ if (head)
+ while (pos && (head = ct_expect_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
- if (e == &nf_conntrack_expect_list)
- return NULL;
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock_bh(&nf_conntrack_lock);
+ return ct_expect_get_idx(seq, *pos);
+}
- return e;
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_expect_get_next(seq, v);
}
-static void exp_seq_stop(struct seq_file *s, void *v)
+static void exp_seq_stop(struct seq_file *seq, void *v)
{
read_unlock_bh(&nf_conntrack_lock);
}
static int exp_seq_show(struct seq_file *s, void *v)
{
- struct nf_conntrack_expect *expect = v;
+ struct nf_conntrack_expect *expect;
+ struct hlist_node *n = v;
+
+ expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
if (expect->timeout.function)
seq_printf(s, "%ld ", timer_pending(&expect->timeout)
@@ -425,7 +464,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
return seq_putc(s, '\n');
}
-static struct seq_operations exp_seq_ops = {
+static const struct seq_operations exp_seq_ops = {
.start = exp_seq_start,
.next = exp_seq_next,
.stop = exp_seq_stop,
@@ -434,14 +473,95 @@ static struct seq_operations exp_seq_ops = {
static int exp_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &exp_seq_ops);
+ struct seq_file *seq;
+ struct ct_expect_iter_state *st;
+ int ret;
+
+ st = kzalloc(sizeof(struct ct_expect_iter_state), GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+ ret = seq_open(file, &exp_seq_ops);
+ if (ret)
+ goto out_free;
+ seq = file->private_data;
+ seq->private = st;
+ return ret;
+out_free:
+ kfree(st);
+ return ret;
}
-const struct file_operations exp_file_ops = {
+static const struct file_operations exp_file_ops = {
.owner = THIS_MODULE,
.open = exp_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release
+ .release = seq_release_private,
};
#endif /* CONFIG_PROC_FS */
+
+static int __init exp_proc_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc;
+
+ proc = proc_net_fops_create("nf_conntrack_expect", 0440, &exp_file_ops);
+ if (!proc)
+ return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+ return 0;
+}
+
+static void exp_proc_remove(void)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("nf_conntrack_expect");
+#endif /* CONFIG_PROC_FS */
+}
+
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+
+int __init nf_conntrack_expect_init(void)
+{
+ int err = -ENOMEM;
+
+ if (!nf_ct_expect_hsize) {
+ nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+ if (!nf_ct_expect_hsize)
+ nf_ct_expect_hsize = 1;
+ }
+ nf_ct_expect_max = nf_ct_expect_hsize * 4;
+
+ nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
+ &nf_ct_expect_vmalloc);
+ if (nf_ct_expect_hash == NULL)
+ goto err1;
+
+ nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
+ sizeof(struct nf_conntrack_expect),
+ 0, 0, NULL);
+ if (!nf_ct_expect_cachep)
+ goto err2;
+
+ err = exp_proc_init();
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_expect_hsize);
+err2:
+ kmem_cache_destroy(nf_ct_expect_cachep);
+err1:
+ return err;
+}
+
+void nf_conntrack_expect_fini(void)
+{
+ exp_proc_remove();
+ kmem_cache_destroy(nf_ct_expect_cachep);
+ nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
+ nf_ct_expect_hsize);
+}