summaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-05-03 09:17:01 +0200
committerIngo Molnar <mingo@elte.hu>2010-05-03 09:17:01 +0200
commit53ba4f2fa73225113a488584df0d85d3cba52943 (patch)
treed85b984d9818abc3ccc0237eb53b710d9e96c39e /net/netfilter
parentbd6d29c25bb1a24a4c160ec5de43e0004e01f72b (diff)
parent66f41d4c5c8a5deed66fdcc84509376c9a0bf9d8 (diff)
Merge commit 'v2.6.34-rc6' into core/locking
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/core.c1
-rw-r--r--net/netfilter/ipvs/Kconfig11
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c68
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c1183
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c11
-rw-r--r--net/netfilter/nf_conntrack_acct.c1
-rw-r--r--net/netfilter/nf_conntrack_amanda.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c164
-rw-r--r--net/netfilter/nf_conntrack_ecache.c1
-rw-r--r--net/netfilter/nf_conntrack_expect.c31
-rw-r--r--net/netfilter/nf_conntrack_extend.c1
-rw-r--r--net/netfilter/nf_conntrack_ftp.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c45
-rw-r--r--net/netfilter/nf_conntrack_irc.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c233
-rw-r--r--net/netfilter/nf_conntrack_pptp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c6
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c4
-rw-r--r--net/netfilter/nf_conntrack_sane.c1
-rw-r--r--net/netfilter/nf_conntrack_sip.c334
-rw-r--r--net/netfilter/nf_conntrack_standalone.c7
-rw-r--r--net/netfilter/nf_queue.c3
-rw-r--r--net/netfilter/nfnetlink.c65
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue.c8
-rw-r--r--net/netfilter/x_tables.c81
-rw-r--r--net/netfilter/xt_CT.c165
-rw-r--r--net/netfilter/xt_LED.c1
-rw-r--r--net/netfilter/xt_NFQUEUE.c6
-rw-r--r--net/netfilter/xt_RATEEST.c8
-rw-r--r--net/netfilter/xt_TCPMSS.c31
-rw-r--r--net/netfilter/xt_connlimit.c28
-rw-r--r--net/netfilter/xt_dccp.c1
-rw-r--r--net/netfilter/xt_hashlimit.c221
-rw-r--r--net/netfilter/xt_limit.c5
-rw-r--r--net/netfilter/xt_osf.c4
-rw-r--r--net/netfilter/xt_quota.c1
-rw-r--r--net/netfilter/xt_recent.c169
-rw-r--r--net/netfilter/xt_repldata.h35
-rw-r--r--net/netfilter/xt_statistic.c1
-rw-r--r--net/netfilter/xt_string.c1
62 files changed, 2634 insertions, 501 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 634d14affc8d..18d77b5c351a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK
If unsure, say 'N'.
+config NF_CONNTRACK_ZONES
+ bool 'Connection tracking zones'
+ depends on NETFILTER_ADVANCED
+ depends on NETFILTER_XT_TARGET_CT
+ help
+ This option enables support for connection tracking zones.
+ Normally, each connection needs to have a unique system wide
+ identity. Connection tracking zones allow to have multiple
+ connections using the same identity, as long as they are
+ contained in different zones.
+
+ If unsure, say `N'.
+
config NF_CONNTRACK_EVENTS
bool "Connection tracking events"
depends on NETFILTER_ADVANCED
@@ -341,6 +354,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_TARGET_CT
+ tristate '"CT" target support'
+ depends on NF_CONNTRACK
+ depends on IP_NF_RAW || IP6_NF_RAW
+ depends on NETFILTER_ADVANCED
+ help
+ This options adds a `CT' target, which allows to specify initial
+ connection tracking parameters like events to be delivered and
+ the helper to be used.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_DSCP
tristate '"DSCP" and "TOS" target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 49f62ee4e9ff..f873644f02f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 60ec4e4badaa..78b505d33bfb 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -19,6 +19,7 @@
#include <linux/inetdevice.h>
#include <linux/proc_fs.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/sock.h>
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index f2d76238b9b5..712ccad13344 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -68,6 +68,10 @@ config IP_VS_TAB_BITS
each hash entry uses 8 bytes, so you can estimate how much memory is
needed for your box.
+ You can overwrite this number setting conn_tab_bits module parameter
+ or by appending ip_vs.conn_tab_bits=? to the kernel command line
+ if IP VS was compiled built-in.
+
comment "IPVS transport protocol load balancing support"
config IP_VS_PROTO_TCP
@@ -100,6 +104,13 @@ config IP_VS_PROTO_AH
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
+config IP_VS_PROTO_SCTP
+ bool "SCTP load balancing support"
+ select LIBCRC32C
+ ---help---
+ This option enables support for load balancing SCTP transport
+ protocol. Say Y if unsure.
+
comment "IPVS scheduler"
config IP_VS_RR
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 73a46fe1fe4c..e3baefd7066e 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
+ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 3c7e42735b60..1cb0e834f8ff 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -27,6 +27,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/tcp.h>
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 27c30cf933da..d8f7e8ef67b4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/proc_fs.h> /* for proc_net_* */
+#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/jhash.h>
#include <linux/random.h>
@@ -40,6 +41,21 @@
#include <net/ip_vs.h>
+#ifndef CONFIG_IP_VS_TAB_BITS
+#define CONFIG_IP_VS_TAB_BITS 12
+#endif
+
+/*
+ * Connection hash size. Default is what was selected at compile time.
+*/
+int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
+MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
+
+/* size and mask values */
+int ip_vs_conn_tab_size;
+int ip_vs_conn_tab_mask;
+
/*
* Connection hash table: for input and output packets lookups of IPVS
*/
@@ -125,11 +141,11 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
if (af == AF_INET6)
return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd)
- & IP_VS_CONN_TAB_MASK;
+ & ip_vs_conn_tab_mask;
#endif
return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd)
- & IP_VS_CONN_TAB_MASK;
+ & ip_vs_conn_tab_mask;
}
@@ -760,7 +776,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
int idx;
struct ip_vs_conn *cp;
- for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+ for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) {
@@ -797,7 +813,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
idx = l - ip_vs_conn_tab;
ct_read_unlock_bh(idx);
- while (++idx < IP_VS_CONN_TAB_SIZE) {
+ while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
seq->private = &ip_vs_conn_tab[idx];
@@ -976,8 +992,8 @@ void ip_vs_random_dropentry(void)
/*
* Randomly scan 1/32 of the whole table every second
*/
- for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) {
- unsigned hash = net_random() & IP_VS_CONN_TAB_MASK;
+ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) {
+ unsigned hash = net_random() & ip_vs_conn_tab_mask;
/*
* Lock is actually needed in this loop.
@@ -1029,7 +1045,7 @@ static void ip_vs_conn_flush(void)
struct ip_vs_conn *cp;
flush_again:
- for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) {
+ for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
/*
* Lock is actually needed in this loop.
*/
@@ -1060,10 +1076,15 @@ int __init ip_vs_conn_init(void)
{
int idx;
+ /* Compute size and mask */
+ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
+ ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
+
/*
* Allocate the connection hash table and initialize its list heads
*/
- ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head));
+ ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size *
+ sizeof(struct list_head));
if (!ip_vs_conn_tab)
return -ENOMEM;
@@ -1078,12 +1099,12 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
- IP_VS_CONN_TAB_SIZE,
- (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+ ip_vs_conn_tab_size,
+ (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
sizeof(struct ip_vs_conn));
- for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
+ for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
INIT_LIST_HEAD(&ip_vs_conn_tab[idx]);
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 847ffca40184..1cd6e3fd058b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -31,7 +31,9 @@
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <linux/sctp.h>
#include <linux/icmp.h>
+#include <linux/slab.h>
#include <net/ip.h>
#include <net/tcp.h>
@@ -81,6 +83,8 @@ const char *ip_vs_proto_name(unsigned proto)
return "UDP";
case IPPROTO_TCP:
return "TCP";
+ case IPPROTO_SCTP:
+ return "SCTP";
case IPPROTO_ICMP:
return "ICMP";
#ifdef CONFIG_IP_VS_IPV6
@@ -512,8 +516,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
*/
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0,
- skb->dev);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
else
#endif
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
ip_send_check(ciph);
}
- /* the TCP/UDP port */
- if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+ /* the TCP/UDP/SCTP port */
+ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+ IPPROTO_SCTP == ciph->protocol) {
__be16 *ports = (void *)ciph + ciph->ihl*4;
if (inout)
@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
ciph->saddr = cp->daddr.in6;
}
- /* the TCP/UDP port */
- if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+ /* the TCP/UDP/SCTP port */
+ if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
+ IPPROTO_SCTP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
if (inout)
@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
goto out;
}
- if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+ if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+ IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
}
#endif
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+ sctp_chunkhdr_t *sch, schunk;
+ sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
+ sizeof(schunk), &schunk);
+ if (sch == NULL)
+ return 0;
+ if (sch->type == SCTP_CID_ABORT)
+ return 1;
+ return 0;
+}
+
static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
- pp->protocol == IPPROTO_UDP)) {
+ pp->protocol == IPPROTO_UDP ||
+ pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;
pptr = skb_header_pointer(skb, iph.len,
@@ -1014,14 +1036,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
* existing entry if it is not RST
* packet or not TCP packet.
*/
- if (iph.protocol != IPPROTO_TCP
- || !is_tcp_reset(skb, iph.len)) {
+ if ((iph.protocol != IPPROTO_TCP &&
+ iph.protocol != IPPROTO_SCTP)
+ || ((iph.protocol == IPPROTO_TCP
+ && !is_tcp_reset(skb, iph.len))
+ || (iph.protocol == IPPROTO_SCTP
+ && !is_sctp_abort(skb,
+ iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
- 0, skb->dev);
+ 0);
else
#endif
icmp_send(skb,
@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
- if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+ if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
+ IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */
@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
+ if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ cp->protocol == IPPROTO_SCTP) {
+ if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
+ (atomic_read(&cp->in_pkts) %
+ sysctl_ip_vs_sync_threshold[1]
+ == sysctl_ip_vs_sync_threshold[0])) ||
+ (cp->old_state != cp->state &&
+ ((cp->state == IP_VS_SCTP_S_CLOSED) ||
+ (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
+ (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
+ ip_vs_sync_conn(cp);
+ goto out;
+ }
+ }
+
if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
+out:
cp->old_state = cp->state;
ip_vs_conn_put(cp);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c37ac2d7bec4..36dc1d88c2fa 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -31,6 +31,7 @@
#include <linux/workqueue.h>
#include <linux/swap.h>
#include <linux/seq_file.h>
+#include <linux/slab.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
@@ -1843,7 +1844,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN) {
seq_printf(seq,
"IP Virtual Server version %d.%d.%d (size=%d)\n",
- NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+ NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
seq_puts(seq,
"Prot LocalAddress:Port Scheduler Flags\n");
seq_puts(seq,
@@ -2132,8 +2133,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
}
- /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
- if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
+ /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
+ if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
+ usvc.protocol != IPPROTO_SCTP) {
pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
usvc.protocol, &usvc.addr.ip,
ntohs(usvc.port), usvc.sched_name);
@@ -2386,7 +2388,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
char buf[64];
sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
- NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
+ NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
ret = -EFAULT;
goto out;
@@ -2399,7 +2401,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
- info.size = IP_VS_CONN_TAB_SIZE;
+ info.size = ip_vs_conn_tab_size;
info.num_services = ip_vs_num_services;
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
@@ -3243,7 +3245,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
case IPVS_CMD_GET_INFO:
NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
- IP_VS_CONN_TAB_SIZE);
+ ip_vs_conn_tab_size);
break;
}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index fe3e18834b91..95fd0d14200b 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -39,6 +39,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/ip.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 702b53ca937c..ff28801962e0 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -17,7 +17,6 @@
#include <linux/kernel.h>
#include <linux/jiffies.h>
-#include <linux/slab.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 33e2c799cba7..2c7f185dfae4 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -32,6 +32,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
+#include <linux/gfp.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <asm/unaligned.h>
@@ -208,7 +209,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
from.ip = n_cp->vaddr.ip;
port = n_cp->vport;
- sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip),
+ sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip),
(ntohs(port)>>8)&255, ntohs(port)&255);
buf_len = strlen(buf);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1b9370db2305..94a45213faa6 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -43,6 +43,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/ip.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index f7476b95ab46..535dc2b419d8 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -45,6 +45,8 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/slab.h>
/* for sysctl */
#include <linux/fs.h>
@@ -85,25 +87,25 @@ static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
/*
* IPVS destination set structure and operations
*/
-struct ip_vs_dest_list {
- struct ip_vs_dest_list *next; /* list link */
+struct ip_vs_dest_set_elem {
+ struct list_head list; /* list link */
struct ip_vs_dest *dest; /* destination server */
};
struct ip_vs_dest_set {
atomic_t size; /* set size */
unsigned long lastmod; /* last modified time */
- struct ip_vs_dest_list *list; /* destination list */
+ struct list_head list; /* destination list */
rwlock_t lock; /* lock for this list */
};
-static struct ip_vs_dest_list *
+static struct ip_vs_dest_set_elem *
ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
- struct ip_vs_dest_list *e;
+ struct ip_vs_dest_set_elem *e;
- for (e=set->list; e!=NULL; e=e->next) {
+ list_for_each_entry(e, &set->list, list) {
if (e->dest == dest)
/* already existed */
return NULL;
@@ -118,9 +120,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
atomic_inc(&dest->refcnt);
e->dest = dest;
- /* link it to the list */
- e->next = set->list;
- set->list = e;
+ list_add(&e->list, &set->list);
atomic_inc(&set->size);
set->lastmod = jiffies;
@@ -130,34 +130,33 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
static void
ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
- struct ip_vs_dest_list *e, **ep;
+ struct ip_vs_dest_set_elem *e;
- for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
+ list_for_each_entry(e, &set->list, list) {
if (e->dest == dest) {
/* HIT */
- *ep = e->next;
atomic_dec(&set->size);
set->lastmod = jiffies;
atomic_dec(&e->dest->refcnt);
+ list_del(&e->list);
kfree(e);
break;
}
- ep = &e->next;
}
}
static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
{
- struct ip_vs_dest_list *e, **ep;
+ struct ip_vs_dest_set_elem *e, *ep;
write_lock(&set->lock);
- for (ep=&set->list, e=*ep; e!=NULL; e=*ep) {
- *ep = e->next;
+ list_for_each_entry_safe(e, ep, &set->list, list) {
/*
* We don't kfree dest because it is refered either
* by its service or by the trash dest list.
*/
atomic_dec(&e->dest->refcnt);
+ list_del(&e->list);
kfree(e);
}
write_unlock(&set->lock);
@@ -166,7 +165,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
/* get weighted least-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
{
- register struct ip_vs_dest_list *e;
+ register struct ip_vs_dest_set_elem *e;
struct ip_vs_dest *dest, *least;
int loh, doh;
@@ -174,7 +173,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
return NULL;
/* select the first destination server, whose weight > 0 */
- for (e=set->list; e!=NULL; e=e->next) {
+ list_for_each_entry(e, &set->list, list) {
least = e->dest;
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -190,7 +189,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
- for (e=e->next; e!=NULL; e=e->next) {
+ list_for_each_entry(e, &set->list, list) {
dest = e->dest;
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -220,7 +219,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* get weighted most-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
{
- register struct ip_vs_dest_list *e;
+ register struct ip_vs_dest_set_elem *e;
struct ip_vs_dest *dest, *most;
int moh, doh;
@@ -228,7 +227,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
return NULL;
/* select the first destination server, whose weight > 0 */
- for (e=set->list; e!=NULL; e=e->next) {
+ list_for_each_entry(e, &set->list, list) {
most = e->dest;
if (atomic_read(&most->weight) > 0) {
moh = atomic_read(&most->activeconns) * 50
@@ -240,7 +239,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
- for (e=e->next; e!=NULL; e=e->next) {
+ list_for_each_entry(e, &set->list, list) {
dest = e->dest;
doh = atomic_read(&dest->activeconns) * 50
+ atomic_read(&dest->inactconns);
@@ -389,7 +388,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
/* initilize its dest set */
atomic_set(&(en->set.size), 0);
- en->set.list = NULL;
+ INIT_LIST_HEAD(&en->set.list);
rwlock_init(&en->set.lock);
ip_vs_lblcr_hash(tbl, en);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 3e7671674549..7fc49f4cf5ad 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <linux/gfp.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <net/protocol.h>
@@ -257,6 +258,9 @@ int __init ip_vs_protocol_init(void)
#ifdef CONFIG_IP_VS_PROTO_UDP
REGISTER_PROTOCOL(&ip_vs_protocol_udp);
#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+ REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
+#endif
#ifdef CONFIG_IP_VS_PROTO_AH
REGISTER_PROTOCOL(&ip_vs_protocol_ah);
#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
new file mode 100644
index 000000000000..c9a3f7a21d53
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -0,0 +1,1183 @@
+#include <linux/kernel.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <net/ip.h>
+#include <net/ip6_checksum.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/sctp/checksum.h>
+#include <net/ip_vs.h>
+
+
+static struct ip_vs_conn *
+sctp_conn_in_get(int af,
+ const struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph,
+ unsigned int proto_off,
+ int inverse)
+{
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse))
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ else
+ return ip_vs_conn_in_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+}
+
+static struct ip_vs_conn *
+sctp_conn_out_get(int af,
+ const struct sk_buff *skb,
+ struct ip_vs_protocol *pp,
+ const struct ip_vs_iphdr *iph,
+ unsigned int proto_off,
+ int inverse)
+{
+ __be16 _ports[2], *pptr;
+
+ pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
+ if (pptr == NULL)
+ return NULL;
+
+ if (likely(!inverse))
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->saddr, pptr[0],
+ &iph->daddr, pptr[1]);
+ else
+ return ip_vs_conn_out_get(af, iph->protocol,
+ &iph->daddr, pptr[1],
+ &iph->saddr, pptr[0]);
+}
+
+static int
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ int *verdict, struct ip_vs_conn **cpp)
+{
+ struct ip_vs_service *svc;
+ sctp_chunkhdr_t _schunkh, *sch;
+ sctp_sctphdr_t *sh, _sctph;
+ struct ip_vs_iphdr iph;
+
+ ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
+
+ sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ return 0;
+
+ sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t),
+ sizeof(_schunkh), &_schunkh);
+ if (sch == NULL)
+ return 0;
+
+ if ((sch->type == SCTP_CID_INIT) &&
+ (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ &iph.daddr, sh->dest))) {
+ if (ip_vs_todrop()) {
+ /*
+ * It seems that we are very loaded.
+ * We have to drop this packet :(
+ */
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ return 0;
+ }
+ /*
+ * Let the virtual server select a real server for the
+ * incoming connection, and create a connection entry.
+ */
+ *cpp = ip_vs_schedule(svc, skb);
+ if (!*cpp) {
+ *verdict = ip_vs_leave(svc, skb, pp);
+ return 0;
+ }
+ ip_vs_service_put(svc);
+ }
+
+ return 1;
+}
+
+static int
+sctp_snat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+ sctp_sctphdr_t *sctph;
+ unsigned int sctphoff;
+ __be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ sctphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ sctphoff = ip_hdrlen(skb);
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /* Call application helper if needed */
+ if (!ip_vs_app_pkt_out(cp, skb))
+ return 0;
+ }
+
+ sctph = (void *) skb_network_header(skb) + sctphoff;
+ sctph->source = cp->vport;
+
+ /* Calculate the checksum */
+ crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+ for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+ crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+ crc32);
+ crc32 = sctp_end_cksum(crc32);
+ sctph->checksum = crc32;
+
+ return 1;
+}
+
+static int
+sctp_dnat_handler(struct sk_buff *skb,
+ struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
+{
+
+ sctp_sctphdr_t *sctph;
+ unsigned int sctphoff;
+ __be32 crc32;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ sctphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ sctphoff = ip_hdrlen(skb);
+
+ /* csum_check requires unshared skb */
+ if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
+ return 0;
+
+ if (unlikely(cp->app != NULL)) {
+ /* Some checks before mangling */
+ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ return 0;
+
+ /* Call application helper if needed */
+ if (!ip_vs_app_pkt_out(cp, skb))
+ return 0;
+ }
+
+ sctph = (void *) skb_network_header(skb) + sctphoff;
+ sctph->dest = cp->dport;
+
+ /* Calculate the checksum */
+ crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
+ for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
+ crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb),
+ crc32);
+ crc32 = sctp_end_cksum(crc32);
+ sctph->checksum = crc32;
+
+ return 1;
+}
+
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+ unsigned int sctphoff;
+ struct sctphdr *sh, _sctph;
+ __le32 cmp;
+ __le32 val;
+ __u32 tmp;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ sctphoff = sizeof(struct ipv6hdr);
+ else
+#endif
+ sctphoff = ip_hdrlen(skb);
+
+ sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
+ if (sh == NULL)
+ return 0;
+
+ cmp = sh->checksum;
+
+ tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
+ for (; list; list = list->next)
+ tmp = sctp_update_cksum((__u8 *) list->data,
+ skb_headlen(list), tmp);
+
+ val = sctp_end_cksum(tmp);
+
+ if (val != cmp) {
+ /* CRC failure, dump it. */
+ IP_VS_DBG_RL_PKT(0, pp, skb, 0,
+ "Failed checksum for");
+ return 0;
+ }
+ return 1;
+}
+
+struct ipvs_sctp_nextstate {
+ int next_state;
+};
+enum ipvs_sctp_event_t {
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_DATA_SER,
+ IP_VS_SCTP_EVE_INIT_CLI,
+ IP_VS_SCTP_EVE_INIT_SER,
+ IP_VS_SCTP_EVE_INIT_ACK_CLI,
+ IP_VS_SCTP_EVE_INIT_ACK_SER,
+ IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+ IP_VS_SCTP_EVE_COOKIE_ECHO_SER,
+ IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+ IP_VS_SCTP_EVE_COOKIE_ACK_SER,
+ IP_VS_SCTP_EVE_ABORT_CLI,
+ IP_VS_SCTP_EVE__ABORT_SER,
+ IP_VS_SCTP_EVE_SHUT_CLI,
+ IP_VS_SCTP_EVE_SHUT_SER,
+ IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+ IP_VS_SCTP_EVE_SHUT_ACK_SER,
+ IP_VS_SCTP_EVE_SHUT_COM_CLI,
+ IP_VS_SCTP_EVE_SHUT_COM_SER,
+ IP_VS_SCTP_EVE_LAST
+};
+
+static enum ipvs_sctp_event_t sctp_events[255] = {
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_INIT_CLI,
+ IP_VS_SCTP_EVE_INIT_ACK_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_ABORT_CLI,
+ IP_VS_SCTP_EVE_SHUT_CLI,
+ IP_VS_SCTP_EVE_SHUT_ACK_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_COOKIE_ECHO_CLI,
+ IP_VS_SCTP_EVE_COOKIE_ACK_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_DATA_CLI,
+ IP_VS_SCTP_EVE_SHUT_COM_CLI,
+};
+
+static struct ipvs_sctp_nextstate
+ sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = {
+ /*
+ * STATE : IP_VS_SCTP_S_NONE
+ */
+ /*next state *//*event */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ },
+ },
+ /*
+ * STATE : IP_VS_SCTP_S_INIT_CLI
+ * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT)
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_INIT_SER
+ * Server sent INIT and waiting for INIT ACK from the client
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_INIT_ACK_CLI
+ * Client sent INIT ACK and waiting for ECHO from the server
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK has been resent by the client, let us stay is in
+ * the same state
+ */
+ {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ /*
+ * INIT_ACK sent by the server, close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * ECHO by client, it should not happen, close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ /*
+ * ECHO by server, this is what we are expecting, move to ECHO_SER
+ */
+ {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, it should not happen, close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ /*
+ * Unexpected COOKIE ACK from server, staty in the same state
+ */
+ {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_INIT_ACK_SER
+ * Server sent INIT ACK and waiting for ECHO from the client
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * Unexpected INIT_ACK by the client, let us close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ /*
+ * INIT_ACK resent by the server, let us move to same state
+ */
+ {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client send the ECHO, this is what we are expecting,
+ * move to ECHO_CLI
+ */
+ {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ /*
+ * ECHO received from the server, Not sure what to do,
+ * let us close it
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, let us stay in the same state
+ */
+ {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ /*
+ * COOKIE ACK from server, hmm... this should not happen, lets close
+ * the connection.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_ECHO_CLI
+ * Cient sent ECHO and waiting COOKEI ACK from the Server
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK has been by the client, let us close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client resent the ECHO, let us stay in the same state
+ */
+ {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ /*
+ * ECHO received from the server, Not sure what to do,
+ * let us close it
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, this shoud not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ /*
+ * COOKIE ACK from server, this is what we are awaiting,lets move to
+ * ESTABLISHED.
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_ECHO_SER
+ * Server sent ECHO and waiting COOKEI ACK from the client
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ /*
+ * INIT_ACK has been by the server, let us close the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent the ECHO, not sure what to do, let's close the
+ * connection.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ /*
+ * ECHO resent by the server, stay in the same state
+ */
+ {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, this is what we are expecting, let's move
+ * to ESTABLISHED.
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ /*
+ * COOKIE ACK from server, this should not happen, lets close the
+ * connection.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_ESTABLISHED
+ * Association established
+ */
+ {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+ * peer and peer shall move to the ESTABISHED. if it doesn't handle
+ * it will send ERROR chunk. So, stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, not sure what to do stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ /*
+ * SHUTDOWN from the client, move to SHUDDOWN_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ /*
+ * SHUTDOWN from the server, move to SHUTDOWN_SER
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ /*
+ * client sent SHUDTDOWN_ACK, this should not happen, let's close
+ * the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_SHUT_CLI
+ * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server
+ */
+ /*
+ * We recieved the data chuck, keep the state unchanged. I assume
+ * that still data chuncks can be received by both the peers in
+ * SHUDOWN state
+ */
+
+ {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+ * peer and peer shall move to the ESTABISHED. if it doesn't handle
+ * it will send ERROR chunk. So, stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, not sure what to do stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ /*
+ * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ /*
+ * SHUTDOWN from the server, move to SHUTDOWN_SER
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ /*
+ * client sent SHUDTDOWN_ACK, this should not happen, let's close
+ * the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ /*
+ * Server sent SHUTDOWN ACK, this is what we are expecting, let's move
+ * to SHUDOWN_ACK_SER
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ /*
+ * SHUTDOWN COM from client, this should not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_SHUT_SER
+ * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client
+ */
+ /*
+ * We recieved the data chuck, keep the state unchanged. I assume
+ * that still data chuncks can be received by both the peers in
+ * SHUDOWN state
+ */
+
+ {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+ * peer and peer shall move to the ESTABISHED. if it doesn't handle
+ * it will send ERROR chunk. So, stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, not sure what to do stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ /*
+ * SHUTDOWN resent from the client, move to SHUDDOWN_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ /*
+ * SHUTDOWN resent from the server, move to SHUTDOWN_SER
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ /*
+ * client sent SHUDTDOWN_ACK, this is what we are expecting, let's
+ * move to SHUT_ACK_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ /*
+ * Server sent SHUTDOWN ACK, this should not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ /*
+ * SHUTDOWN COM from client, this should not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+
+ /*
+ * State : IP_VS_SCTP_S_SHUT_ACK_CLI
+ * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server
+ */
+ /*
+ * We recieved the data chuck, keep the state unchanged. I assume
+ * that still data chuncks can be received by both the peers in
+ * SHUDOWN state
+ */
+
+ {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+ * peer and peer shall move to the ESTABISHED. if it doesn't handle
+ * it will send ERROR chunk. So, stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, not sure what to do stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ /*
+ * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ /*
+ * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ /*
+ * client resent SHUDTDOWN_ACK, let's stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ /*
+ * Server sent SHUTDOWN ACK, this should not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ /*
+ * SHUTDOWN COM from client, this should not happen, let's close the
+ * connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ /*
+ * SHUTDOWN COMPLETE from server this is what we are expecting.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+
+ /*
+ * State : IP_VS_SCTP_S_SHUT_ACK_SER
+ * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client
+ */
+ /*
+ * We recieved the data chuck, keep the state unchanged. I assume
+ * that still data chuncks can be received by both the peers in
+ * SHUDOWN state
+ */
+
+ {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ },
+ /*
+ * We have got an INIT from client. From the spec.“Upon receipt of
+ * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with
+ * an INIT ACK using the same parameters it sent in its original
+ * INIT chunk (including its Initiate Tag, unchanged”).
+ */
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ /*
+ * INIT_ACK sent by the server, Unexpected INIT ACK, spec says,
+ * “If an INIT ACK is received by an endpoint in any state other
+ * than the COOKIE-WAIT state, the endpoint should discard the
+ * INIT ACK chunk”. Stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ /*
+ * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the
+ * peer and peer shall move to the ESTABISHED. if it doesn't handle
+ * it will send ERROR chunk. So, stay in the same state
+ */
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ /*
+ * COOKIE ACK from client, not sure what to do stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ /*
+ * SHUTDOWN sent from the client, move to SHUDDOWN_CLI
+ */
+ {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ /*
+ * SHUTDOWN sent from the server, move to SHUTDOWN_SER
+ */
+ {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ /*
+ * client sent SHUDTDOWN_ACK, this should not happen let's close
+ * the connection.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ /*
+ * Server resent SHUTDOWN ACK, stay in the same state
+ */
+ {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ /*
+ * SHUTDOWN COM from client, this what we are expecting, let's close
+ * the connection
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ /*
+ * SHUTDOWN COMPLETE from server this should not happen.
+ */
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ },
+ /*
+ * State : IP_VS_SCTP_S_CLOSED
+ */
+ {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ },
+ {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ },
+ {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ },
+ {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }
+ }
+};
+
+/*
+ * Timeout table[state]
+ */
+static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+ [IP_VS_SCTP_S_NONE] = 2 * HZ,
+ [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
+ [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ,
+ [IP_VS_SCTP_S_CLOSED] = 10 * HZ,
+ [IP_VS_SCTP_S_LAST] = 2 * HZ,
+};
+
+static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
+ [IP_VS_SCTP_S_NONE] = "NONE",
+ [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI",
+ [IP_VS_SCTP_S_INIT_SER] = "INIT_SER",
+ [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI",
+ [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER",
+ [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI",
+ [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER",
+ [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED",
+ [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI",
+ [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER",
+ [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI",
+ [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER",
+ [IP_VS_SCTP_S_CLOSED] = "CLOSED",
+ [IP_VS_SCTP_S_LAST] = "BUG!"
+};
+
+
+static const char *sctp_state_name(int state)
+{
+ if (state >= IP_VS_SCTP_S_LAST)
+ return "ERR!";
+ if (sctp_state_name_table[state])
+ return sctp_state_name_table[state];
+ return "?";
+}
+
+static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
+{
+}
+
+static int
+sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
+{
+
+return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
+ sctp_state_name_table, sname, to);
+}
+
+static inline int
+set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+ int direction, const struct sk_buff *skb)
+{
+ sctp_chunkhdr_t _sctpch, *sch;
+ unsigned char chunk_type;
+ int event, next_state;
+ int ihl;
+
+#ifdef CONFIG_IP_VS_IPV6
+ ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
+#else
+ ihl = ip_hdrlen(skb);
+#endif
+
+ sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t),
+ sizeof(_sctpch), &_sctpch);
+ if (sch == NULL)
+ return 0;
+
+ chunk_type = sch->type;
+ /*
+ * Section 3: Multiple chunks can be bundled into one SCTP packet
+ * up to the MTU size, except for the INIT, INIT ACK, and
+ * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
+ * any other chunk in a packet.
+ *
+ * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
+ * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
+ * bundled with an ABORT, but they MUST be placed before the ABORT
+ * in the SCTP packet or they will be ignored by the receiver.
+ */
+ if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
+ (sch->type == SCTP_CID_COOKIE_ACK)) {
+ sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) +
+ sch->length), sizeof(_sctpch), &_sctpch);
+ if (sch) {
+ if (sch->type == SCTP_CID_ABORT)
+ chunk_type = sch->type;
+ }
+ }
+
+ event = sctp_events[chunk_type];
+
+ /*
+ * If the direction is IP_VS_DIR_OUTPUT, this event is from server
+ */
+ if (direction == IP_VS_DIR_OUTPUT)
+ event++;
+ /*
+ * get next state
+ */
+ next_state = sctp_states_table[cp->state][event].next_state;
+
+ if (next_state != cp->state) {
+ struct ip_vs_dest *dest = cp->dest;
+
+ IP_VS_DBG_BUF(8, "%s %s %s:%d->"
+ "%s:%d state: %s->%s conn->refcnt:%d\n",
+ pp->name,
+ ((direction == IP_VS_DIR_OUTPUT) ?
+ "output " : "input "),
+ IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ sctp_state_name(cp->state),
+ sctp_state_name(next_state),
+ atomic_read(&cp->refcnt));
+ if (dest) {
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
+ }
+
+ cp->timeout = pp->timeout_table[cp->state = next_state];
+
+ return 1;
+}
+
+static int
+sctp_state_transition(struct ip_vs_conn *cp, int direction,
+ const struct sk_buff *skb, struct ip_vs_protocol *pp)
+{
+ int ret = 0;
+
+ spin_lock(&cp->lock);
+ ret = set_sctp_state(pp, cp, direction, skb);
+ spin_unlock(&cp->lock);
+
+ return ret;
+}
+
+/*
+ * Hash table for SCTP application incarnations
+ */
+#define SCTP_APP_TAB_BITS 4
+#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
+#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
+
+static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
+static DEFINE_SPINLOCK(sctp_app_lock);
+
+static inline __u16 sctp_app_hashkey(__be16 port)
+{
+ return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
+ & SCTP_APP_TAB_MASK;
+}
+
+static int sctp_register_app(struct ip_vs_app *inc)
+{
+ struct ip_vs_app *i;
+ __u16 hash;
+ __be16 port = inc->port;
+ int ret = 0;
+
+ hash = sctp_app_hashkey(port);
+
+ spin_lock_bh(&sctp_app_lock);
+ list_for_each_entry(i, &sctp_apps[hash], p_list) {
+ if (i->port == port) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+ list_add(&inc->p_list, &sctp_apps[hash]);
+ atomic_inc(&ip_vs_protocol_sctp.appcnt);
+out:
+ spin_unlock_bh(&sctp_app_lock);
+
+ return ret;
+}
+
+static void sctp_unregister_app(struct ip_vs_app *inc)
+{
+ spin_lock_bh(&sctp_app_lock);
+ atomic_dec(&ip_vs_protocol_sctp.appcnt);
+ list_del(&inc->p_list);
+ spin_unlock_bh(&sctp_app_lock);
+}
+
+static int sctp_app_conn_bind(struct ip_vs_conn *cp)
+{
+ int hash;
+ struct ip_vs_app *inc;
+ int result = 0;
+
+ /* Default binding: bind app only for NAT */
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+ return 0;
+ /* Lookup application incarnations and bind the right one */
+ hash = sctp_app_hashkey(cp->vport);
+
+ spin_lock(&sctp_app_lock);
+ list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+ if (inc->port == cp->vport) {
+ if (unlikely(!ip_vs_app_inc_get(inc)))
+ break;
+ spin_unlock(&sctp_app_lock);
+
+ IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+ "%s:%u to app %s on port %u\n",
+ __func__,
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr),
+ ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport),
+ inc->name, ntohs(inc->port));
+ cp->app = inc;
+ if (inc->init_conn)
+ result = inc->init_conn(inc, cp);
+ goto out;
+ }
+ }
+ spin_unlock(&sctp_app_lock);
+out:
+ return result;
+}
+
+static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+{
+ IP_VS_INIT_HASH_TABLE(sctp_apps);
+ pp->timeout_table = sctp_timeouts;
+}
+
+
+static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+{
+
+}
+
+struct ip_vs_protocol ip_vs_protocol_sctp = {
+ .name = "SCTP",
+ .protocol = IPPROTO_SCTP,
+ .num_states = IP_VS_SCTP_S_LAST,
+ .dont_defrag = 0,
+ .appcnt = ATOMIC_INIT(0),
+ .init = ip_vs_sctp_init,
+ .exit = ip_vs_sctp_exit,
+ .register_app = sctp_register_app,
+ .unregister_app = sctp_unregister_app,
+ .conn_schedule = sctp_conn_schedule,
+ .conn_in_get = sctp_conn_in_get,
+ .conn_out_get = sctp_conn_out_get,
+ .snat_handler = sctp_snat_handler,
+ .dnat_handler = sctp_dnat_handler,
+ .csum_check = sctp_csum_check,
+ .state_name = sctp_state_name,
+ .state_transition = sctp_state_transition,
+ .app_conn_bind = sctp_app_conn_bind,
+ .debug_packet = ip_vs_tcpudp_debug_packet,
+ .timeout_change = sctp_timeout_change,
+ .set_state_timeout = sctp_set_state_timeout,
+};
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 8e6cfd36e6f0..e6cc174fbc06 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -36,6 +36,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/ip.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e177f0dc2084..8fb0ae616761 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
flags |= IP_VS_CONN_F_INACTIVE;
else
flags &= ~IP_VS_CONN_F_INACTIVE;
+ } else if (s->protocol == IPPROTO_SCTP) {
+ if (state != IP_VS_SCTP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
}
cp = ip_vs_conn_new(AF_INET, s->protocol,
(union nf_inet_addr *)&s->caddr,
@@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
}
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+ (cp->state != state)) {
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
}
if (opt)
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 3c115fc19784..30db633f88f1 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/net.h>
#include <linux/gcd.h>
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 30b3189bd29c..e450cd6f4eb5 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
@@ -311,7 +312,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
}
@@ -454,7 +455,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL_PKT(0, pp, skb, 0,
"ip_vs_nat_xmit_v6(): frag needed for");
goto tx_error;
@@ -672,7 +673,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->u.dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
@@ -814,7 +815,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
dst_release(&rt->u.dst);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
@@ -965,7 +966,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
mtu = dst_mtu(&rt->u.dst);
if (skb->len > mtu) {
dst_release(&rt->u.dst);
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error;
}
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 018f90db511c..ab81b380eae6 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -9,6 +9,7 @@
*/
#include <linux/netfilter.h>
+#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/moduleparam.h>
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 07d9d8857e5d..372e80f07a81 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -16,6 +16,7 @@
#include <linux/in.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
+#include <linux/gfp.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4d79e3c1616c..0c9bbe93cc16 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -42,6 +42,7 @@
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
@@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted;
static unsigned int nf_conntrack_hash_rnd;
static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- unsigned int size, unsigned int rnd)
+ u16 zone, unsigned int size, unsigned int rnd)
{
unsigned int n;
u_int32_t h;
@@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
*/
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
h = jhash2((u32 *)tuple, n,
- rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
- tuple->dst.protonum));
+ zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) |
+ tuple->dst.protonum));
return ((u64)h * size) >> 32;
}
-static inline u_int32_t hash_conntrack(const struct net *net,
+static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, net->ct.htable_size,
+ return __hash_conntrack(tuple, zone, net->ct.htable_size,
nf_conntrack_hash_rnd);
}
@@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
* - Caller must lock nf_conntrack_lock before calling this function
*/
struct nf_conntrack_tuple_hash *
-__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_conntrack_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, tuple);
+ unsigned int hash = hash_conntrack(net, zone, tuple);
/* Disable BHs the entire time since we normally need to disable them
* at least once for the stats anyway.
@@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
local_bh_disable();
begin:
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
- if (nf_ct_tuple_equal(tuple, &h->tuple)) {
+ if (nf_ct_tuple_equal(tuple, &h->tuple) &&
+ nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
@@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find);
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_conntrack_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
rcu_read_lock();
begin:
- h = __nf_conntrack_find(net, tuple);
+ h = __nf_conntrack_find(net, zone, tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (unlikely(nf_ct_is_dying(ct) ||
!atomic_inc_not_zero(&ct->ct_general.use)))
h = NULL;
else {
- if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
+ if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
+ nf_ct_zone(ct) != zone)) {
nf_ct_put(ct);
goto begin;
}
@@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned int hash, repl_hash;
+ u16 zone;
- hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ zone = nf_ct_zone(ct);
+ hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
__nf_conntrack_hash_insert(ct, hash, repl_hash);
}
@@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
+ u16 zone;
ct = nf_ct_get(skb, &ctinfo);
net = nf_ct_net(ct);
@@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT;
- hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ zone = nf_ct_zone(ct);
+ hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* We're not in hash table, and we refuse to set up related
connections for unconfirmed conns. But packet copies and
@@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb)
not in the hash. If there is, we lost race. */
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &h->tuple))
+ &h->tuple) &&
+ zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
- &h->tuple))
+ &h->tuple) &&
+ zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
goto out;
/* Remove from unconfirmed list */
@@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
struct net *net = nf_ct_net(ignored_conntrack);
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- unsigned int hash = hash_conntrack(net, tuple);
+ struct nf_conn *ct;
+ u16 zone = nf_ct_zone(ignored_conntrack);
+ unsigned int hash = hash_conntrack(net, zone, tuple);
/* Disable BHs the entire time since we need to disable them at
* least once for the stats anyway.
*/
rcu_read_lock_bh();
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
- if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
- nf_ct_tuple_equal(tuple, &h->tuple)) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (ct != ignored_conntrack &&
+ nf_ct_tuple_equal(tuple, &h->tuple) &&
+ nf_ct_zone(ct) == zone) {
NF_CT_STAT_INC(net, found);
rcu_read_unlock_bh();
return 1;
@@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
}
-struct nf_conn *nf_conntrack_alloc(struct net *net,
+struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp)
@@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
if (nf_conntrack_max &&
unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
- unsigned int hash = hash_conntrack(net, orig);
+ unsigned int hash = hash_conntrack(net, zone, orig);
if (!early_drop(net, hash)) {
atomic_dec(&net->ct.count);
if (net_ratelimit())
@@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
#ifdef CONFIG_NET_NS
ct->ct_net = net;
#endif
-
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (zone) {
+ struct nf_conntrack_zone *nf_ct_zone;
+
+ nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
+ if (!nf_ct_zone)
+ goto out_free;
+ nf_ct_zone->id = zone;
+ }
+#endif
/*
* changes to lookup keys must be done before setting refcnt to 1
*/
smp_wmb();
atomic_set(&ct->ct_general.use, 1);
return ct;
+
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+out_free:
+ kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+ return ERR_PTR(-ENOMEM);
+#endif
}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
@@ -619,7 +649,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
static struct nf_conntrack_tuple_hash *
-init_conntrack(struct net *net,
+init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_l3proto *l3proto,
struct nf_conntrack_l4proto *l4proto,
@@ -629,14 +659,16 @@ init_conntrack(struct net *net,
struct nf_conn *ct;
struct nf_conn_help *help;
struct nf_conntrack_tuple repl_tuple;
+ struct nf_conntrack_ecache *ecache;
struct nf_conntrack_expect *exp;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
- ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC);
+ ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC);
if (IS_ERR(ct)) {
pr_debug("Can't allocate conntrack.\n");
return (struct nf_conntrack_tuple_hash *)ct;
@@ -649,10 +681,14 @@ init_conntrack(struct net *net,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
- nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
+
+ ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
+ nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
+ ecache ? ecache->expmask : 0,
+ GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
- exp = nf_ct_find_expectation(net, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
ct, exp);
@@ -674,7 +710,7 @@ init_conntrack(struct net *net,
nf_conntrack_get(&ct->master->ct_general);
NF_CT_STAT_INC(net, expect_new);
} else {
- __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+ __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
NF_CT_STAT_INC(net, new);
}
@@ -695,7 +731,7 @@ init_conntrack(struct net *net,
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
static inline struct nf_conn *
-resolve_normal_ct(struct net *net,
+resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
@@ -708,6 +744,7 @@ resolve_normal_ct(struct net *net,
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, l3num, protonum, &tuple, l3proto,
@@ -717,9 +754,10 @@ resolve_normal_ct(struct net *net,
}
/* look for tuple match */
- h = nf_conntrack_find_get(net, &tuple);
+ h = nf_conntrack_find_get(net, zone, &tuple);
if (!h) {
- h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff);
+ h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
+ skb, dataoff);
if (!h)
return NULL;
if (IS_ERR(h))
@@ -756,7 +794,7 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
- struct nf_conn *ct;
+ struct nf_conn *ct, *tmpl = NULL;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
@@ -765,10 +803,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
int set_reply = 0;
int ret;
- /* Previously seen (loopback or untracked)? Ignore. */
if (skb->nfct) {
- NF_CT_STAT_INC_ATOMIC(net, ignore);
- return NF_ACCEPT;
+ /* Previously seen (loopback or untracked)? Ignore. */
+ tmpl = (struct nf_conn *)skb->nfct;
+ if (!nf_ct_is_template(tmpl)) {
+ NF_CT_STAT_INC_ATOMIC(net, ignore);
+ return NF_ACCEPT;
+ }
+ skb->nfct = NULL;
}
/* rcu_read_lock()ed by nf_hook_slow */
@@ -779,7 +821,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
pr_debug("not prepared to track yet or error occured\n");
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
- return -ret;
+ ret = -ret;
+ goto out;
}
l4proto = __nf_ct_l4proto_find(pf, protonum);
@@ -788,26 +831,30 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (l4proto->error != NULL) {
- ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum);
+ ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo,
+ pf, hooknum);
if (ret <= 0) {
NF_CT_STAT_INC_ATOMIC(net, error);
NF_CT_STAT_INC_ATOMIC(net, invalid);
- return -ret;
+ ret = -ret;
+ goto out;
}
}
- ct = resolve_normal_ct(net, skb, dataoff, pf, protonum,
+ ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
l3proto, l4proto, &set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
NF_CT_STAT_INC_ATOMIC(net, invalid);
- return NF_ACCEPT;
+ ret = NF_ACCEPT;
+ goto out;
}
if (IS_ERR(ct)) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(net, drop);
- return NF_DROP;
+ ret = NF_DROP;
+ goto out;
}
NF_CT_ASSERT(skb->nfct);
@@ -822,11 +869,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
NF_CT_STAT_INC_ATOMIC(net, invalid);
if (ret == -NF_DROP)
NF_CT_STAT_INC_ATOMIC(net, drop);
- return -ret;
+ ret = -ret;
+ goto out;
}
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_REPLY, ct);
+out:
+ if (tmpl)
+ nf_ct_put(tmpl);
return ret;
}
@@ -865,7 +916,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
return;
rcu_read_lock();
- __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+ __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
@@ -939,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
+ .len = sizeof(struct nf_conntrack_zone),
+ .align = __alignof__(struct nf_conntrack_zone),
+ .id = NF_CT_EXT_ZONE,
+};
+#endif
+
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
#include <linux/netfilter/nfnetlink.h>
@@ -1120,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void)
nf_conntrack_helper_fini();
nf_conntrack_proto_fini();
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ nf_ct_extend_unregister(&nf_ct_zone_extend);
+#endif
}
static void nf_conntrack_cleanup_net(struct net *net)
@@ -1195,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
unsigned int hashsize, old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
if (current->nsproxy->net_ns != &init_net)
return -EOPNOTSUPP;
@@ -1221,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
h = hlist_nulls_entry(init_net.ct.hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
+ ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
- bucket = __hash_conntrack(&h->tuple, hashsize,
+ bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
+ hashsize,
nf_conntrack_hash_rnd);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
@@ -1280,6 +1345,11 @@ static int nf_conntrack_init_init_net(void)
if (ret < 0)
goto err_helper;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ ret = nf_ct_extend_register(&nf_ct_zone_extend);
+ if (ret < 0)
+ goto err_extend;
+#endif
/* Set up fake conntrack: to never be deleted, not in any hashes */
#ifdef CONFIG_NET_NS
nf_conntrack_untracked.ct_net = &init_net;
@@ -1290,6 +1360,10 @@ static int nf_conntrack_init_init_net(void)
return 0;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+err_extend:
+ nf_conntrack_helper_fini();
+#endif
err_helper:
nf_conntrack_proto_fini();
err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d5a9bcd7d61b..f516961a83b4 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -18,6 +18,7 @@
#include <linux/percpu.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/slab.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 2f25ff610982..acb29ccaa41f 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -27,6 +27,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
unsigned int nf_ct_expect_hsize __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
@@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
+__nf_ct_expect_find(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
struct hlist_node *n;
@@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
- if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
+ if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+ nf_ct_zone(i->master) == zone)
return i;
}
return NULL;
@@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_expect_find_get(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
rcu_read_lock();
- i = __nf_ct_expect_find(net, tuple);
+ i = __nf_ct_expect_find(net, zone, tuple);
if (i && !atomic_inc_not_zero(&i->use))
i = NULL;
rcu_read_unlock();
@@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
+nf_ct_find_expectation(struct net *net, u16 zone,
+ const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
struct hlist_node *n;
@@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
- nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+ nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
+ nf_ct_zone(i->master) == zone) {
exp = i;
break;
}
@@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a,
{
return a->master == b->master && a->class == b->class &&
nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
- nf_ct_tuple_mask_equal(&a->mask, &b->mask);
+ nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+ nf_ct_zone(a->master) == nf_ct_zone(b->master);
}
/* Generally a bad idea to call this: could have matched already. */
@@ -232,7 +239,6 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
new->master = me;
atomic_set(&new->use, 1);
- INIT_RCU_HEAD(&new->rcu);
return new;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
@@ -500,6 +506,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v)
static int exp_seq_show(struct seq_file *s, void *v)
{
struct nf_conntrack_expect *expect;
+ struct nf_conntrack_helper *helper;
struct hlist_node *n = v;
char *delim = "";
@@ -525,6 +532,14 @@ static int exp_seq_show(struct seq_file *s, void *v)
if (expect->flags & NF_CT_EXPECT_INACTIVE)
seq_printf(s, "%sINACTIVE", delim);
+ helper = rcu_dereference(nfct_help(expect->master)->helper);
+ if (helper) {
+ seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
+ if (helper->expect_policy[expect->class].name)
+ seq_printf(s, "/%s",
+ helper->expect_policy[expect->class].name);
+ }
+
return seq_putc(s, '\n');
}
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fef95be334bd..fdc8fb4ae10f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -59,7 +59,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp)
if (!*ext)
return NULL;
- INIT_RCU_HEAD(&(*ext)->rcu);
(*ext)->offset[id] = off;
(*ext)->len = len;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f0732aa18e4f..2ae3169e7633 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -13,6 +13,7 @@
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
#include <linux/ip.h>
+#include <linux/slab.h>
#include <linux/ipv6.h>
#include <linux/ctype.h>
#include <linux/inet.h>
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 66369490230e..a487c8038044 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -17,6 +17,7 @@
#include <linux/inet.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/slab.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/skbuff.h>
@@ -29,6 +30,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_h323.h>
/* Parameters */
@@ -1216,7 +1218,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
tuple.dst.u.tcp.port = port;
tuple.dst.protonum = IPPROTO_TCP;
- exp = __nf_ct_expect_find(net, &tuple);
+ exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (exp && exp->master == ct)
return exp;
return NULL;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 4b1a56bd074c..59e1a4cd4e8b 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -15,7 +15,6 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/stddef.h>
-#include <linux/slab.h>
#include <linux/random.h>
#include <linux/err.h>
#include <linux/kernel.h>
@@ -65,7 +64,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
}
struct nf_conntrack_helper *
-__nf_conntrack_helper_find_byname(const char *name)
+__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
{
struct nf_conntrack_helper *h;
struct hlist_node *n;
@@ -73,13 +72,34 @@ __nf_conntrack_helper_find_byname(const char *name)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) {
- if (!strcmp(h->name, name))
+ if (!strcmp(h->name, name) &&
+ h->tuple.src.l3num == l3num &&
+ h->tuple.dst.protonum == protonum)
return h;
}
}
return NULL;
}
-EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find);
+
+struct nf_conntrack_helper *
+nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
+{
+ struct nf_conntrack_helper *h;
+
+ h = __nf_conntrack_helper_find(name, l3num, protonum);
+#ifdef CONFIG_MODULES
+ if (h == NULL) {
+ if (request_module("nfct-helper-%s", name) == 0)
+ h = __nf_conntrack_helper_find(name, l3num, protonum);
+ }
+#endif
+ if (h != NULL && !try_module_get(h->me))
+ h = NULL;
+
+ return h;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
{
@@ -94,13 +114,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
-int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags)
+int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
+ gfp_t flags)
{
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conn_help *help;
int ret = 0;
- struct nf_conntrack_helper *helper;
- struct nf_conn_help *help = nfct_help(ct);
- helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ if (tmpl != NULL) {
+ help = nfct_help(tmpl);
+ if (help != NULL)
+ helper = help->helper;
+ }
+
+ help = nfct_help(ct);
+ if (helper == NULL)
+ helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
if (helper == NULL) {
if (help)
rcu_assign_pointer(help->helper, NULL);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 8bd98c84f77e..7673930ca342 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -15,6 +15,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter.h>
+#include <linux/slab.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_expect.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0ffe689dfe97..afc52f2ee4ac 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,9 +27,11 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#include <linux/slab.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
+#include <net/sock.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -38,6 +40,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_protocol.h>
@@ -378,6 +381,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
+ if (nf_ct_zone(ct))
+ NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
@@ -456,6 +462,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
+ struct net *net;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
@@ -482,7 +489,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
} else
return 0;
- if (!item->report && !nfnetlink_has_listeners(group))
+ net = nf_ct_net(ct);
+ if (!item->report && !nfnetlink_has_listeners(net, group))
return 0;
skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
@@ -514,6 +522,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
+ if (nf_ct_zone(ct))
+ NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct)));
+
if (ctnetlink_dump_id(skb, ct) < 0)
goto nla_put_failure;
@@ -559,7 +570,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
rcu_read_unlock();
nlmsg_end(skb, nlh);
- err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+ err = nfnetlink_send(skb, net, item->pid, group, item->report,
+ GFP_ATOMIC);
if (err == -ENOBUFS || err == -EAGAIN)
return -ENOBUFS;
@@ -571,7 +583,9 @@ nla_put_failure:
nlmsg_failure:
kfree_skb(skb);
errout:
- nfnetlink_set_err(0, group, -ENOBUFS);
+ if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0)
+ return -ENOBUFS;
+
return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -586,6 +600,7 @@ static int ctnetlink_done(struct netlink_callback *cb)
static int
ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
@@ -594,9 +609,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
last = (struct nf_conn *)cb->args[1];
- for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
+ for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) {
restart:
- hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
+ hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]],
hnnode) {
if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
continue;
@@ -703,6 +718,11 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
return ret;
}
+static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
+ [CTA_TUPLE_IP] = { .type = NLA_NESTED },
+ [CTA_TUPLE_PROTO] = { .type = NLA_NESTED },
+};
+
static int
ctnetlink_parse_tuple(const struct nlattr * const cda[],
struct nf_conntrack_tuple *tuple,
@@ -713,7 +733,7 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
memset(tuple, 0, sizeof(*tuple));
- nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL);
+ nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy);
if (!tb[CTA_TUPLE_IP])
return -EINVAL;
@@ -740,12 +760,31 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
return 0;
}
+static int
+ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
+{
+ if (attr)
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ *zone = ntohs(nla_get_be16(attr));
+#else
+ return -EOPNOTSUPP;
+#endif
+ else
+ *zone = 0;
+
+ return 0;
+}
+
+static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
+ [CTA_HELP_NAME] = { .type = NLA_NUL_STRING },
+};
+
static inline int
ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
{
struct nlattr *tb[CTA_HELP_MAX+1];
- nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL);
+ nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy);
if (!tb[CTA_HELP_NAME])
return -EINVAL;
@@ -756,11 +795,18 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
}
static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
+ [CTA_TUPLE_ORIG] = { .type = NLA_NESTED },
+ [CTA_TUPLE_REPLY] = { .type = NLA_NESTED },
[CTA_STATUS] = { .type = NLA_U32 },
+ [CTA_PROTOINFO] = { .type = NLA_NESTED },
+ [CTA_HELP] = { .type = NLA_NESTED },
+ [CTA_NAT_SRC] = { .type = NLA_NESTED },
[CTA_TIMEOUT] = { .type = NLA_U32 },
[CTA_MARK] = { .type = NLA_U32 },
- [CTA_USE] = { .type = NLA_U32 },
[CTA_ID] = { .type = NLA_U32 },
+ [CTA_NAT_DST] = { .type = NLA_NESTED },
+ [CTA_TUPLE_MASTER] = { .type = NLA_NESTED },
+ [CTA_ZONE] = { .type = NLA_U16 },
};
static int
@@ -768,12 +814,18 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- int err = 0;
+ u16 zone;
+ int err;
+
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+ if (err < 0)
+ return err;
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
@@ -781,7 +833,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
else {
/* Flush the whole table */
- nf_conntrack_flush_report(&init_net,
+ nf_conntrack_flush_report(net,
NETLINK_CB(skb).pid,
nlmsg_report(nlh));
return 0;
@@ -790,7 +842,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- h = nf_conntrack_find_get(&init_net, &tuple);
+ h = nf_conntrack_find_get(net, zone, &tuple);
if (!h)
return -ENOENT;
@@ -828,18 +880,24 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct sk_buff *skb2 = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- int err = 0;
+ u16 zone;
+ int err;
if (nlh->nlmsg_flags & NLM_F_DUMP)
return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
ctnetlink_done);
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+ if (err < 0)
+ return err;
+
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
else if (cda[CTA_TUPLE_REPLY])
@@ -850,7 +908,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- h = nf_conntrack_find_get(&init_net, &tuple);
+ h = nf_conntrack_find_get(net, zone, &tuple);
if (!h)
return -ENOENT;
@@ -994,7 +1052,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
return 0;
}
- helper = __nf_conntrack_helper_find_byname(helpname);
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
if (helper == NULL) {
#ifdef CONFIG_MODULES
spin_unlock_bh(&nf_conntrack_lock);
@@ -1005,7 +1064,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
}
spin_lock_bh(&nf_conntrack_lock);
- helper = __nf_conntrack_helper_find_byname(helpname);
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
if (helper)
return -EAGAIN;
#endif
@@ -1020,9 +1080,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
/* need to zero data of old helper */
memset(&help->help, 0, sizeof(help->help));
} else {
- help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
- if (help == NULL)
- return -ENOMEM;
+ /* we cannot set a helper for an existing conntrack */
+ return -EOPNOTSUPP;
}
rcu_assign_pointer(help->helper, helper);
@@ -1044,6 +1103,12 @@ ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[])
return 0;
}
+static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
+ [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED },
+ [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED },
+ [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED },
+};
+
static inline int
ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[])
{
@@ -1052,7 +1117,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
struct nf_conntrack_l4proto *l4proto;
int err = 0;
- nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
+ nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy);
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -1064,12 +1129,18 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
}
#ifdef CONFIG_NF_NAT_NEEDED
+static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
+ [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 },
+ [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 },
+ [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 },
+};
+
static inline int
change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
{
struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
- nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL);
+ nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
return -EINVAL;
@@ -1175,7 +1246,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
}
static struct nf_conn *
-ctnetlink_create_conntrack(const struct nlattr * const cda[],
+ctnetlink_create_conntrack(struct net *net, u16 zone,
+ const struct nlattr * const cda[],
struct nf_conntrack_tuple *otuple,
struct nf_conntrack_tuple *rtuple,
u8 u3)
@@ -1184,7 +1256,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
int err = -EINVAL;
struct nf_conntrack_helper *helper;
- ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
+ ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC);
if (IS_ERR(ct))
return ERR_PTR(-ENOMEM);
@@ -1193,7 +1265,6 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
- ct->status |= IPS_CONFIRMED;
rcu_read_lock();
if (cda[CTA_HELP]) {
@@ -1203,7 +1274,8 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
if (err < 0)
goto err2;
- helper = __nf_conntrack_helper_find_byname(helpname);
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
if (helper == NULL) {
rcu_read_unlock();
#ifdef CONFIG_MODULES
@@ -1213,7 +1285,9 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
}
rcu_read_lock();
- helper = __nf_conntrack_helper_find_byname(helpname);
+ helper = __nf_conntrack_helper_find(helpname,
+ nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
if (helper) {
err = -EAGAIN;
goto err2;
@@ -1236,19 +1310,24 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
}
} else {
/* try an implicit helper assignation */
- err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
+ err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC);
if (err < 0)
goto err2;
}
- if (cda[CTA_STATUS]) {
- err = ctnetlink_change_status(ct, cda);
+ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
+ err = ctnetlink_change_nat(ct, cda);
if (err < 0)
goto err2;
}
- if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
- err = ctnetlink_change_nat(ct, cda);
+ nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
+ /* we must add conntrack extensions before confirmation. */
+ ct->status |= IPS_CONFIRMED;
+
+ if (cda[CTA_STATUS]) {
+ err = ctnetlink_change_status(ct, cda);
if (err < 0)
goto err2;
}
@@ -1267,9 +1346,6 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
goto err2;
}
- nf_ct_acct_ext_add(ct, GFP_ATOMIC);
- nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
-
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
@@ -1285,7 +1361,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[],
if (err < 0)
goto err2;
- master_h = nf_conntrack_find_get(&init_net, &master);
+ master_h = nf_conntrack_find_get(net, zone, &master);
if (master_h == NULL) {
err = -ENOENT;
goto err2;
@@ -1313,11 +1389,17 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- int err = 0;
+ u16 zone;
+ int err;
+
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
+ if (err < 0)
+ return err;
if (cda[CTA_TUPLE_ORIG]) {
err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
@@ -1333,9 +1415,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
spin_lock_bh(&nf_conntrack_lock);
if (cda[CTA_TUPLE_ORIG])
- h = __nf_conntrack_find(&init_net, &otuple);
+ h = __nf_conntrack_find(net, zone, &otuple);
else if (cda[CTA_TUPLE_REPLY])
- h = __nf_conntrack_find(&init_net, &rtuple);
+ h = __nf_conntrack_find(net, zone, &rtuple);
if (h == NULL) {
err = -ENOENT;
@@ -1343,7 +1425,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn *ct;
enum ip_conntrack_events events;
- ct = ctnetlink_create_conntrack(cda, &otuple,
+ ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
&rtuple, u3);
if (IS_ERR(ct)) {
err = PTR_ERR(ct);
@@ -1357,7 +1439,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
else
events = IPCT_NEW;
- nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+ nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+ (1 << IPCT_ASSURED) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
@@ -1382,7 +1465,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err == 0) {
nf_conntrack_get(&ct->ct_general);
spin_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+ nf_conntrack_eventmask_report((1 << IPCT_REPLY) |
+ (1 << IPCT_ASSURED) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
@@ -1469,6 +1553,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
struct nf_conn *master = exp->master;
+ struct nf_conntrack_helper *helper;
long timeout = (exp->timeout.expires - jiffies) / HZ;
if (timeout < 0)
@@ -1485,6 +1570,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
+ helper = rcu_dereference(nfct_help(master)->helper);
+ if (helper)
+ NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
return 0;
@@ -1526,9 +1614,10 @@ nla_put_failure:
static int
ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
{
+ struct nf_conntrack_expect *exp = item->exp;
+ struct net *net = nf_ct_exp_net(exp);
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
int flags = 0;
@@ -1540,7 +1629,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
return 0;
if (!item->report &&
- !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+ !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
return 0;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1563,7 +1652,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
rcu_read_unlock();
nlmsg_end(skb, nlh);
- nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+ nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
item->report, GFP_ATOMIC);
return 0;
@@ -1573,7 +1662,7 @@ nla_put_failure:
nlmsg_failure:
kfree_skb(skb);
errout:
- nfnetlink_set_err(0, 0, -ENOBUFS);
+ nfnetlink_set_err(net, 0, 0, -ENOBUFS);
return 0;
}
#endif
@@ -1587,7 +1676,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb)
static int
ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = &init_net;
+ struct net *net = sock_net(skb->sk);
struct nf_conntrack_expect *exp, *last;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct hlist_node *n;
@@ -1631,8 +1720,12 @@ out:
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+ [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
+ [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
[CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
[CTA_EXPECT_ID] = { .type = NLA_U32 },
+ [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING },
};
static int
@@ -1640,12 +1733,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct sk_buff *skb2;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- int err = 0;
+ u16 zone;
+ int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
return netlink_dump_start(ctnl, skb, nlh,
@@ -1653,6 +1748,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
ctnetlink_exp_done);
}
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
+
if (cda[CTA_EXPECT_MASTER])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
else
@@ -1661,7 +1760,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(&init_net, &tuple);
+ exp = nf_ct_expect_find_get(net, zone, &tuple);
if (!exp)
return -ENOENT;
@@ -1701,23 +1800,28 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
- struct nf_conntrack_helper *h;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct hlist_node *n, *next;
u_int8_t u3 = nfmsg->nfgen_family;
unsigned int i;
+ u16 zone;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
/* delete a single expect by tuple */
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
+
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(&init_net, &tuple);
+ exp = nf_ct_expect_find_get(net, zone, &tuple);
if (!exp)
return -ENOENT;
@@ -1740,18 +1844,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
/* delete all expectations for this helper */
spin_lock_bh(&nf_conntrack_lock);
- h = __nf_conntrack_helper_find_byname(name);
- if (!h) {
- spin_unlock_bh(&nf_conntrack_lock);
- return -EOPNOTSUPP;
- }
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &init_net.ct.expect_hash[i],
+ &net->ct.expect_hash[i],
hnode) {
m_help = nfct_help(exp->master);
- if (m_help->helper == h
- && del_timer(&exp->timeout)) {
+ if (!strcmp(m_help->helper->name, name) &&
+ del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
nf_ct_expect_put(exp);
}
@@ -1763,7 +1862,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
spin_lock_bh(&nf_conntrack_lock);
for (i = 0; i < nf_ct_expect_hsize; i++) {
hlist_for_each_entry_safe(exp, n, next,
- &init_net.ct.expect_hash[i],
+ &net->ct.expect_hash[i],
hnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect(exp);
@@ -1784,7 +1883,9 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
}
static int
-ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
+ctnetlink_create_expect(struct net *net, u16 zone,
+ const struct nlattr * const cda[],
+ u_int8_t u3,
u32 pid, int report)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
@@ -1806,7 +1907,7 @@ ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
return err;
/* Look for master conntrack of this expectation */
- h = nf_conntrack_find_get(&init_net, &master_tuple);
+ h = nf_conntrack_find_get(net, zone, &master_tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1846,29 +1947,35 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(ctnl);
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- int err = 0;
+ u16 zone;
+ int err;
if (!cda[CTA_EXPECT_TUPLE]
|| !cda[CTA_EXPECT_MASK]
|| !cda[CTA_EXPECT_MASTER])
return -EINVAL;
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
+
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
if (err < 0)
return err;
spin_lock_bh(&nf_conntrack_lock);
- exp = __nf_ct_expect_find(&init_net, &tuple);
+ exp = __nf_ct_expect_find(net, zone, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
- err = ctnetlink_create_expect(cda,
+ err = ctnetlink_create_expect(net, zone, cda,
u3,
NETLINK_CB(skb).pid,
nlmsg_report(nlh));
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 3807ac7faf4c..088944824e13 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
@@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
- exp_other = nf_ct_expect_find_get(net, &inv_t);
+ exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t);
if (exp_other) {
/* delete other expectation. */
pr_debug("found\n");
@@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct,
rcu_read_unlock();
}
-static int destroy_sibling_or_exp(struct net *net,
+static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_expect *exp;
struct nf_conn *sibling;
+ u16 zone = nf_ct_zone(ct);
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
- h = nf_conntrack_find_get(net, t);
+ h = nf_conntrack_find_get(net, zone, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
pr_debug("setting timeout of conntrack %p to 0\n", sibling);
@@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net,
nf_ct_put(sibling);
return 1;
} else {
- exp = nf_ct_expect_find_get(net, t);
+ exp = nf_ct_expect_find_get(net, zone, t);
if (exp) {
pr_debug("unexpect_related of expect %p\n", exp);
nf_ct_unexpect_related(exp);
@@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
t.dst.protonum = IPPROTO_GRE;
t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id;
t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id;
- if (!destroy_sibling_or_exp(net, &t))
+ if (!destroy_sibling_or_exp(net, ct, &t))
pr_debug("failed to timeout original pns->pac ct/exp\n");
/* try reply (pac->pns) tuple */
@@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct)
t.dst.protonum = IPPROTO_GRE;
t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id;
t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id;
- if (!destroy_sibling_or_exp(net, &t))
+ if (!destroy_sibling_or_exp(net, ct, &t))
pr_debug("failed to timeout reply pac->pns ct/exp\n");
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1a4568bf7ea5..a44fa75b5178 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/netfilter.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index dd375500dccc..5292560d6d4a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/dccp.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -561,8 +562,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
-static int dccp_error(struct net *net, struct sk_buff *skb,
- unsigned int dataoff, enum ip_conntrack_info *ctinfo,
+static int dccp_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo,
u_int8_t pf, unsigned int hooknum)
{
struct dccp_hdr _dh, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c99cfba64ddc..cf616e55ca41 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
#include <linux/in.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -241,7 +242,7 @@ static int gre_packet(struct nf_conn *ct,
ct->proto.gre.stream_timeout);
/* Also, more likely to be important, and not a probe. */
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.timeout);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index f9d930f80276..b68ff15ed979 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct,
new_state == SCTP_CONNTRACK_ESTABLISHED) {
pr_debug("Setting assured bit\n");
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
}
return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 3c96437b45ad..9dd8cd4fb6e6 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct net *net,
+static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
@@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct,
after SYN_RECV or a valid answer for a picked up
connection. */
set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
}
nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 5c5518bedb4b..8289088b8218 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout);
@@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
- enum ip_conntrack_info *ctinfo,
+static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+ unsigned int dataoff, enum ip_conntrack_info *ctinfo,
u_int8_t pf,
unsigned int hooknum)
{
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 458655bb2106..263b5a72588d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct,
nf_ct_udplite_timeout_stream);
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, ct);
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else
nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout);
@@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
-static int udplite_error(struct net *net,
+static int udplite_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info *ctinfo,
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index dcfecbb81c46..d9e27734b2a2 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
+#include <linux/slab.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <net/netfilter/nf_conntrack.h>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 023966b569bf..c6cd1b84eddd 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -16,12 +16,14 @@
#include <linux/inet.h>
#include <linux/in.h>
#include <linux/udp.h>
+#include <linux/tcp.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_sip.h>
MODULE_LICENSE("GPL");
@@ -50,12 +52,16 @@ module_param(sip_direct_media, int, 0600);
MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
"endpoints only (default 1)");
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
const char **dptr,
unsigned int *datalen) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
+
unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+ unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
struct nf_conntrack_expect *exp,
@@ -63,17 +69,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
unsigned int matchlen) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
const char **dptr,
- unsigned int dataoff,
unsigned int *datalen,
+ unsigned int sdpoff,
enum sdp_header_types type,
enum sdp_header_types term,
const union nf_inet_addr *addr)
__read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
unsigned int matchoff,
@@ -82,14 +88,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
- const char **dptr,
unsigned int dataoff,
+ const char **dptr,
unsigned int *datalen,
+ unsigned int sdpoff,
const union nf_inet_addr *addr)
__read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
struct nf_conntrack_expect *rtp_exp,
@@ -236,12 +243,13 @@ int ct_sip_parse_request(const struct nf_conn *ct,
return 0;
/* Find SIP URI */
- limit -= strlen("sip:");
- for (; dptr < limit; dptr++) {
+ for (; dptr < limit - strlen("sip:"); dptr++) {
if (*dptr == '\r' || *dptr == '\n')
return -1;
- if (strnicmp(dptr, "sip:", strlen("sip:")) == 0)
+ if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) {
+ dptr += strlen("sip:");
break;
+ }
}
if (!skp_epaddr_len(ct, dptr, limit, &shift))
return 0;
@@ -276,7 +284,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request);
* tabs, spaces and continuation lines, which are treated as a single whitespace
* character.
*
- * Some headers may appear multiple times. A comma seperated list of values is
+ * Some headers may appear multiple times. A comma separated list of values is
* equivalent to multiple headers.
*/
static const struct sip_header ct_sip_hdrs[] = {
@@ -284,7 +292,8 @@ static const struct sip_header ct_sip_hdrs[] = {
[SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len),
[SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len),
[SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len),
- [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len),
+ [SIP_HDR_VIA_UDP] = SIP_HDR("Via", "v", "UDP ", epaddr_len),
+ [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len),
[SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len),
[SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len),
};
@@ -412,7 +421,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr,
}
EXPORT_SYMBOL_GPL(ct_sip_get_header);
-/* Get next header field in a list of comma seperated values */
+/* Get next header field in a list of comma separated values */
static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr,
unsigned int dataoff, unsigned int datalen,
enum sip_header_types type,
@@ -516,6 +525,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
}
EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri);
+static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ const char *name,
+ unsigned int *matchoff, unsigned int *matchlen)
+{
+ const char *limit = dptr + datalen;
+ const char *start;
+ const char *end;
+
+ limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(","));
+ if (!limit)
+ limit = dptr + datalen;
+
+ start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name));
+ if (!start)
+ return 0;
+ start += strlen(name);
+
+ end = ct_sip_header_search(start, limit, ";", strlen(";"));
+ if (!end)
+ end = limit;
+
+ *matchoff = start - dptr;
+ *matchlen = end - start;
+ return 1;
+}
+
/* Parse address from header parameter and return address, offset and length */
int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
unsigned int dataoff, unsigned int datalen,
@@ -574,6 +610,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
}
EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param);
+static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
+ unsigned int dataoff, unsigned int datalen,
+ u8 *proto)
+{
+ unsigned int matchoff, matchlen;
+
+ if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=",
+ &matchoff, &matchlen)) {
+ if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP")))
+ *proto = IPPROTO_TCP;
+ else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP")))
+ *proto = IPPROTO_UDP;
+ else
+ return 0;
+
+ if (*proto != nf_ct_protonum(ct))
+ return 0;
+ } else
+ *proto = nf_ct_protonum(ct);
+
+ return 1;
+}
+
/* SDP header parsing: a SDP session description contains an ordered set of
* headers, starting with a section containing general session parameters,
* optionally followed by multiple media descriptions.
@@ -682,7 +741,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
static int refresh_signalling_expectation(struct nf_conn *ct,
union nf_inet_addr *addr,
- __be16 port,
+ u8 proto, __be16 port,
unsigned int expires)
{
struct nf_conn_help *help = nfct_help(ct);
@@ -694,6 +753,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
if (exp->class != SIP_EXPECT_SIGNALLING ||
!nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) ||
+ exp->tuple.dst.protonum != proto ||
exp->tuple.dst.u.udp.port != port)
continue;
if (!del_timer(&exp->timeout))
@@ -728,7 +788,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
spin_unlock_bh(&nf_conntrack_lock);
}
-static int set_expected_rtp_rtcp(struct sk_buff *skb,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
union nf_inet_addr *daddr, __be16 port,
enum sip_expectation_classes class,
@@ -777,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
rcu_read_lock();
do {
- exp = __nf_ct_expect_find(net, &tuple);
+ exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple);
if (!exp || exp->master == ct ||
nfct_help(exp->master)->helper != nfct_help(ct)->helper ||
@@ -805,7 +865,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
if (direct_rtp) {
nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
if (nf_nat_sdp_port &&
- !nf_nat_sdp_port(skb, dptr, datalen,
+ !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
mediaoff, medialen, ntohs(rtp_port)))
goto err1;
}
@@ -827,7 +887,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb,
nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
- ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp,
+ ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+ rtp_exp, rtcp_exp,
mediaoff, medialen, daddr);
else {
if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -847,6 +908,7 @@ err1:
static const struct sdp_media_type sdp_media_types[] = {
SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO),
SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO),
+ SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE),
};
static const struct sdp_media_type *sdp_media_type(const char *dptr,
@@ -866,13 +928,12 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
return NULL;
}
-static int process_sdp(struct sk_buff *skb,
+static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- struct nf_conn_help *help = nfct_help(ct);
unsigned int matchoff, matchlen;
unsigned int mediaoff, medialen;
unsigned int sdpoff;
@@ -941,7 +1002,7 @@ static int process_sdp(struct sk_buff *skb,
else
return NF_DROP;
- ret = set_expected_rtp_rtcp(skb, dptr, datalen,
+ ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
&rtp_addr, htons(port), t->class,
mediaoff, medialen);
if (ret != NF_ACCEPT)
@@ -949,8 +1010,9 @@ static int process_sdp(struct sk_buff *skb,
/* Update media connection address if present */
if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
- ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen,
- c_hdr, SDP_HDR_MEDIA, &rtp_addr);
+ ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
+ mediaoff, c_hdr, SDP_HDR_MEDIA,
+ &rtp_addr);
if (ret != NF_ACCEPT)
return ret;
}
@@ -960,14 +1022,12 @@ static int process_sdp(struct sk_buff *skb,
/* Update session connection and owner addresses */
nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
-
- if (ret == NF_ACCEPT && i > 0)
- help->help.ct_sip_info.invite_cseq = cseq;
+ ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
+ &rtp_addr);
return ret;
}
-static int process_invite_response(struct sk_buff *skb,
+static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -977,13 +1037,13 @@ static int process_invite_response(struct sk_buff *skb,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dptr, datalen, cseq);
+ return process_sdp(skb, dataoff, dptr, datalen, cseq);
else if (help->help.ct_sip_info.invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_update_response(struct sk_buff *skb,
+static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -993,13 +1053,13 @@ static int process_update_response(struct sk_buff *skb,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dptr, datalen, cseq);
+ return process_sdp(skb, dataoff, dptr, datalen, cseq);
else if (help->help.ct_sip_info.invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_prack_response(struct sk_buff *skb,
+static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1009,13 +1069,29 @@ static int process_prack_response(struct sk_buff *skb,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dptr, datalen, cseq);
+ return process_sdp(skb, dataoff, dptr, datalen, cseq);
else if (help->help.ct_sip_info.invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_bye_request(struct sk_buff *skb,
+static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+ const char **dptr, unsigned int *datalen,
+ unsigned int cseq)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_help *help = nfct_help(ct);
+ unsigned int ret;
+
+ flush_expectations(ct, true);
+ ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+ if (ret == NF_ACCEPT)
+ help->help.ct_sip_info.invite_cseq = cseq;
+ return ret;
+}
+
+static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -1030,7 +1106,7 @@ static int process_bye_request(struct sk_buff *skb,
* signalling connections. The expectation is marked inactive and is activated
* when receiving a response indicating success from the registrar.
*/
-static int process_register_request(struct sk_buff *skb,
+static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -1042,6 +1118,7 @@ static int process_register_request(struct sk_buff *skb,
struct nf_conntrack_expect *exp;
union nf_inet_addr *saddr, daddr;
__be16 port;
+ u8 proto;
unsigned int expires = 0;
int ret;
typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect;
@@ -1074,6 +1151,10 @@ static int process_register_request(struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr))
return NF_ACCEPT;
+ if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen,
+ &proto) == 0)
+ return NF_ACCEPT;
+
if (ct_sip_parse_numerical_param(ct, *dptr,
matchoff + matchlen, *datalen,
"expires=", NULL, NULL, &expires) < 0)
@@ -1093,14 +1174,14 @@ static int process_register_request(struct sk_buff *skb,
saddr = &ct->tuplehash[!dir].tuple.src.u3;
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
- saddr, &daddr, IPPROTO_UDP, NULL, &port);
+ saddr, &daddr, proto, NULL, &port);
exp->timeout.expires = sip_timeout * HZ;
exp->helper = nfct_help(ct)->helper;
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sip_expect(skb, dptr, datalen, exp,
+ ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
matchoff, matchlen);
else {
if (nf_ct_expect_related(exp) != 0)
@@ -1116,7 +1197,7 @@ store_cseq:
return ret;
}
-static int process_register_response(struct sk_buff *skb,
+static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1126,7 +1207,8 @@ static int process_register_response(struct sk_buff *skb,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
union nf_inet_addr addr;
__be16 port;
- unsigned int matchoff, matchlen, dataoff = 0;
+ u8 proto;
+ unsigned int matchoff, matchlen, coff = 0;
unsigned int expires = 0;
int in_contact = 0, ret;
@@ -1153,7 +1235,7 @@ static int process_register_response(struct sk_buff *skb,
while (1) {
unsigned int c_expires = expires;
- ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
+ ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
SIP_HDR_CONTACT, &in_contact,
&matchoff, &matchlen,
&addr, &port);
@@ -1166,6 +1248,10 @@ static int process_register_response(struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr))
continue;
+ if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen,
+ *datalen, &proto) == 0)
+ continue;
+
ret = ct_sip_parse_numerical_param(ct, *dptr,
matchoff + matchlen,
*datalen, "expires=",
@@ -1174,7 +1260,8 @@ static int process_register_response(struct sk_buff *skb,
return NF_DROP;
if (c_expires == 0)
break;
- if (refresh_signalling_expectation(ct, &addr, port, c_expires))
+ if (refresh_signalling_expectation(ct, &addr, proto, port,
+ c_expires))
return NF_ACCEPT;
}
@@ -1184,7 +1271,7 @@ flush:
}
static const struct sip_handler sip_handlers[] = {
- SIP_HANDLER("INVITE", process_sdp, process_invite_response),
+ SIP_HANDLER("INVITE", process_invite_request, process_invite_response),
SIP_HANDLER("UPDATE", process_sdp, process_update_response),
SIP_HANDLER("ACK", process_sdp, NULL),
SIP_HANDLER("PRACK", process_sdp, process_prack_response),
@@ -1192,13 +1279,13 @@ static const struct sip_handler sip_handlers[] = {
SIP_HANDLER("REGISTER", process_register_request, process_register_response),
};
-static int process_sip_response(struct sk_buff *skb,
+static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- unsigned int matchoff, matchlen;
- unsigned int code, cseq, dataoff, i;
+ unsigned int matchoff, matchlen, matchend;
+ unsigned int code, cseq, i;
if (*datalen < strlen("SIP/2.0 200"))
return NF_ACCEPT;
@@ -1212,7 +1299,7 @@ static int process_sip_response(struct sk_buff *skb,
cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
if (!cseq)
return NF_DROP;
- dataoff = matchoff + matchlen + 1;
+ matchend = matchoff + matchlen + 1;
for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) {
const struct sip_handler *handler;
@@ -1220,15 +1307,16 @@ static int process_sip_response(struct sk_buff *skb,
handler = &sip_handlers[i];
if (handler->response == NULL)
continue;
- if (*datalen < dataoff + handler->len ||
- strnicmp(*dptr + dataoff, handler->method, handler->len))
+ if (*datalen < matchend + handler->len ||
+ strnicmp(*dptr + matchend, handler->method, handler->len))
continue;
- return handler->response(skb, dptr, datalen, cseq, code);
+ return handler->response(skb, dataoff, dptr, datalen,
+ cseq, code);
}
return NF_ACCEPT;
}
-static int process_sip_request(struct sk_buff *skb,
+static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
@@ -1253,69 +1341,157 @@ static int process_sip_request(struct sk_buff *skb,
if (!cseq)
return NF_DROP;
- return handler->request(skb, dptr, datalen, cseq);
+ return handler->request(skb, dataoff, dptr, datalen, cseq);
}
return NF_ACCEPT;
}
-static int sip_help(struct sk_buff *skb,
- unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
+static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
+ unsigned int dataoff, const char **dptr,
+ unsigned int *datalen)
+{
+ typeof(nf_nat_sip_hook) nf_nat_sip;
+ int ret;
+
+ if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
+ ret = process_sip_request(skb, dataoff, dptr, datalen);
+ else
+ ret = process_sip_response(skb, dataoff, dptr, datalen);
+
+ if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
+ nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
+ if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+ ret = NF_DROP;
+ }
+
+ return ret;
+}
+
+static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
+ struct tcphdr *th, _tcph;
unsigned int dataoff, datalen;
- const char *dptr;
+ unsigned int matchoff, matchlen, clen;
+ unsigned int msglen, origlen;
+ const char *dptr, *end;
+ s16 diff, tdiff = 0;
int ret;
- typeof(nf_nat_sip_hook) nf_nat_sip;
+ typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust;
+
+ if (ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY)
+ return NF_ACCEPT;
/* No Data ? */
- dataoff = protoff + sizeof(struct udphdr);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return NF_ACCEPT;
+ dataoff = protoff + th->doff * 4;
if (dataoff >= skb->len)
return NF_ACCEPT;
nf_ct_refresh(ct, skb, sip_timeout * HZ);
- if (!skb_is_nonlinear(skb))
- dptr = skb->data + dataoff;
- else {
+ if (skb_is_nonlinear(skb)) {
pr_debug("Copy of skbuff not supported yet.\n");
return NF_ACCEPT;
}
+ dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
if (datalen < strlen("SIP/2.0 200"))
return NF_ACCEPT;
- if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
- ret = process_sip_request(skb, &dptr, &datalen);
- else
- ret = process_sip_response(skb, &dptr, &datalen);
+ while (1) {
+ if (ct_sip_get_header(ct, dptr, 0, datalen,
+ SIP_HDR_CONTENT_LENGTH,
+ &matchoff, &matchlen) <= 0)
+ break;
+
+ clen = simple_strtoul(dptr + matchoff, (char **)&end, 10);
+ if (dptr + matchoff == end)
+ break;
+
+ if (end + strlen("\r\n\r\n") > dptr + datalen)
+ break;
+ if (end[0] != '\r' || end[1] != '\n' ||
+ end[2] != '\r' || end[3] != '\n')
+ break;
+ end += strlen("\r\n\r\n") + clen;
+
+ msglen = origlen = end - dptr;
+
+ ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+ if (ret != NF_ACCEPT)
+ break;
+ diff = msglen - origlen;
+ tdiff += diff;
+
+ dataoff += msglen;
+ dptr += msglen;
+ datalen = datalen + diff - msglen;
+ }
if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
- nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
- if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen))
- ret = NF_DROP;
+ nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
+ if (nf_nat_sip_seq_adjust)
+ nf_nat_sip_seq_adjust(skb, tdiff);
}
return ret;
}
-static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly;
-static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly;
+static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+ unsigned int dataoff, datalen;
+ const char *dptr;
+
+ /* No Data ? */
+ dataoff = protoff + sizeof(struct udphdr);
+ if (dataoff >= skb->len)
+ return NF_ACCEPT;
+
+ nf_ct_refresh(ct, skb, sip_timeout * HZ);
+
+ if (skb_is_nonlinear(skb)) {
+ pr_debug("Copy of skbuff not supported yet.\n");
+ return NF_ACCEPT;
+ }
+
+ dptr = skb->data + dataoff;
+ datalen = skb->len - dataoff;
+ if (datalen < strlen("SIP/2.0 200"))
+ return NF_ACCEPT;
+
+ return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+}
+
+static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
[SIP_EXPECT_SIGNALLING] = {
+ .name = "signalling",
.max_expected = 1,
.timeout = 3 * 60,
},
[SIP_EXPECT_AUDIO] = {
+ .name = "audio",
.max_expected = 2 * IP_CT_DIR_MAX,
.timeout = 3 * 60,
},
[SIP_EXPECT_VIDEO] = {
+ .name = "video",
.max_expected = 2 * IP_CT_DIR_MAX,
.timeout = 3 * 60,
},
+ [SIP_EXPECT_IMAGE] = {
+ .name = "image",
+ .max_expected = IP_CT_DIR_MAX,
+ .timeout = 3 * 60,
+ },
};
static void nf_conntrack_sip_fini(void)
@@ -1323,7 +1499,7 @@ static void nf_conntrack_sip_fini(void)
int i, j;
for (i = 0; i < ports_c; i++) {
- for (j = 0; j < 2; j++) {
+ for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
if (sip[i][j].me == NULL)
continue;
nf_conntrack_helper_unregister(&sip[i][j]);
@@ -1343,14 +1519,24 @@ static int __init nf_conntrack_sip_init(void)
memset(&sip[i], 0, sizeof(sip[i]));
sip[i][0].tuple.src.l3num = AF_INET;
- sip[i][1].tuple.src.l3num = AF_INET6;
- for (j = 0; j < 2; j++) {
- sip[i][j].tuple.dst.protonum = IPPROTO_UDP;
+ sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
+ sip[i][0].help = sip_help_udp;
+ sip[i][1].tuple.src.l3num = AF_INET;
+ sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
+ sip[i][1].help = sip_help_tcp;
+
+ sip[i][2].tuple.src.l3num = AF_INET6;
+ sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
+ sip[i][2].help = sip_help_udp;
+ sip[i][3].tuple.src.l3num = AF_INET6;
+ sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
+ sip[i][3].help = sip_help_tcp;
+
+ for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
sip[i][j].expect_policy = sip_exp_policy;
sip[i][j].expect_class_max = SIP_EXPECT_MAX;
sip[i][j].me = THIS_MODULE;
- sip[i][j].help = sip_help;
tmpname = &sip_names[i][j][0];
if (ports[i] == SIP_PORT)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index e310f1561bb2..faa8eb3722b9 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/netfilter.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
@@ -26,6 +27,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_zones.h>
MODULE_LICENSE("GPL");
@@ -171,6 +173,11 @@ static int ct_seq_show(struct seq_file *s, void *v)
goto release;
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (seq_printf(s, "zone=%u ", nf_ct_zone(ct)))
+ goto release;
+#endif
+
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
goto release;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 3a6fd77f7761..c49ef219899e 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,4 +1,5 @@
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -265,7 +266,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
local_bh_disable();
entry->okfn(skb);
local_bh_enable();
- case NF_STOLEN:
break;
case NF_QUEUE:
if (!__nf_queue(skb, elem, entry->pf, entry->hook,
@@ -273,6 +273,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
verdict >> NF_VERDICT_BITS))
goto next_hook;
break;
+ case NF_STOLEN:
default:
kfree_skb(skb);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index eedc0c1ac7a4..6afa3d52ea5f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
static char __initdata nfversion[] = "0.30";
-static struct sock *nfnl = NULL;
static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
static DEFINE_MUTEX(nfnl_mutex);
@@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss)
return &ss->cb[cb_id];
}
-int nfnetlink_has_listeners(unsigned int group)
+int nfnetlink_has_listeners(struct net *net, unsigned int group)
{
- return netlink_has_listeners(nfnl, group);
+ return netlink_has_listeners(net->nfnl, group);
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-int nfnetlink_send(struct sk_buff *skb, u32 pid,
+int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid,
unsigned group, int echo, gfp_t flags)
{
- return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
+ return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
-void nfnetlink_set_err(u32 pid, u32 group, int error)
+int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error)
{
- netlink_set_err(nfnl, pid, group, error);
+ return netlink_set_err(net->nfnl, pid, group, error);
}
EXPORT_SYMBOL_GPL(nfnetlink_set_err);
-int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
+int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags)
{
- return netlink_unicast(nfnl, skb, pid, flags);
+ return netlink_unicast(net->nfnl, skb, pid, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_unicast);
/* Process one complete nfnetlink message. */
static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
+ struct net *net = sock_net(skb->sk);
const struct nfnl_callback *nc;
const struct nfnetlink_subsystem *ss;
int type, err;
@@ -170,7 +170,7 @@ replay:
if (err < 0)
return err;
- err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda);
+ err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda);
if (err == -EAGAIN)
goto replay;
return err;
@@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb)
nfnl_unlock();
}
-static void __exit nfnetlink_exit(void)
+static int __net_init nfnetlink_net_init(struct net *net)
{
- printk("Removing netfilter NETLINK layer.\n");
- netlink_kernel_release(nfnl);
- return;
+ struct sock *nfnl;
+
+ nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX,
+ nfnetlink_rcv, NULL, THIS_MODULE);
+ if (!nfnl)
+ return -ENOMEM;
+ net->nfnl_stash = nfnl;
+ rcu_assign_pointer(net->nfnl, nfnl);
+ return 0;
}
-static int __init nfnetlink_init(void)
+static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list)
{
- printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+ struct net *net;
- nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
- nfnetlink_rcv, NULL, THIS_MODULE);
- if (!nfnl) {
- printk(KERN_ERR "cannot initialize nfnetlink!\n");
- return -ENOMEM;
- }
+ list_for_each_entry(net, net_exit_list, exit_list)
+ rcu_assign_pointer(net->nfnl, NULL);
+ synchronize_net();
+ list_for_each_entry(net, net_exit_list, exit_list)
+ netlink_kernel_release(net->nfnl_stash);
+}
- return 0;
+static struct pernet_operations nfnetlink_net_ops = {
+ .init = nfnetlink_net_init,
+ .exit_batch = nfnetlink_net_exit_batch,
+};
+
+static int __init nfnetlink_init(void)
+{
+ printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+ return register_pernet_subsys(&nfnetlink_net_ops);
}
+static void __exit nfnetlink_exit(void)
+{
+ printk("Removing netfilter NETLINK layer.\n");
+ unregister_pernet_subsys(&nfnetlink_net_ops);
+}
module_init(nfnetlink_init);
module_exit(nfnetlink_exit);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9de0470d557e..203643fb2c52 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -28,6 +28,7 @@
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nfnetlink_log.h>
@@ -323,7 +324,8 @@ __nfulnl_send(struct nfulnl_instance *inst)
NLMSG_DONE,
sizeof(struct nfgenmsg));
- status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT);
+ status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+ MSG_DONTWAIT);
inst->qlen = 0;
inst->skb = NULL;
@@ -767,7 +769,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
instance_destroy(inst);
- goto out;
+ goto out_put;
default:
ret = -ENOTSUPP;
break;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7e3fa410641e..e70a6ef1f4f2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -18,6 +18,7 @@
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <linux/slab.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
@@ -112,7 +113,6 @@ instance_create(u_int16_t queue_num, int pid)
inst->copy_mode = NFQNL_COPY_NONE;
spin_lock_init(&inst->lock);
INIT_LIST_HEAD(&inst->queue_list);
- INIT_RCU_HEAD(&inst->rcu);
if (!try_module_get(THIS_MODULE)) {
err = -EAGAIN;
@@ -414,13 +414,13 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
queue->queue_dropped++;
if (net_ratelimit())
printk(KERN_WARNING "nf_queue: full at %d entries, "
- "dropping packets(s). Dropped: %d\n",
- queue->queue_total, queue->queue_dropped);
+ "dropping packets(s).\n",
+ queue->queue_total);
goto err_out_free_nskb;
}
/* nfnetlink_unicast will either free the nskb or add it to a socket */
- err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT);
+ err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
if (err < 0) {
queue->queue_user_dropped++;
goto err_out_unlock;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index f01955cce314..665f5beef6ad 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -22,11 +22,14 @@
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp.h>
-
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_arp/arp_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
@@ -37,7 +40,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
struct compat_delta {
struct compat_delta *next;
unsigned int offset;
- short delta;
+ int delta;
};
struct xt_af {
@@ -364,8 +367,10 @@ int xt_check_match(struct xt_mtchk_param *par,
* ebt_among is exempt from centralized matchsize checking
* because it uses a dynamic-size data set.
*/
- pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
+ pr_err("%s_tables: %s.%u match: invalid size "
+ "%u (kernel) != (user) %u\n",
xt_prefix[par->family], par->match->name,
+ par->match->revision,
XT_ALIGN(par->match->matchsize), size);
return -EINVAL;
}
@@ -435,10 +440,10 @@ void xt_compat_flush_offsets(u_int8_t af)
}
EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
-short xt_compat_calc_jump(u_int8_t af, unsigned int offset)
+int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
{
struct compat_delta *tmp;
- short delta;
+ int delta;
for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
if (tmp->offset < offset)
@@ -481,8 +486,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
-int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr,
- unsigned int *size)
+int xt_compat_match_to_user(const struct xt_entry_match *m,
+ void __user **dstptr, unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match __user *cm = *dstptr;
@@ -514,8 +519,10 @@ int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
if (XT_ALIGN(par->target->targetsize) != size) {
- pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
+ pr_err("%s_tables: %s.%u target: invalid size "
+ "%u (kernel) != (user) %u\n",
xt_prefix[par->family], par->target->name,
+ par->target->revision,
XT_ALIGN(par->target->targetsize), size);
return -EINVAL;
}
@@ -582,8 +589,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
}
EXPORT_SYMBOL_GPL(xt_compat_target_from_user);
-int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr,
- unsigned int *size)
+int xt_compat_target_to_user(const struct xt_entry_target *t,
+ void __user **dstptr, unsigned int *size)
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target __user *ct = *dstptr;
@@ -1091,6 +1098,60 @@ static const struct file_operations xt_target_ops = {
#endif /* CONFIG_PROC_FS */
+/**
+ * xt_hook_link - set up hooks for a new table
+ * @table: table with metadata needed to set up hooks
+ * @fn: Hook function
+ *
+ * This function will take care of creating and registering the necessary
+ * Netfilter hooks for XT tables.
+ */
+struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+{
+ unsigned int hook_mask = table->valid_hooks;
+ uint8_t i, num_hooks = hweight32(hook_mask);
+ uint8_t hooknum;
+ struct nf_hook_ops *ops;
+ int ret;
+
+ ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+ if (ops == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
+ hook_mask >>= 1, ++hooknum) {
+ if (!(hook_mask & 1))
+ continue;
+ ops[i].hook = fn;
+ ops[i].owner = table->me;
+ ops[i].pf = table->af;
+ ops[i].hooknum = hooknum;
+ ops[i].priority = table->priority;
+ ++i;
+ }
+
+ ret = nf_register_hooks(ops, num_hooks);
+ if (ret < 0) {
+ kfree(ops);
+ return ERR_PTR(ret);
+ }
+
+ return ops;
+}
+EXPORT_SYMBOL_GPL(xt_hook_link);
+
+/**
+ * xt_hook_unlink - remove hooks for a table
+ * @ops: nf_hook_ops array as returned by nf_hook_link
+ * @hook_mask: the very same mask that was passed to nf_hook_link
+ */
+void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
+{
+ nf_unregister_hooks(ops, hweight32(table->valid_hooks));
+ kfree(ops);
+}
+EXPORT_SYMBOL_GPL(xt_hook_unlink);
+
int xt_proto_init(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
new file mode 100644
index 000000000000..ee18b231b950
--- /dev/null
+++ b/net/netfilter/xt_CT.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/selinux.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CT.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static unsigned int xt_ct_target(struct sk_buff *skb,
+ const struct xt_target_param *par)
+{
+ const struct xt_ct_target_info *info = par->targinfo;
+ struct nf_conn *ct = info->ct;
+
+ /* Previously seen (loopback)? Ignore. */
+ if (skb->nfct != NULL)
+ return XT_CONTINUE;
+
+ atomic_inc(&ct->ct_general.use);
+ skb->nfct = &ct->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+
+ return XT_CONTINUE;
+}
+
+static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
+{
+ if (par->family == AF_INET) {
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.invflags & IPT_INV_PROTO)
+ return 0;
+ return e->ip.proto;
+ } else if (par->family == AF_INET6) {
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (e->ipv6.invflags & IP6T_INV_PROTO)
+ return 0;
+ return e->ipv6.proto;
+ } else
+ return 0;
+}
+
+static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+{
+ struct xt_ct_target_info *info = par->targinfo;
+ struct nf_conntrack_tuple t;
+ struct nf_conn_help *help;
+ struct nf_conn *ct;
+ u8 proto;
+
+ if (info->flags & ~XT_CT_NOTRACK)
+ return false;
+
+ if (info->flags & XT_CT_NOTRACK) {
+ ct = &nf_conntrack_untracked;
+ atomic_inc(&ct->ct_general.use);
+ goto out;
+ }
+
+#ifndef CONFIG_NF_CONNTRACK_ZONES
+ if (info->zone)
+ goto err1;
+#endif
+
+ if (nf_ct_l3proto_try_module_get(par->family) < 0)
+ goto err1;
+
+ memset(&t, 0, sizeof(t));
+ ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
+ if (IS_ERR(ct))
+ goto err2;
+
+ if ((info->ct_events || info->exp_events) &&
+ !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
+ GFP_KERNEL))
+ goto err3;
+
+ if (info->helper[0]) {
+ proto = xt_ct_find_proto(par);
+ if (!proto)
+ goto err3;
+
+ help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+ if (help == NULL)
+ goto err3;
+
+ help->helper = nf_conntrack_helper_try_module_get(info->helper,
+ par->family,
+ proto);
+ if (help->helper == NULL)
+ goto err3;
+ }
+
+ __set_bit(IPS_TEMPLATE_BIT, &ct->status);
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+out:
+ info->ct = ct;
+ return true;
+
+err3:
+ nf_conntrack_free(ct);
+err2:
+ nf_ct_l3proto_module_put(par->family);
+err1:
+ return false;
+}
+
+static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
+{
+ struct xt_ct_target_info *info = par->targinfo;
+ struct nf_conn *ct = info->ct;
+ struct nf_conn_help *help;
+
+ if (ct != &nf_conntrack_untracked) {
+ help = nfct_help(ct);
+ if (help)
+ module_put(help->helper->me);
+
+ nf_ct_l3proto_module_put(par->family);
+ }
+ nf_ct_put(info->ct);
+}
+
+static struct xt_target xt_ct_tg __read_mostly = {
+ .name = "CT",
+ .family = NFPROTO_UNSPEC,
+ .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)),
+ .checkentry = xt_ct_tg_check,
+ .destroy = xt_ct_tg_destroy,
+ .target = xt_ct_target,
+ .table = "raw",
+ .me = THIS_MODULE,
+};
+
+static int __init xt_ct_tg_init(void)
+{
+ return xt_register_target(&xt_ct_tg);
+}
+
+static void __exit xt_ct_tg_exit(void)
+{
+ xt_unregister_target(&xt_ct_tg);
+}
+
+module_init(xt_ct_tg_init);
+module_exit(xt_ct_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: connection tracking target");
+MODULE_ALIAS("ipt_CT");
+MODULE_ALIAS("ip6t_CT");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 8ff7843bb921..3271c8e52153 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -22,6 +22,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
+#include <linux/slab.h>
#include <linux/leds.h>
#include <linux/mutex.h>
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f28f6a5fc02d..12dcd7007c3e 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -28,6 +28,7 @@ MODULE_ALIAS("ip6t_NFQUEUE");
MODULE_ALIAS("arpt_NFQUEUE");
static u32 jhash_initval __read_mostly;
+static bool rnd_inited __read_mostly;
static unsigned int
nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
@@ -90,6 +91,10 @@ static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
const struct xt_NFQ_info_v1 *info = par->targinfo;
u32 maxid;
+ if (unlikely(!rnd_inited)) {
+ get_random_bytes(&jhash_initval, sizeof(jhash_initval));
+ rnd_inited = true;
+ }
if (info->queues_total == 0) {
pr_err("NFQUEUE: number of total queues is 0\n");
return false;
@@ -135,7 +140,6 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
static int __init nfqueue_tg_init(void)
{
- get_random_bytes(&jhash_initval, sizeof(jhash_initval));
return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index d80b8192e0d4..d16d55df4f61 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -11,6 +11,7 @@
#include <linux/jhash.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <net/gen_stats.h>
#include <net/netlink.h>
@@ -23,6 +24,7 @@ static DEFINE_MUTEX(xt_rateest_mutex);
#define RATEEST_HSIZE 16
static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
static unsigned int jhash_rnd __read_mostly;
+static bool rnd_inited __read_mostly;
static unsigned int xt_rateest_hash(const char *name)
{
@@ -93,6 +95,11 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
struct gnet_estimator est;
} cfg;
+ if (unlikely(!rnd_inited)) {
+ get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
+ rnd_inited = true;
+ }
+
est = xt_rateest_lookup(info->name);
if (est) {
/*
@@ -164,7 +171,6 @@ static int __init xt_rateest_tg_init(void)
for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
INIT_HLIST_HEAD(&rateest_hash[i]);
- get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
return xt_register_target(&xt_rateest_tg_reg);
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index eda64c1cb1e5..c5f4b9919e9a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <linux/gfp.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <net/dst.h>
@@ -60,17 +61,9 @@ tcpmss_mangle_packet(struct sk_buff *skb,
tcplen = skb->len - tcphoff;
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
- /* Since it passed flags test in tcp match, we know it is is
- not a fragment, and has data >= tcp header length. SYN
- packets should not contain data: if they did, then we risk
- running over MTU, sending Frag Needed and breaking things
- badly. --RR */
- if (tcplen != tcph->doff*4) {
- if (net_ratelimit())
- printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
- skb->len);
+ /* Header cannot be larger than the packet */
+ if (tcplen < tcph->doff*4)
return -1;
- }
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
if (dst_mtu(skb_dst(skb)) <= minlen) {
@@ -115,6 +108,12 @@ tcpmss_mangle_packet(struct sk_buff *skb,
}
}
+ /* There is data after the header so the option can't be added
+ without moving it, and doing so may make the SYN packet
+ itself too large. Accept the packet unmodified instead. */
+ if (tcplen > tcph->doff*4)
+ return 0;
+
/*
* MSS Option not found ?! add it..
*/
@@ -241,6 +240,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
{
const struct xt_tcpmss_info *info = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
+ const struct xt_entry_match *ematch;
if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -250,8 +250,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return false;
}
- if (IPT_MATCH_ITERATE(e, find_syn_match))
- return true;
+ xt_ematch_foreach(ematch, e)
+ if (find_syn_match(ematch))
+ return true;
printk("xt_TCPMSS: Only works on TCP SYN packets\n");
return false;
}
@@ -261,6 +262,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
{
const struct xt_tcpmss_info *info = par->targinfo;
const struct ip6t_entry *e = par->entryinfo;
+ const struct xt_entry_match *ematch;
if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
@@ -270,8 +272,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return false;
}
- if (IP6T_MATCH_ITERATE(e, find_syn_match))
- return true;
+ xt_ematch_foreach(ematch, e)
+ if (find_syn_match(ematch))
+ return true;
printk("xt_TCPMSS: Only works on TCP SYN packets\n");
return false;
}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 38f03f75a636..388ca4596098 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -17,6 +17,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jhash.h>
+#include <linux/slab.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -28,6 +29,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
/* we will save the tuples of all connections we care about */
struct xt_connlimit_conn {
@@ -40,15 +42,11 @@ struct xt_connlimit_data {
spinlock_t lock;
};
-static u_int32_t connlimit_rnd;
-static bool connlimit_rnd_inited;
+static u_int32_t connlimit_rnd __read_mostly;
+static bool connlimit_rnd_inited __read_mostly;
static inline unsigned int connlimit_iphash(__be32 addr)
{
- if (unlikely(!connlimit_rnd_inited)) {
- get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
- connlimit_rnd_inited = true;
- }
return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF;
}
@@ -59,11 +57,6 @@ connlimit_iphash6(const union nf_inet_addr *addr,
union nf_inet_addr res;
unsigned int i;
- if (unlikely(!connlimit_rnd_inited)) {
- get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
- connlimit_rnd_inited = true;
- }
-
for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
res.ip6[i] = addr->ip6[i] & mask->ip6[i];
@@ -99,7 +92,8 @@ same_source_net(const union nf_inet_addr *addr,
}
}
-static int count_them(struct xt_connlimit_data *data,
+static int count_them(struct net *net,
+ struct xt_connlimit_data *data,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
@@ -122,7 +116,8 @@ static int count_them(struct xt_connlimit_data *data,
/* check the saved connections */
list_for_each_entry_safe(conn, tmp, hash, list) {
- found = nf_conntrack_find_get(&init_net, &conn->tuple);
+ found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
+ &conn->tuple);
found_ct = NULL;
if (found != NULL)
@@ -180,6 +175,7 @@ static int count_them(struct xt_connlimit_data *data,
static bool
connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
{
+ struct net *net = dev_net(par->in ? par->in : par->out);
const struct xt_connlimit_info *info = par->matchinfo;
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
@@ -204,7 +200,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
}
spin_lock_bh(&info->data->lock);
- connections = count_them(info->data, tuple_ptr, &addr,
+ connections = count_them(net, info->data, tuple_ptr, &addr,
&info->mask, par->family);
spin_unlock_bh(&info->data->lock);
@@ -226,6 +222,10 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
struct xt_connlimit_info *info = par->matchinfo;
unsigned int i;
+ if (unlikely(!connlimit_rnd_inited)) {
+ get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
+ connlimit_rnd_inited = true;
+ }
if (nf_ct_l3proto_try_module_get(par->family) < 0) {
printk(KERN_WARNING "cannot load conntrack support for "
"address family %u\n", par->family);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0989f29ade2e..395af5943ffd 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/ip.h>
#include <linux/dccp.h>
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index dd16e404424f..215a64835de8 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -1,6 +1,6 @@
/*
* xt_hashlimit - Netfilter module to limit the number of packets per time
- * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
+ * separately for each hashbucket (sourceip/sourceport/dstip/dstport)
*
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
* Copyright © CC Computer Consultants GmbH, 2007 - 2008
@@ -26,6 +26,7 @@
#endif
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -40,9 +41,19 @@ MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
MODULE_ALIAS("ipt_hashlimit");
MODULE_ALIAS("ip6t_hashlimit");
+struct hashlimit_net {
+ struct hlist_head htables;
+ struct proc_dir_entry *ipt_hashlimit;
+ struct proc_dir_entry *ip6t_hashlimit;
+};
+
+static int hashlimit_net_id;
+static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
+{
+ return net_generic(net, hashlimit_net_id);
+}
+
/* need to declare this at the top */
-static struct proc_dir_entry *hashlimit_procdir4;
-static struct proc_dir_entry *hashlimit_procdir6;
static const struct file_operations dl_file_ops;
/* hash table crap */
@@ -79,27 +90,26 @@ struct dsthash_ent {
struct xt_hashlimit_htable {
struct hlist_node node; /* global list of all htables */
- atomic_t use;
+ int use;
u_int8_t family;
+ bool rnd_initialized;
struct hashlimit_cfg1 cfg; /* config */
/* used internally */
spinlock_t lock; /* lock for list_head */
u_int32_t rnd; /* random seed for hash */
- int rnd_initialized;
unsigned int count; /* number entries in table */
struct timer_list timer; /* timer for gc */
/* seq_file stuff */
struct proc_dir_entry *pde;
+ struct net *net;
struct hlist_head hash[0]; /* hashtable itself */
};
-static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
-static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */
-static HLIST_HEAD(hashlimit_htables);
+static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */
static struct kmem_cache *hashlimit_cachep __read_mostly;
static inline bool dst_cmp(const struct dsthash_ent *ent,
@@ -150,7 +160,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
* the first hashtable entry */
if (!ht->rnd_initialized) {
get_random_bytes(&ht->rnd, sizeof(ht->rnd));
- ht->rnd_initialized = 1;
+ ht->rnd_initialized = true;
}
if (ht->cfg.max && ht->count >= ht->cfg.max) {
@@ -185,8 +195,9 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
}
static void htable_gc(unsigned long htlong);
-static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
+static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family)
{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
unsigned int size;
unsigned int i;
@@ -232,33 +243,34 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- atomic_set(&hinfo->use, 1);
+ hinfo->use = 1;
hinfo->count = 0;
hinfo->family = family;
- hinfo->rnd_initialized = 0;
+ hinfo->rnd_initialized = false;
spin_lock_init(&hinfo->lock);
hinfo->pde = proc_create_data(minfo->name, 0,
(family == NFPROTO_IPV4) ?
- hashlimit_procdir4 : hashlimit_procdir6,
+ hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
&dl_file_ops, hinfo);
if (!hinfo->pde) {
vfree(hinfo);
return -1;
}
+ hinfo->net = net;
setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
add_timer(&hinfo->timer);
- spin_lock_bh(&hashlimit_lock);
- hlist_add_head(&hinfo->node, &hashlimit_htables);
- spin_unlock_bh(&hashlimit_lock);
+ hlist_add_head(&hinfo->node, &hashlimit_net->htables);
return 0;
}
-static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
+static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
+ u_int8_t family)
{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
unsigned int size;
unsigned int i;
@@ -293,28 +305,27 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
for (i = 0; i < hinfo->cfg.size; i++)
INIT_HLIST_HEAD(&hinfo->hash[i]);
- atomic_set(&hinfo->use, 1);
+ hinfo->use = 1;
hinfo->count = 0;
hinfo->family = family;
- hinfo->rnd_initialized = 0;
+ hinfo->rnd_initialized = false;
spin_lock_init(&hinfo->lock);
hinfo->pde = proc_create_data(minfo->name, 0,
(family == NFPROTO_IPV4) ?
- hashlimit_procdir4 : hashlimit_procdir6,
+ hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
&dl_file_ops, hinfo);
if (hinfo->pde == NULL) {
vfree(hinfo);
return -1;
}
+ hinfo->net = net;
setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo);
hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
add_timer(&hinfo->timer);
- spin_lock_bh(&hashlimit_lock);
- hlist_add_head(&hinfo->node, &hashlimit_htables);
- spin_unlock_bh(&hashlimit_lock);
+ hlist_add_head(&hinfo->node, &hashlimit_net->htables);
return 0;
}
@@ -364,43 +375,46 @@ static void htable_gc(unsigned long htlong)
static void htable_destroy(struct xt_hashlimit_htable *hinfo)
{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net);
+ struct proc_dir_entry *parent;
+
del_timer_sync(&hinfo->timer);
- /* remove proc entry */
- remove_proc_entry(hinfo->pde->name,
- hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 :
- hashlimit_procdir6);
+ if (hinfo->family == NFPROTO_IPV4)
+ parent = hashlimit_net->ipt_hashlimit;
+ else
+ parent = hashlimit_net->ip6t_hashlimit;
+ remove_proc_entry(hinfo->pde->name, parent);
htable_selective_cleanup(hinfo, select_all);
vfree(hinfo);
}
-static struct xt_hashlimit_htable *htable_find_get(const char *name,
+static struct xt_hashlimit_htable *htable_find_get(struct net *net,
+ const char *name,
u_int8_t family)
{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
struct hlist_node *pos;
- spin_lock_bh(&hashlimit_lock);
- hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
+ hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) {
if (!strcmp(name, hinfo->pde->name) &&
hinfo->family == family) {
- atomic_inc(&hinfo->use);
- spin_unlock_bh(&hashlimit_lock);
+ hinfo->use++;
return hinfo;
}
}
- spin_unlock_bh(&hashlimit_lock);
return NULL;
}
static void htable_put(struct xt_hashlimit_htable *hinfo)
{
- if (atomic_dec_and_test(&hinfo->use)) {
- spin_lock_bh(&hashlimit_lock);
+ mutex_lock(&hashlimit_mutex);
+ if (--hinfo->use == 0) {
hlist_del(&hinfo->node);
- spin_unlock_bh(&hashlimit_lock);
htable_destroy(hinfo);
}
+ mutex_unlock(&hashlimit_mutex);
}
/* The algorithm used is the Simple Token Bucket Filter (TBF)
@@ -479,6 +493,7 @@ static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)
case 64 ... 95:
i[2] = maskl(i[2], p - 64);
i[3] = 0;
+ break;
case 96 ... 127:
i[3] = maskl(i[3], p - 96);
break;
@@ -665,6 +680,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
{
+ struct net *net = par->net;
struct xt_hashlimit_info *r = par->matchinfo;
/* Check for overflow. */
@@ -687,25 +703,20 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
if (r->name[sizeof(r->name) - 1] != '\0')
return false;
- /* This is the best we've got: We cannot release and re-grab lock,
- * since checkentry() is called before x_tables.c grabs xt_mutex.
- * We also cannot grab the hashtable spinlock, since htable_create will
- * call vmalloc, and that can sleep. And we cannot just re-search
- * the list of htable's in htable_create(), since then we would
- * create duplicate proc files. -HW */
- mutex_lock(&hlimit_mutex);
- r->hinfo = htable_find_get(r->name, par->match->family);
- if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) {
- mutex_unlock(&hlimit_mutex);
+ mutex_lock(&hashlimit_mutex);
+ r->hinfo = htable_find_get(net, r->name, par->match->family);
+ if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
+ mutex_unlock(&hashlimit_mutex);
return false;
}
- mutex_unlock(&hlimit_mutex);
+ mutex_unlock(&hashlimit_mutex);
return true;
}
static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
{
+ struct net *net = par->net;
struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
/* Check for overflow. */
@@ -728,19 +739,13 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
return false;
}
- /* This is the best we've got: We cannot release and re-grab lock,
- * since checkentry() is called before x_tables.c grabs xt_mutex.
- * We also cannot grab the hashtable spinlock, since htable_create will
- * call vmalloc, and that can sleep. And we cannot just re-search
- * the list of htable's in htable_create(), since then we would
- * create duplicate proc files. -HW */
- mutex_lock(&hlimit_mutex);
- info->hinfo = htable_find_get(info->name, par->match->family);
- if (!info->hinfo && htable_create(info, par->match->family) != 0) {
- mutex_unlock(&hlimit_mutex);
+ mutex_lock(&hashlimit_mutex);
+ info->hinfo = htable_find_get(net, info->name, par->match->family);
+ if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
+ mutex_unlock(&hashlimit_mutex);
return false;
}
- mutex_unlock(&hlimit_mutex);
+ mutex_unlock(&hashlimit_mutex);
return true;
}
@@ -767,7 +772,7 @@ struct compat_xt_hashlimit_info {
compat_uptr_t master;
};
-static void hashlimit_mt_compat_from_user(void *dst, void *src)
+static void hashlimit_mt_compat_from_user(void *dst, const void *src)
{
int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
@@ -775,7 +780,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src)
memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
}
-static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
+static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
{
int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
@@ -841,8 +846,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
static void *dl_seq_start(struct seq_file *s, loff_t *pos)
__acquires(htable->lock)
{
- struct proc_dir_entry *pde = s->private;
- struct xt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = s->private;
unsigned int *bucket;
spin_lock_bh(&htable->lock);
@@ -859,8 +863,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos)
static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct proc_dir_entry *pde = s->private;
- struct xt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = s->private;
unsigned int *bucket = (unsigned int *)v;
*pos = ++(*bucket);
@@ -874,11 +877,11 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void dl_seq_stop(struct seq_file *s, void *v)
__releases(htable->lock)
{
- struct proc_dir_entry *pde = s->private;
- struct xt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = s->private;
unsigned int *bucket = (unsigned int *)v;
- kfree(bucket);
+ if (!IS_ERR(bucket))
+ kfree(bucket);
spin_unlock_bh(&htable->lock);
}
@@ -917,8 +920,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
static int dl_seq_show(struct seq_file *s, void *v)
{
- struct proc_dir_entry *pde = s->private;
- struct xt_hashlimit_htable *htable = pde->data;
+ struct xt_hashlimit_htable *htable = s->private;
unsigned int *bucket = (unsigned int *)v;
struct dsthash_ent *ent;
struct hlist_node *pos;
@@ -944,7 +946,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- sf->private = PDE(inode);
+ sf->private = PDE(inode)->data;
}
return ret;
}
@@ -957,10 +959,61 @@ static const struct file_operations dl_file_ops = {
.release = seq_release
};
+static int __net_init hashlimit_proc_net_init(struct net *net)
+{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+ hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);
+ if (!hashlimit_net->ipt_hashlimit)
+ return -ENOMEM;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);
+ if (!hashlimit_net->ip6t_hashlimit) {
+ proc_net_remove(net, "ipt_hashlimit");
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+static void __net_exit hashlimit_proc_net_exit(struct net *net)
+{
+ proc_net_remove(net, "ipt_hashlimit");
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+ proc_net_remove(net, "ip6t_hashlimit");
+#endif
+}
+
+static int __net_init hashlimit_net_init(struct net *net)
+{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+ INIT_HLIST_HEAD(&hashlimit_net->htables);
+ return hashlimit_proc_net_init(net);
+}
+
+static void __net_exit hashlimit_net_exit(struct net *net)
+{
+ struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
+
+ BUG_ON(!hlist_empty(&hashlimit_net->htables));
+ hashlimit_proc_net_exit(net);
+}
+
+static struct pernet_operations hashlimit_net_ops = {
+ .init = hashlimit_net_init,
+ .exit = hashlimit_net_exit,
+ .id = &hashlimit_net_id,
+ .size = sizeof(struct hashlimit_net),
+};
+
static int __init hashlimit_mt_init(void)
{
int err;
+ err = register_pernet_subsys(&hashlimit_net_ops);
+ if (err < 0)
+ return err;
err = xt_register_matches(hashlimit_mt_reg,
ARRAY_SIZE(hashlimit_mt_reg));
if (err < 0)
@@ -974,41 +1027,21 @@ static int __init hashlimit_mt_init(void)
printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
goto err2;
}
- hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
- if (!hashlimit_procdir4) {
- printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
- "entry\n");
- goto err3;
- }
- err = 0;
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
- hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
- if (!hashlimit_procdir6) {
- printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
- "entry\n");
- err = -ENOMEM;
- }
-#endif
- if (!err)
- return 0;
- remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-err3:
- kmem_cache_destroy(hashlimit_cachep);
+ return 0;
+
err2:
xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
err1:
+ unregister_pernet_subsys(&hashlimit_net_ops);
return err;
}
static void __exit hashlimit_mt_exit(void)
{
- remove_proc_entry("ipt_hashlimit", init_net.proc_net);
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
- remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
-#endif
kmem_cache_destroy(hashlimit_cachep);
xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
+ unregister_pernet_subsys(&hashlimit_net_ops);
}
module_init(hashlimit_mt_init);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 2773be6a71dd..e5d7e1ffb1a4 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -6,6 +6,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
@@ -148,7 +149,7 @@ struct compat_xt_rateinfo {
/* To keep the full "prev" timestamp, the upper 32 bits are stored in the
* master pointer, which does not need to be preserved. */
-static void limit_mt_compat_from_user(void *dst, void *src)
+static void limit_mt_compat_from_user(void *dst, const void *src)
{
const struct compat_xt_rateinfo *cm = src;
struct xt_rateinfo m = {
@@ -162,7 +163,7 @@ static void limit_mt_compat_from_user(void *dst, void *src)
memcpy(dst, &m, sizeof(m));
}
-static int limit_mt_compat_to_user(void __user *dst, void *src)
+static int limit_mt_compat_to_user(void __user *dst, const void *src)
{
const struct xt_rateinfo *m = src;
struct compat_xt_rateinfo cm = {
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4d1a41bbd5d7..4169e200588d 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -334,7 +334,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
if (info->flags & XT_OSF_LOG)
nf_log_packet(p->family, p->hooknum, skb,
p->in, p->out, NULL,
- "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+ "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
f->genre, f->version, f->subtype,
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest),
@@ -349,7 +349,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb,
if (!fcount && (info->flags & XT_OSF_LOG))
nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL,
- "Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+ "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
&ip->saddr, ntohs(tcp->source),
&ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2d5562498c43 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -4,6 +4,7 @@
* Sam Johnston <samj@samj.net>
*/
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index fc70a49c0afd..834b736857cb 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -27,7 +27,9 @@
#include <linux/bitops.h>
#include <linux/skbuff.h>
#include <linux/inet.h>
+#include <linux/slab.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_recent.h>
@@ -52,7 +54,7 @@ module_param(ip_list_perms, uint, 0400);
module_param(ip_list_uid, uint, 0400);
module_param(ip_list_gid, uint, 0400);
MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)");
+MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
@@ -78,37 +80,40 @@ struct recent_table {
struct list_head iphash[0];
};
-static LIST_HEAD(tables);
+struct recent_net {
+ struct list_head tables;
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *xt_recent;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ struct proc_dir_entry *ipt_recent;
+#endif
+#endif
+};
+
+static int recent_net_id;
+static inline struct recent_net *recent_pernet(struct net *net)
+{
+ return net_generic(net, recent_net_id);
+}
+
static DEFINE_SPINLOCK(recent_lock);
static DEFINE_MUTEX(recent_mutex);
#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-static struct proc_dir_entry *proc_old_dir;
-#endif
-static struct proc_dir_entry *recent_proc_dir;
static const struct file_operations recent_old_fops, recent_mt_fops;
#endif
-static u_int32_t hash_rnd;
-static bool hash_rnd_initted;
+static u_int32_t hash_rnd __read_mostly;
+static bool hash_rnd_inited __read_mostly;
-static unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
{
- if (!hash_rnd_initted) {
- get_random_bytes(&hash_rnd, sizeof(hash_rnd));
- hash_rnd_initted = true;
- }
return jhash_1word((__force u32)addr->ip, hash_rnd) &
(ip_list_hash_size - 1);
}
-static unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
+static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr)
{
- if (!hash_rnd_initted) {
- get_random_bytes(&hash_rnd, sizeof(hash_rnd));
- hash_rnd_initted = true;
- }
return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) &
(ip_list_hash_size - 1);
}
@@ -173,18 +178,19 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
static void recent_entry_update(struct recent_table *t, struct recent_entry *e)
{
+ e->index %= ip_pkt_list_tot;
e->stamps[e->index++] = jiffies;
if (e->index > e->nstamps)
e->nstamps = e->index;
- e->index %= ip_pkt_list_tot;
list_move_tail(&e->lru_list, &t->lru_list);
}
-static struct recent_table *recent_table_lookup(const char *name)
+static struct recent_table *recent_table_lookup(struct recent_net *recent_net,
+ const char *name)
{
struct recent_table *t;
- list_for_each_entry(t, &tables, list)
+ list_for_each_entry(t, &recent_net->tables, list)
if (!strcmp(t->name, name))
return t;
return NULL;
@@ -203,6 +209,8 @@ static void recent_table_flush(struct recent_table *t)
static bool
recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
{
+ struct net *net = dev_net(par->in ? par->in : par->out);
+ struct recent_net *recent_net = recent_pernet(net);
const struct xt_recent_mtinfo *info = par->matchinfo;
struct recent_table *t;
struct recent_entry *e;
@@ -235,7 +243,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
ttl++;
spin_lock_bh(&recent_lock);
- t = recent_table_lookup(info->name);
+ t = recent_table_lookup(recent_net, info->name);
e = recent_entry_lookup(t, &addr, par->match->family,
(info->check_set & XT_RECENT_TTL) ? ttl : 0);
if (e == NULL) {
@@ -260,7 +268,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
for (i = 0; i < e->nstamps; i++) {
if (info->seconds && time_after(time, e->stamps[i]))
continue;
- if (++hits >= info->hit_count) {
+ if (!info->hit_count || ++hits >= info->hit_count) {
ret = !ret;
break;
}
@@ -279,6 +287,7 @@ out:
static bool recent_mt_check(const struct xt_mtchk_param *par)
{
+ struct recent_net *recent_net = recent_pernet(par->net);
const struct xt_recent_mtinfo *info = par->matchinfo;
struct recent_table *t;
#ifdef CONFIG_PROC_FS
@@ -287,6 +296,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
unsigned i;
bool ret = false;
+ if (unlikely(!hash_rnd_inited)) {
+ get_random_bytes(&hash_rnd, sizeof(hash_rnd));
+ hash_rnd_inited = true;
+ }
if (hweight8(info->check_set &
(XT_RECENT_SET | XT_RECENT_REMOVE |
XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
@@ -294,14 +307,18 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
(info->seconds || info->hit_count))
return false;
- if (info->hit_count > ip_pkt_list_tot)
+ if (info->hit_count > ip_pkt_list_tot) {
+ pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
+ "packets to be remembered (%u)\n",
+ info->hit_count, ip_pkt_list_tot);
return false;
+ }
if (info->name[0] == '\0' ||
strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
return false;
mutex_lock(&recent_mutex);
- t = recent_table_lookup(info->name);
+ t = recent_table_lookup(recent_net, info->name);
if (t != NULL) {
t->refcnt++;
ret = true;
@@ -318,7 +335,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
for (i = 0; i < ip_list_hash_size; i++)
INIT_LIST_HEAD(&t->iphash[i]);
#ifdef CONFIG_PROC_FS
- pde = proc_create_data(t->name, ip_list_perms, recent_proc_dir,
+ pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
&recent_mt_fops, t);
if (pde == NULL) {
kfree(t);
@@ -327,10 +344,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
pde->uid = ip_list_uid;
pde->gid = ip_list_gid;
#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
- pde = proc_create_data(t->name, ip_list_perms, proc_old_dir,
+ pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
&recent_old_fops, t);
if (pde == NULL) {
- remove_proc_entry(t->name, proc_old_dir);
+ remove_proc_entry(t->name, recent_net->xt_recent);
kfree(t);
goto out;
}
@@ -339,7 +356,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
#endif
#endif
spin_lock_bh(&recent_lock);
- list_add_tail(&t->list, &tables);
+ list_add_tail(&t->list, &recent_net->tables);
spin_unlock_bh(&recent_lock);
ret = true;
out:
@@ -349,20 +366,21 @@ out:
static void recent_mt_destroy(const struct xt_mtdtor_param *par)
{
+ struct recent_net *recent_net = recent_pernet(par->net);
const struct xt_recent_mtinfo *info = par->matchinfo;
struct recent_table *t;
mutex_lock(&recent_mutex);
- t = recent_table_lookup(info->name);
+ t = recent_table_lookup(recent_net, info->name);
if (--t->refcnt == 0) {
spin_lock_bh(&recent_lock);
list_del(&t->list);
spin_unlock_bh(&recent_lock);
#ifdef CONFIG_PROC_FS
#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
- remove_proc_entry(t->name, proc_old_dir);
+ remove_proc_entry(t->name, recent_net->ipt_recent);
#endif
- remove_proc_entry(t->name, recent_proc_dir);
+ remove_proc_entry(t->name, recent_net->xt_recent);
#endif
recent_table_flush(t);
kfree(t);
@@ -611,8 +629,65 @@ static const struct file_operations recent_mt_fops = {
.release = seq_release_private,
.owner = THIS_MODULE,
};
+
+static int __net_init recent_proc_net_init(struct net *net)
+{
+ struct recent_net *recent_net = recent_pernet(net);
+
+ recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
+ if (!recent_net->xt_recent)
+ return -ENOMEM;
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
+ if (!recent_net->ipt_recent) {
+ proc_net_remove(net, "xt_recent");
+ return -ENOMEM;
+ }
+#endif
+ return 0;
+}
+
+static void __net_exit recent_proc_net_exit(struct net *net)
+{
+#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
+ proc_net_remove(net, "ipt_recent");
+#endif
+ proc_net_remove(net, "xt_recent");
+}
+#else
+static inline int recent_proc_net_init(struct net *net)
+{
+ return 0;
+}
+
+static inline void recent_proc_net_exit(struct net *net)
+{
+}
#endif /* CONFIG_PROC_FS */
+static int __net_init recent_net_init(struct net *net)
+{
+ struct recent_net *recent_net = recent_pernet(net);
+
+ INIT_LIST_HEAD(&recent_net->tables);
+ return recent_proc_net_init(net);
+}
+
+static void __net_exit recent_net_exit(struct net *net)
+{
+ struct recent_net *recent_net = recent_pernet(net);
+
+ BUG_ON(!list_empty(&recent_net->tables));
+ recent_proc_net_exit(net);
+}
+
+static struct pernet_operations recent_net_ops = {
+ .init = recent_net_init,
+ .exit = recent_net_exit,
+ .id = &recent_net_id,
+ .size = sizeof(struct recent_net),
+};
+
static struct xt_match recent_mt_reg[] __read_mostly = {
{
.name = "recent",
@@ -644,39 +719,19 @@ static int __init recent_mt_init(void)
return -EINVAL;
ip_list_hash_size = 1 << fls(ip_list_tot);
- err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
+ err = register_pernet_subsys(&recent_net_ops);
if (err)
return err;
- recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net);
- if (recent_proc_dir == NULL) {
- xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
- err = -ENOMEM;
- }
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
- if (err < 0)
- return err;
- proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net);
- if (proc_old_dir == NULL) {
- remove_proc_entry("xt_recent", init_net.proc_net);
- xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
- err = -ENOMEM;
- }
-#endif
-#endif
+ err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
+ if (err)
+ unregister_pernet_subsys(&recent_net_ops);
return err;
}
static void __exit recent_mt_exit(void)
{
- BUG_ON(!list_empty(&tables));
xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg));
-#ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
- remove_proc_entry("ipt_recent", init_net.proc_net);
-#endif
- remove_proc_entry("xt_recent", init_net.proc_net);
-#endif
+ unregister_pernet_subsys(&recent_net_ops);
}
module_init(recent_mt_init);
diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h
new file mode 100644
index 000000000000..6efe4e5a81c6
--- /dev/null
+++ b/net/netfilter/xt_repldata.h
@@ -0,0 +1,35 @@
+/*
+ * Today's hack: quantum tunneling in structs
+ *
+ * 'entries' and 'term' are never anywhere referenced by word in code. In fact,
+ * they serve as the hanging-off data accessed through repl.data[].
+ */
+
+#define xt_alloc_initial_table(type, typ2) ({ \
+ unsigned int hook_mask = info->valid_hooks; \
+ unsigned int nhooks = hweight32(hook_mask); \
+ unsigned int bytes = 0, hooknum = 0, i = 0; \
+ struct { \
+ struct type##_replace repl; \
+ struct type##_standard entries[nhooks]; \
+ struct type##_error term; \
+ } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \
+ if (tbl == NULL) \
+ return NULL; \
+ strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
+ tbl->term = (struct type##_error)typ2##_ERROR_INIT; \
+ tbl->repl.valid_hooks = hook_mask; \
+ tbl->repl.num_entries = nhooks + 1; \
+ tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
+ sizeof(struct type##_error); \
+ for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
+ if (!(hook_mask & 1)) \
+ continue; \
+ tbl->repl.hook_entry[hooknum] = bytes; \
+ tbl->repl.underflow[hooknum] = bytes; \
+ tbl->entries[i++] = (struct type##_standard) \
+ typ2##_STANDARD_INIT(NF_ACCEPT); \
+ bytes += sizeof(struct type##_standard); \
+ } \
+ tbl; \
+})
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d8c0f8f1a78e..937ce0633e99 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -12,6 +12,7 @@
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/net.h>
+#include <linux/slab.h>
#include <linux/netfilter/xt_statistic.h>
#include <linux/netfilter/x_tables.h>
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..96801ffd8af8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -7,6 +7,7 @@
* published by the Free Software Foundation.
*/
+#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>