diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-05-03 09:17:01 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-03 09:17:01 +0200 |
commit | 53ba4f2fa73225113a488584df0d85d3cba52943 (patch) | |
tree | d85b984d9818abc3ccc0237eb53b710d9e96c39e /net/netfilter | |
parent | bd6d29c25bb1a24a4c160ec5de43e0004e01f72b (diff) | |
parent | 66f41d4c5c8a5deed66fdcc84509376c9a0bf9d8 (diff) |
Merge commit 'v2.6.34-rc6' into core/locking
Diffstat (limited to 'net/netfilter')
62 files changed, 2634 insertions, 501 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 634d14affc8d..18d77b5c351a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK If unsure, say 'N'. +config NF_CONNTRACK_ZONES + bool 'Connection tracking zones' + depends on NETFILTER_ADVANCED + depends on NETFILTER_XT_TARGET_CT + help + This option enables support for connection tracking zones. + Normally, each connection needs to have a unique system wide + identity. Connection tracking zones allow to have multiple + connections using the same identity, as long as they are + contained in different zones. + + If unsure, say `N'. + config NF_CONNTRACK_EVENTS bool "Connection tracking events" depends on NETFILTER_ADVANCED @@ -341,6 +354,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_CT + tristate '"CT" target support' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + help + This options adds a `CT' target, which allows to specify initial + connection tracking parameters like events to be delivered and + the helper to be used. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 49f62ee4e9ff..f873644f02f6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 60ec4e4badaa..78b505d33bfb 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -19,6 +19,7 @@ #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index f2d76238b9b5..712ccad13344 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -68,6 +68,10 @@ config IP_VS_TAB_BITS each hash entry uses 8 bytes, so you can estimate how much memory is needed for your box. + You can overwrite this number setting conn_tab_bits module parameter + or by appending ip_vs.conn_tab_bits=? to the kernel command line + if IP VS was compiled built-in. + comment "IPVS transport protocol load balancing support" config IP_VS_PROTO_TCP @@ -100,6 +104,13 @@ config IP_VS_PROTO_AH This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. +config IP_VS_PROTO_SCTP + bool "SCTP load balancing support" + select LIBCRC32C + ---help--- + This option enables support for load balancing SCTP transport + protocol. Say Y if unsure. + comment "IPVS scheduler" config IP_VS_RR diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 73a46fe1fe4c..e3baefd7066e 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -7,6 +7,7 @@ ip_vs_proto-objs-y := ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o +ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 3c7e42735b60..1cb0e834f8ff 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -27,6 +27,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/tcp.h> diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 27c30cf933da..d8f7e8ef67b4 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/proc_fs.h> /* for proc_net_* */ +#include <linux/slab.h> #include <linux/seq_file.h> #include <linux/jhash.h> #include <linux/random.h> @@ -40,6 +41,21 @@ #include <net/ip_vs.h> +#ifndef CONFIG_IP_VS_TAB_BITS +#define CONFIG_IP_VS_TAB_BITS 12 +#endif + +/* + * Connection hash size. Default is what was selected at compile time. +*/ +int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; +module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); +MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); + +/* size and mask values */ +int ip_vs_conn_tab_size; +int ip_vs_conn_tab_mask; + /* * Connection hash table: for input and output packets lookups of IPVS */ @@ -125,11 +141,11 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, if (af == AF_INET6) return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), (__force u32)port, proto, ip_vs_conn_rnd) - & IP_VS_CONN_TAB_MASK; + & ip_vs_conn_tab_mask; #endif return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, ip_vs_conn_rnd) - & IP_VS_CONN_TAB_MASK; + & ip_vs_conn_tab_mask; } @@ -760,7 +776,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) int idx; struct ip_vs_conn *cp; - for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) { + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { @@ -797,7 +813,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) idx = l - ip_vs_conn_tab; ct_read_unlock_bh(idx); - while (++idx < IP_VS_CONN_TAB_SIZE) { + while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { seq->private = &ip_vs_conn_tab[idx]; @@ -976,8 +992,8 @@ void ip_vs_random_dropentry(void) /* * Randomly scan 1/32 of the whole table every second */ - for (idx = 0; idx < (IP_VS_CONN_TAB_SIZE>>5); idx++) { - unsigned hash = net_random() & IP_VS_CONN_TAB_MASK; + for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { + unsigned hash = net_random() & ip_vs_conn_tab_mask; /* * Lock is actually needed in this loop. @@ -1029,7 +1045,7 @@ static void ip_vs_conn_flush(void) struct ip_vs_conn *cp; flush_again: - for (idx=0; idx<IP_VS_CONN_TAB_SIZE; idx++) { + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { /* * Lock is actually needed in this loop. */ @@ -1060,10 +1076,15 @@ int __init ip_vs_conn_init(void) { int idx; + /* Compute size and mask */ + ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; + ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; + /* * Allocate the connection hash table and initialize its list heads */ - ip_vs_conn_tab = vmalloc(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head)); + ip_vs_conn_tab = vmalloc(ip_vs_conn_tab_size * + sizeof(struct list_head)); if (!ip_vs_conn_tab) return -ENOMEM; @@ -1078,12 +1099,12 @@ int __init ip_vs_conn_init(void) pr_info("Connection hash table configured " "(size=%d, memory=%ldKbytes)\n", - IP_VS_CONN_TAB_SIZE, - (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024); + ip_vs_conn_tab_size, + (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024); IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n", sizeof(struct ip_vs_conn)); - for (idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) { + for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { INIT_LIST_HEAD(&ip_vs_conn_tab[idx]); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 847ffca40184..1cd6e3fd058b 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -31,7 +31,9 @@ #include <linux/kernel.h> #include <linux/ip.h> #include <linux/tcp.h> +#include <linux/sctp.h> #include <linux/icmp.h> +#include <linux/slab.h> #include <net/ip.h> #include <net/tcp.h> @@ -81,6 +83,8 @@ const char *ip_vs_proto_name(unsigned proto) return "UDP"; case IPPROTO_TCP: return "TCP"; + case IPPROTO_SCTP: + return "SCTP"; case IPPROTO_ICMP: return "ICMP"; #ifdef CONFIG_IP_VS_IPV6 @@ -512,8 +516,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, - skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, ip_send_check(ciph); } - /* the TCP/UDP port */ - if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { + /* the TCP/UDP/SCTP port */ + if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol || + IPPROTO_SCTP == ciph->protocol) { __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) @@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, ciph->saddr = cp->daddr.in6; } - /* the TCP/UDP port */ - if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { + /* the TCP/UDP/SCTP port */ + if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr || + IPPROTO_SCTP == ciph->nexthdr) { __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); if (inout) @@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, goto out; } - if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) + if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || + IPPROTO_SCTP == protocol) offset += 2 * sizeof(__u16); if (!skb_make_writable(skb, offset)) goto out; @@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) } #endif +/* + * Check if sctp chunc is ABORT chunk + */ +static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len) +{ + sctp_chunkhdr_t *sch, schunk; + sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t), + sizeof(schunk), &schunk); + if (sch == NULL) + return 0; + if (sch->type == SCTP_CID_ABORT) + return 1; + return 0; +} + static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; @@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || - pp->protocol == IPPROTO_UDP)) { + pp->protocol == IPPROTO_UDP || + pp->protocol == IPPROTO_SCTP)) { __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph.len, @@ -1014,14 +1036,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, * existing entry if it is not RST * packet or not TCP packet. */ - if (iph.protocol != IPPROTO_TCP - || !is_tcp_reset(skb, iph.len)) { + if ((iph.protocol != IPPROTO_TCP && + iph.protocol != IPPROTO_SCTP) + || ((iph.protocol == IPPROTO_TCP + && !is_tcp_reset(skb, iph.len)) + || (iph.protocol == IPPROTO_SCTP + && !is_sctp_abort(skb, + iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, - 0, skb->dev); + 0); else #endif icmp_send(skb, @@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); - if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) + if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || + IPPROTO_SCTP == cih->nexthdr) offset += 2 * sizeof(__u16); verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); /* do not touch skb anymore */ @@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, * encorage the standby servers to update the connections timeout */ pkts = atomic_add_return(1, &cp->in_pkts); + if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && + cp->protocol == IPPROTO_SCTP) { + if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && + (atomic_read(&cp->in_pkts) % + sysctl_ip_vs_sync_threshold[1] + == sysctl_ip_vs_sync_threshold[0])) || + (cp->old_state != cp->state && + ((cp->state == IP_VS_SCTP_S_CLOSED) || + (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || + (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { + ip_vs_sync_conn(cp); + goto out; + } + } + if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || @@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) ip_vs_sync_conn(cp); +out: cp->old_state = cp->state; ip_vs_conn_put(cp); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c37ac2d7bec4..36dc1d88c2fa 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -31,6 +31,7 @@ #include <linux/workqueue.h> #include <linux/swap.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -1843,7 +1844,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_printf(seq, "IP Virtual Server version %d.%d.%d (size=%d)\n", - NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); + NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); seq_puts(seq, "Prot LocalAddress:Port Scheduler Flags\n"); seq_puts(seq, @@ -2132,8 +2133,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) } } - /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { + /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && + usvc.protocol != IPPROTO_SCTP) { pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", usvc.protocol, &usvc.addr.ip, ntohs(usvc.port), usvc.sched_name); @@ -2386,7 +2388,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) char buf[64]; sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", - NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); + NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); if (copy_to_user(user, buf, strlen(buf)+1) != 0) { ret = -EFAULT; goto out; @@ -2399,7 +2401,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; - info.size = IP_VS_CONN_TAB_SIZE; + info.size = ip_vs_conn_tab_size; info.num_services = ip_vs_num_services; if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; @@ -3243,7 +3245,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) case IPVS_CMD_GET_INFO: NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, - IP_VS_CONN_TAB_SIZE); + ip_vs_conn_tab_size); break; } diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index fe3e18834b91..95fd0d14200b 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -39,6 +39,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/ip.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 702b53ca937c..ff28801962e0 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -17,7 +17,6 @@ #include <linux/kernel.h> #include <linux/jiffies.h> -#include <linux/slab.h> #include <linux/types.h> #include <linux/interrupt.h> #include <linux/sysctl.h> diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 33e2c799cba7..2c7f185dfae4 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -32,6 +32,7 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/netfilter.h> +#include <linux/gfp.h> #include <net/protocol.h> #include <net/tcp.h> #include <asm/unaligned.h> @@ -208,7 +209,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ from.ip = n_cp->vaddr.ip; port = n_cp->vport; - sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip), + sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip), (ntohs(port)>>8)&255, ntohs(port)&255); buf_len = strlen(buf); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 1b9370db2305..94a45213faa6 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -43,6 +43,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/ip.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index f7476b95ab46..535dc2b419d8 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -45,6 +45,8 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/jiffies.h> +#include <linux/list.h> +#include <linux/slab.h> /* for sysctl */ #include <linux/fs.h> @@ -85,25 +87,25 @@ static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; /* * IPVS destination set structure and operations */ -struct ip_vs_dest_list { - struct ip_vs_dest_list *next; /* list link */ +struct ip_vs_dest_set_elem { + struct list_head list; /* list link */ struct ip_vs_dest *dest; /* destination server */ }; struct ip_vs_dest_set { atomic_t size; /* set size */ unsigned long lastmod; /* last modified time */ - struct ip_vs_dest_list *list; /* destination list */ + struct list_head list; /* destination list */ rwlock_t lock; /* lock for this list */ }; -static struct ip_vs_dest_list * +static struct ip_vs_dest_set_elem * ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { - struct ip_vs_dest_list *e; + struct ip_vs_dest_set_elem *e; - for (e=set->list; e!=NULL; e=e->next) { + list_for_each_entry(e, &set->list, list) { if (e->dest == dest) /* already existed */ return NULL; @@ -118,9 +120,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) atomic_inc(&dest->refcnt); e->dest = dest; - /* link it to the list */ - e->next = set->list; - set->list = e; + list_add(&e->list, &set->list); atomic_inc(&set->size); set->lastmod = jiffies; @@ -130,34 +130,33 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) static void ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { - struct ip_vs_dest_list *e, **ep; + struct ip_vs_dest_set_elem *e; - for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { + list_for_each_entry(e, &set->list, list) { if (e->dest == dest) { /* HIT */ - *ep = e->next; atomic_dec(&set->size); set->lastmod = jiffies; atomic_dec(&e->dest->refcnt); + list_del(&e->list); kfree(e); break; } - ep = &e->next; } } static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) { - struct ip_vs_dest_list *e, **ep; + struct ip_vs_dest_set_elem *e, *ep; write_lock(&set->lock); - for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { - *ep = e->next; + list_for_each_entry_safe(e, ep, &set->list, list) { /* * We don't kfree dest because it is refered either * by its service or by the trash dest list. */ atomic_dec(&e->dest->refcnt); + list_del(&e->list); kfree(e); } write_unlock(&set->lock); @@ -166,7 +165,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) /* get weighted least-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_list *e; + register struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *least; int loh, doh; @@ -174,7 +173,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) return NULL; /* select the first destination server, whose weight > 0 */ - for (e=set->list; e!=NULL; e=e->next) { + list_for_each_entry(e, &set->list, list) { least = e->dest; if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -190,7 +189,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* find the destination with the weighted least load */ nextstage: - for (e=e->next; e!=NULL; e=e->next) { + list_for_each_entry(e, &set->list, list) { dest = e->dest; if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; @@ -220,7 +219,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* get weighted most-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_list *e; + register struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *most; int moh, doh; @@ -228,7 +227,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) return NULL; /* select the first destination server, whose weight > 0 */ - for (e=set->list; e!=NULL; e=e->next) { + list_for_each_entry(e, &set->list, list) { most = e->dest; if (atomic_read(&most->weight) > 0) { moh = atomic_read(&most->activeconns) * 50 @@ -240,7 +239,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) /* find the destination with the weighted most load */ nextstage: - for (e=e->next; e!=NULL; e=e->next) { + list_for_each_entry(e, &set->list, list) { dest = e->dest; doh = atomic_read(&dest->activeconns) * 50 + atomic_read(&dest->inactconns); @@ -389,7 +388,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, /* initilize its dest set */ atomic_set(&(en->set.size), 0); - en->set.list = NULL; + INIT_LIST_HEAD(&en->set.list); rwlock_init(&en->set.lock); ip_vs_lblcr_hash(tbl, en); diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 3e7671674549..7fc49f4cf5ad 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> +#include <linux/gfp.h> #include <linux/in.h> #include <linux/ip.h> #include <net/protocol.h> @@ -257,6 +258,9 @@ int __init ip_vs_protocol_init(void) #ifdef CONFIG_IP_VS_PROTO_UDP REGISTER_PROTOCOL(&ip_vs_protocol_udp); #endif +#ifdef CONFIG_IP_VS_PROTO_SCTP + REGISTER_PROTOCOL(&ip_vs_protocol_sctp); +#endif #ifdef CONFIG_IP_VS_PROTO_AH REGISTER_PROTOCOL(&ip_vs_protocol_ah); #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c new file mode 100644 index 000000000000..c9a3f7a21d53 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -0,0 +1,1183 @@ +#include <linux/kernel.h> +#include <linux/ip.h> +#include <linux/sctp.h> +#include <net/ip.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <net/sctp/checksum.h> +#include <net/ip_vs.h> + + +static struct ip_vs_conn * +sctp_conn_in_get(int af, + const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} + +static struct ip_vs_conn * +sctp_conn_out_get(int af, + const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return NULL; + + if (likely(!inverse)) + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); + else + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); +} + +static int +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + int *verdict, struct ip_vs_conn **cpp) +{ + struct ip_vs_service *svc; + sctp_chunkhdr_t _schunkh, *sch; + sctp_sctphdr_t *sh, _sctph; + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + + sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), + sizeof(_schunkh), &_schunkh); + if (sch == NULL) + return 0; + + if ((sch->type == SCTP_CID_INIT) && + (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + &iph.daddr, sh->dest))) { + if (ip_vs_todrop()) { + /* + * It seems that we are very loaded. + * We have to drop this packet :( + */ + ip_vs_service_put(svc); + *verdict = NF_DROP; + return 0; + } + /* + * Let the virtual server select a real server for the + * incoming connection, and create a connection entry. + */ + *cpp = ip_vs_schedule(svc, skb); + if (!*cpp) { + *verdict = ip_vs_leave(svc, skb, pp); + return 0; + } + ip_vs_service_put(svc); + } + + return 1; +} + +static int +sctp_snat_handler(struct sk_buff *skb, + struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +{ + sctp_sctphdr_t *sctph; + unsigned int sctphoff; + __be32 crc32; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + /* csum_check requires unshared skb */ + if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) + return 0; + + if (unlikely(cp->app != NULL)) { + /* Some checks before mangling */ + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + return 0; + + /* Call application helper if needed */ + if (!ip_vs_app_pkt_out(cp, skb)) + return 0; + } + + sctph = (void *) skb_network_header(skb) + sctphoff; + sctph->source = cp->vport; + + /* Calculate the checksum */ + crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); + for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) + crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), + crc32); + crc32 = sctp_end_cksum(crc32); + sctph->checksum = crc32; + + return 1; +} + +static int +sctp_dnat_handler(struct sk_buff *skb, + struct ip_vs_protocol *pp, struct ip_vs_conn *cp) +{ + + sctp_sctphdr_t *sctph; + unsigned int sctphoff; + __be32 crc32; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + /* csum_check requires unshared skb */ + if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) + return 0; + + if (unlikely(cp->app != NULL)) { + /* Some checks before mangling */ + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) + return 0; + + /* Call application helper if needed */ + if (!ip_vs_app_pkt_out(cp, skb)) + return 0; + } + + sctph = (void *) skb_network_header(skb) + sctphoff; + sctph->dest = cp->dport; + + /* Calculate the checksum */ + crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); + for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) + crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), + crc32); + crc32 = sctp_end_cksum(crc32); + sctph->checksum = crc32; + + return 1; +} + +static int +sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) +{ + struct sk_buff *list = skb_shinfo(skb)->frag_list; + unsigned int sctphoff; + struct sctphdr *sh, _sctph; + __le32 cmp; + __le32 val; + __u32 tmp; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + sctphoff = sizeof(struct ipv6hdr); + else +#endif + sctphoff = ip_hdrlen(skb); + + sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + cmp = sh->checksum; + + tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb)); + for (; list; list = list->next) + tmp = sctp_update_cksum((__u8 *) list->data, + skb_headlen(list), tmp); + + val = sctp_end_cksum(tmp); + + if (val != cmp) { + /* CRC failure, dump it. */ + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + return 1; +} + +struct ipvs_sctp_nextstate { + int next_state; +}; +enum ipvs_sctp_event_t { + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_SER, + IP_VS_SCTP_EVE_INIT_CLI, + IP_VS_SCTP_EVE_INIT_SER, + IP_VS_SCTP_EVE_INIT_ACK_CLI, + IP_VS_SCTP_EVE_INIT_ACK_SER, + IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, + IP_VS_SCTP_EVE_COOKIE_ECHO_SER, + IP_VS_SCTP_EVE_COOKIE_ACK_CLI, + IP_VS_SCTP_EVE_COOKIE_ACK_SER, + IP_VS_SCTP_EVE_ABORT_CLI, + IP_VS_SCTP_EVE__ABORT_SER, + IP_VS_SCTP_EVE_SHUT_CLI, + IP_VS_SCTP_EVE_SHUT_SER, + IP_VS_SCTP_EVE_SHUT_ACK_CLI, + IP_VS_SCTP_EVE_SHUT_ACK_SER, + IP_VS_SCTP_EVE_SHUT_COM_CLI, + IP_VS_SCTP_EVE_SHUT_COM_SER, + IP_VS_SCTP_EVE_LAST +}; + +static enum ipvs_sctp_event_t sctp_events[255] = { + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_INIT_CLI, + IP_VS_SCTP_EVE_INIT_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_ABORT_CLI, + IP_VS_SCTP_EVE_SHUT_CLI, + IP_VS_SCTP_EVE_SHUT_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, + IP_VS_SCTP_EVE_COOKIE_ACK_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_DATA_CLI, + IP_VS_SCTP_EVE_SHUT_COM_CLI, +}; + +static struct ipvs_sctp_nextstate + sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { + /* + * STATE : IP_VS_SCTP_S_NONE + */ + /*next state *//*event */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, + }, + /* + * STATE : IP_VS_SCTP_S_INIT_CLI + * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_SER + * Server sent INIT and waiting for INIT ACK from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_ACK_CLI + * Client sent INIT ACK and waiting for ECHO from the server + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK has been resent by the client, let us stay is in + * the same state + */ + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK sent by the server, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * ECHO by client, it should not happen, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO by server, this is what we are expecting, move to ECHO_SER + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, it should not happen, close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * Unexpected COOKIE ACK from server, staty in the same state + */ + {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_INIT_ACK_SER + * Server sent INIT ACK and waiting for ECHO from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * Unexpected INIT_ACK by the client, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK resent by the server, let us move to same state + */ + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client send the ECHO, this is what we are expecting, + * move to ECHO_CLI + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO received from the server, Not sure what to do, + * let us close it + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, let us stay in the same state + */ + {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, hmm... this should not happen, lets close + * the connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ECHO_CLI + * Cient sent ECHO and waiting COOKEI ACK from the Server + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK has been by the client, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client resent the ECHO, let us stay in the same state + */ + {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO received from the server, Not sure what to do, + * let us close it + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, this shoud not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, this is what we are awaiting,lets move to + * ESTABLISHED. + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ECHO_SER + * Server sent ECHO and waiting COOKEI ACK from the client + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + /* + * INIT_ACK has been by the server, let us close the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent the ECHO, not sure what to do, let's close the + * connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + /* + * ECHO resent by the server, stay in the same state + */ + {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, this is what we are expecting, let's move + * to ESTABLISHED. + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + /* + * COOKIE ACK from server, this should not happen, lets close the + * connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_ESTABLISHED + * Association established + */ + {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_SHUT_CLI + * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN resent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this is what we are expecting, let's move + * to SHUDOWN_ACK_SER + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_SHUT_SER + * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN resent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN resent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this is what we are expecting, let's + * move to SHUT_ACK_CLI + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + + /* + * State : IP_VS_SCTP_S_SHUT_ACK_CLI + * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN sent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN sent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client resent SHUDTDOWN_ACK, let's stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server sent SHUTDOWN ACK, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this should not happen, let's close the + * connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + /* + * SHUTDOWN COMPLETE from server this is what we are expecting. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + + /* + * State : IP_VS_SCTP_S_SHUT_ACK_SER + * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client + */ + /* + * We recieved the data chuck, keep the state unchanged. I assume + * that still data chuncks can be received by both the peers in + * SHUDOWN state + */ + + {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, + /* + * We have got an INIT from client. From the spec.“Upon receipt of + * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with + * an INIT ACK using the same parameters it sent in its original + * INIT chunk (including its Initiate Tag, unchanged”). + */ + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + /* + * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, + * “If an INIT ACK is received by an endpoint in any state other + * than the COOKIE-WAIT state, the endpoint should discard the + * INIT ACK chunk”. Stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + /* + * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the + * peer and peer shall move to the ESTABISHED. if it doesn't handle + * it will send ERROR chunk. So, stay in the same state + */ + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + /* + * COOKIE ACK from client, not sure what to do stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + /* + * SHUTDOWN sent from the client, move to SHUDDOWN_CLI + */ + {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + /* + * SHUTDOWN sent from the server, move to SHUTDOWN_SER + */ + {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, + /* + * client sent SHUDTDOWN_ACK, this should not happen let's close + * the connection. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + /* + * Server resent SHUTDOWN ACK, stay in the same state + */ + {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + /* + * SHUTDOWN COM from client, this what we are expecting, let's close + * the connection + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + /* + * SHUTDOWN COMPLETE from server this should not happen. + */ + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + }, + /* + * State : IP_VS_SCTP_S_CLOSED + */ + {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, + {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, + {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, + {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } + } +}; + +/* + * Timeout table[state] + */ +static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, + [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, + [IP_VS_SCTP_S_CLOSED] = 10 * HZ, + [IP_VS_SCTP_S_LAST] = 2 * HZ, +}; + +static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", + [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", + [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", + [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", + [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", + [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", + [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", + [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", + [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", + [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!" +}; + + +static const char *sctp_state_name(int state) +{ + if (state >= IP_VS_SCTP_S_LAST) + return "ERR!"; + if (sctp_state_name_table[state]) + return sctp_state_name_table[state]; + return "?"; +} + +static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) +{ +} + +static int +sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) +{ + +return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, + sctp_state_name_table, sname, to); +} + +static inline int +set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, + int direction, const struct sk_buff *skb) +{ + sctp_chunkhdr_t _sctpch, *sch; + unsigned char chunk_type; + int event, next_state; + int ihl; + +#ifdef CONFIG_IP_VS_IPV6 + ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); +#else + ihl = ip_hdrlen(skb); +#endif + + sch = skb_header_pointer(skb, ihl + sizeof(sctp_sctphdr_t), + sizeof(_sctpch), &_sctpch); + if (sch == NULL) + return 0; + + chunk_type = sch->type; + /* + * Section 3: Multiple chunks can be bundled into one SCTP packet + * up to the MTU size, except for the INIT, INIT ACK, and + * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with + * any other chunk in a packet. + * + * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control + * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be + * bundled with an ABORT, but they MUST be placed before the ABORT + * in the SCTP packet or they will be ignored by the receiver. + */ + if ((sch->type == SCTP_CID_COOKIE_ECHO) || + (sch->type == SCTP_CID_COOKIE_ACK)) { + sch = skb_header_pointer(skb, (ihl + sizeof(sctp_sctphdr_t) + + sch->length), sizeof(_sctpch), &_sctpch); + if (sch) { + if (sch->type == SCTP_CID_ABORT) + chunk_type = sch->type; + } + } + + event = sctp_events[chunk_type]; + + /* + * If the direction is IP_VS_DIR_OUTPUT, this event is from server + */ + if (direction == IP_VS_DIR_OUTPUT) + event++; + /* + * get next state + */ + next_state = sctp_states_table[cp->state][event].next_state; + + if (next_state != cp->state) { + struct ip_vs_dest *dest = cp->dest; + + IP_VS_DBG_BUF(8, "%s %s %s:%d->" + "%s:%d state: %s->%s conn->refcnt:%d\n", + pp->name, + ((direction == IP_VS_DIR_OUTPUT) ? + "output " : "input "), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), + ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + sctp_state_name(cp->state), + sctp_state_name(next_state), + atomic_read(&cp->refcnt)); + if (dest) { + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (next_state != IP_VS_SCTP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags |= IP_VS_CONN_F_INACTIVE; + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && + (next_state == IP_VS_SCTP_S_ESTABLISHED)) { + atomic_inc(&dest->activeconns); + atomic_dec(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } + } + } + + cp->timeout = pp->timeout_table[cp->state = next_state]; + + return 1; +} + +static int +sctp_state_transition(struct ip_vs_conn *cp, int direction, + const struct sk_buff *skb, struct ip_vs_protocol *pp) +{ + int ret = 0; + + spin_lock(&cp->lock); + ret = set_sctp_state(pp, cp, direction, skb); + spin_unlock(&cp->lock); + + return ret; +} + +/* + * Hash table for SCTP application incarnations + */ +#define SCTP_APP_TAB_BITS 4 +#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) +#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) + +static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; +static DEFINE_SPINLOCK(sctp_app_lock); + +static inline __u16 sctp_app_hashkey(__be16 port) +{ + return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) + & SCTP_APP_TAB_MASK; +} + +static int sctp_register_app(struct ip_vs_app *inc) +{ + struct ip_vs_app *i; + __u16 hash; + __be16 port = inc->port; + int ret = 0; + + hash = sctp_app_hashkey(port); + + spin_lock_bh(&sctp_app_lock); + list_for_each_entry(i, &sctp_apps[hash], p_list) { + if (i->port == port) { + ret = -EEXIST; + goto out; + } + } + list_add(&inc->p_list, &sctp_apps[hash]); + atomic_inc(&ip_vs_protocol_sctp.appcnt); +out: + spin_unlock_bh(&sctp_app_lock); + + return ret; +} + +static void sctp_unregister_app(struct ip_vs_app *inc) +{ + spin_lock_bh(&sctp_app_lock); + atomic_dec(&ip_vs_protocol_sctp.appcnt); + list_del(&inc->p_list); + spin_unlock_bh(&sctp_app_lock); +} + +static int sctp_app_conn_bind(struct ip_vs_conn *cp) +{ + int hash; + struct ip_vs_app *inc; + int result = 0; + + /* Default binding: bind app only for NAT */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + return 0; + /* Lookup application incarnations and bind the right one */ + hash = sctp_app_hashkey(cp->vport); + + spin_lock(&sctp_app_lock); + list_for_each_entry(inc, &sctp_apps[hash], p_list) { + if (inc->port == cp->vport) { + if (unlikely(!ip_vs_app_inc_get(inc))) + break; + spin_unlock(&sctp_app_lock); + + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; + if (inc->init_conn) + result = inc->init_conn(inc, cp); + goto out; + } + } + spin_unlock(&sctp_app_lock); +out: + return result; +} + +static void ip_vs_sctp_init(struct ip_vs_protocol *pp) +{ + IP_VS_INIT_HASH_TABLE(sctp_apps); + pp->timeout_table = sctp_timeouts; +} + + +static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) +{ + +} + +struct ip_vs_protocol ip_vs_protocol_sctp = { + .name = "SCTP", + .protocol = IPPROTO_SCTP, + .num_states = IP_VS_SCTP_S_LAST, + .dont_defrag = 0, + .appcnt = ATOMIC_INIT(0), + .init = ip_vs_sctp_init, + .exit = ip_vs_sctp_exit, + .register_app = sctp_register_app, + .unregister_app = sctp_unregister_app, + .conn_schedule = sctp_conn_schedule, + .conn_in_get = sctp_conn_in_get, + .conn_out_get = sctp_conn_out_get, + .snat_handler = sctp_snat_handler, + .dnat_handler = sctp_dnat_handler, + .csum_check = sctp_csum_check, + .state_name = sctp_state_name, + .state_transition = sctp_state_transition, + .app_conn_bind = sctp_app_conn_bind, + .debug_packet = ip_vs_tcpudp_debug_packet, + .timeout_change = sctp_timeout_change, + .set_state_timeout = sctp_set_state_timeout, +}; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 8e6cfd36e6f0..e6cc174fbc06 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -36,6 +36,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/ip.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index e177f0dc2084..8fb0ae616761 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -400,6 +400,11 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) flags |= IP_VS_CONN_F_INACTIVE; else flags &= ~IP_VS_CONN_F_INACTIVE; + } else if (s->protocol == IPPROTO_SCTP) { + if (state != IP_VS_SCTP_S_ESTABLISHED) + flags |= IP_VS_CONN_F_INACTIVE; + else + flags &= ~IP_VS_CONN_F_INACTIVE; } cp = ip_vs_conn_new(AF_INET, s->protocol, (union nf_inet_addr *)&s->caddr, @@ -434,6 +439,15 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; } + } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && + (cp->state != state)) { + dest = cp->dest; + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && + (state != IP_VS_SCTP_S_ESTABLISHED)) { + atomic_dec(&dest->activeconns); + atomic_inc(&dest->inactconns); + cp->flags &= ~IP_VS_CONN_F_INACTIVE; + } } if (opt) diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 3c115fc19784..30db633f88f1 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/net.h> #include <linux/gcd.h> diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 30b3189bd29c..e450cd6f4eb5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ @@ -311,7 +312,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -454,7 +455,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error; @@ -672,7 +673,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->u.dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -814,7 +815,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->u.dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -965,7 +966,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 018f90db511c..ab81b380eae6 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -9,6 +9,7 @@ */ #include <linux/netfilter.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/moduleparam.h> diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 07d9d8857e5d..372e80f07a81 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -16,6 +16,7 @@ #include <linux/in.h> #include <linux/udp.h> #include <linux/netfilter.h> +#include <linux/gfp.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_expect.h> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 4d79e3c1616c..0c9bbe93cc16 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -42,6 +42,7 @@ #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size, unsigned int rnd) + u16 zone, unsigned int size, unsigned int rnd) { unsigned int n; u_int32_t h; @@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); h = jhash2((u32 *)tuple, n, - rnd ^ (((__force __u16)tuple->dst.u.all << 16) | - tuple->dst.protonum)); + zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | + tuple->dst.protonum)); return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct net *net, +static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size, + return __hash_conntrack(tuple, zone, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack) * - Caller must lock nf_conntrack_lock before calling this function */ struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple)) { + if (nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); begin: - h = __nf_conntrack_find(net, tuple); + h = __nf_conntrack_find(net, zone, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(nf_ct_is_dying(ct) || !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { + if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || + nf_ct_zone(ct) != zone)) { nf_ct_put(ct); goto begin; } @@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + u16 zone; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; + u16 zone; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; /* Remove from unconfirmed list */ @@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + struct nf_conn *ct; + u16 zone = nf_ct_zone(ignored_conntrack); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. */ rcu_read_lock_bh(); hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(struct net *net, +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(net, orig); + unsigned int hash = hash_conntrack(net, zone, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, #ifdef CONFIG_NET_NS ct->ct_net = net; #endif - +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif /* * changes to lookup keys must be done before setting refcnt to 1 */ smp_wmb(); atomic_set(&ct->ct_general.use, 1); return ct; + +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + return ERR_PTR(-ENOMEM); +#endif } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -619,7 +649,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(struct net *net, +init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, @@ -629,14 +659,16 @@ init_conntrack(struct net *net, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); return NULL; } - ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -649,10 +681,14 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + + ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; + nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(net, tuple); + exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -674,7 +710,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -695,7 +731,7 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct net *net, +resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, @@ -708,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, @@ -717,9 +754,10 @@ resolve_normal_ct(struct net *net, } /* look for tuple match */ - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) { - h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, + skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -756,7 +794,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct; + struct nf_conn *ct, *tmpl = NULL; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -765,10 +803,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(net, ignore); - return NF_ACCEPT; + /* Previously seen (loopback or untracked)? Ignore. */ + tmpl = (struct nf_conn *)skb->nfct; + if (!nf_ct_is_template(tmpl)) { + NF_CT_STAT_INC_ATOMIC(net, ignore); + return NF_ACCEPT; + } + skb->nfct = NULL; } /* rcu_read_lock()ed by nf_hook_slow */ @@ -779,7 +821,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } l4proto = __nf_ct_l4proto_find(pf, protonum); @@ -788,26 +831,30 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, + pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } } - ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); - return NF_ACCEPT; + ret = NF_ACCEPT; + goto out; } if (IS_ERR(ct)) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; + ret = NF_DROP; + goto out; } NF_CT_ASSERT(skb->nfct); @@ -822,11 +869,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); - return -ret; + ret = -ret; + goto out; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_REPLY, ct); +out: + if (tmpl) + nf_ct_put(tmpl); return ret; } @@ -865,7 +916,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -939,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +#ifdef CONFIG_NF_CONNTRACK_ZONES +static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_zone), + .align = __alignof__(struct nf_conntrack_zone), + .id = NF_CT_EXT_ZONE, +}; +#endif + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> @@ -1120,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); +#ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); +#endif } static void nf_conntrack_cleanup_net(struct net *net) @@ -1195,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) unsigned int hashsize, old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; @@ -1221,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); + ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, + bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), + hashsize, nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } @@ -1280,6 +1345,11 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; +#ifdef CONFIG_NF_CONNTRACK_ZONES + ret = nf_ct_extend_register(&nf_ct_zone_extend); + if (ret < 0) + goto err_extend; +#endif /* Set up fake conntrack: to never be deleted, not in any hashes */ #ifdef CONFIG_NET_NS nf_conntrack_untracked.ct_net = &init_net; @@ -1290,6 +1360,10 @@ static int nf_conntrack_init_init_net(void) return 0; +#ifdef CONFIG_NF_CONNTRACK_ZONES +err_extend: + nf_conntrack_helper_fini(); +#endif err_helper: nf_conntrack_proto_fini(); err_proto: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d5a9bcd7d61b..f516961a83b4 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -18,6 +18,7 @@ #include <linux/percpu.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/slab.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2f25ff610982..acb29ccaa41f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -27,6 +27,7 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); @@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; @@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) return i; } return NULL; @@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(net, tuple); + i = __nf_ct_expect_find(net, zone, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; @@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) { exp = i; break; } @@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && a->class == b->class && nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask); + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } /* Generally a bad idea to call this: could have matched already. */ @@ -232,7 +239,6 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) new->master = me; atomic_set(&new->use, 1); - INIT_RCU_HEAD(&new->rcu); return new; } EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); @@ -500,6 +506,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v) static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; + struct nf_conntrack_helper *helper; struct hlist_node *n = v; char *delim = ""; @@ -525,6 +532,14 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_INACTIVE) seq_printf(s, "%sINACTIVE", delim); + helper = rcu_dereference(nfct_help(expect->master)->helper); + if (helper) { + seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); + if (helper->expect_policy[expect->class].name) + seq_printf(s, "/%s", + helper->expect_policy[expect->class].name); + } + return seq_putc(s, '\n'); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fef95be334bd..fdc8fb4ae10f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -59,7 +59,6 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) if (!*ext) return NULL; - INIT_RCU_HEAD(&(*ext)->rcu); (*ext)->offset[id] = off; (*ext)->len = len; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index f0732aa18e4f..2ae3169e7633 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -13,6 +13,7 @@ #include <linux/moduleparam.h> #include <linux/netfilter.h> #include <linux/ip.h> +#include <linux/slab.h> #include <linux/ipv6.h> #include <linux/ctype.h> #include <linux/inet.h> diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 66369490230e..a487c8038044 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -17,6 +17,7 @@ #include <linux/inet.h> #include <linux/in.h> #include <linux/ip.h> +#include <linux/slab.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/skbuff.h> @@ -29,6 +30,7 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_h323.h> /* Parameters */ @@ -1216,7 +1218,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4b1a56bd074c..59e1a4cd4e8b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -15,7 +15,6 @@ #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> -#include <linux/slab.h> #include <linux/random.h> #include <linux/err.h> #include <linux/kernel.h> @@ -65,7 +64,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name) +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct hlist_node *n; @@ -73,13 +72,34 @@ __nf_conntrack_helper_find_byname(const char *name) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name)) + if (!strcmp(h->name, name) && + h->tuple.src.l3num == l3num && + h->tuple.dst.protonum == protonum) return h; } } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); + +struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + + h = __nf_conntrack_helper_find(name, l3num, protonum); +#ifdef CONFIG_MODULES + if (h == NULL) { + if (request_module("nfct-helper-%s", name) == 0) + h = __nf_conntrack_helper_find(name, l3num, protonum); + } +#endif + if (h != NULL && !try_module_get(h->me)) + h = NULL; + + return h; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { @@ -94,13 +114,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn_help *help; int ret = 0; - struct nf_conntrack_helper *helper; - struct nf_conn_help *help = nfct_help(ct); - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (tmpl != NULL) { + help = nfct_help(tmpl); + if (help != NULL) + helper = help->helper; + } + + help = nfct_help(ct); + if (helper == NULL) + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) rcu_assign_pointer(help->helper, NULL); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8bd98c84f77e..7673930ca342 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -15,6 +15,7 @@ #include <linux/ip.h> #include <linux/tcp.h> #include <linux/netfilter.h> +#include <linux/slab.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_expect.h> diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0ffe689dfe97..afc52f2ee4ac 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -27,9 +27,11 @@ #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> +#include <linux/slab.h> #include <linux/netfilter.h> #include <net/netlink.h> +#include <net/sock.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> @@ -38,6 +40,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_conntrack_zones.h> #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_protocol.h> @@ -378,6 +381,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (nf_ct_zone(ct)) + NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || @@ -456,6 +462,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct) static int ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { + struct net *net; struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; @@ -482,7 +489,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } else return 0; - if (!item->report && !nfnetlink_has_listeners(group)) + net = nf_ct_net(ct); + if (!item->report && !nfnetlink_has_listeners(net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); @@ -514,6 +522,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (nf_ct_zone(ct)) + NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -559,7 +570,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + err = nfnetlink_send(skb, net, item->pid, group, item->report, + GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -571,7 +583,9 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, group, -ENOBUFS); + if (nfnetlink_set_err(net, 0, group, -ENOBUFS) > 0) + return -ENOBUFS; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -586,6 +600,7 @@ static int ctnetlink_done(struct netlink_callback *cb) static int ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; @@ -594,9 +609,9 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_lock(); last = (struct nf_conn *)cb->args[1]; - for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) { + for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { restart: - hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[cb->args[0]], hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -703,6 +718,11 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return ret; } +static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = { + [CTA_TUPLE_IP] = { .type = NLA_NESTED }, + [CTA_TUPLE_PROTO] = { .type = NLA_NESTED }, +}; + static int ctnetlink_parse_tuple(const struct nlattr * const cda[], struct nf_conntrack_tuple *tuple, @@ -713,7 +733,7 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], NULL); + nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); if (!tb[CTA_TUPLE_IP]) return -EINVAL; @@ -740,12 +760,31 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } +static int +ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) +{ + if (attr) +#ifdef CONFIG_NF_CONNTRACK_ZONES + *zone = ntohs(nla_get_be16(attr)); +#else + return -EOPNOTSUPP; +#endif + else + *zone = 0; + + return 0; +} + +static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { + [CTA_HELP_NAME] = { .type = NLA_NUL_STRING }, +}; + static inline int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) { struct nlattr *tb[CTA_HELP_MAX+1]; - nla_parse_nested(tb, CTA_HELP_MAX, attr, NULL); + nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); if (!tb[CTA_HELP_NAME]) return -EINVAL; @@ -756,11 +795,18 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) } static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { + [CTA_TUPLE_ORIG] = { .type = NLA_NESTED }, + [CTA_TUPLE_REPLY] = { .type = NLA_NESTED }, [CTA_STATUS] = { .type = NLA_U32 }, + [CTA_PROTOINFO] = { .type = NLA_NESTED }, + [CTA_HELP] = { .type = NLA_NESTED }, + [CTA_NAT_SRC] = { .type = NLA_NESTED }, [CTA_TIMEOUT] = { .type = NLA_U32 }, [CTA_MARK] = { .type = NLA_U32 }, - [CTA_USE] = { .type = NLA_U32 }, [CTA_ID] = { .type = NLA_U32 }, + [CTA_NAT_DST] = { .type = NLA_NESTED }, + [CTA_TUPLE_MASTER] = { .type = NLA_NESTED }, + [CTA_ZONE] = { .type = NLA_U16 }, }; static int @@ -768,12 +814,18 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; + + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -781,7 +833,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush_report(&init_net, + nf_conntrack_flush_report(net, NETLINK_CB(skb).pid, nlmsg_report(nlh)); return 0; @@ -790,7 +842,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) return -ENOENT; @@ -828,18 +880,24 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct sk_buff *skb2 = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); else if (cda[CTA_TUPLE_REPLY]) @@ -850,7 +908,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&init_net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) return -ENOENT; @@ -994,7 +1052,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return 0; } - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { #ifdef CONFIG_MODULES spin_unlock_bh(&nf_conntrack_lock); @@ -1005,7 +1064,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } spin_lock_bh(&nf_conntrack_lock); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) return -EAGAIN; #endif @@ -1020,9 +1080,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) /* need to zero data of old helper */ memset(&help->help, 0, sizeof(help->help)); } else { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help == NULL) - return -ENOMEM; + /* we cannot set a helper for an existing conntrack */ + return -EOPNOTSUPP; } rcu_assign_pointer(help->helper, helper); @@ -1044,6 +1103,12 @@ ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[]) return 0; } +static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = { + [CTA_PROTOINFO_TCP] = { .type = NLA_NESTED }, + [CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED }, + [CTA_PROTOINFO_SCTP] = { .type = NLA_NESTED }, +}; + static inline int ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { @@ -1052,7 +1117,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] struct nf_conntrack_l4proto *l4proto; int err = 0; - nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); + nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); @@ -1064,12 +1129,18 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] } #ifdef CONFIG_NF_NAT_NEEDED +static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { + [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 }, + [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 }, + [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 }, +}; + static inline int change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; - nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, NULL); + nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) return -EINVAL; @@ -1175,7 +1246,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(const struct nlattr * const cda[], +ctnetlink_create_conntrack(struct net *net, u16 zone, + const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, u8 u3) @@ -1184,7 +1256,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); @@ -1193,7 +1265,6 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; - ct->status |= IPS_CONFIRMED; rcu_read_lock(); if (cda[CTA_HELP]) { @@ -1203,7 +1274,8 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], if (err < 0) goto err2; - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); #ifdef CONFIG_MODULES @@ -1213,7 +1285,9 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], } rcu_read_lock(); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; goto err2; @@ -1236,19 +1310,24 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], } } else { /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); if (err < 0) goto err2; } - if (cda[CTA_STATUS]) { - err = ctnetlink_change_status(ct, cda); + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { + err = ctnetlink_change_nat(ct, cda); if (err < 0) goto err2; } - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = ctnetlink_change_nat(ct, cda); + nf_ct_acct_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + /* we must add conntrack extensions before confirmation. */ + ct->status |= IPS_CONFIRMED; + + if (cda[CTA_STATUS]) { + err = ctnetlink_change_status(ct, cda); if (err < 0) goto err2; } @@ -1267,9 +1346,6 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], goto err2; } - nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); - #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); @@ -1285,7 +1361,7 @@ ctnetlink_create_conntrack(const struct nlattr * const cda[], if (err < 0) goto err2; - master_h = nf_conntrack_find_get(&init_net, &master); + master_h = nf_conntrack_find_get(net, zone, &master); if (master_h == NULL) { err = -ENOENT; goto err2; @@ -1313,11 +1389,17 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; + + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; if (cda[CTA_TUPLE_ORIG]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); @@ -1333,9 +1415,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&init_net, &otuple); + h = __nf_conntrack_find(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&init_net, &rtuple); + h = __nf_conntrack_find(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1343,7 +1425,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; enum ip_conntrack_events events; - ct = ctnetlink_create_conntrack(cda, &otuple, + ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) { err = PTR_ERR(ct); @@ -1357,7 +1439,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1382,7 +1465,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1469,6 +1553,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) { struct nf_conn *master = exp->master; + struct nf_conntrack_helper *helper; long timeout = (exp->timeout.expires - jiffies) / HZ; if (timeout < 0) @@ -1485,6 +1570,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); + helper = rcu_dereference(nfct_help(master)->helper); + if (helper) + NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); return 0; @@ -1526,9 +1614,10 @@ nla_put_failure: static int ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { + struct nf_conntrack_expect *exp = item->exp; + struct net *net = nf_ct_exp_net(exp); struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; int flags = 0; @@ -1540,7 +1629,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) return 0; if (!item->report && - !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1563,7 +1652,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report, GFP_ATOMIC); return 0; @@ -1573,7 +1662,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, 0, -ENOBUFS); + nfnetlink_set_err(net, 0, 0, -ENOBUFS); return 0; } #endif @@ -1587,7 +1676,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = &init_net; + struct net *net = sock_net(skb->sk); struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct hlist_node *n; @@ -1631,8 +1720,12 @@ out: } static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { + [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, + [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, + [CTA_EXPECT_MASK] = { .type = NLA_NESTED }, [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, [CTA_EXPECT_ID] = { .type = NLA_U32 }, + [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, }; static int @@ -1640,12 +1733,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, @@ -1653,6 +1748,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, ctnetlink_exp_done); } + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else @@ -1661,7 +1760,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&init_net, &tuple); + exp = nf_ct_expect_find_get(net, zone, &tuple); if (!exp) return -ENOENT; @@ -1701,23 +1800,28 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; - struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; + u16 zone; int err; if (cda[CTA_EXPECT_TUPLE]) { /* delete a single expect by tuple */ + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&init_net, &tuple); + exp = nf_ct_expect_find_get(net, zone, &tuple); if (!exp) return -ENOENT; @@ -1740,18 +1844,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_helper_find_byname(name); - if (!h) { - spin_unlock_bh(&nf_conntrack_lock); - return -EOPNOTSUPP; - } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &init_net.ct.expect_hash[i], + &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { + if (!strcmp(m_help->helper->name, name) && + del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } @@ -1763,7 +1862,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &init_net.ct.expect_hash[i], + &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1784,7 +1883,9 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, } static int -ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, +ctnetlink_create_expect(struct net *net, u16 zone, + const struct nlattr * const cda[], + u_int8_t u3, u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; @@ -1806,7 +1907,7 @@ ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3, return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&init_net, &master_tuple); + h = nf_conntrack_find_get(net, zone, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1846,29 +1947,35 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[]) { + struct net *net = sock_net(ctnl); struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK] || !cda[CTA_EXPECT_MASTER]) return -EINVAL; + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&init_net, &tuple); + exp = __nf_ct_expect_find(net, zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(cda, + err = ctnetlink_create_expect(net, zone, cda, u3, NETLINK_CB(skb).pid, nlmsg_report(nlh)); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 3807ac7faf4c..088944824e13 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -28,6 +28,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_proto_gre.h> #include <linux/netfilter/nf_conntrack_pptp.h> @@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(net, &inv_t); + exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(struct net *net, +static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; struct nf_conn *sibling; + u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(net, t); + h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net, nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(net, t); + exp = nf_ct_expect_find_get(net, zone, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 1a4568bf7ea5..a44fa75b5178 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -12,6 +12,7 @@ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/mutex.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index dd375500dccc..5292560d6d4a 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/dccp.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -561,8 +562,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct net *net, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, +static int dccp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c99cfba64ddc..cf616e55ca41 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -31,6 +31,7 @@ #include <linux/in.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <net/dst.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -241,7 +242,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index f9d930f80276..b68ff15ed979 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3c96437b45ad..9dd8cd4fb6e6 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct net *net, +static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, @@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 5c5518bedb4b..8289088b8218 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); @@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, +static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 458655bb2106..263b5a72588d 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); @@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct net *net, +static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index dcfecbb81c46..d9e27734b2a2 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> +#include <linux/slab.h> #include <linux/in.h> #include <linux/tcp.h> #include <net/netfilter/nf_conntrack.h> diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 023966b569bf..c6cd1b84eddd 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -16,12 +16,14 @@ #include <linux/inet.h> #include <linux/in.h> #include <linux/udp.h> +#include <linux/tcp.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_conntrack_sip.h> MODULE_LICENSE("GPL"); @@ -50,12 +52,16 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); +void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); + unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, @@ -63,17 +69,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int matchlen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, - unsigned int dataoff, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, @@ -82,14 +88,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - const char **dptr, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, const union nf_inet_addr *addr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, @@ -236,12 +243,13 @@ int ct_sip_parse_request(const struct nf_conn *ct, return 0; /* Find SIP URI */ - limit -= strlen("sip:"); - for (; dptr < limit; dptr++) { + for (; dptr < limit - strlen("sip:"); dptr++) { if (*dptr == '\r' || *dptr == '\n') return -1; - if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) + if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) { + dptr += strlen("sip:"); break; + } } if (!skp_epaddr_len(ct, dptr, limit, &shift)) return 0; @@ -276,7 +284,7 @@ EXPORT_SYMBOL_GPL(ct_sip_parse_request); * tabs, spaces and continuation lines, which are treated as a single whitespace * character. * - * Some headers may appear multiple times. A comma seperated list of values is + * Some headers may appear multiple times. A comma separated list of values is * equivalent to multiple headers. */ static const struct sip_header ct_sip_hdrs[] = { @@ -284,7 +292,8 @@ static const struct sip_header ct_sip_hdrs[] = { [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len), [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len), [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len), - [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_VIA_UDP] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len), [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), }; @@ -412,7 +421,7 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_get_header); -/* Get next header field in a list of comma seperated values */ +/* Get next header field in a list of comma separated values */ static int ct_sip_next_header(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, enum sip_header_types type, @@ -516,6 +525,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri); +static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchlen) +{ + const char *limit = dptr + datalen; + const char *start; + const char *end; + + limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(",")); + if (!limit) + limit = dptr + datalen; + + start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name)); + if (!start) + return 0; + start += strlen(name); + + end = ct_sip_header_search(start, limit, ";", strlen(";")); + if (!end) + end = limit; + + *matchoff = start - dptr; + *matchlen = end - start; + return 1; +} + /* Parse address from header parameter and return address, offset and length */ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, @@ -574,6 +610,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param); +static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + u8 *proto) +{ + unsigned int matchoff, matchlen; + + if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=", + &matchoff, &matchlen)) { + if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP"))) + *proto = IPPROTO_TCP; + else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP"))) + *proto = IPPROTO_UDP; + else + return 0; + + if (*proto != nf_ct_protonum(ct)) + return 0; + } else + *proto = nf_ct_protonum(ct); + + return 1; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -682,7 +741,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, static int refresh_signalling_expectation(struct nf_conn *ct, union nf_inet_addr *addr, - __be16 port, + u8 proto, __be16 port, unsigned int expires) { struct nf_conn_help *help = nfct_help(ct); @@ -694,6 +753,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || + exp->tuple.dst.protonum != proto || exp->tuple.dst.u.udp.port != port) continue; if (!del_timer(&exp->timeout)) @@ -728,7 +788,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) spin_unlock_bh(&nf_conntrack_lock); } -static int set_expected_rtp_rtcp(struct sk_buff *skb, +static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, enum sip_expectation_classes class, @@ -777,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || @@ -805,7 +865,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, if (direct_rtp) { nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, dptr, datalen, + !nf_nat_sdp_port(skb, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -827,7 +887,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp, + ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen, + rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -847,6 +908,7 @@ err1: static const struct sdp_media_type sdp_media_types[] = { SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO), SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO), + SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE), }; static const struct sdp_media_type *sdp_media_type(const char *dptr, @@ -866,13 +928,12 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr, return NULL; } -static int process_sdp(struct sk_buff *skb, +static int process_sdp(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -941,7 +1002,7 @@ static int process_sdp(struct sk_buff *skb, else return NF_DROP; - ret = set_expected_rtp_rtcp(skb, dptr, datalen, + ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); if (ret != NF_ACCEPT) @@ -949,8 +1010,9 @@ static int process_sdp(struct sk_buff *skb, /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen, - c_hdr, SDP_HDR_MEDIA, &rtp_addr); + ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen, + mediaoff, c_hdr, SDP_HDR_MEDIA, + &rtp_addr); if (ret != NF_ACCEPT) return ret; } @@ -960,14 +1022,12 @@ static int process_sdp(struct sk_buff *skb, /* Update session connection and owner addresses */ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); - - if (ret == NF_ACCEPT && i > 0) - help->help.ct_sip_info.invite_cseq = cseq; + ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, + &rtp_addr); return ret; } -static int process_invite_response(struct sk_buff *skb, +static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -977,13 +1037,13 @@ static int process_invite_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_update_response(struct sk_buff *skb, +static int process_update_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -993,13 +1053,13 @@ static int process_update_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_prack_response(struct sk_buff *skb, +static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1009,13 +1069,29 @@ static int process_prack_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_bye_request(struct sk_buff *skb, +static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int cseq) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + unsigned int ret; + + flush_expectations(ct, true); + ret = process_sdp(skb, dataoff, dptr, datalen, cseq); + if (ret == NF_ACCEPT) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; +} + +static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1030,7 +1106,7 @@ static int process_bye_request(struct sk_buff *skb, * signalling connections. The expectation is marked inactive and is activated * when receiving a response indicating success from the registrar. */ -static int process_register_request(struct sk_buff *skb, +static int process_register_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1042,6 +1118,7 @@ static int process_register_request(struct sk_buff *skb, struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; __be16 port; + u8 proto; unsigned int expires = 0; int ret; typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; @@ -1074,6 +1151,10 @@ static int process_register_request(struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr)) return NF_ACCEPT; + if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen, + &proto) == 0) + return NF_ACCEPT; + if (ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, "expires=", NULL, NULL, &expires) < 0) @@ -1093,14 +1174,14 @@ static int process_register_request(struct sk_buff *skb, saddr = &ct->tuplehash[!dir].tuple.src.u3; nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), - saddr, &daddr, IPPROTO_UDP, NULL, &port); + saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, dptr, datalen, exp, + ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) @@ -1116,7 +1197,7 @@ store_cseq: return ret; } -static int process_register_response(struct sk_buff *skb, +static int process_register_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1126,7 +1207,8 @@ static int process_register_response(struct sk_buff *skb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; - unsigned int matchoff, matchlen, dataoff = 0; + u8 proto; + unsigned int matchoff, matchlen, coff = 0; unsigned int expires = 0; int in_contact = 0, ret; @@ -1153,7 +1235,7 @@ static int process_register_response(struct sk_buff *skb, while (1) { unsigned int c_expires = expires; - ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, + ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, SIP_HDR_CONTACT, &in_contact, &matchoff, &matchlen, &addr, &port); @@ -1166,6 +1248,10 @@ static int process_register_response(struct sk_buff *skb, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr)) continue; + if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, + *datalen, &proto) == 0) + continue; + ret = ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, "expires=", @@ -1174,7 +1260,8 @@ static int process_register_response(struct sk_buff *skb, return NF_DROP; if (c_expires == 0) break; - if (refresh_signalling_expectation(ct, &addr, port, c_expires)) + if (refresh_signalling_expectation(ct, &addr, proto, port, + c_expires)) return NF_ACCEPT; } @@ -1184,7 +1271,7 @@ flush: } static const struct sip_handler sip_handlers[] = { - SIP_HANDLER("INVITE", process_sdp, process_invite_response), + SIP_HANDLER("INVITE", process_invite_request, process_invite_response), SIP_HANDLER("UPDATE", process_sdp, process_update_response), SIP_HANDLER("ACK", process_sdp, NULL), SIP_HANDLER("PRACK", process_sdp, process_prack_response), @@ -1192,13 +1279,13 @@ static const struct sip_handler sip_handlers[] = { SIP_HANDLER("REGISTER", process_register_request, process_register_response), }; -static int process_sip_response(struct sk_buff *skb, +static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchoff, matchlen; - unsigned int code, cseq, dataoff, i; + unsigned int matchoff, matchlen, matchend; + unsigned int code, cseq, i; if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; @@ -1212,7 +1299,7 @@ static int process_sip_response(struct sk_buff *skb, cseq = simple_strtoul(*dptr + matchoff, NULL, 10); if (!cseq) return NF_DROP; - dataoff = matchoff + matchlen + 1; + matchend = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; @@ -1220,15 +1307,16 @@ static int process_sip_response(struct sk_buff *skb, handler = &sip_handlers[i]; if (handler->response == NULL) continue; - if (*datalen < dataoff + handler->len || - strnicmp(*dptr + dataoff, handler->method, handler->len)) + if (*datalen < matchend + handler->len || + strnicmp(*dptr + matchend, handler->method, handler->len)) continue; - return handler->response(skb, dptr, datalen, cseq, code); + return handler->response(skb, dataoff, dptr, datalen, + cseq, code); } return NF_ACCEPT; } -static int process_sip_request(struct sk_buff *skb, +static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1253,69 +1341,157 @@ static int process_sip_request(struct sk_buff *skb, if (!cseq) return NF_DROP; - return handler->request(skb, dptr, datalen, cseq); + return handler->request(skb, dataoff, dptr, datalen, cseq); } return NF_ACCEPT; } -static int sip_help(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) +static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, + unsigned int dataoff, const char **dptr, + unsigned int *datalen) +{ + typeof(nf_nat_sip_hook) nf_nat_sip; + int ret; + + if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) + ret = process_sip_request(skb, dataoff, dptr, datalen); + else + ret = process_sip_response(skb, dataoff, dptr, datalen); + + if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { + nf_nat_sip = rcu_dereference(nf_nat_sip_hook); + if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen)) + ret = NF_DROP; + } + + return ret; +} + +static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) { + struct tcphdr *th, _tcph; unsigned int dataoff, datalen; - const char *dptr; + unsigned int matchoff, matchlen, clen; + unsigned int msglen, origlen; + const char *dptr, *end; + s16 diff, tdiff = 0; int ret; - typeof(nf_nat_sip_hook) nf_nat_sip; + typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; + + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + return NF_ACCEPT; /* No Data ? */ - dataoff = protoff + sizeof(struct udphdr); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + dataoff = protoff + th->doff * 4; if (dataoff >= skb->len) return NF_ACCEPT; nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (!skb_is_nonlinear(skb)) - dptr = skb->data + dataoff; - else { + if (skb_is_nonlinear(skb)) { pr_debug("Copy of skbuff not supported yet.\n"); return NF_ACCEPT; } + dptr = skb->data + dataoff; datalen = skb->len - dataoff; if (datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) - ret = process_sip_request(skb, &dptr, &datalen); - else - ret = process_sip_response(skb, &dptr, &datalen); + while (1) { + if (ct_sip_get_header(ct, dptr, 0, datalen, + SIP_HDR_CONTENT_LENGTH, + &matchoff, &matchlen) <= 0) + break; + + clen = simple_strtoul(dptr + matchoff, (char **)&end, 10); + if (dptr + matchoff == end) + break; + + if (end + strlen("\r\n\r\n") > dptr + datalen) + break; + if (end[0] != '\r' || end[1] != '\n' || + end[2] != '\r' || end[3] != '\n') + break; + end += strlen("\r\n\r\n") + clen; + + msglen = origlen = end - dptr; + + ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen); + if (ret != NF_ACCEPT) + break; + diff = msglen - origlen; + tdiff += diff; + + dataoff += msglen; + dptr += msglen; + datalen = datalen + diff - msglen; + } if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen)) - ret = NF_DROP; + nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); + if (nf_nat_sip_seq_adjust) + nf_nat_sip_seq_adjust(skb, tdiff); } return ret; } -static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly; -static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; +static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + const char *dptr; + + /* No Data ? */ + dataoff = protoff + sizeof(struct udphdr); + if (dataoff >= skb->len) + return NF_ACCEPT; + + nf_ct_refresh(ct, skb, sip_timeout * HZ); + + if (skb_is_nonlinear(skb)) { + pr_debug("Copy of skbuff not supported yet.\n"); + return NF_ACCEPT; + } + + dptr = skb->data + dataoff; + datalen = skb->len - dataoff; + if (datalen < strlen("SIP/2.0 200")) + return NF_ACCEPT; + + return process_sip_msg(skb, ct, dataoff, &dptr, &datalen); +} + +static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; +static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { + .name = "signalling", .max_expected = 1, .timeout = 3 * 60, }, [SIP_EXPECT_AUDIO] = { + .name = "audio", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, [SIP_EXPECT_VIDEO] = { + .name = "video", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, + [SIP_EXPECT_IMAGE] = { + .name = "image", + .max_expected = IP_CT_DIR_MAX, + .timeout = 3 * 60, + }, }; static void nf_conntrack_sip_fini(void) @@ -1323,7 +1499,7 @@ static void nf_conntrack_sip_fini(void) int i, j; for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { + for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { if (sip[i][j].me == NULL) continue; nf_conntrack_helper_unregister(&sip[i][j]); @@ -1343,14 +1519,24 @@ static int __init nf_conntrack_sip_init(void) memset(&sip[i], 0, sizeof(sip[i])); sip[i][0].tuple.src.l3num = AF_INET; - sip[i][1].tuple.src.l3num = AF_INET6; - for (j = 0; j < 2; j++) { - sip[i][j].tuple.dst.protonum = IPPROTO_UDP; + sip[i][0].tuple.dst.protonum = IPPROTO_UDP; + sip[i][0].help = sip_help_udp; + sip[i][1].tuple.src.l3num = AF_INET; + sip[i][1].tuple.dst.protonum = IPPROTO_TCP; + sip[i][1].help = sip_help_tcp; + + sip[i][2].tuple.src.l3num = AF_INET6; + sip[i][2].tuple.dst.protonum = IPPROTO_UDP; + sip[i][2].help = sip_help_udp; + sip[i][3].tuple.src.l3num = AF_INET6; + sip[i][3].tuple.dst.protonum = IPPROTO_TCP; + sip[i][3].help = sip_help_tcp; + + for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { sip[i][j].tuple.src.u.udp.port = htons(ports[i]); sip[i][j].expect_policy = sip_exp_policy; sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; - sip[i][j].help = sip_help; tmpname = &sip_names[i][j][0]; if (ports[i] == SIP_PORT) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e310f1561bb2..faa8eb3722b9 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/netfilter.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> @@ -26,6 +27,7 @@ #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_conntrack_zones.h> MODULE_LICENSE("GPL"); @@ -171,6 +173,11 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) + goto release; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 3a6fd77f7761..c49ef219899e 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,4 +1,5 @@ #include <linux/kernel.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -265,7 +266,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_disable(); entry->okfn(skb); local_bh_enable(); - case NF_STOLEN: break; case NF_QUEUE: if (!__nf_queue(skb, elem, entry->pf, entry->hook, @@ -273,6 +273,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) verdict >> NF_VERDICT_BITS)) goto next_hook; break; + case NF_STOLEN: default: kfree_skb(skb); } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index eedc0c1ac7a4..6afa3d52ea5f 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); @@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -int nfnetlink_has_listeners(unsigned int group) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(nfnl, group); + return netlink_has_listeners(net->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(nfnl, pid, group, error); + return netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, pid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); + err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; @@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfnl_unlock(); } -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) { - printk("Removing netfilter NETLINK layer.\n"); - netlink_kernel_release(nfnl); - return; + struct sock *nfnl; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; } -static int __init nfnetlink_init(void) +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { - printk("Netfilter messages via NETLINK v%s.\n", nfversion); + struct net *net; - nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, NULL, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -ENOMEM; - } + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); +} - return 0; +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + +static int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9de0470d557e..203643fb2c52 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -28,6 +28,7 @@ #include <linux/list.h> #include <linux/jhash.h> #include <linux/random.h> +#include <linux/slab.h> #include <net/sock.h> #include <net/netfilter/nf_log.h> #include <net/netfilter/nfnetlink_log.h> @@ -323,7 +324,8 @@ __nfulnl_send(struct nfulnl_instance *inst) NLMSG_DONE, sizeof(struct nfgenmsg)); - status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + MSG_DONTWAIT); inst->qlen = 0; inst->skb = NULL; @@ -767,7 +769,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } instance_destroy(inst); - goto out; + goto out_put; default: ret = -ENOTSUPP; break; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7e3fa410641e..e70a6ef1f4f2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -18,6 +18,7 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/spinlock.h> +#include <linux/slab.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/netfilter.h> @@ -112,7 +113,6 @@ instance_create(u_int16_t queue_num, int pid) inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); - INIT_RCU_HEAD(&inst->rcu); if (!try_module_get(THIS_MODULE)) { err = -EAGAIN; @@ -414,13 +414,13 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) queue->queue_dropped++; if (net_ratelimit()) printk(KERN_WARNING "nf_queue: full at %d entries, " - "dropping packets(s). Dropped: %d\n", - queue->queue_total, queue->queue_dropped); + "dropping packets(s).\n", + queue->queue_total); goto err_out_free_nskb; } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f01955cce314..665f5beef6ad 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -22,11 +22,14 @@ #include <linux/vmalloc.h> #include <linux/mutex.h> #include <linux/mm.h> +#include <linux/slab.h> #include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp.h> - +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_arp/arp_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -37,7 +40,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); struct compat_delta { struct compat_delta *next; unsigned int offset; - short delta; + int delta; }; struct xt_af { @@ -364,8 +367,10 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - pr_err("%s_tables: %s match: invalid size %Zu != %u\n", + pr_err("%s_tables: %s.%u match: invalid size " + "%u (kernel) != (user) %u\n", xt_prefix[par->family], par->match->name, + par->match->revision, XT_ALIGN(par->match->matchsize), size); return -EINVAL; } @@ -435,10 +440,10 @@ void xt_compat_flush_offsets(u_int8_t af) } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); -short xt_compat_calc_jump(u_int8_t af, unsigned int offset) +int xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp; - short delta; + int delta; for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) if (tmp->offset < offset) @@ -481,8 +486,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); -int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, - unsigned int *size) +int xt_compat_match_to_user(const struct xt_entry_match *m, + void __user **dstptr, unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match __user *cm = *dstptr; @@ -514,8 +519,10 @@ int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { if (XT_ALIGN(par->target->targetsize) != size) { - pr_err("%s_tables: %s target: invalid size %Zu != %u\n", + pr_err("%s_tables: %s.%u target: invalid size " + "%u (kernel) != (user) %u\n", xt_prefix[par->family], par->target->name, + par->target->revision, XT_ALIGN(par->target->targetsize), size); return -EINVAL; } @@ -582,8 +589,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_target_from_user); -int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, - unsigned int *size) +int xt_compat_target_to_user(const struct xt_entry_target *t, + void __user **dstptr, unsigned int *size) { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target __user *ct = *dstptr; @@ -1091,6 +1098,60 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ +/** + * xt_hook_link - set up hooks for a new table + * @table: table with metadata needed to set up hooks + * @fn: Hook function + * + * This function will take care of creating and registering the necessary + * Netfilter hooks for XT tables. + */ +struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) +{ + unsigned int hook_mask = table->valid_hooks; + uint8_t i, num_hooks = hweight32(hook_mask); + uint8_t hooknum; + struct nf_hook_ops *ops; + int ret; + + ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + if (ops == NULL) + return ERR_PTR(-ENOMEM); + + for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0; + hook_mask >>= 1, ++hooknum) { + if (!(hook_mask & 1)) + continue; + ops[i].hook = fn; + ops[i].owner = table->me; + ops[i].pf = table->af; + ops[i].hooknum = hooknum; + ops[i].priority = table->priority; + ++i; + } + + ret = nf_register_hooks(ops, num_hooks); + if (ret < 0) { + kfree(ops); + return ERR_PTR(ret); + } + + return ops; +} +EXPORT_SYMBOL_GPL(xt_hook_link); + +/** + * xt_hook_unlink - remove hooks for a table + * @ops: nf_hook_ops array as returned by nf_hook_link + * @hook_mask: the very same mask that was passed to nf_hook_link + */ +void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops) +{ + nf_unregister_hooks(ops, hweight32(table->valid_hooks)); + kfree(ops); +} +EXPORT_SYMBOL_GPL(xt_hook_unlink); + int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c new file mode 100644 index 000000000000..ee18b231b950 --- /dev/null +++ b/net/netfilter/xt_CT.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2010 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/selinux.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_CT.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_ecache.h> +#include <net/netfilter/nf_conntrack_zones.h> + +static unsigned int xt_ct_target(struct sk_buff *skb, + const struct xt_target_param *par) +{ + const struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; + + return XT_CONTINUE; +} + +static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) +{ + if (par->family == AF_INET) { + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.invflags & IPT_INV_PROTO) + return 0; + return e->ip.proto; + } else if (par->family == AF_INET6) { + const struct ip6t_entry *e = par->entryinfo; + + if (e->ipv6.invflags & IP6T_INV_PROTO) + return 0; + return e->ipv6.proto; + } else + return 0; +} + +static bool xt_ct_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conntrack_tuple t; + struct nf_conn_help *help; + struct nf_conn *ct; + u8 proto; + + if (info->flags & ~XT_CT_NOTRACK) + return false; + + if (info->flags & XT_CT_NOTRACK) { + ct = &nf_conntrack_untracked; + atomic_inc(&ct->ct_general.use); + goto out; + } + +#ifndef CONFIG_NF_CONNTRACK_ZONES + if (info->zone) + goto err1; +#endif + + if (nf_ct_l3proto_try_module_get(par->family) < 0) + goto err1; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) + goto err2; + + if ((info->ct_events || info->exp_events) && + !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, + GFP_KERNEL)) + goto err3; + + if (info->helper[0]) { + proto = xt_ct_find_proto(par); + if (!proto) + goto err3; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (help->helper == NULL) + goto err3; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +out: + info->ct = ct; + return true; + +err3: + nf_conntrack_free(ct); +err2: + nf_ct_l3proto_module_put(par->family); +err1: + return false; +} + +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + + if (ct != &nf_conntrack_untracked) { + help = nfct_help(ct); + if (help) + module_put(help->helper->me); + + nf_ct_l3proto_module_put(par->family); + } + nf_ct_put(info->ct); +} + +static struct xt_target xt_ct_tg __read_mostly = { + .name = "CT", + .family = NFPROTO_UNSPEC, + .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), + .checkentry = xt_ct_tg_check, + .destroy = xt_ct_tg_destroy, + .target = xt_ct_target, + .table = "raw", + .me = THIS_MODULE, +}; + +static int __init xt_ct_tg_init(void) +{ + return xt_register_target(&xt_ct_tg); +} + +static void __exit xt_ct_tg_exit(void) +{ + xt_unregister_target(&xt_ct_tg); +} + +module_init(xt_ct_tg_init); +module_exit(xt_ct_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: connection tracking target"); +MODULE_ALIAS("ipt_CT"); +MODULE_ALIAS("ip6t_CT"); diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 8ff7843bb921..3271c8e52153 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> +#include <linux/slab.h> #include <linux/leds.h> #include <linux/mutex.h> diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index f28f6a5fc02d..12dcd7007c3e 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -28,6 +28,7 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static u32 jhash_initval __read_mostly; +static bool rnd_inited __read_mostly; static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) @@ -90,6 +91,10 @@ static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) const struct xt_NFQ_info_v1 *info = par->targinfo; u32 maxid; + if (unlikely(!rnd_inited)) { + get_random_bytes(&jhash_initval, sizeof(jhash_initval)); + rnd_inited = true; + } if (info->queues_total == 0) { pr_err("NFQUEUE: number of total queues is 0\n"); return false; @@ -135,7 +140,6 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = { static int __init nfqueue_tg_init(void) { - get_random_bytes(&jhash_initval, sizeof(jhash_initval)); return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index d80b8192e0d4..d16d55df4f61 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -11,6 +11,7 @@ #include <linux/jhash.h> #include <linux/rtnetlink.h> #include <linux/random.h> +#include <linux/slab.h> #include <net/gen_stats.h> #include <net/netlink.h> @@ -23,6 +24,7 @@ static DEFINE_MUTEX(xt_rateest_mutex); #define RATEEST_HSIZE 16 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; static unsigned int jhash_rnd __read_mostly; +static bool rnd_inited __read_mostly; static unsigned int xt_rateest_hash(const char *name) { @@ -93,6 +95,11 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) struct gnet_estimator est; } cfg; + if (unlikely(!rnd_inited)) { + get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); + rnd_inited = true; + } + est = xt_rateest_lookup(info->name); if (est) { /* @@ -164,7 +171,6 @@ static int __init xt_rateest_tg_init(void) for (i = 0; i < ARRAY_SIZE(rateest_hash); i++) INIT_HLIST_HEAD(&rateest_hash[i]); - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); return xt_register_target(&xt_rateest_tg_reg); } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index eda64c1cb1e5..c5f4b9919e9a 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> +#include <linux/gfp.h> #include <linux/ipv6.h> #include <linux/tcp.h> #include <net/dst.h> @@ -60,17 +61,9 @@ tcpmss_mangle_packet(struct sk_buff *skb, tcplen = skb->len - tcphoff; tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); - /* Since it passed flags test in tcp match, we know it is is - not a fragment, and has data >= tcp header length. SYN - packets should not contain data: if they did, then we risk - running over MTU, sending Frag Needed and breaking things - badly. --RR */ - if (tcplen != tcph->doff*4) { - if (net_ratelimit()) - printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", - skb->len); + /* Header cannot be larger than the packet */ + if (tcplen < tcph->doff*4) return -1; - } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { if (dst_mtu(skb_dst(skb)) <= minlen) { @@ -115,6 +108,12 @@ tcpmss_mangle_packet(struct sk_buff *skb, } } + /* There is data after the header so the option can't be added + without moving it, and doing so may make the SYN packet + itself too large. Accept the packet unmodified instead. */ + if (tcplen > tcph->doff*4) + return 0; + /* * MSS Option not found ?! add it.. */ @@ -241,6 +240,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) { const struct xt_tcpmss_info *info = par->targinfo; const struct ipt_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; if (info->mss == XT_TCPMSS_CLAMP_PMTU && (par->hook_mask & ~((1 << NF_INET_FORWARD) | @@ -250,8 +250,9 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return false; } - if (IPT_MATCH_ITERATE(e, find_syn_match)) - return true; + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); return false; } @@ -261,6 +262,7 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) { const struct xt_tcpmss_info *info = par->targinfo; const struct ip6t_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; if (info->mss == XT_TCPMSS_CLAMP_PMTU && (par->hook_mask & ~((1 << NF_INET_FORWARD) | @@ -270,8 +272,9 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) "FORWARD, OUTPUT and POSTROUTING hooks\n"); return false; } - if (IP6T_MATCH_ITERATE(e, find_syn_match)) - return true; + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return true; printk("xt_TCPMSS: Only works on TCP SYN packets\n"); return false; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 38f03f75a636..388ca4596098 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -17,6 +17,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/jhash.h> +#include <linux/slab.h> #include <linux/list.h> #include <linux/module.h> #include <linux/random.h> @@ -28,6 +29,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_tuple.h> +#include <net/netfilter/nf_conntrack_zones.h> /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { @@ -40,15 +42,11 @@ struct xt_connlimit_data { spinlock_t lock; }; -static u_int32_t connlimit_rnd; -static bool connlimit_rnd_inited; +static u_int32_t connlimit_rnd __read_mostly; +static bool connlimit_rnd_inited __read_mostly; static inline unsigned int connlimit_iphash(__be32 addr) { - if (unlikely(!connlimit_rnd_inited)) { - get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); - connlimit_rnd_inited = true; - } return jhash_1word((__force __u32)addr, connlimit_rnd) & 0xFF; } @@ -59,11 +57,6 @@ connlimit_iphash6(const union nf_inet_addr *addr, union nf_inet_addr res; unsigned int i; - if (unlikely(!connlimit_rnd_inited)) { - get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); - connlimit_rnd_inited = true; - } - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) res.ip6[i] = addr->ip6[i] & mask->ip6[i]; @@ -99,7 +92,8 @@ same_source_net(const union nf_inet_addr *addr, } } -static int count_them(struct xt_connlimit_data *data, +static int count_them(struct net *net, + struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, @@ -122,7 +116,8 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = nf_conntrack_find_get(&init_net, &conn->tuple); + found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, + &conn->tuple); found_ct = NULL; if (found != NULL) @@ -180,6 +175,7 @@ static int count_them(struct xt_connlimit_data *data, static bool connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; @@ -204,7 +200,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) } spin_lock_bh(&info->data->lock); - connections = count_them(info->data, tuple_ptr, &addr, + connections = count_them(net, info->data, tuple_ptr, &addr, &info->mask, par->family); spin_unlock_bh(&info->data->lock); @@ -226,6 +222,10 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par) struct xt_connlimit_info *info = par->matchinfo; unsigned int i; + if (unlikely(!connlimit_rnd_inited)) { + get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); + connlimit_rnd_inited = true; + } if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " "address family %u\n", par->family); diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 0989f29ade2e..395af5943ffd 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <net/ip.h> #include <linux/dccp.h> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index dd16e404424f..215a64835de8 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1,6 +1,6 @@ /* * xt_hashlimit - Netfilter module to limit the number of packets per time - * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) + * separately for each hashbucket (sourceip/sourceport/dstip/dstport) * * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 @@ -26,6 +26,7 @@ #endif #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -40,9 +41,19 @@ MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match"); MODULE_ALIAS("ipt_hashlimit"); MODULE_ALIAS("ip6t_hashlimit"); +struct hashlimit_net { + struct hlist_head htables; + struct proc_dir_entry *ipt_hashlimit; + struct proc_dir_entry *ip6t_hashlimit; +}; + +static int hashlimit_net_id; +static inline struct hashlimit_net *hashlimit_pernet(struct net *net) +{ + return net_generic(net, hashlimit_net_id); +} + /* need to declare this at the top */ -static struct proc_dir_entry *hashlimit_procdir4; -static struct proc_dir_entry *hashlimit_procdir6; static const struct file_operations dl_file_ops; /* hash table crap */ @@ -79,27 +90,26 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ - atomic_t use; + int use; u_int8_t family; + bool rnd_initialized; struct hashlimit_cfg1 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ u_int32_t rnd; /* random seed for hash */ - int rnd_initialized; unsigned int count; /* number entries in table */ struct timer_list timer; /* timer for gc */ /* seq_file stuff */ struct proc_dir_entry *pde; + struct net *net; struct hlist_head hash[0]; /* hashtable itself */ }; -static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ -static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */ -static HLIST_HEAD(hashlimit_htables); +static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */ static struct kmem_cache *hashlimit_cachep __read_mostly; static inline bool dst_cmp(const struct dsthash_ent *ent, @@ -150,7 +160,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, * the first hashtable entry */ if (!ht->rnd_initialized) { get_random_bytes(&ht->rnd, sizeof(ht->rnd)); - ht->rnd_initialized = 1; + ht->rnd_initialized = true; } if (ht->cfg.max && ht->count >= ht->cfg.max) { @@ -185,8 +195,9 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) +static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family) { + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; unsigned int size; unsigned int i; @@ -232,33 +243,34 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - atomic_set(&hinfo->use, 1); + hinfo->use = 1; hinfo->count = 0; hinfo->family = family; - hinfo->rnd_initialized = 0; + hinfo->rnd_initialized = false; spin_lock_init(&hinfo->lock); hinfo->pde = proc_create_data(minfo->name, 0, (family == NFPROTO_IPV4) ? - hashlimit_procdir4 : hashlimit_procdir6, + hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, &dl_file_ops, hinfo); if (!hinfo->pde) { vfree(hinfo); return -1; } + hinfo->net = net; setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo); hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); add_timer(&hinfo->timer); - spin_lock_bh(&hashlimit_lock); - hlist_add_head(&hinfo->node, &hashlimit_htables); - spin_unlock_bh(&hashlimit_lock); + hlist_add_head(&hinfo->node, &hashlimit_net->htables); return 0; } -static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) +static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, + u_int8_t family) { + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; unsigned int size; unsigned int i; @@ -293,28 +305,27 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - atomic_set(&hinfo->use, 1); + hinfo->use = 1; hinfo->count = 0; hinfo->family = family; - hinfo->rnd_initialized = 0; + hinfo->rnd_initialized = false; spin_lock_init(&hinfo->lock); hinfo->pde = proc_create_data(minfo->name, 0, (family == NFPROTO_IPV4) ? - hashlimit_procdir4 : hashlimit_procdir6, + hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, &dl_file_ops, hinfo); if (hinfo->pde == NULL) { vfree(hinfo); return -1; } + hinfo->net = net; setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); add_timer(&hinfo->timer); - spin_lock_bh(&hashlimit_lock); - hlist_add_head(&hinfo->node, &hashlimit_htables); - spin_unlock_bh(&hashlimit_lock); + hlist_add_head(&hinfo->node, &hashlimit_net->htables); return 0; } @@ -364,43 +375,46 @@ static void htable_gc(unsigned long htlong) static void htable_destroy(struct xt_hashlimit_htable *hinfo) { + struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); + struct proc_dir_entry *parent; + del_timer_sync(&hinfo->timer); - /* remove proc entry */ - remove_proc_entry(hinfo->pde->name, - hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 : - hashlimit_procdir6); + if (hinfo->family == NFPROTO_IPV4) + parent = hashlimit_net->ipt_hashlimit; + else + parent = hashlimit_net->ip6t_hashlimit; + remove_proc_entry(hinfo->pde->name, parent); htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } -static struct xt_hashlimit_htable *htable_find_get(const char *name, +static struct xt_hashlimit_htable *htable_find_get(struct net *net, + const char *name, u_int8_t family) { + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; - spin_lock_bh(&hashlimit_lock); - hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) { + hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->pde->name) && hinfo->family == family) { - atomic_inc(&hinfo->use); - spin_unlock_bh(&hashlimit_lock); + hinfo->use++; return hinfo; } } - spin_unlock_bh(&hashlimit_lock); return NULL; } static void htable_put(struct xt_hashlimit_htable *hinfo) { - if (atomic_dec_and_test(&hinfo->use)) { - spin_lock_bh(&hashlimit_lock); + mutex_lock(&hashlimit_mutex); + if (--hinfo->use == 0) { hlist_del(&hinfo->node); - spin_unlock_bh(&hashlimit_lock); htable_destroy(hinfo); } + mutex_unlock(&hashlimit_mutex); } /* The algorithm used is the Simple Token Bucket Filter (TBF) @@ -479,6 +493,7 @@ static void hashlimit_ipv6_mask(__be32 *i, unsigned int p) case 64 ... 95: i[2] = maskl(i[2], p - 64); i[3] = 0; + break; case 96 ... 127: i[3] = maskl(i[3], p - 96); break; @@ -665,6 +680,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) { + struct net *net = par->net; struct xt_hashlimit_info *r = par->matchinfo; /* Check for overflow. */ @@ -687,25 +703,20 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) if (r->name[sizeof(r->name) - 1] != '\0') return false; - /* This is the best we've got: We cannot release and re-grab lock, - * since checkentry() is called before x_tables.c grabs xt_mutex. - * We also cannot grab the hashtable spinlock, since htable_create will - * call vmalloc, and that can sleep. And we cannot just re-search - * the list of htable's in htable_create(), since then we would - * create duplicate proc files. -HW */ - mutex_lock(&hlimit_mutex); - r->hinfo = htable_find_get(r->name, par->match->family); - if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) { - mutex_unlock(&hlimit_mutex); + mutex_lock(&hashlimit_mutex); + r->hinfo = htable_find_get(net, r->name, par->match->family); + if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) { + mutex_unlock(&hashlimit_mutex); return false; } - mutex_unlock(&hlimit_mutex); + mutex_unlock(&hashlimit_mutex); return true; } static bool hashlimit_mt_check(const struct xt_mtchk_param *par) { + struct net *net = par->net; struct xt_hashlimit_mtinfo1 *info = par->matchinfo; /* Check for overflow. */ @@ -728,19 +739,13 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par) return false; } - /* This is the best we've got: We cannot release and re-grab lock, - * since checkentry() is called before x_tables.c grabs xt_mutex. - * We also cannot grab the hashtable spinlock, since htable_create will - * call vmalloc, and that can sleep. And we cannot just re-search - * the list of htable's in htable_create(), since then we would - * create duplicate proc files. -HW */ - mutex_lock(&hlimit_mutex); - info->hinfo = htable_find_get(info->name, par->match->family); - if (!info->hinfo && htable_create(info, par->match->family) != 0) { - mutex_unlock(&hlimit_mutex); + mutex_lock(&hashlimit_mutex); + info->hinfo = htable_find_get(net, info->name, par->match->family); + if (!info->hinfo && htable_create(net, info, par->match->family) != 0) { + mutex_unlock(&hashlimit_mutex); return false; } - mutex_unlock(&hlimit_mutex); + mutex_unlock(&hashlimit_mutex); return true; } @@ -767,7 +772,7 @@ struct compat_xt_hashlimit_info { compat_uptr_t master; }; -static void hashlimit_mt_compat_from_user(void *dst, void *src) +static void hashlimit_mt_compat_from_user(void *dst, const void *src) { int off = offsetof(struct compat_xt_hashlimit_info, hinfo); @@ -775,7 +780,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src) memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); } -static int hashlimit_mt_compat_to_user(void __user *dst, void *src) +static int hashlimit_mt_compat_to_user(void __user *dst, const void *src) { int off = offsetof(struct compat_xt_hashlimit_info, hinfo); @@ -841,8 +846,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { static void *dl_seq_start(struct seq_file *s, loff_t *pos) __acquires(htable->lock) { - struct proc_dir_entry *pde = s->private; - struct xt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket; spin_lock_bh(&htable->lock); @@ -859,8 +863,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct xt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; *pos = ++(*bucket); @@ -874,11 +877,11 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { - struct proc_dir_entry *pde = s->private; - struct xt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; - kfree(bucket); + if (!IS_ERR(bucket)) + kfree(bucket); spin_unlock_bh(&htable->lock); } @@ -917,8 +920,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show(struct seq_file *s, void *v) { - struct proc_dir_entry *pde = s->private; - struct xt_hashlimit_htable *htable = pde->data; + struct xt_hashlimit_htable *htable = s->private; unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; struct hlist_node *pos; @@ -944,7 +946,7 @@ static int dl_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - sf->private = PDE(inode); + sf->private = PDE(inode)->data; } return ret; } @@ -957,10 +959,61 @@ static const struct file_operations dl_file_ops = { .release = seq_release }; +static int __net_init hashlimit_proc_net_init(struct net *net) +{ + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net); + if (!hashlimit_net->ipt_hashlimit) + return -ENOMEM; +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net); + if (!hashlimit_net->ip6t_hashlimit) { + proc_net_remove(net, "ipt_hashlimit"); + return -ENOMEM; + } +#endif + return 0; +} + +static void __net_exit hashlimit_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "ipt_hashlimit"); +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) + proc_net_remove(net, "ip6t_hashlimit"); +#endif +} + +static int __net_init hashlimit_net_init(struct net *net) +{ + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + INIT_HLIST_HEAD(&hashlimit_net->htables); + return hashlimit_proc_net_init(net); +} + +static void __net_exit hashlimit_net_exit(struct net *net) +{ + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + BUG_ON(!hlist_empty(&hashlimit_net->htables)); + hashlimit_proc_net_exit(net); +} + +static struct pernet_operations hashlimit_net_ops = { + .init = hashlimit_net_init, + .exit = hashlimit_net_exit, + .id = &hashlimit_net_id, + .size = sizeof(struct hashlimit_net), +}; + static int __init hashlimit_mt_init(void) { int err; + err = register_pernet_subsys(&hashlimit_net_ops); + if (err < 0) + return err; err = xt_register_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); if (err < 0) @@ -974,41 +1027,21 @@ static int __init hashlimit_mt_init(void) printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); goto err2; } - hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); - if (!hashlimit_procdir4) { - printk(KERN_ERR "xt_hashlimit: unable to create proc dir " - "entry\n"); - goto err3; - } - err = 0; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); - if (!hashlimit_procdir6) { - printk(KERN_ERR "xt_hashlimit: unable to create proc dir " - "entry\n"); - err = -ENOMEM; - } -#endif - if (!err) - return 0; - remove_proc_entry("ipt_hashlimit", init_net.proc_net); -err3: - kmem_cache_destroy(hashlimit_cachep); + return 0; + err2: xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); err1: + unregister_pernet_subsys(&hashlimit_net_ops); return err; } static void __exit hashlimit_mt_exit(void) { - remove_proc_entry("ipt_hashlimit", init_net.proc_net); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - remove_proc_entry("ip6t_hashlimit", init_net.proc_net); -#endif kmem_cache_destroy(hashlimit_cachep); xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); + unregister_pernet_subsys(&hashlimit_net_ops); } module_init(hashlimit_mt_init); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 2773be6a71dd..e5d7e1ffb1a4 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/spinlock.h> @@ -148,7 +149,7 @@ struct compat_xt_rateinfo { /* To keep the full "prev" timestamp, the upper 32 bits are stored in the * master pointer, which does not need to be preserved. */ -static void limit_mt_compat_from_user(void *dst, void *src) +static void limit_mt_compat_from_user(void *dst, const void *src) { const struct compat_xt_rateinfo *cm = src; struct xt_rateinfo m = { @@ -162,7 +163,7 @@ static void limit_mt_compat_from_user(void *dst, void *src) memcpy(dst, &m, sizeof(m)); } -static int limit_mt_compat_to_user(void __user *dst, void *src) +static int limit_mt_compat_to_user(void __user *dst, const void *src) { const struct xt_rateinfo *m = src; struct compat_xt_rateinfo cm = { diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 4d1a41bbd5d7..4169e200588d 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -334,7 +334,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, if (info->flags & XT_OSF_LOG) nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, - "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", + "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", f->genre, f->version, f->subtype, &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest), @@ -349,7 +349,7 @@ static bool xt_osf_match_packet(const struct sk_buff *skb, if (!fcount && (info->flags & XT_OSF_LOG)) nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, - "Remote OS is not known: %pi4:%u -> %pi4:%u\n", + "Remote OS is not known: %pI4:%u -> %pI4:%u\n", &ip->saddr, ntohs(tcp->source), &ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 390b7d09fe51..2d5562498c43 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -4,6 +4,7 @@ * Sam Johnston <samj@samj.net> */ #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/netfilter/x_tables.h> diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index fc70a49c0afd..834b736857cb 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -27,7 +27,9 @@ #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> +#include <linux/slab.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_recent.h> @@ -52,7 +54,7 @@ module_param(ip_list_perms, uint, 0400); module_param(ip_list_uid, uint, 0400); module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files"); @@ -78,37 +80,40 @@ struct recent_table { struct list_head iphash[0]; }; -static LIST_HEAD(tables); +struct recent_net { + struct list_head tables; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *xt_recent; +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + struct proc_dir_entry *ipt_recent; +#endif +#endif +}; + +static int recent_net_id; +static inline struct recent_net *recent_pernet(struct net *net) +{ + return net_generic(net, recent_net_id); +} + static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT -static struct proc_dir_entry *proc_old_dir; -#endif -static struct proc_dir_entry *recent_proc_dir; static const struct file_operations recent_old_fops, recent_mt_fops; #endif -static u_int32_t hash_rnd; -static bool hash_rnd_initted; +static u_int32_t hash_rnd __read_mostly; +static bool hash_rnd_inited __read_mostly; -static unsigned int recent_entry_hash4(const union nf_inet_addr *addr) +static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { - if (!hash_rnd_initted) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - hash_rnd_initted = true; - } return jhash_1word((__force u32)addr->ip, hash_rnd) & (ip_list_hash_size - 1); } -static unsigned int recent_entry_hash6(const union nf_inet_addr *addr) +static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { - if (!hash_rnd_initted) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - hash_rnd_initted = true; - } return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & (ip_list_hash_size - 1); } @@ -173,18 +178,19 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { + e->index %= ip_pkt_list_tot; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; - e->index %= ip_pkt_list_tot; list_move_tail(&e->lru_list, &t->lru_list); } -static struct recent_table *recent_table_lookup(const char *name) +static struct recent_table *recent_table_lookup(struct recent_net *recent_net, + const char *name) { struct recent_table *t; - list_for_each_entry(t, &tables, list) + list_for_each_entry(t, &recent_net->tables, list) if (!strcmp(t->name, name)) return t; return NULL; @@ -203,6 +209,8 @@ static void recent_table_flush(struct recent_table *t) static bool recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) { + struct net *net = dev_net(par->in ? par->in : par->out); + struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; @@ -235,7 +243,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) ttl++; spin_lock_bh(&recent_lock); - t = recent_table_lookup(info->name); + t = recent_table_lookup(recent_net, info->name); e = recent_entry_lookup(t, &addr, par->match->family, (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { @@ -260,7 +268,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) for (i = 0; i < e->nstamps; i++) { if (info->seconds && time_after(time, e->stamps[i])) continue; - if (++hits >= info->hit_count) { + if (!info->hit_count || ++hits >= info->hit_count) { ret = !ret; break; } @@ -279,6 +287,7 @@ out: static bool recent_mt_check(const struct xt_mtchk_param *par) { + struct recent_net *recent_net = recent_pernet(par->net); const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; #ifdef CONFIG_PROC_FS @@ -287,6 +296,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) unsigned i; bool ret = false; + if (unlikely(!hash_rnd_inited)) { + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd_inited = true; + } if (hweight8(info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE | XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) @@ -294,14 +307,18 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) return false; - if (info->hit_count > ip_pkt_list_tot) + if (info->hit_count > ip_pkt_list_tot) { + pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than " + "packets to be remembered (%u)\n", + info->hit_count, ip_pkt_list_tot); return false; + } if (info->name[0] == '\0' || strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) return false; mutex_lock(&recent_mutex); - t = recent_table_lookup(info->name); + t = recent_table_lookup(recent_net, info->name); if (t != NULL) { t->refcnt++; ret = true; @@ -318,7 +335,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - pde = proc_create_data(t->name, ip_list_perms, recent_proc_dir, + pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_fops, t); if (pde == NULL) { kfree(t); @@ -327,10 +344,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) pde->uid = ip_list_uid; pde->gid = ip_list_gid; #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - pde = proc_create_data(t->name, ip_list_perms, proc_old_dir, + pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent, &recent_old_fops, t); if (pde == NULL) { - remove_proc_entry(t->name, proc_old_dir); + remove_proc_entry(t->name, recent_net->xt_recent); kfree(t); goto out; } @@ -339,7 +356,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par) #endif #endif spin_lock_bh(&recent_lock); - list_add_tail(&t->list, &tables); + list_add_tail(&t->list, &recent_net->tables); spin_unlock_bh(&recent_lock); ret = true; out: @@ -349,20 +366,21 @@ out: static void recent_mt_destroy(const struct xt_mtdtor_param *par) { + struct recent_net *recent_net = recent_pernet(par->net); const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); - t = recent_table_lookup(info->name); + t = recent_table_lookup(recent_net, info->name); if (--t->refcnt == 0) { spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS #ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - remove_proc_entry(t->name, proc_old_dir); + remove_proc_entry(t->name, recent_net->ipt_recent); #endif - remove_proc_entry(t->name, recent_proc_dir); + remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); kfree(t); @@ -611,8 +629,65 @@ static const struct file_operations recent_mt_fops = { .release = seq_release_private, .owner = THIS_MODULE, }; + +static int __net_init recent_proc_net_init(struct net *net) +{ + struct recent_net *recent_net = recent_pernet(net); + + recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); + if (!recent_net->xt_recent) + return -ENOMEM; +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net); + if (!recent_net->ipt_recent) { + proc_net_remove(net, "xt_recent"); + return -ENOMEM; + } +#endif + return 0; +} + +static void __net_exit recent_proc_net_exit(struct net *net) +{ +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + proc_net_remove(net, "ipt_recent"); +#endif + proc_net_remove(net, "xt_recent"); +} +#else +static inline int recent_proc_net_init(struct net *net) +{ + return 0; +} + +static inline void recent_proc_net_exit(struct net *net) +{ +} #endif /* CONFIG_PROC_FS */ +static int __net_init recent_net_init(struct net *net) +{ + struct recent_net *recent_net = recent_pernet(net); + + INIT_LIST_HEAD(&recent_net->tables); + return recent_proc_net_init(net); +} + +static void __net_exit recent_net_exit(struct net *net) +{ + struct recent_net *recent_net = recent_pernet(net); + + BUG_ON(!list_empty(&recent_net->tables)); + recent_proc_net_exit(net); +} + +static struct pernet_operations recent_net_ops = { + .init = recent_net_init, + .exit = recent_net_exit, + .id = &recent_net_id, + .size = sizeof(struct recent_net), +}; + static struct xt_match recent_mt_reg[] __read_mostly = { { .name = "recent", @@ -644,39 +719,19 @@ static int __init recent_mt_init(void) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); - err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); -#ifdef CONFIG_PROC_FS + err = register_pernet_subsys(&recent_net_ops); if (err) return err; - recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net); - if (recent_proc_dir == NULL) { - xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); - err = -ENOMEM; - } -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - if (err < 0) - return err; - proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net); - if (proc_old_dir == NULL) { - remove_proc_entry("xt_recent", init_net.proc_net); - xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); - err = -ENOMEM; - } -#endif -#endif + err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); + if (err) + unregister_pernet_subsys(&recent_net_ops); return err; } static void __exit recent_mt_exit(void) { - BUG_ON(!list_empty(&tables)); xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); -#ifdef CONFIG_PROC_FS -#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT - remove_proc_entry("ipt_recent", init_net.proc_net); -#endif - remove_proc_entry("xt_recent", init_net.proc_net); -#endif + unregister_pernet_subsys(&recent_net_ops); } module_init(recent_mt_init); diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h new file mode 100644 index 000000000000..6efe4e5a81c6 --- /dev/null +++ b/net/netfilter/xt_repldata.h @@ -0,0 +1,35 @@ +/* + * Today's hack: quantum tunneling in structs + * + * 'entries' and 'term' are never anywhere referenced by word in code. In fact, + * they serve as the hanging-off data accessed through repl.data[]. + */ + +#define xt_alloc_initial_table(type, typ2) ({ \ + unsigned int hook_mask = info->valid_hooks; \ + unsigned int nhooks = hweight32(hook_mask); \ + unsigned int bytes = 0, hooknum = 0, i = 0; \ + struct { \ + struct type##_replace repl; \ + struct type##_standard entries[nhooks]; \ + struct type##_error term; \ + } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \ + if (tbl == NULL) \ + return NULL; \ + strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ + tbl->term = (struct type##_error)typ2##_ERROR_INIT; \ + tbl->repl.valid_hooks = hook_mask; \ + tbl->repl.num_entries = nhooks + 1; \ + tbl->repl.size = nhooks * sizeof(struct type##_standard) + \ + sizeof(struct type##_error); \ + for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \ + if (!(hook_mask & 1)) \ + continue; \ + tbl->repl.hook_entry[hooknum] = bytes; \ + tbl->repl.underflow[hooknum] = bytes; \ + tbl->entries[i++] = (struct type##_standard) \ + typ2##_STANDARD_INIT(NF_ACCEPT); \ + bytes += sizeof(struct type##_standard); \ + } \ + tbl; \ +}) diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index d8c0f8f1a78e..937ce0633e99 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -12,6 +12,7 @@ #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/net.h> +#include <linux/slab.h> #include <linux/netfilter/xt_statistic.h> #include <linux/netfilter/x_tables.h> diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index b4d774111311..96801ffd8af8 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -7,6 +7,7 @@ * published by the Free Software Foundation. */ +#include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> |