From 80c72fe415698049a477314ac82790c1af0fa7e3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 3 May 2007 03:11:29 -0700
Subject: [AFS/AF_RXRPC]: Miscellaneous fixes.

Make miscellaneous fixes to AFS and AF_RXRPC:

 (*) Make AF_RXRPC select KEYS rather than RXKAD or AFS_FS in Kconfig.

 (*) Don't use FS_BINARY_MOUNTDATA.

 (*) Remove a done 'TODO' item in a comemnt on afs_get_sb().

 (*) Don't pass a void * as the page pointer argument of kmap_atomic() as this
     breaks on m68k.  Patch from Geert Uytterhoeven <geert@linux-m68k.org>.

 (*) Use match_*() functions rather than doing my own parsing.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 8750f6da6bc7..91b3d52f6f1a 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -5,6 +5,7 @@
 config AF_RXRPC
 	tristate "RxRPC session sockets"
 	depends on EXPERIMENTAL
+	select KEYS
 	help
 	  Say Y or M here to include support for RxRPC session sockets (just
 	  the transport part, not the presentation part: (un)marshalling is
@@ -29,7 +30,7 @@ config AF_RXRPC_DEBUG
 
 config RXKAD
 	tristate "RxRPC Kerberos security"
-	depends on AF_RXRPC && KEYS
+	depends on AF_RXRPC
 	select CRYPTO
 	select CRYPTO_MANAGER
 	select CRYPTO_BLKCIPHER
-- 
cgit v1.2.3


From 825e7d45cfa41bc96dd8ac4978b4d458a9ad5770 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Thu, 3 May 2007 03:13:35 -0700
Subject: [TCP]: Delete unused header file net/ipv4/tcp_yeah.h.

Delete the apparently unused header file net/ipv4/tcp_yeah.h.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_yeah.h | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 net/ipv4/tcp_yeah.h

(limited to 'net')

diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
deleted file mode 100644
index ed3b7198f23c..000000000000
--- a/net/ipv4/tcp_yeah.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/inet_diag.h>
-#include <asm/div64.h>
-
-#include <net/tcp.h>
-- 
cgit v1.2.3


From 3f660d66dfbc13ea4b61d3865851b348444c24b4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 3 May 2007 03:17:14 -0700
Subject: [NETLINK]: Kill CB only when socket is unused

Since we can still receive packets until all references to the
socket are gone, we don't need to kill the CB until that happens.
This also aligns ourselves with the receive queue purging which
happens at that point.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 42d2fb94eff1..7fc6b4da4f02 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -140,6 +140,15 @@ static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
 
 static void netlink_sock_destruct(struct sock *sk)
 {
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	BUG_ON(mutex_is_locked(nlk_sk(sk)->cb_mutex));
+	if (nlk->cb) {
+		if (nlk->cb->done)
+			nlk->cb->done(nlk->cb);
+		netlink_destroy_callback(nlk->cb);
+	}
+
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
@@ -148,7 +157,6 @@ static void netlink_sock_destruct(struct sock *sk)
 	}
 	BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
 	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
-	BUG_TRAP(!nlk_sk(sk)->cb);
 	BUG_TRAP(!nlk_sk(sk)->groups);
 }
 
@@ -456,17 +464,10 @@ static int netlink_release(struct socket *sock)
 	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
-	mutex_lock(nlk->cb_mutex);
-	if (nlk->cb) {
-		if (nlk->cb->done)
-			nlk->cb->done(nlk->cb);
-		netlink_destroy_callback(nlk->cb);
-		nlk->cb = NULL;
-	}
-	mutex_unlock(nlk->cb_mutex);
-
-	/* OK. Socket is unlinked, and, therefore,
-	   no new packets will arrive */
+	/*
+	 * OK. Socket is unlinked, any packets that arrive now
+	 * will be purged.
+	 */
 
 	sock->sk = NULL;
 	wake_up_interruptible_all(&nlk->wait);
@@ -1426,9 +1427,9 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 		return -ECONNREFUSED;
 	}
 	nlk = nlk_sk(sk);
-	/* A dump or destruction is in progress... */
+	/* A dump is in progress... */
 	mutex_lock(nlk->cb_mutex);
-	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
+	if (nlk->cb) {
 		mutex_unlock(nlk->cb_mutex);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
-- 
cgit v1.2.3


From 188ccb5583b8f501e1d0f5ba4f056afa141694e7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 May 2007 03:27:01 -0700
Subject: [NETLINK]: Fix use after free in netlink_recvmsg

When the user passes in MSG_TRUNC the skb is used after getting freed.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7fc6b4da4f02..ac1ceadf4ed3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1246,16 +1246,14 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		siocb->scm = &scm;
 	}
 	siocb->scm->creds = *NETLINK_CREDS(skb);
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
 	skb_free_datagram(sk, skb);
 
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
 		netlink_dump(sk);
 
 	scm_recv(sock, msg, siocb->scm, flags);
-
-	if (flags & MSG_TRUNC)
-		copied = skb->len;
-
 out:
 	netlink_rcv_wake(sk);
 	return err ? : copied;
-- 
cgit v1.2.3


From 4e9cac2ba437fcb093c7417b1cd91a77ebd1756a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 May 2007 03:28:13 -0700
Subject: [NET]: Add __dev_getfirstbyhwtype

Add __dev_getfirstbyhwtype for callers that don't want a reference but
some data from the device and thus need to take the rtnl anyway.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index eb999003bbb7..c305819b7266 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -576,17 +576,28 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 
 EXPORT_SYMBOL(dev_getbyhwaddr);
 
-struct net_device *dev_getfirstbyhwtype(unsigned short type)
+struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 {
 	struct net_device *dev;
 
-	rtnl_lock();
+	ASSERT_RTNL();
 	for (dev = dev_base; dev; dev = dev->next) {
-		if (dev->type == type) {
-			dev_hold(dev);
+		if (dev->type == type)
 			break;
-		}
 	}
+	return dev;
+}
+
+EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
+struct net_device *dev_getfirstbyhwtype(unsigned short type)
+{
+	struct net_device *dev;
+
+	rtnl_lock();
+	dev = __dev_getfirstbyhwtype(type);
+	if (dev)
+		dev_hold(dev);
 	rtnl_unlock();
 	return dev;
 }
-- 
cgit v1.2.3


From 327850070b019a96853c533c152688546201c286 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 May 2007 03:34:03 -0700
Subject: [NETFILTER]: ipt_DNAT: accept port randomization option

Also accept the --random option for DNAT to allow randomly selecting a
destination port from the given range.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_rule.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 2a283397a8b6..2534f718ab92 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -226,10 +226,6 @@ static int ipt_dnat_checkentry(const char *tablename,
 		printk("DNAT: multiple ranges no longer supported\n");
 		return 0;
 	}
-	if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		printk("DNAT: port randomization not supported\n");
-		return 0;
-	}
 	return 1;
 }
 
-- 
cgit v1.2.3


From c2a1910b06fed96db77bb358c18c52a1fcf2b7fe Mon Sep 17 00:00:00 2001
From: Jorge Boncompte <jorge@dti2.net>
Date: Thu, 3 May 2007 03:34:42 -0700
Subject: [NETFILTER]: nf_nat_proto_gre: do not modify/corrupt GREv0 packets
 through NAT

While porting some changes of the 2.6.21-rc7 pptp/proto_gre conntrack
and nat modules to a 2.4.32 kernel I noticed that the gre_key function
returns a wrong pointer to the GRE key of a version 0 packet thus
corrupting the packet payload.

The intended behaviour for GREv0 packets is to act like
nf_conntrack_proto_generic/nf_nat_proto_unknown so I have ripped the
offending functions (not used anymore) and modified the
nf_nat_proto_gre modules to not touch version 0 (non PPTP) packets.

Signed-off-by: Jorge Boncompte <jorge@dti2.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_proto_gre.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index e5a34c17d927..c3908bc5a709 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -72,6 +72,11 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	__be16 *keyptr;
 	unsigned int min, i, range_size;
 
+	/* If there is no master conntrack we are not PPTP,
+	   do not change tuples */
+	if (!conntrack->master)
+		return 0;
+
 	if (maniptype == IP_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
 	else
@@ -122,18 +127,9 @@ gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
 	if (maniptype != IP_NAT_MANIP_DST)
 		return 1;
 	switch (greh->version) {
-	case 0:
-		if (!greh->key) {
-			DEBUGP("can't nat GRE w/o key\n");
-			break;
-		}
-		if (greh->csum) {
-			/* FIXME: Never tested this code... */
-			nf_proto_csum_replace4(gre_csum(greh), *pskb,
-					       *(gre_key(greh)),
-					       tuple->dst.u.gre.key, 0);
-		}
-		*(gre_key(greh)) = tuple->dst.u.gre.key;
+	case GRE_VERSION_1701:
+		/* We do not currently NAT any GREv0 packets.
+		 * Try to behave like "nf_nat_proto_unknown" */
 		break;
 	case GRE_VERSION_PPTP:
 		DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
-- 
cgit v1.2.3


From cfd6c38096d75c8b86782683c5f45c415a505b78 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 3 May 2007 03:35:31 -0700
Subject: [NETFILTER]: sip: Fix RTP address NAT

I needed to use this recently to talk to a Cisco server.  In my case
I only did SNAT while the Cisco server used a different address for
RTP traffic than the one for SIP.  I discovered that nf_nat_sip NATed
the RTP address to the SIP one which was unnecessary but OK.  However,
in doing so it did not DNAT the destination address on the RTP traffic
to the Cisco back to the original RTP address.

This patch corrects this by noting down the RTP address and using it
when the expectation fires.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_sip.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index bfd88e4e0685..fac97cf51ae5 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -222,6 +222,29 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
 	return mangle_content_len(pskb, ctinfo, ct, dptr);
 }
 
+static void ip_nat_sdp_expect(struct nf_conn *ct,
+			      struct nf_conntrack_expect *exp)
+{
+	struct nf_nat_range range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* Change src to where master sends to */
+	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.min_ip = range.max_ip
+		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+	/* hook doesn't matter, but it has to do source manip */
+	nf_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.min = range.max = exp->saved_proto;
+	range.min_ip = range.max_ip = exp->saved_ip;
+	/* hook doesn't matter, but it has to do destination manip */
+	nf_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
+}
+
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
 static unsigned int ip_nat_sdp(struct sk_buff **pskb,
@@ -239,13 +262,14 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 	/* Connection will come from reply */
 	newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
 
+	exp->saved_ip = exp->tuple.dst.u3.ip;
 	exp->tuple.dst.u3.ip = newip;
 	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
 	exp->dir = !dir;
 
 	/* When you see the packet, we need to NAT it the same as the
 	   this one. */
-	exp->expectfn = nf_nat_follow_master;
+	exp->expectfn = ip_nat_sdp_expect;
 
 	/* Try to get same port: if not, try to change it. */
 	for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
-- 
cgit v1.2.3


From fc38582db98533066f4ba64f948720483fbfe7b2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 3 May 2007 03:36:16 -0700
Subject: [NETFILTER]: bridge netfilter: consolidate header pushing/pulling
 code

Consolidate the common push/pull sequences into a few helper functions.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 138 ++++++++++++++--------------------------------
 1 file changed, 40 insertions(+), 98 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 9b2986b182ba..fa779874b9dd 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -142,14 +142,33 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 	return skb->nf_bridge;
 }
 
-static inline void nf_bridge_save_header(struct sk_buff *skb)
+static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
+{
+	unsigned int len = nf_bridge_encap_header_len(skb);
+
+	skb_push(skb, len);
+	skb->network_header -= len;
+}
+
+static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
 {
-	int header_size = ETH_HLEN;
+	unsigned int len = nf_bridge_encap_header_len(skb);
+
+	skb_pull(skb, len);
+	skb->network_header += len;
+}
 
-	if (skb->protocol == htons(ETH_P_8021Q))
-		header_size += VLAN_HLEN;
-	else if (skb->protocol == htons(ETH_P_PPP_SES))
-		header_size += PPPOE_SES_HLEN;
+static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
+{
+	unsigned int len = nf_bridge_encap_header_len(skb);
+
+	skb_pull_rcsum(skb, len);
+	skb->network_header += len;
+}
+
+static inline void nf_bridge_save_header(struct sk_buff *skb)
+{
+	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
 
 	skb_copy_from_linear_data_offset(skb, -header_size,
 					 skb->nf_bridge->data, header_size);
@@ -162,12 +181,7 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 int nf_bridge_copy_header(struct sk_buff *skb)
 {
 	int err;
-	int header_size = ETH_HLEN;
-
-	if (skb->protocol == htons(ETH_P_8021Q))
-		header_size += VLAN_HLEN;
-	else if (skb->protocol == htons(ETH_P_PPP_SES))
-		header_size += PPPOE_SES_HLEN;
+	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
 
 	err = skb_cow(skb, header_size);
 	if (err)
@@ -175,11 +189,7 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 
 	skb_copy_to_linear_data_offset(skb, -header_size,
 				       skb->nf_bridge->data, header_size);
-
-	if (skb->protocol == htons(ETH_P_8021Q))
-		__skb_push(skb, VLAN_HLEN);
-	else if (skb->protocol == htons(ETH_P_PPP_SES))
-		__skb_push(skb, PPPOE_SES_HLEN);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
 	return 0;
 }
 
@@ -200,13 +210,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	dst_hold(skb->dst);
 
 	skb->dev = nf_bridge->physindev;
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_push(skb, VLAN_HLEN);
-		skb->network_header -= VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_push(skb, PPPOE_SES_HLEN);
-		skb->network_header -= PPPOE_SES_HLEN;
-	}
+	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -284,13 +288,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 	if (!skb->dev)
 		kfree_skb(skb);
 	else {
-		if (skb->protocol == htons(ETH_P_8021Q)) {
-			skb_pull(skb, VLAN_HLEN);
-			skb->network_header += VLAN_HLEN;
-		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-			skb_pull(skb, PPPOE_SES_HLEN);
-			skb->network_header += PPPOE_SES_HLEN;
-		}
+		nf_bridge_pull_encap_header(skb);
 		skb->dst->output(skb);
 	}
 	return 0;
@@ -356,15 +354,7 @@ bridged_dnat:
 				 * bridged frame */
 				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
-				if (skb->protocol ==
-				    htons(ETH_P_8021Q)) {
-					skb_push(skb, VLAN_HLEN);
-					skb->network_header -= VLAN_HLEN;
-				} else if(skb->protocol ==
-				    htons(ETH_P_PPP_SES)) {
-					skb_push(skb, PPPOE_SES_HLEN);
-					skb->network_header -= PPPOE_SES_HLEN;
-				}
+				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
 					       br_nf_pre_routing_finish_bridge,
@@ -380,13 +370,7 @@ bridged_dnat:
 	}
 
 	skb->dev = nf_bridge->physindev;
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_push(skb, VLAN_HLEN);
-		skb->network_header -= VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_push(skb, PPPOE_SES_HLEN);
-		skb->network_header -= PPPOE_SES_HLEN;
-	}
+	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -536,14 +520,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 #endif
 		if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
 			goto out;
-
-		if (skb->protocol == htons(ETH_P_8021Q)) {
-			skb_pull_rcsum(skb, VLAN_HLEN);
-			skb->network_header += VLAN_HLEN;
-		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-			skb_pull_rcsum(skb, PPPOE_SES_HLEN);
-			skb->network_header += PPPOE_SES_HLEN;
-		}
+		nf_bridge_pull_encap_header_rcsum(skb);
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
 #ifdef CONFIG_SYSCTL
@@ -557,14 +534,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 
 	if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
 		goto out;
-
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_pull_rcsum(skb, VLAN_HLEN);
-		skb->network_header += VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_pull_rcsum(skb, PPPOE_SES_HLEN);
-		skb->network_header += PPPOE_SES_HLEN;
-	}
+	nf_bridge_pull_encap_header_rcsum(skb);
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
@@ -642,13 +612,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_push(skb, VLAN_HLEN);
-		skb->network_header -= VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_push(skb, PPPOE_SES_HLEN);
-		skb->network_header -= PPPOE_SES_HLEN;
-	}
+	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
@@ -682,13 +646,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	else
 		pf = PF_INET6;
 
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->network_header += VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_pull(*pskb, PPPOE_SES_HLEN);
-		(*pskb)->network_header += PPPOE_SES_HLEN;
-	}
+	nf_bridge_pull_encap_header(*pskb);
 
 	nf_bridge = skb->nf_bridge;
 	if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -722,15 +680,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol != htons(ETH_P_ARP)) {
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
-		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->network_header += VLAN_HLEN;
+		nf_bridge_pull_encap_header(*pskb);
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
-		if (IS_VLAN_ARP(skb)) {
-			skb_push(*pskb, VLAN_HLEN);
-			(*pskb)->network_header -= VLAN_HLEN;
-		}
+		if (IS_VLAN_ARP(skb))
+			nf_bridge_push_encap_header(*pskb);
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
@@ -777,13 +732,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 		skb->pkt_type = PACKET_OTHERHOST;
 		nf_bridge->mask ^= BRNF_PKT_TYPE;
 	}
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_push(skb, VLAN_HLEN);
-		skb->network_header -= VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_push(skb, PPPOE_SES_HLEN);
-		skb->network_header -= PPPOE_SES_HLEN;
-	}
+	nf_bridge_push_encap_header(skb);
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
 		br_forward_finish);
@@ -848,14 +797,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 		nf_bridge->mask |= BRNF_PKT_TYPE;
 	}
 
-	if (skb->protocol == htons(ETH_P_8021Q)) {
-		skb_pull(skb, VLAN_HLEN);
-		skb->network_header += VLAN_HLEN;
-	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
-		skb_pull(skb, PPPOE_SES_HLEN);
-		skb->network_header += PPPOE_SES_HLEN;
-	}
-
+	nf_bridge_pull_encap_header(skb);
 	nf_bridge_save_header(skb);
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
-- 
cgit v1.2.3


From 03fba0479600114f32d29eee74ca3eaa364606bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 3 May 2007 13:28:35 -0700
Subject: [TCP] Highspeed: Limited slow-start is nowadays in tcp_slow_start
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reuse limited slow-start (RFC3742) included into tcp_cong instead
of having another implementation in High Speed TCP.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_highspeed.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index a291097fcc0a..43d624e5043c 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -97,10 +97,6 @@ struct hstcp {
 	u32	ai;
 };
 
-static int max_ssthresh = 100;
-module_param(max_ssthresh, int, 0644);
-MODULE_PARM_DESC(max_ssthresh, "limited slow start threshold (RFC3742)");
-
 static void hstcp_init(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -122,23 +118,9 @@ static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
-		/* RFC3742: limited slow start
-		 * the window is increased by 1/K MSS for each arriving ACK,
-		 * for K = int(cwnd/(0.5 max_ssthresh))
-		 */
-		if (max_ssthresh > 0 && tp->snd_cwnd > max_ssthresh) {
-			u32 k = max(tp->snd_cwnd / (max_ssthresh >> 1), 1U);
-			if (++tp->snd_cwnd_cnt >= k) {
-				if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-					tp->snd_cwnd++;
-				tp->snd_cwnd_cnt = 0;
-			}
-		} else {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
-	} else {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+	else {
 		/* Update AIMD parameters.
 		 *
 		 * We want to guarantee that:
-- 
cgit v1.2.3


From 7562f876cd93800f2f8c89445f2a563590b24e09 Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@openvz.org>
Date: Thu, 3 May 2007 15:13:45 -0700
Subject: [NET]: Rework dev_base via list_head (v3)

Cleanup of dev_base list use, with the aim to simplify making device
list per-namespace. In almost every occasion, use of dev_base variable
and dev->next pointer could be easily replaced by for_each_netdev
loop. A few most complicated places were converted to using
first_netdev()/next_netdev().

Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Acked-by: Kirill Korotaev <dev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c        |  3 +-
 net/8021q/vlanproc.c    | 36 ++++++++++++++-------
 net/bridge/br_if.c      |  4 +--
 net/bridge/br_ioctl.c   |  4 ++-
 net/bridge/br_netlink.c |  3 +-
 net/core/dev.c          | 84 ++++++++++++++++++++++--------------------------
 net/core/dev_mcast.c    |  5 ++-
 net/core/rtnetlink.c    |  7 ++--
 net/decnet/af_decnet.c  | 11 ++++---
 net/decnet/dn_dev.c     | 85 ++++++++++++++++++++++++-------------------------
 net/decnet/dn_fib.c     |  2 +-
 net/decnet/dn_route.c   | 14 ++++----
 net/ipv4/devinet.c      | 17 ++++++----
 net/ipv4/igmp.c         | 15 ++++-----
 net/ipv4/ipconfig.c     |  2 +-
 net/ipv6/addrconf.c     | 28 +++++++++-------
 net/ipv6/anycast.c      | 17 ++++++----
 net/ipv6/mcast.c        | 15 ++++-----
 net/llc/llc_core.c      | 10 ++++--
 net/netrom/nr_route.c   |  5 +--
 net/rose/rose_route.c   |  8 +++--
 net/sched/sch_api.c     |  7 ++--
 net/sctp/protocol.c     |  2 +-
 net/tipc/eth_media.c    | 12 ++++---
 24 files changed, 214 insertions(+), 182 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c0c7bb8e9f07..bd93c45778d4 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -117,8 +117,7 @@ static void __exit vlan_cleanup_devices(void)
 	struct net_device *dev, *nxt;
 
 	rtnl_lock();
-	for (dev = dev_base; dev; dev = nxt) {
-		nxt = dev->next;
+	for_each_netdev_safe(dev, nxt) {
 		if (dev->priv_flags & IFF_802_1Q_VLAN) {
 			unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
 					    VLAN_DEV_INFO(dev)->vlan_id);
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5e24f72602a1..d216a64421cd 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -237,13 +237,9 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
  * The following few functions build the content of /proc/net/vlan/config
  */
 
-/* starting at dev, find a VLAN device */
-static struct net_device *vlan_skip(struct net_device *dev)
+static inline int is_vlan_dev(struct net_device *dev)
 {
-	while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN))
-		dev = dev->next;
-
-	return dev;
+	return dev->priv_flags & IFF_802_1Q_VLAN;
 }
 
 /* start read of /proc/net/vlan/config */
@@ -257,19 +253,35 @@ static void *vlan_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
 
-	for (dev = vlan_skip(dev_base); dev && i < *pos;
-	     dev = vlan_skip(dev->next), ++i);
+	for_each_netdev(dev) {
+		if (!is_vlan_dev(dev))
+			continue;
+
+		if (i++ == *pos)
+			return dev;
+	}
 
-	return  (i == *pos) ? dev : NULL;
+	return  NULL;
 }
 
 static void *vlan_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct net_device *dev;
+
 	++*pos;
 
-	return vlan_skip((v == SEQ_START_TOKEN)
-			    ? dev_base
-			    : ((struct net_device *)v)->next);
+	dev = (struct net_device *)v;
+	if (v == SEQ_START_TOKEN)
+		dev = net_device_entry(&dev_base_head);
+
+	for_each_netdev_continue(dev) {
+		if (!is_vlan_dev(dev))
+			continue;
+
+		return dev;
+	}
+
+	return NULL;
 }
 
 static void vlan_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 690573bbf012..849deaf14108 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -475,11 +475,9 @@ void __exit br_cleanup_bridges(void)
 	struct net_device *dev, *nxt;
 
 	rtnl_lock();
-	for (dev = dev_base; dev; dev = nxt) {
-		nxt = dev->next;
+	for_each_netdev_safe(dev, nxt)
 		if (dev->priv_flags & IFF_EBRIDGE)
 			del_br(dev->priv);
-	}
 	rtnl_unlock();
 
 }
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index eda0fbfc923a..bb15e9e259b1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -27,7 +27,9 @@ static int get_bridge_ifindices(int *indices, int num)
 	struct net_device *dev;
 	int i = 0;
 
-	for (dev = dev_base; dev && i < num; dev = dev->next) {
+	for_each_netdev(dev) {
+		if (i >= num)
+			break;
 		if (dev->priv_flags & IFF_EBRIDGE)
 			indices[i++] = dev->ifindex;
 	}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 35facc0c11c2..0fcf6f073064 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -109,7 +109,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int idx;
 
-	for (dev = dev_base, idx = 0; dev; dev = dev->next) {
+	idx = 0;
+	for_each_netdev(dev) {
 		/* not a bridge port */
 		if (dev->br_port == NULL || idx < cb->args[0])
 			goto skip;
diff --git a/net/core/dev.c b/net/core/dev.c
index c305819b7266..f27d4ab181e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -156,13 +156,13 @@ static spinlock_t net_dma_event_lock;
 #endif
 
 /*
- * The @dev_base list is protected by @dev_base_lock and the rtnl
+ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
  * Pure readers hold dev_base_lock for reading.
  *
  * Writers must hold the rtnl semaphore while they loop through the
- * dev_base list, and hold dev_base_lock for writing when they do the
+ * dev_base_head list, and hold dev_base_lock for writing when they do the
  * actual updates.  This allows pure readers to access the list even
  * while a writer is preparing to update it.
  *
@@ -174,11 +174,10 @@ static spinlock_t net_dma_event_lock;
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
-struct net_device *dev_base;
-static struct net_device **dev_tail = &dev_base;
+LIST_HEAD(dev_base_head);
 DEFINE_RWLOCK(dev_base_lock);
 
-EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_head);
 EXPORT_SYMBOL(dev_base_lock);
 
 #define NETDEV_HASHBITS	8
@@ -567,11 +566,12 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 
 	ASSERT_RTNL();
 
-	for (dev = dev_base; dev; dev = dev->next)
+	for_each_netdev(dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
-			break;
-	return dev;
+			return dev;
+
+	return NULL;
 }
 
 EXPORT_SYMBOL(dev_getbyhwaddr);
@@ -581,11 +581,11 @@ struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 	struct net_device *dev;
 
 	ASSERT_RTNL();
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev)
 		if (dev->type == type)
-			break;
-	}
-	return dev;
+			return dev;
+
+	return NULL;
 }
 
 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
@@ -617,17 +617,19 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 {
-	struct net_device *dev;
+	struct net_device *dev, *ret;
 
+	ret = NULL;
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			dev_hold(dev);
+			ret = dev;
 			break;
 		}
 	}
 	read_unlock(&dev_base_lock);
-	return dev;
+	return ret;
 }
 
 /**
@@ -693,7 +695,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 		if (!inuse)
 			return -ENOMEM;
 
-		for (d = dev_base; d; d = d->next) {
+		for_each_netdev(d) {
 			if (!sscanf(d->name, name, &i))
 				continue;
 			if (i < 0 || i >= max_netdevices)
@@ -975,7 +977,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
 	rtnl_lock();
 	err = raw_notifier_chain_register(&netdev_chain, nb);
 	if (!err) {
-		for (dev = dev_base; dev; dev = dev->next) {
+		for_each_netdev(dev) {
 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
 
 			if (dev->flags & IFF_UP)
@@ -2049,7 +2051,7 @@ static int dev_ifconf(char __user *arg)
 	 */
 
 	total = 0;
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		for (i = 0; i < NPROTO; i++) {
 			if (gifconf_list[i]) {
 				int done;
@@ -2081,26 +2083,28 @@ static int dev_ifconf(char __user *arg)
  *	This is invoked by the /proc filesystem handler to display a device
  *	in detail.
  */
-static struct net_device *dev_get_idx(loff_t pos)
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	loff_t off;
 	struct net_device *dev;
-	loff_t i;
 
-	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+	read_lock(&dev_base_lock);
+	if (!*pos)
+		return SEQ_START_TOKEN;
 
-	return i == pos ? dev : NULL;
-}
+	off = 1;
+	for_each_netdev(dev)
+		if (off++ == *pos)
+			return dev;
 
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	read_lock(&dev_base_lock);
-	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+	return NULL;
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+	return v == SEQ_START_TOKEN ?
+		first_net_device() : next_net_device((struct net_device *)v);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -3082,11 +3086,9 @@ int register_netdevice(struct net_device *dev)
 
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
-	dev->next = NULL;
 	dev_init_scheduler(dev);
 	write_lock_bh(&dev_base_lock);
-	*dev_tail = dev;
-	dev_tail = &dev->next;
+	list_add_tail(&dev->dev_list, &dev_base_head);
 	hlist_add_head(&dev->name_hlist, head);
 	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
 	dev_hold(dev);
@@ -3360,8 +3362,6 @@ void synchronize_net(void)
 
 void unregister_netdevice(struct net_device *dev)
 {
-	struct net_device *d, **dp;
-
 	BUG_ON(dev_boot_phase);
 	ASSERT_RTNL();
 
@@ -3381,19 +3381,11 @@ void unregister_netdevice(struct net_device *dev)
 		dev_close(dev);
 
 	/* And unlink it from device chain. */
-	for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
-		if (d == dev) {
-			write_lock_bh(&dev_base_lock);
-			hlist_del(&dev->name_hlist);
-			hlist_del(&dev->index_hlist);
-			if (dev_tail == &dev->next)
-				dev_tail = dp;
-			*dp = d->next;
-			write_unlock_bh(&dev_base_lock);
-			break;
-		}
-	}
-	BUG_ON(!d);
+	write_lock_bh(&dev_base_lock);
+	list_del(&dev->dev_list);
+	hlist_del(&dev->name_hlist);
+	hlist_del(&dev->index_hlist);
+	write_unlock_bh(&dev_base_lock);
 
 	dev->reg_state = NETREG_UNREGISTERING;
 
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 7d57bf77f3a3..5a54053386c8 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -223,7 +223,7 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
 	loff_t off = 0;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		if (off++ == *pos)
 			return dev;
 	}
@@ -232,9 +232,8 @@ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = v;
 	++*pos;
-	return dev->next;
+	return next_net_device((struct net_device *)v);
 }
 
 static void dev_mc_seq_stop(struct seq_file *seq, void *v)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cec111109155..8c971a2efe2a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -539,13 +539,16 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 
-	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < s_idx)
-			continue;
+			goto cont;
 		if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK,
 				     NETLINK_CB(cb->skb).pid,
 				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
+cont:
+		idx++;
 	}
 	cb->args[0] = idx;
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a205eaa87f52..9fbe87c93802 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -721,7 +721,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
 	struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
-	struct net_device *dev;
+	struct net_device *dev, *ldev;
 	int rv;
 
 	if (addr_len != sizeof(struct sockaddr_dn))
@@ -746,14 +746,17 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (!(saddr->sdn_flags & SDF_WILD)) {
 		if (dn_ntohs(saddr->sdn_nodeaddrl)) {
 			read_lock(&dev_base_lock);
-			for(dev = dev_base; dev; dev = dev->next) {
+			ldev = NULL;
+			for_each_netdev(dev) {
 				if (!dev->dn_ptr)
 					continue;
-				if (dn_dev_islocal(dev, dn_saddr2dn(saddr)))
+				if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+					ldev = dev;
 					break;
+				}
 			}
 			read_unlock(&dev_base_lock);
-			if (dev == NULL)
+			if (ldev == NULL)
 				return -EADDRNOTAVAIL;
 		}
 	}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 5c2a9951b638..764a56a13e38 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,9 +799,10 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	skip_ndevs = cb->args[0];
 	skip_naddr = cb->args[1];
 
-	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < skip_ndevs)
-			continue;
+			goto cont;
 		else if (idx > skip_ndevs) {
 			/* Only skip over addresses for first dev dumped
 			 * in this iteration (idx == skip_ndevs) */
@@ -809,18 +810,20 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 
 		if ((dn_db = dev->dn_ptr) == NULL)
-			continue;
+			goto cont;
 
 		for (ifa = dn_db->ifa_list, dn_idx = 0; ifa;
 		     ifa = ifa->ifa_next, dn_idx++) {
 			if (dn_idx < skip_naddr)
-				continue;
+				goto cont;
 
 			if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
 					      cb->nlh->nlmsg_seq, RTM_NEWADDR,
 					      NLM_F_MULTI) < 0)
 				goto done;
 		}
+cont:
+		idx++;
 	}
 done:
 	cb->args[0] = idx;
@@ -1296,7 +1299,7 @@ void dn_dev_devices_off(void)
 	struct net_device *dev;
 
 	rtnl_lock();
-	for(dev = dev_base; dev; dev = dev->next)
+	for_each_netdev(dev)
 		dn_dev_down(dev);
 	rtnl_unlock();
 
@@ -1307,7 +1310,7 @@ void dn_dev_devices_on(void)
 	struct net_device *dev;
 
 	rtnl_lock();
-	for(dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		if (dev->flags & IFF_UP)
 			dn_dev_up(dev);
 	}
@@ -1325,62 +1328,56 @@ int unregister_dnaddr_notifier(struct notifier_block *nb)
 }
 
 #ifdef CONFIG_PROC_FS
-static inline struct net_device *dn_dev_get_next(struct seq_file *seq, struct net_device *dev)
+static inline int is_dn_dev(struct net_device *dev)
 {
-	do {
-		dev = dev->next;
-	} while(dev && !dev->dn_ptr);
-
-	return dev;
+	return dev->dn_ptr != NULL;
 }
 
-static struct net_device *dn_dev_get_idx(struct seq_file *seq, loff_t pos)
+static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	int i;
 	struct net_device *dev;
 
-	dev = dev_base;
-	if (dev && !dev->dn_ptr)
-		dev = dn_dev_get_next(seq, dev);
-	if (pos) {
-		while(dev && (dev = dn_dev_get_next(seq, dev)))
-			--pos;
-	}
-	return dev;
-}
+	read_lock(&dev_base_lock);
 
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	if (*pos) {
-		struct net_device *dev;
-		read_lock(&dev_base_lock);
-		dev = dn_dev_get_idx(seq, *pos - 1);
-		if (dev == NULL)
-			read_unlock(&dev_base_lock);
-		return dev;
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
+	i = 1;
+	for_each_netdev(dev) {
+		if (!is_dn_dev(dev))
+			continue;
+
+		if (i++ == *pos)
+			return dev;
 	}
-	return SEQ_START_TOKEN;
+
+	return NULL;
 }
 
 static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = v;
-	loff_t one = 1;
+	struct net_device *dev;
 
-	if (v == SEQ_START_TOKEN) {
-		dev = dn_dev_seq_start(seq, &one);
-	} else {
-		dev = dn_dev_get_next(seq, dev);
-		if (dev == NULL)
-			read_unlock(&dev_base_lock);
-	}
 	++*pos;
-	return dev;
+
+	dev = (struct net_device *)v;
+	if (v == SEQ_START_TOKEN)
+		dev = net_device_entry(&dev_base_head);
+
+	for_each_netdev_continue(dev) {
+		if (!is_dn_dev(dev))
+			continue;
+
+		return dev;
+	}
+
+	return NULL;
 }
 
 static void dn_dev_seq_stop(struct seq_file *seq, void *v)
 {
-	if (v && v != SEQ_START_TOKEN)
-		read_unlock(&dev_base_lock);
+	read_unlock(&dev_base_lock);
 }
 
 static char *dn_type2asc(char type)
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 310a86268d2b..d2bc19d47950 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -602,7 +602,7 @@ static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
 
 	/* Scan device list */
 	read_lock(&dev_base_lock);
-	for(dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		dn_db = dev->dn_ptr;
 		if (dn_db == NULL)
 			continue;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5d7337bcf0fe..a8bf106b7a61 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -886,7 +886,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
 			    .iif = loopback_dev.ifindex,
 			    .oif = oldflp->oif };
 	struct dn_route *rt = NULL;
-	struct net_device *dev_out = NULL;
+	struct net_device *dev_out = NULL, *dev;
 	struct neighbour *neigh = NULL;
 	unsigned hash;
 	unsigned flags = 0;
@@ -925,15 +925,17 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old
 			goto out;
 		}
 		read_lock(&dev_base_lock);
-		for(dev_out = dev_base; dev_out; dev_out = dev_out->next) {
-			if (!dev_out->dn_ptr)
+		for_each_netdev(dev) {
+			if (!dev->dn_ptr)
 				continue;
-			if (!dn_dev_islocal(dev_out, oldflp->fld_src))
+			if (!dn_dev_islocal(dev, oldflp->fld_src))
 				continue;
-			if ((dev_out->flags & IFF_LOOPBACK) &&
+			if ((dev->flags & IFF_LOOPBACK) &&
 			    oldflp->fld_dst &&
-			    !dn_dev_islocal(dev_out, oldflp->fld_dst))
+			    !dn_dev_islocal(dev, oldflp->fld_dst))
 				continue;
+
+			dev_out = dev;
 			break;
 		}
 		read_unlock(&dev_base_lock);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 088888db8b3d..7f95e6e9beeb 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -910,7 +910,7 @@ no_in_dev:
 	 */
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
 			continue;
 
@@ -989,7 +989,7 @@ __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local,
 
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((in_dev = __in_dev_get_rcu(dev))) {
 			addr = confirm_addr_indev(in_dev, dst, local, scope);
 			if (addr)
@@ -1182,23 +1182,26 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_ip_idx, s_idx = cb->args[0];
 
 	s_ip_idx = ip_idx = cb->args[1];
-	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < s_idx)
-			continue;
+			goto cont;
 		if (idx > s_idx)
 			s_ip_idx = 0;
 		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
-			continue;
+			goto cont;
 
 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
 		     ifa = ifa->ifa_next, ip_idx++) {
 			if (ip_idx < s_ip_idx)
-				continue;
+				goto cont;
 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq,
 					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
 				goto done;
 		}
+cont:
+		idx++;
 	}
 
 done:
@@ -1243,7 +1246,7 @@ void inet_forward_change(void)
 	ipv4_devconf_dflt.forwarding = on;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		struct in_device *in_dev;
 		rcu_read_lock();
 		in_dev = __in_dev_get_rcu(dev);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2506021c2935..f4dd47453108 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2288,9 +2288,8 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq)
 	struct ip_mc_list *im = NULL;
 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 
-	for (state->dev = dev_base, state->in_dev = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->in_dev = NULL;
+	for_each_netdev(state->dev) {
 		struct in_device *in_dev;
 		in_dev = in_dev_get(state->dev);
 		if (!in_dev)
@@ -2316,7 +2315,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li
 			read_unlock(&state->in_dev->mc_list_lock);
 			in_dev_put(state->in_dev);
 		}
-		state->dev = state->dev->next;
+		state->dev = next_net_device(state->dev);
 		if (!state->dev) {
 			state->in_dev = NULL;
 			break;
@@ -2450,9 +2449,9 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq)
 	struct ip_mc_list *im = NULL;
 	struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
 
-	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	state->im = NULL;
+	for_each_netdev(state->dev) {
 		struct in_device *idev;
 		idev = in_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2488,7 +2487,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l
 				read_unlock(&state->idev->mc_list_lock);
 				in_dev_put(state->idev);
 			}
-			state->dev = state->dev->next;
+			state->dev = next_net_device(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
 				goto out;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 597c800b2fdc..342ca8d89458 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -192,7 +192,7 @@ static int __init ic_open_devs(void)
 	if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
 		printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
 
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		if (dev == &loopback_dev)
 			continue;
 		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3452433cbc96..d02685c6bc69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -449,7 +449,7 @@ static void addrconf_forward_change(void)
 	struct inet6_dev *idev;
 
 	read_lock(&dev_base_lock);
-	for (dev=dev_base; dev; dev=dev->next) {
+	for_each_netdev(dev) {
 		rcu_read_lock();
 		idev = __in6_dev_get(dev);
 		if (idev) {
@@ -911,7 +911,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 	read_lock(&dev_base_lock);
 	rcu_read_lock();
 
-	for (dev = dev_base; dev; dev=dev->next) {
+	for_each_netdev(dev) {
 		struct inet6_dev *idev;
 		struct inet6_ifaddr *ifa;
 
@@ -2064,7 +2064,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 		return;
 	}
 
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		struct in_device * in_dev = __in_dev_get_rtnl(dev);
 		if (in_dev && (dev->flags & IFF_UP)) {
 			struct in_ifaddr * ifa;
@@ -2225,7 +2225,7 @@ static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
 			return;
 	}
 	/* then try to inherit it from any device */
-	for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
+	for_each_netdev(link_dev) {
 		if (!ipv6_inherit_linklocal(idev, link_dev))
 			return;
 	}
@@ -3257,14 +3257,15 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 	s_idx = cb->args[0];
 	s_ip_idx = ip_idx = cb->args[1];
 
-	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < s_idx)
-			continue;
+			goto cont;
 		if (idx > s_idx)
 			s_ip_idx = 0;
 		ip_idx = 0;
 		if ((idev = in6_dev_get(dev)) == NULL)
-			continue;
+			goto cont;
 		read_lock_bh(&idev->lock);
 		switch (type) {
 		case UNICAST_ADDR:
@@ -3311,6 +3312,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 		}
 		read_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
+cont:
+		idx++;
 	}
 done:
 	if (err <= 0) {
@@ -3575,16 +3578,19 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct inet6_dev *idev;
 
 	read_lock(&dev_base_lock);
-	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < s_idx)
-			continue;
+			goto cont;
 		if ((idev = in6_dev_get(dev)) == NULL)
-			continue;
+			goto cont;
 		err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
 				cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI);
 		in6_dev_put(idev);
 		if (err <= 0)
 			break;
+cont:
+		idx++;
 	}
 	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
@@ -4247,7 +4253,7 @@ void __exit addrconf_cleanup(void)
 	 *	clean dev list.
 	 */
 
-	for (dev=dev_base; dev; dev=dev->next) {
+	for_each_netdev(dev) {
 		if ((idev = __in6_dev_get(dev)) == NULL)
 			continue;
 		addrconf_ifdown(dev, 1);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 09117d63256f..9b81264eb78f 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -423,14 +423,18 @@ static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
  */
 int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
 {
+	int found = 0;
+
 	if (dev)
 		return ipv6_chk_acast_dev(dev, addr);
 	read_lock(&dev_base_lock);
-	for (dev=dev_base; dev; dev=dev->next)
-		if (ipv6_chk_acast_dev(dev, addr))
+	for_each_netdev(dev)
+		if (ipv6_chk_acast_dev(dev, addr)) {
+			found = 1;
 			break;
+		}
 	read_unlock(&dev_base_lock);
-	return dev != 0;
+	return found;
 }
 
 
@@ -447,9 +451,8 @@ static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
 	struct ifacaddr6 *im = NULL;
 	struct ac6_iter_state *state = ac6_seq_private(seq);
 
-	for (state->dev = dev_base, state->idev = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	for_each_netdev(state->dev) {
 		struct inet6_dev *idev;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
@@ -476,7 +479,7 @@ static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im
 			read_unlock_bh(&state->idev->lock);
 			in6_dev_put(state->idev);
 		}
-		state->dev = state->dev->next;
+		state->dev = next_net_device(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6c2758951d60..3e308fb41b49 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2331,9 +2331,8 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
 	struct ifmcaddr6 *im = NULL;
 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
 
-	for (state->dev = dev_base, state->idev = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	for_each_netdev(state->dev) {
 		struct inet6_dev *idev;
 		idev = in6_dev_get(state->dev);
 		if (!idev)
@@ -2360,7 +2359,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr
 			read_unlock_bh(&state->idev->lock);
 			in6_dev_put(state->idev);
 		}
-		state->dev = state->dev->next;
+		state->dev = next_net_device(state->dev);
 		if (!state->dev) {
 			state->idev = NULL;
 			break;
@@ -2475,9 +2474,9 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
 	struct ifmcaddr6 *im = NULL;
 	struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
 
-	for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
-	     state->dev;
-	     state->dev = state->dev->next) {
+	state->idev = NULL;
+	state->im = NULL;
+	for_each_netdev(state->dev) {
 		struct inet6_dev *idev;
 		idev = in6_dev_get(state->dev);
 		if (unlikely(idev == NULL))
@@ -2513,7 +2512,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s
 				read_unlock_bh(&state->idev->lock);
 				in6_dev_put(state->idev);
 			}
-			state->dev = state->dev->next;
+			state->dev = next_net_device(state->dev);
 			if (!state->dev) {
 				state->idev = NULL;
 				goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index d12413cff5bd..d4b13a031fd5 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -160,8 +160,14 @@ static struct packet_type llc_tr_packet_type = {
 
 static int __init llc_init(void)
 {
-	if (dev_base->next)
-		memcpy(llc_station_mac_sa, dev_base->next->dev_addr, ETH_ALEN);
+	struct net_device *dev;
+
+	dev = first_net_device();
+	if (dev != NULL)
+		dev = next_net_device(dev);
+
+	if (dev != NULL)
+		memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN);
 	else
 		memset(llc_station_mac_sa, 0, ETH_ALEN);
 	dev_add_pack(&llc_packet_type);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 8e6bd4e9d82c..2f76e062609d 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -598,7 +598,7 @@ struct net_device *nr_dev_first(void)
 	struct net_device *dev, *first = NULL;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
 				first = dev;
@@ -618,12 +618,13 @@ struct net_device *nr_dev_get(ax25_address *addr)
 	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
 			dev_hold(dev);
 			goto out;
 		}
 	}
+	dev = NULL;
 out:
 	read_unlock(&dev_base_lock);
 	return dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 1f9aefd95a99..929a784a86d7 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -596,7 +596,7 @@ struct net_device *rose_dev_first(void)
 	struct net_device *dev, *first = NULL;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
 				first = dev;
@@ -614,12 +614,13 @@ struct net_device *rose_dev_get(rose_address *addr)
 	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
 			dev_hold(dev);
 			goto out;
 		}
 	}
+	dev = NULL;
 out:
 	read_unlock(&dev_base_lock);
 	return dev;
@@ -630,10 +631,11 @@ static int rose_dev_exists(rose_address *addr)
 	struct net_device *dev;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev != NULL; dev = dev->next) {
+	for_each_netdev(dev) {
 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
 			goto out;
 	}
+	dev = NULL;
 out:
 	read_unlock(&dev_base_lock);
 	return dev != NULL;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8699e7006d80..bec600af03ca 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -894,9 +894,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	s_idx = cb->args[0];
 	s_q_idx = q_idx = cb->args[1];
 	read_lock(&dev_base_lock);
-	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+	idx = 0;
+	for_each_netdev(dev) {
 		if (idx < s_idx)
-			continue;
+			goto cont;
 		if (idx > s_idx)
 			s_q_idx = 0;
 		q_idx = 0;
@@ -910,6 +911,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 				goto done;
 			q_idx++;
 		}
+cont:
+		idx++;
 	}
 
 done:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c361deb6cea9..d4afafc39138 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -170,7 +170,7 @@ static void sctp_get_local_addr_list(void)
 	struct sctp_af *af;
 
 	read_lock(&dev_base_lock);
-	for (dev = dev_base; dev; dev = dev->next) {
+	for_each_netdev(dev) {
 		__list_for_each(pos, &sctp_address_families) {
 			af = list_entry(pos, struct sctp_af, list);
 			af->copy_addrlist(&sctp_local_addr_list, dev);
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 67bb29b44d1b..0ee6ded18f3a 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -120,16 +120,18 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 
 static int enable_bearer(struct tipc_bearer *tb_ptr)
 {
-	struct net_device *dev = dev_base;
+	struct net_device *dev, *pdev;
 	struct eth_bearer *eb_ptr = &eth_bearers[0];
 	struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
 	char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
 
 	/* Find device with specified name */
-
-	while (dev && dev->name && strncmp(dev->name, driver_name, IFNAMSIZ)) {
-		dev = dev->next;
-	}
+	dev = NULL;
+	for_each_netdev(pdev)
+		if (!strncmp(dev->name, driver_name, IFNAMSIZ)) {
+			dev = pdev;
+			break;
+		}
 	if (!dev)
 		return -ENODEV;
 
-- 
cgit v1.2.3


From b40b4f79ce789e9e28d382c85006f62be2725282 Mon Sep 17 00:00:00 2001
From: Srinivas Aji <Aji_Srinivas@emc.com>
Date: Thu, 3 May 2007 17:32:28 -0700
Subject: [TCP]: zero out rx_opt in tcp_disconnect()

When the server drops its connection, NFS client reconnects using the
same socket after disconnecting. If the new connection's SYN,ACK
doesn't contain the TCP timestamp option and the old connection's did,
tp->tcp_header_len is recomputed assuming no timestamp header but
tp->rx_opt.tstamp_ok remains set. Then tcp_build_and_update_options()
adds in a timestamp option past the end of the allocated TCP header,
overwriting TCP data, or when the data is in skb_shinfo(skb)->frags[],
overwriting skb_shinfo(skb) causing a crash soon after. (The issue was
debugged from such a crash.)

Similarly, wscale_ok and sack_ok also get set based on the SYN,ACK
packet but not reset on disconnect, since they are zeroed out at
initialization. The patch zeroes out the entire tp->rx_opt struct in
tcp_disconnect() to avoid this sort of problem.

Signed-off-by: Srinivas Aji <Aji_Srinivas@emc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d6e488668171..8b124eafbb90 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1760,8 +1760,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
 	tcp_init_send_head(sk);
-	tp->rx_opt.saw_tstamp = 0;
-	tcp_sack_reset(&tp->rx_opt);
+	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
 
 	BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
-- 
cgit v1.2.3


From 9e71efcd6d659afb9d390eea69b558a7432ba23e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 4 May 2007 12:15:11 -0700
Subject: [NETLINK]: Remove bogus BUG_ON

Remove bogus BUG_ON(mutex_is_locked(nlk_sk(sk)->cb_mutex)), when the
netlink_kernel_create caller specifies an external mutex it might
validly be locked.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ac1ceadf4ed3..507828d7d4ae 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -142,7 +142,6 @@ static void netlink_sock_destruct(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 
-	BUG_ON(mutex_is_locked(nlk_sk(sk)->cb_mutex));
 	if (nlk->cb) {
 		if (nlk->cb->done)
 			nlk->cb->done(nlk->cb);
-- 
cgit v1.2.3


From 561e036006dc4078446815613781c6c33441dd3b Mon Sep 17 00:00:00 2001
From: Jennifer Hunt <jenhunt@us.ibm.com>
Date: Fri, 4 May 2007 12:22:07 -0700
Subject: [AF_IUCV]: Implementation of a skb backlog queue

With the inital implementation we missed to implement a skb backlog
queue . The result is that socket receive processing tossed packets.
Since AF_IUCV connections are working synchronously it leads to
connection hangs. Problems with read, close and select also occured.

Using a skb backlog queue is fixing all of these problems .

Signed-off-by: Jennifer Hunt <jenhunt@us.ibm.com>
Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 159 +++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 130 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index e84c924a81ee..026704a47296 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -147,6 +147,7 @@ static void iucv_sock_close(struct sock *sk)
 	unsigned char user_data[16];
 	struct iucv_sock *iucv = iucv_sk(sk);
 	int err;
+	unsigned long timeo;
 
 	iucv_sock_clear_timer(sk);
 	lock_sock(sk);
@@ -159,6 +160,21 @@ static void iucv_sock_close(struct sock *sk)
 	case IUCV_CONNECTED:
 	case IUCV_DISCONN:
 		err = 0;
+
+		sk->sk_state = IUCV_CLOSING;
+		sk->sk_state_change(sk);
+
+		if(!skb_queue_empty(&iucv->send_skb_q)) {
+			if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
+				timeo = sk->sk_lingertime;
+			else
+				timeo = IUCV_DISCONN_TIMEOUT;
+			err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+		}
+
+		sk->sk_state = IUCV_CLOSED;
+		sk->sk_state_change(sk);
+
 		if (iucv->path) {
 			low_nmcpy(user_data, iucv->src_name);
 			high_nmcpy(user_data, iucv->dst_name);
@@ -168,12 +184,11 @@ static void iucv_sock_close(struct sock *sk)
 			iucv->path = NULL;
 		}
 
-		sk->sk_state = IUCV_CLOSED;
-		sk->sk_state_change(sk);
 		sk->sk_err = ECONNRESET;
 		sk->sk_state_change(sk);
 
 		skb_queue_purge(&iucv->send_skb_q);
+		skb_queue_purge(&iucv->backlog_skb_q);
 
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
@@ -204,6 +219,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	sock_init_data(sock, sk);
 	INIT_LIST_HEAD(&iucv_sk(sk)->accept_q);
 	skb_queue_head_init(&iucv_sk(sk)->send_skb_q);
+	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
 
 	sk->sk_destruct = iucv_sock_destruct;
@@ -510,7 +526,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 	long timeo;
 	int err = 0;
 
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 	if (sk->sk_state != IUCV_LISTEN) {
 		err = -EBADFD;
@@ -530,7 +546,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
-		lock_sock(sk);
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 		if (sk->sk_state != IUCV_LISTEN) {
 			err = -EBADFD;
@@ -606,7 +622,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		if(!(skb = sock_alloc_send_skb(sk, len,
 				       msg->msg_flags & MSG_DONTWAIT,
 				       &err)))
-			return err;
+			goto out;
 
 		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
 			err = -EFAULT;
@@ -647,10 +663,16 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	int noblock = flags & MSG_DONTWAIT;
 	struct sock *sk = sock->sk;
+	struct iucv_sock *iucv = iucv_sk(sk);
 	int target, copied = 0;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *rskb, *cskb;
 	int err = 0;
 
+	if ((sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED) &&
+		skb_queue_empty(&iucv->backlog_skb_q) &&
+		skb_queue_empty(&sk->sk_receive_queue))
+		return 0;
+
 	if (flags & (MSG_OOB))
 		return -EOPNOTSUPP;
 
@@ -665,10 +687,12 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	copied = min_t(unsigned int, skb->len, len);
 
-	if (memcpy_toiovec(msg->msg_iov, skb->data, copied)) {
+	cskb = skb;
+	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
 		skb_queue_head(&sk->sk_receive_queue, skb);
 		if (copied == 0)
 			return -EFAULT;
+		goto done;
 	}
 
 	len -= copied;
@@ -683,6 +707,18 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 
 		kfree_skb(skb);
+
+		/* Queue backlog skbs */
+		rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+		while(rskb) {
+			if (sock_queue_rcv_skb(sk, rskb)) {
+				skb_queue_head(&iucv_sk(sk)->backlog_skb_q,
+						rskb);
+				break;
+			} else {
+				rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
+			}
+		}
 	} else
 		skb_queue_head(&sk->sk_receive_queue, skb);
 
@@ -732,6 +768,9 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 	if (sk->sk_state == IUCV_CLOSED)
 		mask |= POLLHUP;
 
+	if (sk->sk_state == IUCV_DISCONN || sk->sk_state == IUCV_SEVERED)
+		mask |= POLLIN;
+
 	if (sock_writeable(sk))
 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
 	else
@@ -817,13 +856,6 @@ static int iucv_sock_release(struct socket *sock)
 		iucv_sk(sk)->path = NULL;
 	}
 
-	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime){
-		lock_sock(sk);
-		err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0,
-					   sk->sk_lingertime);
-		release_sock(sk);
-	}
-
 	sock_orphan(sk);
 	iucv_sock_kill(sk);
 	return err;
@@ -927,18 +959,52 @@ static void iucv_callback_connack(struct iucv_path *path, u8 ipuser[16])
 	sk->sk_state_change(sk);
 }
 
+static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
+			     struct sk_buff_head fragmented_skb_q)
+{
+	int dataleft, size, copied = 0;
+	struct sk_buff *nskb;
+
+	dataleft = len;
+	while(dataleft) {
+		if (dataleft >= sk->sk_rcvbuf / 4)
+			size = sk->sk_rcvbuf / 4;
+		else
+			size = dataleft;
+
+		nskb = alloc_skb(size, GFP_ATOMIC | GFP_DMA);
+		if (!nskb)
+			return -ENOMEM;
+
+		memcpy(nskb->data, skb->data + copied, size);
+		copied += size;
+		dataleft -= size;
+
+		nskb->h.raw = nskb->data;
+		nskb->nh.raw = nskb->data;
+		nskb->len = size;
+
+		skb_queue_tail(fragmented_skb_q, nskb);
+	}
+
+	return 0;
+}
 static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 {
 	struct sock *sk = path->private;
-	struct sk_buff *skb;
+	struct iucv_sock *iucv = iucv_sk(sk);
+	struct sk_buff *skb, *fskb;
+	struct sk_buff_head fragmented_skb_q;
 	int rc;
 
+	skb_queue_head_init(&fragmented_skb_q);
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		return;
 
 	skb = alloc_skb(msg->length, GFP_ATOMIC | GFP_DMA);
 	if (!skb) {
-		iucv_message_reject(path, msg);
+		iucv_path_sever(path, NULL);
 		return;
 	}
 
@@ -952,14 +1018,39 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 			kfree_skb(skb);
 			return;
 		}
+		if (skb->truesize >= sk->sk_rcvbuf / 4) {
+			rc = iucv_fragment_skb(sk, skb, msg->length,
+					       &fragmented_skb_q);
+			kfree_skb(skb);
+			skb = NULL;
+			if (rc) {
+				iucv_path_sever(path, NULL);
+				return;
+			}
+		} else {
+			skb_reset_transport_header(skb);
+			skb_reset_network_header(skb);
+			skb->len = msg->length;
+		}
+	}
+	/* Queue the fragmented skb */
+	fskb = skb_dequeue(&fragmented_skb_q);
+	while(fskb) {
+		if (!skb_queue_empty(&iucv->backlog_skb_q))
+			skb_queue_tail(&iucv->backlog_skb_q, fskb);
+		else if (sock_queue_rcv_skb(sk, fskb))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, fskb);
+		fskb = skb_dequeue(&fragmented_skb_q);
+	}
 
-		skb_reset_transport_header(skb);
-		skb_reset_network_header(skb);
-		skb->len = msg->length;
+	/* Queue the original skb if it exists (was not fragmented) */
+	if (skb) {
+		if (!skb_queue_empty(&iucv->backlog_skb_q))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
+		else if (sock_queue_rcv_skb(sk, skb))
+			skb_queue_tail(&iucv_sk(sk)->backlog_skb_q, skb);
 	}
 
-	if (sock_queue_rcv_skb(sk, skb))
-		kfree_skb(skb);
 }
 
 static void iucv_callback_txdone(struct iucv_path *path,
@@ -971,17 +1062,27 @@ static void iucv_callback_txdone(struct iucv_path *path,
 	struct sk_buff *list_skb = list->next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&list->lock, flags);
+	if (list_skb) {
+		spin_lock_irqsave(&list->lock, flags);
+
+		do {
+			this = list_skb;
+			list_skb = list_skb->next;
+		} while (memcmp(&msg->tag, this->cb, 4) && list_skb);
+
+		spin_unlock_irqrestore(&list->lock, flags);
 
-	do {
-		this = list_skb;
-		list_skb = list_skb->next;
-	} while (memcmp(&msg->tag, this->cb, 4));
+		skb_unlink(this, &iucv_sk(sk)->send_skb_q);
+		kfree_skb(this);
+	}
 
-	spin_unlock_irqrestore(&list->lock, flags);
+	if (sk->sk_state == IUCV_CLOSING){
+		if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
+			sk->sk_state = IUCV_CLOSED;
+			sk->sk_state_change(sk);
+		}
+	}
 
-	skb_unlink(this, &iucv_sk(sk)->send_skb_q);
-	kfree_skb(this);
 }
 
 static void iucv_callback_connrej(struct iucv_path *path, u8 ipuser[16])
-- 
cgit v1.2.3


From da99f0565477899f08b76ffcb32afbf6fa95d64a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 4 May 2007 12:23:27 -0700
Subject: [AF_IUCV/IUCV] : Add missing section annotations

Add missing section annotations and found and fixed some
Coding Style issues.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Frank Pavlic <fpavlic@de.ibm.com>
---
 net/iucv/af_iucv.c | 44 +++++++++++++++++++++++---------------------
 net/iucv/iucv.c    | 49 ++++++++++++++++++++++---------------------------
 2 files changed, 45 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 026704a47296..2f1373855a8b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -45,7 +45,8 @@ static struct proto iucv_proto = {
 static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
 static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
 static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
-static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8], u8 ipuser[16]);
+static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
+				 u8 ipuser[16]);
 static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
 
 static struct iucv_sock_list iucv_sk_list = {
@@ -152,7 +153,7 @@ static void iucv_sock_close(struct sock *sk)
 	iucv_sock_clear_timer(sk);
 	lock_sock(sk);
 
-	switch(sk->sk_state) {
+	switch (sk->sk_state) {
 	case IUCV_LISTEN:
 		iucv_sock_cleanup_listen(sk);
 		break;
@@ -164,7 +165,7 @@ static void iucv_sock_close(struct sock *sk)
 		sk->sk_state = IUCV_CLOSING;
 		sk->sk_state_change(sk);
 
-		if(!skb_queue_empty(&iucv->send_skb_q)) {
+		if (!skb_queue_empty(&iucv->send_skb_q)) {
 			if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime)
 				timeo = sk->sk_lingertime;
 			else
@@ -292,7 +293,7 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
 	struct iucv_sock *isk, *n;
 	struct sock *sk;
 
-	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
 		sk = (struct sock *) isk;
 		lock_sock(sk);
 
@@ -537,7 +538,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 
 	/* Wait for an incoming connection */
 	add_wait_queue_exclusive(sk->sk_sleep, &wait);
-	while (!(nsk = iucv_accept_dequeue(sk, newsock))){
+	while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
 			err = -EAGAIN;
@@ -618,13 +619,13 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	if (sk->sk_state == IUCV_CONNECTED){
-		if(!(skb = sock_alloc_send_skb(sk, len,
-				       msg->msg_flags & MSG_DONTWAIT,
-				       &err)))
+	if (sk->sk_state == IUCV_CONNECTED) {
+		if (!(skb = sock_alloc_send_skb(sk, len,
+						msg->msg_flags & MSG_DONTWAIT,
+						&err)))
 			goto out;
 
-		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)){
+		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
 			err = -EFAULT;
 			goto fail;
 		}
@@ -710,7 +711,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		/* Queue backlog skbs */
 		rskb = skb_dequeue(&iucv_sk(sk)->backlog_skb_q);
-		while(rskb) {
+		while (rskb) {
 			if (sock_queue_rcv_skb(sk, rskb)) {
 				skb_queue_head(&iucv_sk(sk)->backlog_skb_q,
 						rskb);
@@ -731,7 +732,7 @@ static inline unsigned int iucv_accept_poll(struct sock *parent)
 	struct iucv_sock *isk, *n;
 	struct sock *sk;
 
-	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q){
+	list_for_each_entry_safe(isk, n, &iucv_sk(parent)->accept_q, accept_q) {
 		sk = (struct sock *) isk;
 
 		if (sk->sk_state == IUCV_CONNECTED)
@@ -762,7 +763,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 		mask |= POLLHUP;
 
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
-			(sk->sk_shutdown & RCV_SHUTDOWN))
+	    (sk->sk_shutdown & RCV_SHUTDOWN))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sk->sk_state == IUCV_CLOSED)
@@ -793,7 +794,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 		return -EINVAL;
 
 	lock_sock(sk);
-	switch(sk->sk_state) {
+	switch (sk->sk_state) {
 	case IUCV_CLOSED:
 		err = -ENOTCONN;
 		goto fail;
@@ -809,7 +810,7 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 		err = iucv_message_send(iucv->path, &txmsg, IUCV_IPRMDATA, 0,
 					(void *) prmmsg, 8);
 		if (err) {
-			switch(err) {
+			switch (err) {
 			case 1:
 				err = -ENOTCONN;
 				break;
@@ -912,7 +913,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 
 	/* Create the new socket */
 	nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC);
-	if (!nsk){
+	if (!nsk) {
 		err = iucv_path_sever(path, user_data);
 		goto fail;
 	}
@@ -935,7 +936,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 
 	path->msglim = IUCV_QUEUELEN_DEFAULT;
 	err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk);
-	if (err){
+	if (err) {
 		err = iucv_path_sever(path, user_data);
 		goto fail;
 	}
@@ -966,7 +967,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
 	struct sk_buff *nskb;
 
 	dataleft = len;
-	while(dataleft) {
+	while (dataleft) {
 		if (dataleft >= sk->sk_rcvbuf / 4)
 			size = sk->sk_rcvbuf / 4;
 		else
@@ -989,6 +990,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len,
 
 	return 0;
 }
+
 static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 {
 	struct sock *sk = path->private;
@@ -1035,7 +1037,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 	}
 	/* Queue the fragmented skb */
 	fskb = skb_dequeue(&fragmented_skb_q);
-	while(fskb) {
+	while (fskb) {
 		if (!skb_queue_empty(&iucv->backlog_skb_q))
 			skb_queue_tail(&iucv->backlog_skb_q, fskb);
 		else if (sock_queue_rcv_skb(sk, fskb))
@@ -1076,7 +1078,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
 		kfree_skb(this);
 	}
 
-	if (sk->sk_state == IUCV_CLOSING){
+	if (sk->sk_state == IUCV_CLOSING) {
 		if (skb_queue_empty(&iucv_sk(sk)->send_skb_q)) {
 			sk->sk_state = IUCV_CLOSED;
 			sk->sk_state_change(sk);
@@ -1123,7 +1125,7 @@ static struct net_proto_family iucv_sock_family_ops = {
 	.create	= iucv_sock_create,
 };
 
-static int afiucv_init(void)
+static int __init afiucv_init(void)
 {
 	int err;
 
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 903bdb6eaaa1..fb3faf72e850 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -32,7 +32,6 @@
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-
 #include <linux/spinlock.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -69,7 +68,7 @@
 #define IUCV_IPNORPY	0x10
 #define IUCV_IPALL	0x80
 
-static int iucv_bus_match (struct device *dev, struct device_driver *drv)
+static int iucv_bus_match(struct device *dev, struct device_driver *drv)
 {
 	return 0;
 }
@@ -78,8 +77,11 @@ struct bus_type iucv_bus = {
 	.name = "iucv",
 	.match = iucv_bus_match,
 };
+EXPORT_SYMBOL(iucv_bus);
 
 struct device *iucv_root;
+EXPORT_SYMBOL(iucv_root);
+
 static int iucv_available;
 
 /* General IUCV interrupt structure */
@@ -405,7 +407,7 @@ static void iucv_declare_cpu(void *data)
 	rc = iucv_call_b2f0(IUCV_DECLARE_BUFFER, parm);
 	if (rc) {
 		char *err = "Unknown";
-		switch(rc) {
+		switch (rc) {
 		case 0x03:
 			err = "Directory error";
 			break;
@@ -588,7 +590,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block iucv_cpu_notifier = {
+static struct notifier_block __cpuinitdata iucv_cpu_notifier = {
 	.notifier_call = iucv_cpu_notify,
 };
 
@@ -691,6 +693,7 @@ out_mutex:
 	mutex_unlock(&iucv_register_mutex);
 	return rc;
 }
+EXPORT_SYMBOL(iucv_register);
 
 /**
  * iucv_unregister
@@ -723,6 +726,7 @@ void iucv_unregister(struct iucv_handler *handler, int smp)
 		iucv_setmask_mp();
 	mutex_unlock(&iucv_register_mutex);
 }
+EXPORT_SYMBOL(iucv_unregister);
 
 /**
  * iucv_path_accept
@@ -761,6 +765,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_path_accept);
 
 /**
  * iucv_path_connect
@@ -824,6 +829,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
 	spin_unlock_bh(&iucv_table_lock);
 	return rc;
 }
+EXPORT_SYMBOL(iucv_path_connect);
 
 /**
  * iucv_path_quiesce:
@@ -850,6 +856,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_path_quiesce);
 
 /**
  * iucv_path_resume:
@@ -890,7 +897,6 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
 {
 	int rc;
 
-
 	preempt_disable();
 	if (iucv_active_cpu != smp_processor_id())
 		spin_lock_bh(&iucv_table_lock);
@@ -904,6 +910,7 @@ int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
 	preempt_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_path_sever);
 
 /**
  * iucv_message_purge
@@ -936,6 +943,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_purge);
 
 /**
  * iucv_message_receive
@@ -1006,6 +1014,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_receive);
 
 /**
  * iucv_message_reject
@@ -1034,6 +1043,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg)
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_reject);
 
 /**
  * iucv_message_reply
@@ -1077,6 +1087,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_reply);
 
 /**
  * iucv_message_send
@@ -1125,6 +1136,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_send);
 
 /**
  * iucv_message_send2way
@@ -1181,6 +1193,7 @@ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg,
 	local_bh_enable();
 	return rc;
 }
+EXPORT_SYMBOL(iucv_message_send2way);
 
 /**
  * iucv_path_pending
@@ -1572,7 +1585,7 @@ static void iucv_external_interrupt(u16 code)
  *
  * Allocates and initializes various data structures.
  */
-static int iucv_init(void)
+static int __init iucv_init(void)
 {
 	int rc;
 
@@ -1583,7 +1596,7 @@ static int iucv_init(void)
 	rc = iucv_query_maxconn();
 	if (rc)
 		goto out;
-	rc = register_external_interrupt (0x4000, iucv_external_interrupt);
+	rc = register_external_interrupt(0x4000, iucv_external_interrupt);
 	if (rc)
 		goto out;
 	rc = bus_register(&iucv_bus);
@@ -1594,7 +1607,7 @@ static int iucv_init(void)
 		rc = PTR_ERR(iucv_root);
 		goto out_bus;
 	}
-	/* Note: GFP_DMA used used to get memory below 2G */
+	/* Note: GFP_DMA used to get memory below 2G */
 	iucv_irq_data = percpu_alloc(sizeof(struct iucv_irq_data),
 				     GFP_KERNEL|GFP_DMA);
 	if (!iucv_irq_data) {
@@ -1632,7 +1645,7 @@ out:
  *
  * Frees everything allocated from iucv_init.
  */
-static void iucv_exit(void)
+static void __exit iucv_exit(void)
 {
 	struct iucv_irq_list *p, *n;
 
@@ -1653,24 +1666,6 @@ static void iucv_exit(void)
 subsys_initcall(iucv_init);
 module_exit(iucv_exit);
 
-/**
- * Export all public stuff
- */
-EXPORT_SYMBOL (iucv_bus);
-EXPORT_SYMBOL (iucv_root);
-EXPORT_SYMBOL (iucv_register);
-EXPORT_SYMBOL (iucv_unregister);
-EXPORT_SYMBOL (iucv_path_accept);
-EXPORT_SYMBOL (iucv_path_connect);
-EXPORT_SYMBOL (iucv_path_quiesce);
-EXPORT_SYMBOL (iucv_path_sever);
-EXPORT_SYMBOL (iucv_message_purge);
-EXPORT_SYMBOL (iucv_message_receive);
-EXPORT_SYMBOL (iucv_message_reject);
-EXPORT_SYMBOL (iucv_message_reply);
-EXPORT_SYMBOL (iucv_message_send);
-EXPORT_SYMBOL (iucv_message_send2way);
-
 MODULE_AUTHOR("(C) 2001 IBM Corp. by Fritz Elfert (felfert@millenux.com)");
 MODULE_DESCRIPTION("Linux for S/390 IUCV lowlevel driver");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 224711df5c00f7540b89f32a8225866031977f17 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 4 May 2007 12:41:11 -0700
Subject: [AF_RXRPC]: Sort out MTU handling.

Sort out the MTU determination and handling in AF_RXRPC:

 (1) If it's present, parse the additional information supplied by the peer at
     the end of the ACK packet (struct ackinfo) to determine the MTU sizes
     that peer is willing to support.

 (2) Initialise the MTU size to that peer from the kernel's routing records.

 (3) Send ACKs rather than ACKALLs as the former carry the additional info,
     and the latter do not.

 (4) Declare the interface MTU size in outgoing ACKs as a maximum amount of
     data that can be stuffed into an RxRPC packet without it having to be
     fragmented to come in this computer's NIC.

 (5) If sendmsg() is given MSG_MORE then it should allocate an skb of the
     maximum size rather than one just big enough for the data it's got left
     to process on the theory that there is more data to come that it can
     append to that packet.

     This means, for example, that if AFS does a large StoreData op, all the
     packets barring the last will be filled to the maximum unfragmented size.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-ack.c    | 80 +++++++++++++++++++++++++++++++++++++++++++--------
 net/rxrpc/ar-error.c  |  2 ++
 net/rxrpc/ar-output.c |  2 +-
 net/rxrpc/ar-peer.c   | 45 ++++++++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index fc07a926df56..657ee69f2133 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -542,6 +542,38 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
 	kfree(acks_window);
 }
 
+/*
+ * process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+				  unsigned latest, int nAcks)
+{
+	struct rxrpc_ackinfo ackinfo;
+	struct rxrpc_peer *peer;
+	unsigned mtu;
+
+	if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
+		_leave(" [no ackinfo]");
+		return;
+	}
+
+	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+	       latest,
+	       ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
+	       ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
+
+	mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
+
+	peer = call->conn->trans->peer;
+	if (mtu < peer->maxdata) {
+		spin_lock_bh(&peer->lock);
+		peer->maxdata = mtu;
+		peer->mtu = mtu + peer->hdrsize;
+		spin_unlock_bh(&peer->lock);
+		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+	}
+}
+
 /*
  * process packets in the reception queue
  */
@@ -606,6 +638,8 @@ process_further:
 		       rxrpc_acks[ack.reason],
 		       ack.nAcks);
 
+		rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
+
 		if (ack.reason == RXRPC_ACK_PING) {
 			_proto("Rx ACK %%%u PING Request", latest);
 			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
@@ -801,9 +835,9 @@ void rxrpc_process_call(struct work_struct *work)
 	struct msghdr msg;
 	struct kvec iov[5];
 	unsigned long bits;
-	__be32 data;
+	__be32 data, pad;
 	size_t len;
-	int genbit, loop, nbit, ioc, ret;
+	int genbit, loop, nbit, ioc, ret, mtu;
 	u32 abort_code = RX_PROTOCOL_ERROR;
 	u8 *acks = NULL;
 
@@ -899,9 +933,30 @@ void rxrpc_process_call(struct work_struct *work)
 	}
 
 	if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
-		hdr.type = RXRPC_PACKET_TYPE_ACKALL;
 		genbit = RXRPC_CALL_ACK_FINAL;
-		goto send_message;
+
+		ack.bufferSpace	= htons(8);
+		ack.maxSkew	= 0;
+		ack.serial	= 0;
+		ack.reason	= RXRPC_ACK_IDLE;
+		ack.nAcks	= 0;
+		call->ackr_reason = 0;
+
+		spin_lock_bh(&call->lock);
+		ack.serial = call->ackr_serial;
+		ack.previousPacket = call->ackr_prev_seq;
+		ack.firstPacket = htonl(call->rx_data_eaten + 1);
+		spin_unlock_bh(&call->lock);
+
+		pad = 0;
+
+		iov[1].iov_base = &ack;
+		iov[1].iov_len	= sizeof(ack);
+		iov[2].iov_base = &pad;
+		iov[2].iov_len	= 3;
+		iov[3].iov_base = &ackinfo;
+		iov[3].iov_len	= sizeof(ackinfo);
+		goto send_ACK;
 	}
 
 	if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
@@ -971,8 +1026,6 @@ void rxrpc_process_call(struct work_struct *work)
 
 	/* consider sending an ordinary ACK */
 	if (test_bit(RXRPC_CALL_ACK, &call->events)) {
-		__be32 pad;
-
 		_debug("send ACK: window: %d - %d { %lx }",
 		       call->rx_data_eaten, call->ackr_win_top,
 		       call->ackr_window[0]);
@@ -997,12 +1050,6 @@ void rxrpc_process_call(struct work_struct *work)
 		ack.serial	= 0;
 		ack.reason	= 0;
 
-		ackinfo.rxMTU	= htonl(5692);
-//		ackinfo.rxMTU	= htonl(call->conn->trans->peer->maxdata);
-		ackinfo.maxMTU	= htonl(call->conn->trans->peer->maxdata);
-		ackinfo.rwind	= htonl(32);
-		ackinfo.jumbo_max = htonl(4);
-
 		spin_lock_bh(&call->lock);
 		ack.reason = call->ackr_reason;
 		ack.serial = call->ackr_serial;
@@ -1116,6 +1163,15 @@ send_ACK_with_skew:
 	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
 			    ntohl(ack.serial));
 send_ACK:
+	mtu = call->conn->trans->peer->if_mtu;
+	mtu -= call->conn->trans->peer->hdrsize;
+	ackinfo.maxMTU	= htonl(mtu);
+	ackinfo.rwind	= htonl(32);
+
+	/* permit the peer to send us jumbo packets if it wants to */
+	ackinfo.rxMTU	= htonl(5692);
+	ackinfo.jumbo_max = htonl(4);
+
 	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
 	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
 	       ntohl(hdr.serial),
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 2c27df1ffa17..6cb3e8890e7e 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -100,8 +100,10 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		}
 
 		if (mtu < peer->mtu) {
+			spin_lock_bh(&peer->lock);
 			peer->mtu = mtu;
 			peer->maxdata = peer->mtu - peer->hdrsize;
+			spin_unlock_bh(&peer->lock);
 			_net("Net MTU %u (maxdata %u)",
 			     peer->mtu, peer->maxdata);
 		}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 5cdde4a48ed1..591c4422205e 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -582,7 +582,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			max &= ~(call->conn->size_align - 1UL);
 
 			chunk = max;
-			if (chunk > len)
+			if (chunk > len && !more)
 				chunk = len;
 
 			space = chunk + call->conn->size_align;
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index d399de4a7fe2..ce08b78647ce 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -19,6 +19,7 @@
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
+#include <net/route.h>
 #include "ar-internal.h"
 
 static LIST_HEAD(rxrpc_peers);
@@ -27,6 +28,47 @@ static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
 
 static void rxrpc_destroy_peer(struct work_struct *work);
 
+/*
+ * assess the MTU size for the network interface through which this peer is
+ * reached
+ */
+static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
+{
+	struct rtable *rt;
+	struct flowi fl;
+	int ret;
+
+	peer->if_mtu = 1500;
+
+	memset(&fl, 0, sizeof(fl));
+
+	switch (peer->srx.transport.family) {
+	case AF_INET:
+		fl.oif = 0;
+		fl.proto = IPPROTO_UDP,
+		fl.nl_u.ip4_u.saddr = 0;
+		fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
+		fl.nl_u.ip4_u.tos = 0;
+		/* assume AFS.CM talking to AFS.FS */
+		fl.uli_u.ports.sport = htons(7001);
+		fl.uli_u.ports.dport = htons(7000);
+		break;
+	default:
+		BUG();
+	}
+
+	ret = ip_route_output_key(&rt, &fl);
+	if (ret < 0) {
+		kleave(" [route err %d]", ret);
+		return;
+	}
+
+	peer->if_mtu = dst_mtu(&rt->u.dst);
+	dst_release(&rt->u.dst);
+
+	kleave(" [if_mtu %u]", peer->if_mtu);
+}
+
 /*
  * allocate a new peer
  */
@@ -47,7 +89,8 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
 		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		memcpy(&peer->srx, srx, sizeof(*srx));
 
-		peer->mtu = peer->if_mtu = 65535;
+		rxrpc_assess_MTU_size(peer);
+		peer->mtu = peer->if_mtu;
 
 		if (srx->transport.family == AF_INET) {
 			peer->hdrsize = sizeof(struct iphdr);
-- 
cgit v1.2.3


From af11e31609d93765c1b22611592543e028f7aa54 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 4 May 2007 12:55:13 -0700
Subject: [XFRM] SAD info TLV aggregationx

Aggregate the SAD info TLVs.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c |  2 +-
 net/xfrm/xfrm_user.c  | 22 +++++++++-------------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f3a61ebd8d65..9955ff4da0a2 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -421,7 +421,7 @@ restart:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
-void xfrm_sad_getinfo(struct xfrm_sadinfo *si)
+void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
 	si->sadcnt = xfrm_state_num;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 4210d91624cd..c35b9ea3b62b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -749,7 +749,8 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 {
-	struct xfrm_sadinfo si;
+	struct xfrmk_sadinfo si;
+	struct xfrmu_sadhinfo sh;
 	struct nlmsghdr *nlh;
 	u32 *f;
 
@@ -761,12 +762,11 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 	*f = flags;
 	xfrm_sad_getinfo(&si);
 
-	if (flags & XFRM_SAD_HMASK)
-		NLA_PUT_U32(skb, XFRMA_SADHMASK, si.sadhcnt);
-	if (flags & XFRM_SAD_HMAX)
-		NLA_PUT_U32(skb, XFRMA_SADHMAX, si.sadhmcnt);
-	if (flags & XFRM_SAD_CNT)
-		NLA_PUT_U32(skb, XFRMA_SADCNT, si.sadcnt);
+	sh.sadhmcnt = si.sadhmcnt;
+	sh.sadhcnt = si.sadhcnt;
+
+	NLA_PUT_U32(skb, XFRMA_SAD_CNT, si.sadcnt);
+	NLA_PUT(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh);
 
 	return nlmsg_end(skb, nlh);
 
@@ -784,12 +784,8 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 seq = nlh->nlmsg_seq;
 	int len = NLMSG_LENGTH(sizeof(u32));
 
-	if (*flags & XFRM_SAD_HMASK)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SAD_HMAX)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SAD_CNT)
-		len += RTA_SPACE(sizeof(u32));
+	len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo));
+	len += RTA_SPACE(sizeof(u32));
 
 	r_skb = alloc_skb(len, GFP_ATOMIC);
 
-- 
cgit v1.2.3


From 5a6d34162f5c6f522f857df274f1c8240f161e11 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 4 May 2007 12:55:39 -0700
Subject: [XFRM] SPD info TLV aggregation

Aggregate the SPD info TLVs.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c |  2 +-
 net/xfrm/xfrm_user.c   | 51 ++++++++++++++++----------------------------------
 2 files changed, 17 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 263e34e45265..95271e8426a1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -579,7 +579,7 @@ static inline int xfrm_byidx_should_resize(int total)
 	return 0;
 }
 
-void xfrm_spd_getinfo(struct xfrm_spdinfo *si)
+void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
 {
 	read_lock_bh(&xfrm_policy_lock);
 	si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c35b9ea3b62b..b14c7e590c31 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -674,7 +674,9 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
 
 static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 {
-	struct xfrm_spdinfo si;
+	struct xfrmk_spdinfo si;
+	struct xfrmu_spdinfo spc;
+	struct xfrmu_spdhinfo sph;
 	struct nlmsghdr *nlh;
 	u32 *f;
 
@@ -685,23 +687,17 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 	f = nlmsg_data(nlh);
 	*f = flags;
 	xfrm_spd_getinfo(&si);
-
-	if (flags & XFRM_SPD_HMASK)
-		NLA_PUT_U32(skb, XFRMA_SPDHMASK, si.spdhcnt);
-	if (flags & XFRM_SPD_HMAX)
-		NLA_PUT_U32(skb, XFRMA_SPDHMAX, si.spdhmcnt);
-	if (flags & XFRM_SPD_ICNT)
-		NLA_PUT_U32(skb, XFRMA_SPDICNT, si.incnt);
-	if (flags & XFRM_SPD_OCNT)
-		NLA_PUT_U32(skb, XFRMA_SPDOCNT, si.outcnt);
-	if (flags & XFRM_SPD_FCNT)
-		NLA_PUT_U32(skb, XFRMA_SPDFCNT, si.fwdcnt);
-	if (flags & XFRM_SPD_ISCNT)
-		NLA_PUT_U32(skb, XFRMA_SPDISCNT, si.inscnt);
-	if (flags & XFRM_SPD_OSCNT)
-		NLA_PUT_U32(skb, XFRMA_SPDOSCNT, si.inscnt);
-	if (flags & XFRM_SPD_FSCNT)
-		NLA_PUT_U32(skb, XFRMA_SPDFSCNT, si.inscnt);
+	spc.incnt = si.incnt;
+	spc.outcnt = si.outcnt;
+	spc.fwdcnt = si.fwdcnt;
+	spc.inscnt = si.inscnt;
+	spc.outscnt = si.outscnt;
+	spc.fwdscnt = si.fwdscnt;
+	sph.spdhcnt = si.spdhcnt;
+	sph.spdhmcnt = si.spdhmcnt;
+
+	NLA_PUT(skb, XFRMA_SPD_INFO, sizeof(spc), &spc);
+	NLA_PUT(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph);
 
 	return nlmsg_end(skb, nlh);
 
@@ -719,23 +715,8 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	u32 seq = nlh->nlmsg_seq;
 	int len = NLMSG_LENGTH(sizeof(u32));
 
-
-	if (*flags & XFRM_SPD_HMASK)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_HMAX)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_ICNT)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_OCNT)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_FCNT)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_ISCNT)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_OSCNT)
-		len += RTA_SPACE(sizeof(u32));
-	if (*flags & XFRM_SPD_FSCNT)
-		len += RTA_SPACE(sizeof(u32));
+	len += RTA_SPACE(sizeof(struct xfrmu_spdinfo));
+	len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo));
 
 	r_skb = alloc_skb(len, GFP_ATOMIC);
 	if (r_skb == NULL)
-- 
cgit v1.2.3


From 16d00fb7765a43a1b05989062e985d283b3a1f2d Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 May 2007 13:34:09 -0700
Subject: [SCTP]: Verify all destination ports in sctp_connectx.

We need to make sure that all destination ports are the same, since
the association really must not connect to multiple different ports
at once.  This was reported on the sctp-impl list.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2fc0a92caa78..b2ffab62a229 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,6 +972,7 @@ static int __sctp_connect(struct sock* sk,
 	int walk_size = 0;
 	union sctp_addr *sa_addr;
 	void *addr_buf;
+	unsigned short port;
 
 	sp = sctp_sk(sk);
 	ep = sp->ep;
@@ -992,6 +993,7 @@ static int __sctp_connect(struct sock* sk,
 	while (walk_size < addrs_size) {
 		sa_addr = (union sctp_addr *)addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa.sa_family);
+		port = ntohs(sa_addr->v4.sin_port);
 
 		/* If the address family is not supported or if this address
 		 * causes the address buffer to overflow return EINVAL.
@@ -1005,6 +1007,12 @@ static int __sctp_connect(struct sock* sk,
 		if (err)
 			goto out_free;
 
+		/* Make sure the destination port is correctly set
+		 * in all addresses.
+		 */
+		if (asoc && asoc->peer.port && asoc->peer.port != port)
+			goto out_free;
+
 		memcpy(&to, sa_addr, af->sockaddr_len);
 
 		/* Check if there already is a matching association on the
-- 
cgit v1.2.3


From ce5325c1338acf965f4300f4976eac2129aeb439 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 May 2007 13:34:49 -0700
Subject: [SCTP]: Fix the SO_REUSEADDR handling to be similar to TCP.

Update the SO_REUSEADDR handling to also check for listen state.  This
was muliple listening server sockets can't be created and they will
not steal packets from each other.

Reported by Paolo Galtieri <pgaltieri@mvista.com>

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b2ffab62a229..9f1a908776de 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5020,7 +5020,8 @@ pp_found:
 		struct hlist_node *node;
 
 		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
-		if (pp->fastreuse && sk->sk_reuse)
+		if (pp->fastreuse && sk->sk_reuse &&
+			sk->sk_state != SCTP_SS_LISTENING)
 			goto success;
 
 		/* Run through the list of sockets bound to the port
@@ -5037,7 +5038,8 @@ pp_found:
 			struct sctp_endpoint *ep2;
 			ep2 = sctp_sk(sk2)->ep;
 
-			if (reuse && sk2->sk_reuse)
+			if (reuse && sk2->sk_reuse &&
+			    sk2->sk_state != SCTP_SS_LISTENING)
 				continue;
 
 			if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
@@ -5058,9 +5060,13 @@ pp_not_found:
 	 * if sk->sk_reuse is too (that is, if the caller requested
 	 * SO_REUSEADDR on this socket -sk-).
 	 */
-	if (hlist_empty(&pp->owner))
-		pp->fastreuse = sk->sk_reuse ? 1 : 0;
-	else if (pp->fastreuse && !sk->sk_reuse)
+	if (hlist_empty(&pp->owner)) {
+		if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
+			pp->fastreuse = 1;
+		else
+			pp->fastreuse = 0;
+	} else if (pp->fastreuse &&
+		(!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
 		pp->fastreuse = 0;
 
 	/* We are set, so fill up all the data in the hash table
@@ -5068,8 +5074,8 @@ pp_not_found:
 	 * sockets FIXME: Blurry, NPI (ipg).
 	 */
 success:
-	inet_sk(sk)->num = snum;
 	if (!sctp_sk(sk)->bind_hash) {
+		inet_sk(sk)->num = snum;
 		sk_add_bind_node(sk, &pp->owner);
 		sctp_sk(sk)->bind_hash = pp;
 	}
@@ -5142,12 +5148,16 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
 	 * This is not currently spelled out in the SCTP sockets
 	 * extensions draft, but follows the practice as seen in TCP
 	 * sockets.
+	 *
+	 * Additionally, turn off fastreuse flag since we are not listening
 	 */
+	sk->sk_state = SCTP_SS_LISTENING;
 	if (!ep->base.bind_addr.port) {
 		if (sctp_autobind(sk))
 			return -EAGAIN;
-	}
-	sk->sk_state = SCTP_SS_LISTENING;
+	} else
+		sctp_sk(sk)->bind_hash->fastreuse = 0;
+
 	sctp_hash_endpoint(ep);
 	return 0;
 }
@@ -5185,11 +5195,13 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
 	 * extensions draft, but follows the practice as seen in TCP
 	 * sockets.
 	 */
+	sk->sk_state = SCTP_SS_LISTENING;
 	if (!ep->base.bind_addr.port) {
 		if (sctp_autobind(sk))
 			return -EAGAIN;
-	}
-	sk->sk_state = SCTP_SS_LISTENING;
+	} else
+		sctp_sk(sk)->bind_hash->fastreuse = 0;
+
 	sk->sk_max_ack_backlog = backlog;
 	sctp_hash_endpoint(ep);
 	return 0;
-- 
cgit v1.2.3


From 827bf12236fbafc02bc899aec1b37c342c8cf4e5 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Fri, 4 May 2007 13:36:30 -0700
Subject: [SCTP]: Re-order SCTP initializations to avoid race with sctp_rcv()

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c     | 49 +++++++++++++++++++--------------
 net/sctp/protocol.c | 79 +++++++++++++++++++++++++++++++----------------------
 2 files changed, 75 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ca527a27dd05..84cd53635fe8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -992,45 +992,52 @@ static struct sctp_pf sctp_pf_inet6_specific = {
 	.af            = &sctp_ipv6_specific,
 };
 
-/* Initialize IPv6 support and register with inet6 stack.  */
+/* Initialize IPv6 support and register with socket layer.  */
 int sctp_v6_init(void)
 {
-	int rc = proto_register(&sctpv6_prot, 1);
+	int rc;
 
+	/* Register the SCTP specific PF_INET6 functions. */
+	sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
+
+	/* Register the SCTP specific AF_INET6 functions. */
+	sctp_register_af(&sctp_ipv6_specific);
+
+	rc = proto_register(&sctpv6_prot, 1);
 	if (rc)
-		goto out;
-	/* Register inet6 protocol. */
-	rc = -EAGAIN;
-	if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
-		goto out_unregister_sctp_proto;
+		return rc;
 
 	/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
 	inet6_register_protosw(&sctpv6_seqpacket_protosw);
 	inet6_register_protosw(&sctpv6_stream_protosw);
 
-	/* Register the SCTP specific PF_INET6 functions. */
-	sctp_register_pf(&sctp_pf_inet6_specific, PF_INET6);
-
-	/* Register the SCTP specific AF_INET6 functions. */
-	sctp_register_af(&sctp_ipv6_specific);
+	return 0;
+}
 
+/* Register with inet6 layer. */
+int sctp_v6_add_protocol(void)
+{
 	/* Register notifier for inet6 address additions/deletions. */
 	register_inet6addr_notifier(&sctp_inet6addr_notifier);
-	rc = 0;
-out:
-	return rc;
-out_unregister_sctp_proto:
-	proto_unregister(&sctpv6_prot);
-	goto out;
+
+	if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
+		return -EAGAIN;
+
+	return 0;
 }
 
 /* IPv6 specific exit support. */
 void sctp_v6_exit(void)
 {
-	list_del(&sctp_ipv6_specific.list);
-	inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
 	inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
 	inet6_unregister_protosw(&sctpv6_stream_protosw);
-	unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
 	proto_unregister(&sctpv6_prot);
+	list_del(&sctp_ipv6_specific.list);
+}
+
+/* Unregister with inet6 layer. */
+void sctp_v6_del_protocol(void)
+{
+	inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
+	unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d4afafc39138..34bab36637ac 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -975,28 +975,14 @@ SCTP_STATIC __init int sctp_init(void)
 	if (!sctp_sanity_check())
 		goto out;
 
-	status = proto_register(&sctp_prot, 1);
-	if (status)
-		goto out;
-
-	/* Add SCTP to inet_protos hash table.  */
-	status = -EAGAIN;
-	if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
-		goto err_add_protocol;
-
-	/* Add SCTP(TCP and UDP style) to inetsw linked list.  */
-	inet_register_protosw(&sctp_seqpacket_protosw);
-	inet_register_protosw(&sctp_stream_protosw);
-
-	/* Allocate a cache pools. */
+	/* Allocate bind_bucket and chunk caches. */
 	status = -ENOBUFS;
 	sctp_bucket_cachep = kmem_cache_create("sctp_bind_bucket",
 					       sizeof(struct sctp_bind_bucket),
 					       0, SLAB_HWCACHE_ALIGN,
 					       NULL, NULL);
-
 	if (!sctp_bucket_cachep)
-		goto err_bucket_cachep;
+		goto out;
 
 	sctp_chunk_cachep = kmem_cache_create("sctp_chunk",
 					       sizeof(struct sctp_chunk),
@@ -1153,6 +1139,14 @@ SCTP_STATIC __init int sctp_init(void)
 	INIT_LIST_HEAD(&sctp_address_families);
 	sctp_register_af(&sctp_ipv4_specific);
 
+	status = proto_register(&sctp_prot, 1);
+	if (status)
+		goto err_proto_register;
+
+	/* Register SCTP(UDP and TCP style) with socket layer.  */
+	inet_register_protosw(&sctp_seqpacket_protosw);
+	inet_register_protosw(&sctp_stream_protosw);
+
 	status = sctp_v6_init();
 	if (status)
 		goto err_v6_init;
@@ -1166,19 +1160,39 @@ SCTP_STATIC __init int sctp_init(void)
 
 	/* Initialize the local address list. */
 	INIT_LIST_HEAD(&sctp_local_addr_list);
-
 	sctp_get_local_addr_list();
 
 	/* Register notifier for inet address additions/deletions. */
 	register_inetaddr_notifier(&sctp_inetaddr_notifier);
 
+	/* Register SCTP with inet layer.  */
+	if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0) {
+		status = -EAGAIN;
+		goto err_add_protocol;
+	}
+
+	/* Register SCTP with inet6 layer.  */
+	status = sctp_v6_add_protocol();
+	if (status)
+		goto err_v6_add_protocol;
+
 	__unsafe(THIS_MODULE);
 	status = 0;
 out:
 	return status;
+err_v6_add_protocol:
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+err_add_protocol:
+	sctp_free_local_addr_list();
+	sock_release(sctp_ctl_socket);
 err_ctl_sock_init:
 	sctp_v6_exit();
 err_v6_init:
+	inet_unregister_protosw(&sctp_stream_protosw);
+	inet_unregister_protosw(&sctp_seqpacket_protosw);
+	proto_unregister(&sctp_prot);
+err_proto_register:
 	sctp_sysctl_unregister();
 	list_del(&sctp_ipv4_specific.list);
 	free_pages((unsigned long)sctp_port_hashtable,
@@ -1192,19 +1206,13 @@ err_ehash_alloc:
 			     sizeof(struct sctp_hashbucket)));
 err_ahash_alloc:
 	sctp_dbg_objcnt_exit();
-err_init_proc:
 	sctp_proc_exit();
+err_init_proc:
 	cleanup_sctp_mibs();
 err_init_mibs:
 	kmem_cache_destroy(sctp_chunk_cachep);
 err_chunk_cachep:
 	kmem_cache_destroy(sctp_bucket_cachep);
-err_bucket_cachep:
-	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
-	inet_unregister_protosw(&sctp_seqpacket_protosw);
-	inet_unregister_protosw(&sctp_stream_protosw);
-err_add_protocol:
-	proto_unregister(&sctp_prot);
 	goto out;
 }
 
@@ -1215,8 +1223,9 @@ SCTP_STATIC __exit void sctp_exit(void)
 	 * up all the remaining associations and all that memory.
 	 */
 
-	/* Unregister notifier for inet address additions/deletions. */
-	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+	/* Unregister with inet6/inet layers. */
+	sctp_v6_del_protocol();
+	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
 
 	/* Free the local address list.  */
 	sctp_free_local_addr_list();
@@ -1224,7 +1233,16 @@ SCTP_STATIC __exit void sctp_exit(void)
 	/* Free the control endpoint.  */
 	sock_release(sctp_ctl_socket);
 
+	/* Cleanup v6 initializations. */
 	sctp_v6_exit();
+
+	/* Unregister with socket layer. */
+	inet_unregister_protosw(&sctp_stream_protosw);
+	inet_unregister_protosw(&sctp_seqpacket_protosw);
+
+	/* Unregister notifier for inet address additions/deletions. */
+	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
 	sctp_sysctl_unregister();
 	list_del(&sctp_ipv4_specific.list);
 
@@ -1236,16 +1254,13 @@ SCTP_STATIC __exit void sctp_exit(void)
 		   get_order(sctp_port_hashsize *
 			     sizeof(struct sctp_bind_hashbucket)));
 
-	kmem_cache_destroy(sctp_chunk_cachep);
-	kmem_cache_destroy(sctp_bucket_cachep);
-
 	sctp_dbg_objcnt_exit();
 	sctp_proc_exit();
 	cleanup_sctp_mibs();
 
-	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
-	inet_unregister_protosw(&sctp_seqpacket_protosw);
-	inet_unregister_protosw(&sctp_stream_protosw);
+	kmem_cache_destroy(sctp_chunk_cachep);
+	kmem_cache_destroy(sctp_bucket_cachep);
+
 	proto_unregister(&sctp_prot);
 }
 
-- 
cgit v1.2.3


From 07d939677166cc4f000c767196872a9becc2697b Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 May 2007 13:55:27 -0700
Subject: [SCTP]: Set assoc_id correctly during INIT collision.

During the INIT/COOKIE-ACK collision cases, it's possible to get
into a situation where the association id is not yet set at the time
of the user event generation.  As a result, user events have an
association id set to 0 which will confuse applications.

This happens if we hit case B of duplicate cookie processing.
In the particular example found and provided by Oscar Isaula
<Oscar.Isaula@motorola.com>, flow looks like this:
A				B
---- INIT------->  (lost)
	    <---------INIT------
---- INIT-ACK--->
	    <------ Cookie ECHO

When the Cookie Echo is received, we end up trying to update the
association that was created on A as a result of the (lost) INIT,
but that association doesn't have the ID set yet.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 29 +++++++++++++++++++++++++++++
 net/sctp/sm_make_chunk.c | 15 ++-------------
 net/sctp/sm_sideeffect.c | 35 +++++++++++++++++++++++++++++++++++
 net/sctp/sm_statefuns.c  | 29 +++++++++++------------------
 4 files changed, 77 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index db73ef97485a..df94e3cdfba3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1103,6 +1103,13 @@ void sctp_assoc_update(struct sctp_association *asoc,
 			asoc->ssnmap = new->ssnmap;
 			new->ssnmap = NULL;
 		}
+
+		if (!asoc->assoc_id) {
+			/* get a new association id since we don't have one
+			 * yet.
+			 */
+			sctp_assoc_set_id(asoc, GFP_ATOMIC);
+		}
 	}
 }
 
@@ -1375,3 +1382,25 @@ out:
 	sctp_read_unlock(&asoc->base.addr_lock);
 	return found;
 }
+
+/* Set an association id for a given association */
+int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
+{
+	int assoc_id;
+	int error = 0;
+retry:
+	if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+		return -ENOMEM;
+
+	spin_lock_bh(&sctp_assocs_id_lock);
+	error = idr_get_new_above(&sctp_assocs_id, (void *)asoc,
+				    1, &assoc_id);
+	spin_unlock_bh(&sctp_assocs_id_lock);
+	if (error == -EAGAIN)
+		goto retry;
+	else if (error)
+		return error;
+
+	asoc->assoc_id = (sctp_assoc_t) assoc_id;
+	return error;
+}
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index be783a3761c4..8d18f570c2e6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1939,7 +1939,6 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 	 * association.
 	 */
 	if (!asoc->temp) {
-		int assoc_id;
 		int error;
 
 		asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams,
@@ -1947,19 +1946,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
 		if (!asoc->ssnmap)
 			goto clean_up;
 
-	retry:
-		if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp)))
+		error = sctp_assoc_set_id(asoc, gfp);
+		if (error)
 			goto clean_up;
-		spin_lock_bh(&sctp_assocs_id_lock);
-		error = idr_get_new_above(&sctp_assocs_id, (void *)asoc, 1,
-					  &assoc_id);
-		spin_unlock_bh(&sctp_assocs_id_lock);
-		if (error == -EAGAIN)
-			goto retry;
-		else if (error)
-			goto clean_up;
-
-		asoc->assoc_id = (sctp_assoc_t) assoc_id;
 	}
 
 	/* ADDIP Section 4.1 ASCONF Chunk Procedures
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b37a7adeb150..d9fad4f6ffc3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -862,6 +862,33 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error)
 		sk->sk_err = error;
 }
 
+/* Helper function to generate an association change event */
+static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands,
+				 struct sctp_association *asoc,
+				 u8 state)
+{
+	struct sctp_ulpevent *ev;
+
+	ev = sctp_ulpevent_make_assoc_change(asoc, 0, state, 0,
+					    asoc->c.sinit_num_ostreams,
+					    asoc->c.sinit_max_instreams,
+					    NULL, GFP_ATOMIC);
+	if (ev)
+		sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
+/* Helper function to generate an adaptation indication event */
+static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands,
+				    struct sctp_association *asoc)
+{
+	struct sctp_ulpevent *ev;
+
+	ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
+
+	if (ev)
+		sctp_ulpq_tail_event(&asoc->ulpq, ev);
+}
+
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
  * functionality there.
@@ -1485,6 +1512,14 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 		case SCTP_CMD_SET_SK_ERR:
 			sctp_cmd_set_sk_err(asoc, cmd->obj.error);
 			break;
+		case SCTP_CMD_ASSOC_CHANGE:
+			sctp_cmd_assoc_change(commands, asoc,
+					      cmd->obj.u8);
+			break;
+		case SCTP_CMD_ADAPTATION_IND:
+			sctp_cmd_adaptation_ind(commands, asoc);
+			break;
+
 		default:
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9e28a5d51200..f02ce3dddb7b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1656,7 +1656,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 					struct sctp_association *new_asoc)
 {
 	sctp_init_chunk_t *peer_init;
-	struct sctp_ulpevent *ev;
 	struct sctp_chunk *repl;
 
 	/* new_asoc is a brand-new association, so these are not yet
@@ -1687,34 +1686,28 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 	 * D) IMPLEMENTATION NOTE: An implementation may choose to
 	 * send the Communication Up notification to the SCTP user
 	 * upon reception of a valid COOKIE ECHO chunk.
+	 *
+	 * Sadly, this needs to be implemented as a side-effect, because
+	 * we are not guaranteed to have set the association id of the real
+	 * association and so these notifications need to be delayed until
+	 * the association id is allocated.
 	 */
-	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
-					     new_asoc->c.sinit_num_ostreams,
-					     new_asoc->c.sinit_max_instreams,
-					     NULL, GFP_ATOMIC);
-	if (!ev)
-		goto nomem_ev;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_CHANGE, SCTP_U8(SCTP_COMM_UP));
 
 	/* Sockets API Draft Section 5.3.1.6
 	 * When a peer sends a Adaptation Layer Indication parameter , SCTP
 	 * delivers this notification to inform the application that of the
 	 * peers requested adaptation layer.
+	 *
+	 * This also needs to be done as a side effect for the same reason as
+	 * above.
 	 */
-	if (asoc->peer.adaptation_ind) {
-		ev = sctp_ulpevent_make_adaptation_indication(asoc, GFP_ATOMIC);
-		if (!ev)
-			goto nomem_ev;
-
-		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
-	}
+	if (asoc->peer.adaptation_ind)
+		sctp_add_cmd_sf(commands, SCTP_CMD_ADAPTATION_IND, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
 
-nomem_ev:
-	sctp_chunk_free(repl);
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
 }
-- 
cgit v1.2.3