From b595076a180a56d1bb170e6eceda6eb9d76f4cd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 1 Nov 2010 15:38:34 -0400
Subject: tree-wide: fix comment/printk typos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb8318483..89204e8c0e14 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6096,7 +6096,7 @@ static void __net_exit default_device_exit(struct net *net)
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
 {
 	/* At exit all network devices most be removed from a network
-	 * namespace.  Do this in the reverse order of registeration.
+	 * namespace.  Do this in the reverse order of registration.
 	 * Do this across as many network namespaces as possible to
 	 * improve batching efficiency.
 	 */
-- 
cgit v1.2.3


From df32cc193ad88f7b1326b90af799c927b27f7654 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 1 Nov 2010 12:55:52 -0700
Subject: net: check queue_index from sock is valid for device

In dev_pick_tx recompute the queue index if the value stored in the
socket is greater than or equal to the number of real queues for the
device.  The saved index in the sock structure is not guaranteed to
be appropriate for the egress device (this could happen on a route
change or in presence of tunnelling).  The result of the queue index
being bad would be to return a bogus queue (crash could prersumably
follow).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb8318483..0dd54a69dace 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2131,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0) {
+		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
 			queue_index = 0;
 			if (dev->real_num_tx_queues > 1)
-- 
cgit v1.2.3


From b194a3674fba6d9f9e470084d192c7cb99194a62 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 30 Oct 2010 11:08:52 +0000
Subject: net/core/dev.c: Update WARN uses

Coalesce long formats.
Add missing newlines.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0dd54a69dace..5968c822c999 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1817,8 +1817,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
 			dev->ethtool_ops->get_drvinfo(dev, &info);
 
-		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
-			"ip_summed=%d",
+		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
 		     info.driver, dev ? dev->features : 0L,
 		     skb->sk ? skb->sk->sk_route_caps : 0L,
 		     skb->len, skb->data_len, skb->ip_summed);
-- 
cgit v1.2.3


From e1e78db628b33c657944865e3bca01ee59cc5b80 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:53 +0000
Subject: offloading: Make scatter/gather more tolerant of vlans.

When checking if it is necessary to linearize a packet, we currently
use vlan_features if the packet contains either an in-band or out-
of-band vlan tag.  However, in-band tags aren't special in any way
for scatter/gather since they are part of the packet buffer and are
simply more data to DMA.  Therefore, only use vlan_features for out-
of-band tags, which could potentially have some interaction with
scatter/gather.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5968c822c999..0b403d503311 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1976,15 +1976,20 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
-	int features = dev->features;
+	if (skb_is_nonlinear(skb)) {
+		int features = dev->features;
 
-	if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
-		features &= dev->vlan_features;
+		if (vlan_tx_tag_present(skb))
+			features &= dev->vlan_features;
 
-	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
-		(skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb))));
+		return (skb_has_frag_list(skb) &&
+			!(features & NETIF_F_FRAGLIST)) ||
+			(skb_shinfo(skb)->nr_frags &&
+			(!(features & NETIF_F_SG) ||
+			illegal_highdma(dev, skb)));
+	}
+
+	return 0;
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-- 
cgit v1.2.3


From c8d5bcd1aff89199cde4bd82c5c40fb704c8bba4 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:54 +0000
Subject: offloading: Support multiple vlan tags in GSO.

We assume that hardware TSO can't support multiple levels of vlan tags
but we allow it to be done.  Therefore, enable GSO to parse these tags
so we can fallback to software.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0b403d503311..368930a988e3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1794,16 +1794,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
 	__be16 type = skb->protocol;
+	int vlan_depth = ETH_HLEN;
 	int err;
 
-	if (type == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh;
+	while (type == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vh;
 
-		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
 			return ERR_PTR(-EINVAL);
 
-		veh = (struct vlan_ethhdr *)skb->data;
-		type = veh->h_vlan_encapsulated_proto;
+		vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+		type = vh->h_vlan_encapsulated_proto;
+		vlan_depth += VLAN_HLEN;
 	}
 
 	skb_reset_mac_header(skb);
-- 
cgit v1.2.3


From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 29 Oct 2010 12:14:55 +0000
Subject: offloading: Force software GSO for multiple vlan tags.

We currently use vlan_features to check for TSO support if there is
a vlan tag.  However, it's quite likely that the NIC is not able to
do TSO when there is an arbitrary number of tags.  Therefore if there
is more than one tag (in-band or out-of-band), fall back to software
emulation.

Signed-off-by: Jesse Gross <jesse@nicira.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 368930a988e3..8b500c3e0297 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
+int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+{
+	__be16 protocol = skb->protocol;
+
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+		protocol = veh->h_vlan_encapsulated_proto;
+	} else if (!skb->vlan_tci)
+		return dev->features;
+
+	if (protocol != htons(ETH_P_8021Q))
+		return dev->features & dev->vlan_features;
+	else
+		return 0;
+}
+
 /*
  * Returns true if either:
  *	1. skb has frag_list and the device doesn't support FRAGLIST, or
-- 
cgit v1.2.3


From ed9af2e839c06c18f721da2c768fbb444c4a10e5 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 9 Nov 2010 10:47:30 +0000
Subject: net: Move TX queue allocation to alloc_netdev_mq

TX queues are now allocated in alloc_netdev_mq and freed in
free_netdev.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8b500c3e0297..75490670e0a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5136,10 +5136,6 @@ int register_netdevice(struct net_device *dev)
 	if (ret)
 		goto out;
 
-	ret = netif_alloc_netdev_queues(dev);
-	if (ret)
-		goto out;
-
 	netdev_init_queues(dev);
 
 	/* Init, if this function is available */
@@ -5599,6 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
+	if (netif_alloc_netdev_queues(dev))
+		goto free_pcpu;
 
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
@@ -5619,6 +5617,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
+	kfree(dev->_tx);
 free_p:
 	kfree(p);
 	return NULL;
-- 
cgit v1.2.3


From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 9 Nov 2010 10:47:38 +0000
Subject: net: Simplify RX queue allocation

This patch move RX queue allocation to alloc_netdev_mq and freeing of
the queues to free_netdev (symmetric to TX queue allocation).  Each
kobject RX queue takes a reference to the queue's device so that the
device can't be freed before all the kobjects have been released-- this
obviates the need for reference counts specific to RX queues.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 75490670e0a9..8725d168d1f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 	}
 	dev->_rx = rx;
 
-	/*
-	 * Set a pointer to first element in the array which holds the
-	 * reference count.
-	 */
 	for (i = 0; i < count; i++)
-		rx[i].first = rx;
+		rx[i].dev = dev;
 #endif
 	return 0;
 }
@@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	ret = netif_alloc_rx_queues(dev);
-	if (ret)
-		goto out;
-
 	netdev_init_queues(dev);
 
 	/* Init, if this function is available */
@@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 #ifdef CONFIG_RPS
 	dev->num_rx_queues = queue_count;
 	dev->real_num_rx_queues = queue_count;
+	if (netif_alloc_rx_queues(dev))
+		goto free_pcpu;
 #endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
@@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 free_pcpu:
 	free_percpu(dev->pcpu_refcnt);
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
+
 free_p:
 	kfree(p);
 	return NULL;
@@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev)
 	release_net(dev_net(dev));
 
 	kfree(dev->_tx);
+#ifdef CONFIG_RPS
+	kfree(dev->_rx);
+#endif
 
 	kfree(rcu_dereference_raw(dev->ingress_queue));
 
-- 
cgit v1.2.3


From 6b35308850e1679741e8b646cfb7bb3ab5369888 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 15 Nov 2010 20:15:03 -0800
Subject: net: Export netif_get_vlan_features().

ERROR: "netif_get_vlan_features" [drivers/net/xen-netfront.ko] undefined!

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8725d168d1f5..381b8e280162 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1983,6 +1983,7 @@ int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
 	else
 		return 0;
 }
+EXPORT_SYMBOL(netif_get_vlan_features);
 
 /*
  * Returns true if either:
-- 
cgit v1.2.3


From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:29 +0000
Subject: xps: Improvements in TX queue selection

In dev_pick_tx, don't do work in calculating queue
index or setting
the index in the sock unless the device has more than one queue.  This
allows the sock to be set only with a queue index of a multi-queue
device which is desirable if device are stacked like in a tunnel.

We also allow the mapping of a socket to queue to be changed.  To
maintain in order packet transmission a flag (ooo_okay) has been
added to the sk_buff structure.  If a transport layer sets this flag
on a packet, the transmit queue can be changed for the socket.
Presumably, the transport would set this if there was no possbility
of creating OOO packets (for instance, there are no packets in flight
for the socket).  This patch includes the modification in TCP output
for setting this flag.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 381b8e280162..7b17674a29ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_select_queue) {
+	if (dev->real_num_tx_queues == 1)
+		queue_index = 0;
+	else if (ops->ndo_select_queue) {
 		queue_index = ops->ndo_select_queue(dev, skb);
 		queue_index = dev_cap_txqueue(dev, queue_index);
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
-			queue_index = 0;
-			if (dev->real_num_tx_queues > 1)
-				queue_index = skb_tx_hash(dev, skb);
+		if (queue_index < 0 || skb->ooo_okay ||
+		    queue_index >= dev->real_num_tx_queues) {
+			int old_index = queue_index;
 
-			if (sk) {
-				struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+			queue_index = skb_tx_hash(dev, skb);
+
+			if (queue_index != old_index && sk) {
+				struct dst_entry *dst =
+				    rcu_dereference_check(sk->sk_dst_cache, 1);
 
 				if (dst && skb_dst(skb) == dst)
 					sk_tx_queue_set(sk, queue_index);
-- 
cgit v1.2.3


From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 21 Nov 2010 13:17:27 +0000
Subject: xps: Transmit Packet Steering

This patch implements transmit packet steering (XPS) for multiqueue
devices.  XPS selects a transmit queue during packet transmission based
on configuration.  This is done by mapping the CPU transmitting the
packet to a queue.  This is the transmit side analogue to RPS-- where
RPS is selecting a CPU based on receive queue, XPS selects a queue
based on the CPU (previously there was an XPS patch from Eric
Dumazet, but that might more appropriately be called transmit completion
steering).

Each transmit queue can be associated with a number of CPUs which will
use the queue to send packets.  This is configured as a CPU mask on a
per queue basis in:

/sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

The mappings are stored per device in an inverted data structure that
maps CPUs to queues.  In the netdevice structure this is an array of
num_possible_cpu structures where each structure holds and array of
queue_indexes for queues which that CPU can use.

The benefits of XPS are improved locality in the per queue data
structures.  Also, transmit completions are more likely to be done
nearer to the sending thread, so this should promote locality back
to the socket on free (e.g. UDP).  The benefits of XPS are dependent on
cache hierarchy, application load, and other factors.  XPS would
nominally be configured so that a queue would only be shared by CPUs
which are sharing a cache, the degenerative configuration woud be that
each CPU has it's own queue.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR test
with 1 byte req. and resp.

bnx2x on 16 core AMD
   XPS (16 queues, 1 TX queue per CPU)  1234K at 100% CPU
   No XPS (16 queues)                   996K at 100% CPU

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7b17674a29ec..c852f0038a08 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  */
 int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
+	int rc;
+
 	if (txq < 1 || txq > dev->num_tx_queues)
 		return -EINVAL;
 
 	if (dev->reg_state == NETREG_REGISTERED) {
 		ASSERT_RTNL();
 
+		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+						  txq);
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 	return queue_index;
 }
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_RPS
+	struct xps_dev_maps *dev_maps;
+	struct xps_map *map;
+	int queue_index = -1;
+
+	rcu_read_lock();
+	dev_maps = rcu_dereference(dev->xps_maps);
+	if (dev_maps) {
+		map = rcu_dereference(
+		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		if (map) {
+			if (map->len == 1)
+				queue_index = map->queues[0];
+			else {
+				u32 hash;
+				if (skb->sk && skb->sk->sk_hash)
+					hash = skb->sk->sk_hash;
+				else
+					hash = (__force u16) skb->protocol ^
+					    skb->rxhash;
+				hash = jhash_1word(hash, hashrnd);
+				queue_index = map->queues[
+				    ((u64)hash * map->len) >> 32];
+			}
+			if (unlikely(queue_index >= dev->real_num_tx_queues))
+				queue_index = -1;
+		}
+	}
+	rcu_read_unlock();
+
+	return queue_index;
+#else
+	return -1;
+#endif
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
@@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 		    queue_index >= dev->real_num_tx_queues) {
 			int old_index = queue_index;
 
-			queue_index = skb_tx_hash(dev, skb);
+			queue_index = get_xps_queue(dev, skb);
+			if (queue_index < 0)
+				queue_index = skb_tx_hash(dev, skb);
 
 			if (queue_index != old_index && sk) {
 				struct dst_entry *dst =
@@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
+	int i;
 
 	BUG_ON(count < 1);
 
@@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 		return -ENOMEM;
 	}
 	dev->_tx = tx;
+
+	for (i = 0; i < count; i++)
+		tx[i].dev = dev;
+
 	return 0;
 }
 
@@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue,
 				  void *_unused)
 {
-	queue->dev = dev;
-
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
-- 
cgit v1.2.3


From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 26 Nov 2010 08:36:09 +0000
Subject: xps: Add CONFIG_XPS

This patch adds XPS_CONFIG option to enable and disable XPS.  This is
done in the same manner as RPS_CONFIG.  This is also fixes build
failure in XPS code when SMP is not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index c852f0038a08..3259d2c323a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 
 		rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
 						  txq);
+		if (rc)
+			return rc;
+
 		if (txq < dev->real_num_tx_queues)
 			qdisc_reset_all_tx_gt(dev, txq);
 	}
@@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
-#ifdef CONFIG_RPS
+#ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
 	struct xps_map *map;
 	int queue_index = -1;
@@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 
+#ifdef CONFIG_RPS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
-#ifdef CONFIG_RPS
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 
@@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	for (i = 0; i < count; i++)
 		rx[i].dev = dev;
-#endif
 	return 0;
 }
+#endif
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
-- 
cgit v1.2.3


From f2cd2d3e9b3ef960612e362f0ad129d735452df2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 29 Nov 2010 08:14:37 +0000
Subject: net sched: use xps information for qdisc NUMA affinity

Allocate qdisc memory according to NUMA properties of cpus included in
xps map.

To be effective, qdisc should be (re)setup after changes
of /sys/class/net/eth<n>/queues/tx-<n>/xps_cpus

I added a numa_node field in struct netdev_queue, containing NUMA node
if all cpus included in xps_cpus share same node, else -1.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3259d2c323a6..cd2437495428 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5125,9 +5125,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	}
 	dev->_tx = tx;
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
+		netdev_queue_numa_node_write(&tx[i], -1);
 		tx[i].dev = dev;
-
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7903264402546f45f9bac8ad2bfdb00d00eb124a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Tue, 30 Nov 2010 06:38:00 +0000
Subject: net: Fix too optimistic NETIF_F_HW_CSUM features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM+NETIF_F_IPV6_CSUM, but
some drivers miss the difference. Fix this and also fix UFO dependency
on checksumming offload as it makes the same mistake in assumptions.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Jon Mason <jon.mason@exar.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index cd2437495428..55ff66fabce4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5041,10 +5041,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 	}
 
 	if (features & NETIF_F_UFO) {
-		if (!(features & NETIF_F_GEN_CSUM)) {
+		/* maybe split UFO into V4 and V6? */
+		if (!((features & NETIF_F_GEN_CSUM) ||
+		    (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+			    == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
 			if (name)
 				printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-				       "since no NETIF_F_HW_CSUM feature.\n",
+				       "since no checksum offload features.\n",
 				       name);
 			features &= ~NETIF_F_UFO;
 		}
-- 
cgit v1.2.3


From aa9421041128abb4d269ee1dc502ff65fb3b7d69 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 4 Dec 2010 02:31:41 +0000
Subject: net: init ingress queue

The dev field of ingress queue is forgot to initialized, then NULL
pointer dereference happens in qdisc_alloc().

Move inits of tx queues to netif_alloc_netdev_queues().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 35 +++++++++++++----------------------
 1 file changed, 13 insertions(+), 22 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 55ff66fabce4..ee605c0867e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5112,11 +5112,21 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 }
 #endif
 
+static void netdev_init_one_queue(struct net_device *dev,
+				  struct netdev_queue *queue, void *_unused)
+{
+	/* Initialize queue lock */
+	spin_lock_init(&queue->_xmit_lock);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+	queue->xmit_lock_owner = -1;
+	netdev_queue_numa_node_write(queue, -1);
+	queue->dev = dev;
+}
+
 static int netif_alloc_netdev_queues(struct net_device *dev)
 {
 	unsigned int count = dev->num_tx_queues;
 	struct netdev_queue *tx;
-	int i;
 
 	BUG_ON(count < 1);
 
@@ -5128,27 +5138,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
 	}
 	dev->_tx = tx;
 
-	for (i = 0; i < count; i++) {
-		netdev_queue_numa_node_write(&tx[i], -1);
-		tx[i].dev = dev;
-	}
-	return 0;
-}
-
-static void netdev_init_one_queue(struct net_device *dev,
-				  struct netdev_queue *queue,
-				  void *_unused)
-{
-	/* Initialize queue lock */
-	spin_lock_init(&queue->_xmit_lock);
-	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
-	queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
 	netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
 	spin_lock_init(&dev->tx_global_lock);
+
+	return 0;
 }
 
 /**
@@ -5187,8 +5180,6 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
-	netdev_init_queues(dev);
-
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
-- 
cgit v1.2.3


From 941666c2e3e0f9f6a1cb5808d02352d445bd702c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 01:23:53 +0000
Subject: net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le dimanche 05 décembre 2010 à 09:19 +0100, Eric Dumazet a écrit :

> Hmm..
>
> If somebody can explain why RTNL is held in arp_ioctl() (and therefore
> in arp_req_delete()), we might first remove RTNL use in arp_ioctl() so
> that your patch can be applied.
>
> Right now it is not good, because RTNL wont be necessarly held when you
> are going to call arp_invalidate() ?

While doing this analysis, I found a refcount bug in llc, I'll send a
patch for net-2.6

Meanwhile, here is the patch for net-next-2.6

Your patch then can be applied after mine.

Thanks

[PATCH] net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()

dev_getbyhwaddr() was called under RTNL.

Rename it to dev_getbyhwaddr_rcu() and change all its caller to now use
RCU locking instead of RTNL.

Change arp_ioctl() to use RCU instead of RTNL locking.

Note: this fix a dev refcount bug in llc

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index ee605c0867e7..822b15b8d11c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *	dev_getbyhwaddr - find a device by its hardware address
+ *	dev_getbyhwaddr_rcu - find a device by its hardware address
  *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold the
- *	rtnl semaphore. The returned device has not had its ref count increased
+ *	is not found or a pointer to the device. The caller must hold RCU
+ *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
- *	BUGS:
- *	If the API was consistent this would be __dev_get_by_hwaddr
  */
 
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+				       const char *ha)
 {
 	struct net_device *dev;
 
-	ASSERT_RTNL();
-
-	for_each_netdev(net, dev)
+	for_each_netdev_rcu(net, dev)
 		if (dev->type == type &&
 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 			return dev;
 
 	return NULL;
 }
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-- 
cgit v1.2.3


From 15c2d75f49189e1769c5e8f5f099d03d055c4910 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 7 Dec 2010 00:30:37 +0000
Subject: net: call dev_queue_xmit_nit() after skb_dst_drop()

Avoid some atomic ops on dst refcount, calling dev_queue_xmit_nit()
after skb_dst_drop() in dev_hard_start_xmit().

When queueing a packet into af_packet socket, we drop dst anyway.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 822b15b8d11c..d28b3a023bb2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2022,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
-		if (!list_empty(&ptype_all))
-			dev_queue_xmit_nit(skb, dev);
-
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -2032,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		if (!list_empty(&ptype_all))
+			dev_queue_xmit_nit(skb, dev);
+
 		skb_orphan_try(skb);
 
 		if (vlan_tx_tag_present(skb) &&
-- 
cgit v1.2.3


From a3d22a68d752ccc1a01bb0a64dd70b7a98bf9e23 Mon Sep 17 00:00:00 2001
From: Vladislav Zolotarov <vladz@broadcom.com>
Date: Mon, 13 Dec 2010 06:27:10 +0000
Subject: bnx2x: Take the distribution range definition out of skb_tx_hash()

Move the calcualation of the Tx hash for a given hash range into a separate
function and define the skb_tx_hash(), which calculates a Tx hash for a
[0; dev->real_num_tx_queues - 1] hash values range, using this
function (__skb_tx_hash()).

Signed-off-by: Vladislav Zolotarov <vladz@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index d28b3a023bb2..b25dd087f06a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2112,14 +2112,19 @@ out:
 
 static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+		  unsigned int num_tx_queues)
 {
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
-		while (unlikely(hash >= dev->real_num_tx_queues))
-			hash -= dev->real_num_tx_queues;
+		while (unlikely(hash >= num_tx_queues))
+			hash -= num_tx_queues;
 		return hash;
 	}
 
@@ -2129,9 +2134,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+	return (u16) (((u64) hash * num_tx_queues) >> 32);
 }
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
 
 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
-- 
cgit v1.2.3


From b236da6931e2482bfe44a7865dd4e7bb036f3496 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 14 Dec 2010 03:09:15 +0000
Subject: net: use NUMA_NO_NODE instead of the magic number -1

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index b25dd087f06a..7ac26d2b9722 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5121,7 +5121,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	spin_lock_init(&queue->_xmit_lock);
 	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
-	netdev_queue_numa_node_write(queue, -1);
+	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 }
 
-- 
cgit v1.2.3


From 443457242beb6716b43db4d62fe148eab5515505 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Mon, 13 Dec 2010 12:44:07 +0000
Subject: net: factorize sync-rcu call in unregister_netdevice_many

Add dev_close_many and dev_deactivate_many to factorize another
sync-rcu operation on the netdevice unregister path.

$ modprobe dummy numdummies=10000
$ ip link set dev dummy* up
$ time rmmod dummy

Without the patch           With the patch

real    0m 24.63s           real    0m 5.15s
user    0m 0.00s            user    0m 0.00s
sys     0m 6.05s            sys     0m 5.14s

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 118 +++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 76 insertions(+), 42 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7ac26d2b9722..794b20de5d44 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1222,52 +1222,90 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
+	struct net_device *dev;
 
 	ASSERT_RTNL();
 	might_sleep();
 
-	/*
-	 *	Tell people we are going down, so that they can
-	 *	prepare to death, when device is still operating.
-	 */
-	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		/*
+		 *	Tell people we are going down, so that they can
+		 *	prepare to death, when device is still operating.
+		 */
+		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
-	clear_bit(__LINK_STATE_START, &dev->state);
+		clear_bit(__LINK_STATE_START, &dev->state);
 
-	/* Synchronize to scheduled poll. We cannot touch poll list,
-	 * it can be even on different cpu. So just clear netif_running().
-	 *
-	 * dev->stop() will invoke napi_disable() on all of it's
-	 * napi_struct instances on this device.
-	 */
-	smp_mb__after_clear_bit(); /* Commit netif_running(). */
+		/* Synchronize to scheduled poll. We cannot touch poll list, it
+		 * can be even on different cpu. So just clear netif_running().
+		 *
+		 * dev->stop() will invoke napi_disable() on all of it's
+		 * napi_struct instances on this device.
+		 */
+		smp_mb__after_clear_bit(); /* Commit netif_running(). */
+	}
 
-	dev_deactivate(dev);
+	dev_deactivate_many(head);
 
-	/*
-	 *	Call the device specific close. This cannot fail.
-	 *	Only if device is UP
-	 *
-	 *	We allow it to be called even after a DETACH hot-plug
-	 *	event.
-	 */
-	if (ops->ndo_stop)
-		ops->ndo_stop(dev);
+	list_for_each_entry(dev, head, unreg_list) {
+		const struct net_device_ops *ops = dev->netdev_ops;
 
-	/*
-	 *	Device is now down.
-	 */
+		/*
+		 *	Call the device specific close. This cannot fail.
+		 *	Only if device is UP
+		 *
+		 *	We allow it to be called even after a DETACH hot-plug
+		 *	event.
+		 */
+		if (ops->ndo_stop)
+			ops->ndo_stop(dev);
+
+		/*
+		 *	Device is now down.
+		 */
+
+		dev->flags &= ~IFF_UP;
+
+		/*
+		 *	Shutdown NET_DMA
+		 */
+		net_dmaengine_put();
+	}
 
-	dev->flags &= ~IFF_UP;
+	return 0;
+}
+
+static int __dev_close(struct net_device *dev)
+{
+	LIST_HEAD(single);
+
+	list_add(&dev->unreg_list, &single);
+	return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+	struct net_device *dev, *tmp;
+	LIST_HEAD(tmp_list);
+
+	list_for_each_entry_safe(dev, tmp, head, unreg_list)
+		if (!(dev->flags & IFF_UP))
+			list_move(&dev->unreg_list, &tmp_list);
+
+	__dev_close_many(head);
 
 	/*
-	 *	Shutdown NET_DMA
+	 * Tell people we are down
 	 */
-	net_dmaengine_put();
+	list_for_each_entry(dev, head, unreg_list) {
+		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+		call_netdevice_notifiers(NETDEV_DOWN, dev);
+	}
 
+	/* rollback_registered_many needs the complete original list */
+	list_splice(&tmp_list, head);
 	return 0;
 }
 
@@ -1282,16 +1320,10 @@ static int __dev_close(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
-	if (!(dev->flags & IFF_UP))
-		return 0;
-
-	__dev_close(dev);
+	LIST_HEAD(single);
 
-	/*
-	 * Tell people we are down
-	 */
-	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-	call_netdevice_notifiers(NETDEV_DOWN, dev);
+	list_add(&dev->unreg_list, &single);
+	dev_close_many(&single);
 
 	return 0;
 }
@@ -4963,10 +4995,12 @@ static void rollback_registered_many(struct list_head *head)
 		}
 
 		BUG_ON(dev->reg_state != NETREG_REGISTERED);
+	}
 
-		/* If device is running, close it first. */
-		dev_close(dev);
+	/* If device is running, close it first. */
+	dev_close_many(head);
 
+	list_for_each_entry(dev, head, unreg_list) {
 		/* And unlink it from device chain. */
 		unlist_netdevice(dev);
 
-- 
cgit v1.2.3


From 55508d601dab7df5cbcc7a63f4be8620eface204 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Tue, 14 Dec 2010 15:24:08 +0000
Subject: net: Use skb_checksum_start_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace skb->csum_start - skb_headroom(skb) with skb_checksum_start_offset().

Note for usb/smsc95xx: skb->data - skb->head == skb_headroom(skb).

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 794b20de5d44..92d414ac0e30 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1793,7 +1793,7 @@ int skb_checksum_help(struct sk_buff *skb)
 		goto out_set_summed;
 	}
 
-	offset = skb->csum_start - skb_headroom(skb);
+	offset = skb_checksum_start_offset(skb);
 	BUG_ON(offset >= skb_headlen(skb));
 	csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -2090,8 +2090,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			 * checksumming here.
 			 */
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
-				skb_set_transport_header(skb, skb->csum_start -
-					      skb_headroom(skb));
+				skb_set_transport_header(skb,
+					skb_checksum_start_offset(skb));
 				if (!dev_can_checksum(dev, skb) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
-- 
cgit v1.2.3


From 71d9dec24dce548bf699815c976cf063ad9257e2 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 15 Dec 2010 19:57:25 +0000
Subject: net: increase skb->users instead of skb_clone()

In dev_queue_xmit_nit(), we have to clone skbs as we need to mangle skbs,
however, we don't need to clone skbs for all the packet_types.

Except for the first packet_type, we increase skb->users instead of
skb_clone().

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 92d414ac0e30..59877290bca7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1528,6 +1528,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
+{
+	atomic_inc(&skb->users);
+	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1536,6 +1544,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb);
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
+	struct sk_buff *skb2 = NULL;
+	struct packet_type *pt_prev = NULL;
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
@@ -1552,7 +1562,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		if ((ptype->dev == dev || !ptype->dev) &&
 		    (ptype->af_packet_priv == NULL ||
 		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
-			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+			if (pt_prev) {
+				deliver_skb(skb2, pt_prev, skb->dev);
+				pt_prev = ptype;
+				continue;
+			}
+
+			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (!skb2)
 				break;
 
@@ -1574,9 +1590,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
-			ptype->func(skb2, skb->dev, ptype, skb->dev);
+			pt_prev = ptype;
 		}
 	}
+	if (pt_prev)
+		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
 }
 
@@ -2820,14 +2838,6 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
-static inline int deliver_skb(struct sk_buff *skb,
-			      struct packet_type *pt_prev,
-			      struct net_device *orig_dev)
-{
-	atomic_inc(&skb->users);
-	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
-- 
cgit v1.2.3


From 70978182d431e0348e6ef711d0f962d12c03bc46 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 20 Dec 2010 21:22:51 +0000
Subject: net: timestamp cloned packet in dev_queue_xmit_nit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le vendredi 17 décembre 2010 à 10:26 +0100, Eric Dumazet a écrit :

>
> I think we can add this after latest Changli patch :
>
> He does one skb_clone() before calling the sniffers.
> We could set timestamp on this clone, instead of original skb.
>
> Problem solved.
>

[PATCH net-next-2.6] net: timestamp cloned packet in dev_queue_xmit_nit

Now we do one clone of skb if at least one sniffer might take packet,
we also can do the skb timestamping on the clone and let original packet
unchanged.

This is a generalization of commit 8caf153974f2 (net: sch_netem: Fix an
inconsistency in ingress netem timestamps.)

This way, we can have a good idea when packets are delivered to our
stack (tcpdump -i ifb0), while a tcpdump on original device gives
timestamps right before ingressing.

This also speedup our stack, avoiding taking timestamps if not needed.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Changli Gao <xiaosuo@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 59877290bca7..a215269d2e35 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1547,13 +1547,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
 
-#ifdef CONFIG_NET_CLS_ACT
-	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp_set(skb);
-#else
-	net_timestamp_set(skb);
-#endif
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
 		/* Never send packets back to the socket
@@ -1572,6 +1565,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			if (!skb2)
 				break;
 
+			net_timestamp_set(skb2);
+
 			/* skb->nh should be correctly
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
-- 
cgit v1.2.3


From 9497a0518e8183959e45da05f317f1cb36f2cd7c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:30 +0000
Subject: net offloading: Accept NETIF_F_HW_CSUM for all protocols.

We currently only have software fallback for one type of checksum: the
TCP/UDP one's complement.  This means that a protocol that uses hardware
offloading for a different type of checksum (FCoE, SCTP) must directly
check the device's features and do the right thing ahead of time.  By
the time we get to dev_can_checksum(), we're only deciding whether to
apply the one algorithm in software or hardware.  NETIF_F_HW_CSUM has the
same capabilities as the software version, so we should always use it if
present.  The primary advantage of this is multiply tagged vlans can use
hardware checksumming.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index a215269d2e35..d8befd06da04 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1734,7 +1734,7 @@ EXPORT_SYMBOL(netif_device_attach);
 
 static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 {
-	return ((features & NETIF_F_NO_CSUM) ||
+	return ((features & NETIF_F_GEN_CSUM) ||
 		((features & NETIF_F_V4_CSUM) &&
 		 protocol == htons(ETH_P_IP)) ||
 		((features & NETIF_F_V6_CSUM) &&
-- 
cgit v1.2.3


From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:31 +0000
Subject: net offloading: Generalize netif_get_vlan_features().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

netif_get_vlan_features() is currently only used by netif_needs_gso(),
so it only concerns itself with GSO features.  However, several other
places also should take into account the contents of the packet when
deciding whether to offload to hardware.  This generalizes the function
to return features about all of the various forms of offloading.  Since
offloads tend to be linked together, this avoids duplicating the logic
in each location (i.e. the scatter/gather code also needs the checksum
logic).

Suggested-by: Michał Mirosław <mirqus@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index d8befd06da04..a51dfd7b56fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
-int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev)
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+	if (!can_checksum_protocol(protocol, features)) {
+		features &= ~NETIF_F_ALL_CSUM;
+		features &= ~NETIF_F_SG;
+	} else if (illegal_highdma(skb->dev, skb)) {
+		features &= ~NETIF_F_SG;
+	}
+
+	return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
+	int features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
-	} else if (!skb->vlan_tci)
-		return dev->features;
+	} else if (!vlan_tx_tag_present(skb)) {
+		return harmonize_features(skb, protocol, features);
+	}
 
-	if (protocol != htons(ETH_P_8021Q))
-		return dev->features & dev->vlan_features;
-	else
-		return 0;
+	features &= skb->dev->vlan_features;
+
+	if (protocol != htons(ETH_P_8021Q)) {
+		return harmonize_features(skb, protocol, features);
+	} else {
+		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+				NETIF_F_GEN_CSUM;
+		return harmonize_features(skb, protocol, features);
+	}
 }
-EXPORT_SYMBOL(netif_get_vlan_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 /*
  * Returns true if either:
-- 
cgit v1.2.3


From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:32 +0000
Subject: net offloading: Pass features into netif_needs_gso().

Now that there is a single function that can compute the device
features relevant to a packet, we don't want to run it for each
offload.  This converts netif_needs_gso() to take the features
of the device, rather than computing them itself.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index a51dfd7b56fb..1444ed3861a0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	int rc = NETDEV_TX_OK;
 
 	if (likely(!skb->next)) {
+		int features;
+
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_orphan_try(skb);
 
+		features = netif_skb_features(skb);
+
 		if (vlan_tx_tag_present(skb) &&
-		    !(dev->features & NETIF_F_HW_VLAN_TX)) {
+		    !(features & NETIF_F_HW_VLAN_TX)) {
 			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				goto out;
@@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb->vlan_tci = 0;
 		}
 
-		if (netif_needs_gso(dev, skb)) {
+		if (netif_needs_gso(skb, features)) {
 			if (unlikely(dev_gso_segment(skb)))
 				goto out_kfree_skb;
 			if (skb->next)
-- 
cgit v1.2.3


From 91ecb63c074d802f8cf103f1dafb4aed24d0f24c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:33 +0000
Subject: net offloading: Convert dev_gso_segment() to use precomputed
 features.

This switches dev_gso_segment() to use the device features computed
by the centralized routine.  In doing so, it fixes a problem where
it would always use dev->features, instead of those appropriate
to the number of vlan tags if any are present.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1444ed3861a0..4cd3e84e1294 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1971,16 +1971,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 /**
  *	dev_gso_segment - Perform emulated hardware segmentation on skb.
  *	@skb: buffer to segment
+ *	@features: device features as applicable to this skb
  *
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
 {
-	struct net_device *dev = skb->dev;
 	struct sk_buff *segs;
-	int features = dev->features & ~(illegal_highdma(dev, skb) ?
-					 NETIF_F_SG : 0);
 
 	segs = skb_gso_segment(skb, features);
 
@@ -2112,7 +2110,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		if (netif_needs_gso(skb, features)) {
-			if (unlikely(dev_gso_segment(skb)))
+			if (unlikely(dev_gso_segment(skb, features)))
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
-- 
cgit v1.2.3


From 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:34 +0000
Subject: net offloading: Convert skb_need_linearize() to use precomputed
 features.

This switches skb_need_linearize() to use the features that have
been centrally computed.  In doing so, this fixes a problem where
scatter/gather should not be used because the card does not support
checksum offloading on that type of packet.  On device registration
we only check that some form of checksum offloading is available if
scatter/gatther is enabled but we must also check at transmission
time.  Examples of this include IPv6 or vlan packets on a NIC that
only supports IPv4 offloading.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4cd3e84e1294..2f838f1d222c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2059,22 +2059,13 @@ EXPORT_SYMBOL(netif_skb_features);
  *	   support DMA from it.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-				      struct net_device *dev)
+				      int features)
 {
-	if (skb_is_nonlinear(skb)) {
-		int features = dev->features;
-
-		if (vlan_tx_tag_present(skb))
-			features &= dev->vlan_features;
-
-		return (skb_has_frag_list(skb) &&
-			!(features & NETIF_F_FRAGLIST)) ||
+	return skb_is_nonlinear(skb) &&
+			((skb_has_frag_list(skb) &&
+				!(features & NETIF_F_FRAGLIST)) ||
 			(skb_shinfo(skb)->nr_frags &&
-			(!(features & NETIF_F_SG) ||
-			illegal_highdma(dev, skb)));
-	}
-
-	return 0;
+				!(features & NETIF_F_SG)));
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -2115,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			if (skb->next)
 				goto gso;
 		} else {
-			if (skb_needs_linearize(skb, dev) &&
+			if (skb_needs_linearize(skb, features) &&
 			    __skb_linearize(skb))
 				goto out_kfree_skb;
 
-- 
cgit v1.2.3


From 0363466866d901fbc658f4e63dd61e7cc93dd0af Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sun, 9 Jan 2011 06:23:35 +0000
Subject: net offloading: Convert checksums to use centrally computed features.

In order to compute the features for other offloads (primarily
scatter/gather), we need to first check the ability of the NIC to
offload the checksum for the packet.  Since we have already computed
this, we can directly use the result instead of figuring it out
again.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2f838f1d222c..3fe443be4b15 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
-	return ((features & NETIF_F_GEN_CSUM) ||
-		((features & NETIF_F_V4_CSUM) &&
-		 protocol == htons(ETH_P_IP)) ||
-		((features & NETIF_F_V6_CSUM) &&
-		 protocol == htons(ETH_P_IPV6)) ||
-		((features & NETIF_F_FCOE_CRC) &&
-		 protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-	int features = dev->features;
-
-	if (vlan_tx_tag_present(skb)) {
-		features &= dev->vlan_features;
-	} else if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		protocol = veh->h_vlan_encapsulated_proto;
-		features &= dev->vlan_features;
-	}
-
-	return can_checksum_protocol(features, protocol);
-}
-
 /**
  * skb_dev_set -- assign a new device to a buffer
  * @skb: buffer for the new device
@@ -2015,6 +1988,17 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+	return ((features & NETIF_F_GEN_CSUM) ||
+		((features & NETIF_F_V4_CSUM) &&
+		 protocol == htons(ETH_P_IP)) ||
+		((features & NETIF_F_V6_CSUM) &&
+		 protocol == htons(ETH_P_IPV6)) ||
+		((features & NETIF_F_FCOE_CRC) &&
+		 protocol == htons(ETH_P_FCOE)));
+}
+
 static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
 {
 	if (!can_checksum_protocol(protocol, features)) {
@@ -2117,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
 				skb_set_transport_header(skb,
 					skb_checksum_start_offset(skb));
-				if (!dev_can_checksum(dev, skb) &&
+				if (!(features & NETIF_F_ALL_CSUM) &&
 				     skb_checksum_help(skb))
 					goto out_kfree_skb;
 			}
-- 
cgit v1.2.3


From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 9 Jan 2011 19:36:31 +0000
Subject: net: Add alloc_netdev_mqs function

Added alloc_netdev_mqs function which allows the number of transmit and
receive queues to be specified independenty.  alloc_netdev_mq was
changed to a macro to call the new function.  Also added
alloc_etherdev_mqs with same purpose.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3fe443be4b15..3295b94884ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 }
 
 /**
- *	alloc_netdev_mq - allocate network device
+ *	alloc_netdev_mqs - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
  *	@name:		device name format string
  *	@setup:		callback to initialize device
- *	@queue_count:	the number of subqueues to allocate
+ *	@txqs:		the number of TX subqueues to allocate
+ *	@rxqs:		the number of RX subqueues to allocate
  *
  *	Allocates a struct net_device with private data area for driver use
  *	and performs basic initialization.  Also allocates subquue structs
- *	for each queue on the device at the end of the netdevice.
+ *	for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-		void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+		void (*setup)(struct net_device *),
+		unsigned int txqs, unsigned int rxqs)
 {
 	struct net_device *dev;
 	size_t alloc_size;
@@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
-	if (queue_count < 1) {
+	if (txqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device "
 		       "with zero queues.\n");
 		return NULL;
 	}
 
+#ifdef CONFIG_RPS
+	if (rxqs < 1) {
+		pr_err("alloc_netdev: Unable to allocate device "
+		       "with zero RX queues.\n");
+		return NULL;
+	}
+#endif
+
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
 		/* ensure 32-byte alignment of private area */
@@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
-	dev->num_tx_queues = queue_count;
-	dev->real_num_tx_queues = queue_count;
+	dev->num_tx_queues = txqs;
+	dev->real_num_tx_queues = txqs;
 	if (netif_alloc_netdev_queues(dev))
 		goto free_pcpu;
 
 #ifdef CONFIG_RPS
-	dev->num_rx_queues = queue_count;
-	dev->real_num_rx_queues = queue_count;
+	dev->num_rx_queues = rxqs;
+	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_pcpu;
 #endif
@@ -5707,7 +5717,7 @@ free_p:
 	kfree(p);
 	return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *	free_netdev - free network device
-- 
cgit v1.2.3


From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 9 Jan 2011 08:30:54 +0000
Subject: net_sched: factorize qdisc stats handling

HTB takes into account skb is segmented in stats updates.
Generalize this to all schedulers.

They should use qdisc_bstats_update() helper instead of manipulating
bstats.bytes and bstats.packets

Add bstats_update() helper too for classes that use
gnet_stats_basic_packed fields.

Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no
stab is setup on qdisc.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3295b94884ab..a3ef808b5e36 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2297,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 */
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
-		__qdisc_update_bstats(q, skb->len);
+
+		qdisc_skb_cb(skb)->pkt_len = skb->len;
+		qdisc_bstats_update(q, skb);
+
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
-- 
cgit v1.2.3


From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 12 Jan 2011 12:13:14 +0000
Subject: net: remove dev_txq_stats_fold()

After recent changes, (percpu stats on vlan/tunnels...), we dont need
anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters.

Only remaining users are ixgbe, sch_teql, gianfar & macvlan :

1) ixgbe can be converted to use existing tx_ring counters.

2) macvlan incremented txq->tx_dropped, it can use the
dev->stats.tx_dropped counter.

3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats)
    Now we have ndo_get_stats64(), use it, even for "unsigned long"
fields (No need to bring back a struct net_device_stats)

4) gianfar adds a stats structure per tx queue to hold
tx_bytes/tx_packets

This removes a lockdep warning (and possible lockup) in rndis gadget,
calling dev_get_stats() from hard IRQ context.

Ref: http://www.spinics.net/lists/netdev/msg149202.html

Reported-by: Neil Jones <neiljay@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Sandeep Gopalpet <sandeep.kumar@freescale.com>
CC: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index a3ef808b5e36..83507c265e48 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5523,34 +5523,6 @@ void netdev_run_todo(void)
 	}
 }
 
-/**
- *	dev_txq_stats_fold - fold tx_queues stats
- *	@dev: device to get statistics from
- *	@stats: struct rtnl_link_stats64 to hold results
- */
-void dev_txq_stats_fold(const struct net_device *dev,
-			struct rtnl_link_stats64 *stats)
-{
-	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-	unsigned int i;
-	struct netdev_queue *txq;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		txq = netdev_get_tx_queue(dev, i);
-		spin_lock_bh(&txq->_xmit_lock);
-		tx_bytes   += txq->tx_bytes;
-		tx_packets += txq->tx_packets;
-		tx_dropped += txq->tx_dropped;
-		spin_unlock_bh(&txq->_xmit_lock);
-	}
-	if (tx_bytes || tx_packets || tx_dropped) {
-		stats->tx_bytes   = tx_bytes;
-		stats->tx_packets = tx_packets;
-		stats->tx_dropped = tx_dropped;
-	}
-}
-EXPORT_SYMBOL(dev_txq_stats_fold);
-
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
@@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 	} else {
 		netdev_stats_to_stats64(storage, &dev->stats);
-		dev_txq_stats_fold(dev, storage);
 	}
 	storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
 	return storage;
-- 
cgit v1.2.3


From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Mon, 17 Jan 2011 20:46:00 +0000
Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan.

In netif_skb_features() we return only the features that are valid for vlans
if we have a vlan packet.  However, we should not mask out NETIF_F_HW_VLAN_TX
since it enables transmission of vlan tags and is obviously valid.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 83507c265e48..4c58d11d3b68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb)
 		return harmonize_features(skb, protocol, features);
 	}
 
-	features &= skb->dev->vlan_features;
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
 
 	if (protocol != htons(ETH_P_8021Q)) {
 		return harmonize_features(skb, protocol, features);
 	} else {
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-				NETIF_F_GEN_CSUM;
+				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
 		return harmonize_features(skb, protocol, features);
 	}
 }
-- 
cgit v1.2.3


From d402786ea4f8433774a812d6b8635e737425cddd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 Jan 2011 00:51:36 +0000
Subject: net: fix can_checksum_protocol() arguments swap

commit 0363466866d901fbc (net offloading: Convert checksums to use
centrally computed features.) mistakenly swapped can_checksum_protocol()
arguments.

This broke IPv6 on bnx2 for instance, on NIC without TCPv6 checksum
offloads.

Reported-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 4c58d11d3b68..8393ec408cd4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2001,7 +2001,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 
 static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
 {
-	if (!can_checksum_protocol(protocol, features)) {
+	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
 		features &= ~NETIF_F_SG;
 	} else if (illegal_highdma(skb->dev, skb)) {
-- 
cgit v1.2.3


From c506653d35249bb4738bb139c24362e1ae724bc1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 24 Jan 2011 13:16:16 -0800
Subject: net: arp_ioctl() must hold RTNL

Commit 941666c2e3e0 "net: RCU conversion of dev_getbyhwaddr() and
arp_ioctl()" introduced a regression, reported by Jamie Heilman.
"arp -Ds 192.168.2.41 eth0 pub" triggered the ASSERT_RTNL() assert
in pneigh_lookup()

Removing RTNL requirement from arp_ioctl() was a mistake, just revert
that part.

Reported-by: Jamie Heilman <jamie@audible.transient.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/core/dev.c')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8393ec408cd4..1e5077d2cca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index);
  *	@ha: hardware address
  *
  *	Search for an interface by MAC address. Returns NULL if the device
- *	is not found or a pointer to the device. The caller must hold RCU
+ *	is not found or a pointer to the device.
+ *	The caller must hold RCU or RTNL.
  *	The returned device has not had its ref count increased
  *	and the caller must therefore be careful about locking
  *
-- 
cgit v1.2.3