From b595076a180a56d1bb170e6eceda6eb9d76f4cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 1 Nov 2010 15:38:34 -0400 Subject: tree-wide: fix comment/printk typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "gadget", "through", "command", "maintain", "maintain", "controller", "address", "between", "initiali[zs]e", "instead", "function", "select", "already", "equal", "access", "management", "hierarchy", "registration", "interest", "relative", "memory", "offset", "already", Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 35dfb8318483..89204e8c0e14 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6096,7 +6096,7 @@ static void __net_exit default_device_exit(struct net *net) static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network - * namespace. Do this in the reverse order of registeration. + * namespace. Do this in the reverse order of registration. * Do this across as many network namespaces as possible to * improve batching efficiency. */ -- cgit v1.2.3 From df32cc193ad88f7b1326b90af799c927b27f7654 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 1 Nov 2010 12:55:52 -0700 Subject: net: check queue_index from sock is valid for device In dev_pick_tx recompute the queue index if the value stored in the socket is greater than or equal to the number of real queues for the device. The saved index in the sock structure is not guaranteed to be appropriate for the egress device (this could happen on a route change or in presence of tunnelling). The result of the queue index being bad would be to return a bogus queue (crash could prersumably follow). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 35dfb8318483..0dd54a69dace 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2131,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0) { + if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { queue_index = 0; if (dev->real_num_tx_queues > 1) -- cgit v1.2.3 From b194a3674fba6d9f9e470084d192c7cb99194a62 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 30 Oct 2010 11:08:52 +0000 Subject: net/core/dev.c: Update WARN uses Coalesce long formats. Add missing newlines. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/core/dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0dd54a69dace..5968c822c999 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1817,8 +1817,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo) dev->ethtool_ops->get_drvinfo(dev, &info); - WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d " - "ip_summed=%d", + WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n", info.driver, dev ? dev->features : 0L, skb->sk ? skb->sk->sk_route_caps : 0L, skb->len, skb->data_len, skb->ip_summed); -- cgit v1.2.3 From e1e78db628b33c657944865e3bca01ee59cc5b80 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 Oct 2010 12:14:53 +0000 Subject: offloading: Make scatter/gather more tolerant of vlans. When checking if it is necessary to linearize a packet, we currently use vlan_features if the packet contains either an in-band or out- of-band vlan tag. However, in-band tags aren't special in any way for scatter/gather since they are part of the packet buffer and are simply more data to DMA. Therefore, only use vlan_features for out- of-band tags, which could potentially have some interaction with scatter/gather. Signed-off-by: Jesse Gross CC: Ben Hutchings Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 5968c822c999..0b403d503311 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1976,15 +1976,20 @@ static inline void skb_orphan_try(struct sk_buff *skb) static inline int skb_needs_linearize(struct sk_buff *skb, struct net_device *dev) { - int features = dev->features; + if (skb_is_nonlinear(skb)) { + int features = dev->features; - if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb)) - features &= dev->vlan_features; + if (vlan_tx_tag_present(skb)) + features &= dev->vlan_features; - return skb_is_nonlinear(skb) && - ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || - (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb)))); + return (skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || + (skb_shinfo(skb)->nr_frags && + (!(features & NETIF_F_SG) || + illegal_highdma(dev, skb))); + } + + return 0; } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From c8d5bcd1aff89199cde4bd82c5c40fb704c8bba4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 Oct 2010 12:14:54 +0000 Subject: offloading: Support multiple vlan tags in GSO. We assume that hardware TSO can't support multiple levels of vlan tags but we allow it to be done. Therefore, enable GSO to parse these tags so we can fallback to software. Signed-off-by: Jesse Gross CC: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 0b403d503311..368930a988e3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1794,16 +1794,18 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; __be16 type = skb->protocol; + int vlan_depth = ETH_HLEN; int err; - if (type == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh; + while (type == htons(ETH_P_8021Q)) { + struct vlan_hdr *vh; - if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) + if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return ERR_PTR(-EINVAL); - veh = (struct vlan_ethhdr *)skb->data; - type = veh->h_vlan_encapsulated_proto; + vh = (struct vlan_hdr *)(skb->data + vlan_depth); + type = vh->h_vlan_encapsulated_proto; + vlan_depth += VLAN_HLEN; } skb_reset_mac_header(skb); -- cgit v1.2.3 From 58e998c6d23988490162cef0784b19ea274d90bb Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 29 Oct 2010 12:14:55 +0000 Subject: offloading: Force software GSO for multiple vlan tags. We currently use vlan_features to check for TSO support if there is a vlan tag. However, it's quite likely that the NIC is not able to do TSO when there is an arbitrary number of tags. Therefore if there is more than one tag (in-band or out-of-band), fall back to software emulation. Signed-off-by: Jesse Gross CC: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 368930a988e3..8b500c3e0297 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1968,6 +1968,22 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +{ + __be16 protocol = skb->protocol; + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else if (!skb->vlan_tci) + return dev->features; + + if (protocol != htons(ETH_P_8021Q)) + return dev->features & dev->vlan_features; + else + return 0; +} + /* * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or -- cgit v1.2.3 From ed9af2e839c06c18f721da2c768fbb444c4a10e5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 9 Nov 2010 10:47:30 +0000 Subject: net: Move TX queue allocation to alloc_netdev_mq TX queues are now allocated in alloc_netdev_mq and freed in free_netdev. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8b500c3e0297..75490670e0a9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5136,10 +5136,6 @@ int register_netdevice(struct net_device *dev) if (ret) goto out; - ret = netif_alloc_netdev_queues(dev); - if (ret) - goto out; - netdev_init_queues(dev); /* Init, if this function is available */ @@ -5599,6 +5595,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; + if (netif_alloc_netdev_queues(dev)) + goto free_pcpu; #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; @@ -5619,6 +5617,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); + kfree(dev->_tx); free_p: kfree(p); return NULL; -- cgit v1.2.3 From fe8222406c8277a21172479d3a8283d31c209028 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 9 Nov 2010 10:47:38 +0000 Subject: net: Simplify RX queue allocation This patch move RX queue allocation to alloc_netdev_mq and freeing of the queues to free_netdev (symmetric to TX queue allocation). Each kobject RX queue takes a reference to the queue's device so that the device can't be freed before all the kobjects have been released-- this obviates the need for reference counts specific to RX queues. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 75490670e0a9..8725d168d1f5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5051,12 +5051,8 @@ static int netif_alloc_rx_queues(struct net_device *dev) } dev->_rx = rx; - /* - * Set a pointer to first element in the array which holds the - * reference count. - */ for (i = 0; i < count; i++) - rx[i].first = rx; + rx[i].dev = dev; #endif return 0; } @@ -5132,10 +5128,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - ret = netif_alloc_rx_queues(dev); - if (ret) - goto out; - netdev_init_queues(dev); /* Init, if this function is available */ @@ -5601,6 +5593,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, #ifdef CONFIG_RPS dev->num_rx_queues = queue_count; dev->real_num_rx_queues = queue_count; + if (netif_alloc_rx_queues(dev)) + goto free_pcpu; #endif dev->gso_max_size = GSO_MAX_SIZE; @@ -5618,6 +5612,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif + free_p: kfree(p); return NULL; @@ -5639,6 +5637,9 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); kfree(dev->_tx); +#ifdef CONFIG_RPS + kfree(dev->_rx); +#endif kfree(rcu_dereference_raw(dev->ingress_queue)); -- cgit v1.2.3 From 6b35308850e1679741e8b646cfb7bb3ab5369888 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Nov 2010 20:15:03 -0800 Subject: net: Export netif_get_vlan_features(). ERROR: "netif_get_vlan_features" [drivers/net/xen-netfront.ko] undefined! Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8725d168d1f5..381b8e280162 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1983,6 +1983,7 @@ int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) else return 0; } +EXPORT_SYMBOL(netif_get_vlan_features); /* * Returns true if either: -- cgit v1.2.3 From 3853b5841c01a3f492fe137afaad9c209e5162c6 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:29 +0000 Subject: xps: Improvements in TX queue selection In dev_pick_tx, don't do work in calculating queue index or setting the index in the sock unless the device has more than one queue. This allows the sock to be set only with a queue index of a multi-queue device which is desirable if device are stacked like in a tunnel. We also allow the mapping of a socket to queue to be changed. To maintain in order packet transmission a flag (ooo_okay) has been added to the sk_buff structure. If a transport layer sets this flag on a packet, the transmit queue can be changed for the socket. Presumably, the transport would set this if there was no possbility of creating OOO packets (for instance, there are no packets in flight for the socket). This patch includes the modification in TCP output for setting this flag. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 381b8e280162..7b17674a29ec 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2148,20 +2148,24 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, int queue_index; const struct net_device_ops *ops = dev->netdev_ops; - if (ops->ndo_select_queue) { + if (dev->real_num_tx_queues == 1) + queue_index = 0; + else if (ops->ndo_select_queue) { queue_index = ops->ndo_select_queue(dev, skb); queue_index = dev_cap_txqueue(dev, queue_index); } else { struct sock *sk = skb->sk; queue_index = sk_tx_queue_get(sk); - if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) { - queue_index = 0; - if (dev->real_num_tx_queues > 1) - queue_index = skb_tx_hash(dev, skb); + if (queue_index < 0 || skb->ooo_okay || + queue_index >= dev->real_num_tx_queues) { + int old_index = queue_index; - if (sk) { - struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1); + queue_index = skb_tx_hash(dev, skb); + + if (queue_index != old_index && sk) { + struct dst_entry *dst = + rcu_dereference_check(sk->sk_dst_cache, 1); if (dst && skb_dst(skb) == dst) sk_tx_queue_set(sk, queue_index); -- cgit v1.2.3 From 1d24eb4815d1e0e8b451ecc546645f8ef1176d4f Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 21 Nov 2010 13:17:27 +0000 Subject: xps: Transmit Packet Steering This patch implements transmit packet steering (XPS) for multiqueue devices. XPS selects a transmit queue during packet transmission based on configuration. This is done by mapping the CPU transmitting the packet to a queue. This is the transmit side analogue to RPS-- where RPS is selecting a CPU based on receive queue, XPS selects a queue based on the CPU (previously there was an XPS patch from Eric Dumazet, but that might more appropriately be called transmit completion steering). Each transmit queue can be associated with a number of CPUs which will use the queue to send packets. This is configured as a CPU mask on a per queue basis in: /sys/class/net/eth/queues/tx-/xps_cpus The mappings are stored per device in an inverted data structure that maps CPUs to queues. In the netdevice structure this is an array of num_possible_cpu structures where each structure holds and array of queue_indexes for queues which that CPU can use. The benefits of XPS are improved locality in the per queue data structures. Also, transmit completions are more likely to be done nearer to the sending thread, so this should promote locality back to the socket on free (e.g. UDP). The benefits of XPS are dependent on cache hierarchy, application load, and other factors. XPS would nominally be configured so that a queue would only be shared by CPUs which are sharing a cache, the degenerative configuration woud be that each CPU has it's own queue. Below are some benchmark results which show the potential benfit of this patch. The netperf test has 500 instances of netperf TCP_RR test with 1 byte req. and resp. bnx2x on 16 core AMD XPS (16 queues, 1 TX queue per CPU) 1234K at 100% CPU No XPS (16 queues) 996K at 100% CPU Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7b17674a29ec..c852f0038a08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1557,12 +1557,16 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { + int rc; + if (txq < 1 || txq > dev->num_tx_queues) return -EINVAL; if (dev->reg_state == NETREG_REGISTERED) { ASSERT_RTNL(); + rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, + txq); if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2142,6 +2146,44 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) return queue_index; } +static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) +{ +#ifdef CONFIG_RPS + struct xps_dev_maps *dev_maps; + struct xps_map *map; + int queue_index = -1; + + rcu_read_lock(); + dev_maps = rcu_dereference(dev->xps_maps); + if (dev_maps) { + map = rcu_dereference( + dev_maps->cpu_map[raw_smp_processor_id()]); + if (map) { + if (map->len == 1) + queue_index = map->queues[0]; + else { + u32 hash; + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16) skb->protocol ^ + skb->rxhash; + hash = jhash_1word(hash, hashrnd); + queue_index = map->queues[ + ((u64)hash * map->len) >> 32]; + } + if (unlikely(queue_index >= dev->real_num_tx_queues)) + queue_index = -1; + } + } + rcu_read_unlock(); + + return queue_index; +#else + return -1; +#endif +} + static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { @@ -2161,7 +2203,9 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, queue_index >= dev->real_num_tx_queues) { int old_index = queue_index; - queue_index = skb_tx_hash(dev, skb); + queue_index = get_xps_queue(dev, skb); + if (queue_index < 0) + queue_index = skb_tx_hash(dev, skb); if (queue_index != old_index && sk) { struct dst_entry *dst = @@ -5066,6 +5110,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + int i; BUG_ON(count < 1); @@ -5076,6 +5121,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) return -ENOMEM; } dev->_tx = tx; + + for (i = 0; i < count; i++) + tx[i].dev = dev; + return 0; } @@ -5083,8 +5132,6 @@ static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, void *_unused) { - queue->dev = dev; - /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); -- cgit v1.2.3 From bf26414510103448ad3dc069c7422462f03ea3d7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 26 Nov 2010 08:36:09 +0000 Subject: xps: Add CONFIG_XPS This patch adds XPS_CONFIG option to enable and disable XPS. This is done in the same manner as RPS_CONFIG. This is also fixes build failure in XPS code when SMP is not enabled. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index c852f0038a08..3259d2c323a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1567,6 +1567,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues, txq); + if (rc) + return rc; + if (txq < dev->real_num_tx_queues) qdisc_reset_all_tx_gt(dev, txq); } @@ -2148,7 +2151,7 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) { -#ifdef CONFIG_RPS +#ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; struct xps_map *map; int queue_index = -1; @@ -5085,9 +5088,9 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, } EXPORT_SYMBOL(netif_stacked_transfer_operstate); +#ifdef CONFIG_RPS static int netif_alloc_rx_queues(struct net_device *dev) { -#ifdef CONFIG_RPS unsigned int i, count = dev->num_rx_queues; struct netdev_rx_queue *rx; @@ -5102,9 +5105,9 @@ static int netif_alloc_rx_queues(struct net_device *dev) for (i = 0; i < count; i++) rx[i].dev = dev; -#endif return 0; } +#endif static int netif_alloc_netdev_queues(struct net_device *dev) { -- cgit v1.2.3 From f2cd2d3e9b3ef960612e362f0ad129d735452df2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Nov 2010 08:14:37 +0000 Subject: net sched: use xps information for qdisc NUMA affinity Allocate qdisc memory according to NUMA properties of cpus included in xps map. To be effective, qdisc should be (re)setup after changes of /sys/class/net/eth/queues/tx-/xps_cpus I added a numa_node field in struct netdev_queue, containing NUMA node if all cpus included in xps_cpus share same node, else -1. Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3259d2c323a6..cd2437495428 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5125,9 +5125,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) } dev->_tx = tx; - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + netdev_queue_numa_node_write(&tx[i], -1); tx[i].dev = dev; - + } return 0; } -- cgit v1.2.3 From 7903264402546f45f9bac8ad2bfdb00d00eb124a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 30 Nov 2010 06:38:00 +0000 Subject: net: Fix too optimistic NETIF_F_HW_CSUM features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM+NETIF_F_IPV6_CSUM, but some drivers miss the difference. Fix this and also fix UFO dependency on checksumming offload as it makes the same mistake in assumptions. Signed-off-by: Michał Mirosław Acked-by: Jon Mason Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index cd2437495428..55ff66fabce4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5041,10 +5041,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } if (features & NETIF_F_UFO) { - if (!(features & NETIF_F_GEN_CSUM)) { + /* maybe split UFO into V4 and V6? */ + if (!((features & NETIF_F_GEN_CSUM) || + (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)) + == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { if (name) printk(KERN_ERR "%s: Dropping NETIF_F_UFO " - "since no NETIF_F_HW_CSUM feature.\n", + "since no checksum offload features.\n", name); features &= ~NETIF_F_UFO; } -- cgit v1.2.3 From aa9421041128abb4d269ee1dc502ff65fb3b7d69 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 4 Dec 2010 02:31:41 +0000 Subject: net: init ingress queue The dev field of ingress queue is forgot to initialized, then NULL pointer dereference happens in qdisc_alloc(). Move inits of tx queues to netif_alloc_netdev_queues(). Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 55ff66fabce4..ee605c0867e7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5112,11 +5112,21 @@ static int netif_alloc_rx_queues(struct net_device *dev) } #endif +static void netdev_init_one_queue(struct net_device *dev, + struct netdev_queue *queue, void *_unused) +{ + /* Initialize queue lock */ + spin_lock_init(&queue->_xmit_lock); + netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); + queue->xmit_lock_owner = -1; + netdev_queue_numa_node_write(queue, -1); + queue->dev = dev; +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; - int i; BUG_ON(count < 1); @@ -5128,27 +5138,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev) } dev->_tx = tx; - for (i = 0; i < count; i++) { - netdev_queue_numa_node_write(&tx[i], -1); - tx[i].dev = dev; - } - return 0; -} - -static void netdev_init_one_queue(struct net_device *dev, - struct netdev_queue *queue, - void *_unused) -{ - /* Initialize queue lock */ - spin_lock_init(&queue->_xmit_lock); - netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; -} - -static void netdev_init_queues(struct net_device *dev) -{ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); spin_lock_init(&dev->tx_global_lock); + + return 0; } /** @@ -5187,8 +5180,6 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; - netdev_init_queues(dev); - /* Init, if this function is available */ if (dev->netdev_ops->ndo_init) { ret = dev->netdev_ops->ndo_init(dev); -- cgit v1.2.3 From 941666c2e3e0f9f6a1cb5808d02352d445bd702c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Dec 2010 01:23:53 +0000 Subject: net: RCU conversion of dev_getbyhwaddr() and arp_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le dimanche 05 décembre 2010 à 09:19 +0100, Eric Dumazet a écrit : > Hmm.. > > If somebody can explain why RTNL is held in arp_ioctl() (and therefore > in arp_req_delete()), we might first remove RTNL use in arp_ioctl() so > that your patch can be applied. > > Right now it is not good, because RTNL wont be necessarly held when you > are going to call arp_invalidate() ? While doing this analysis, I found a refcount bug in llc, I'll send a patch for net-2.6 Meanwhile, here is the patch for net-next-2.6 Your patch then can be applied after mine. Thanks [PATCH] net: RCU conversion of dev_getbyhwaddr() and arp_ioctl() dev_getbyhwaddr() was called under RTNL. Rename it to dev_getbyhwaddr_rcu() and change all its caller to now use RCU locking instead of RTNL. Change arp_ioctl() to use RCU instead of RTNL locking. Note: this fix a dev refcount bug in llc Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index ee605c0867e7..822b15b8d11c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -743,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) EXPORT_SYMBOL(dev_get_by_index); /** - * dev_getbyhwaddr - find a device by its hardware address + * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace * @type: media type of device * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold the - * rtnl semaphore. The returned device has not had its ref count increased + * is not found or a pointer to the device. The caller must hold RCU + * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * - * BUGS: - * If the API was consistent this would be __dev_get_by_hwaddr */ -struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) +struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, + const char *ha) { struct net_device *dev; - ASSERT_RTNL(); - - for_each_netdev(net, dev) + for_each_netdev_rcu(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; return NULL; } -EXPORT_SYMBOL(dev_getbyhwaddr); +EXPORT_SYMBOL(dev_getbyhwaddr_rcu); struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) { -- cgit v1.2.3 From 15c2d75f49189e1769c5e8f5f099d03d055c4910 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 7 Dec 2010 00:30:37 +0000 Subject: net: call dev_queue_xmit_nit() after skb_dst_drop() Avoid some atomic ops on dst refcount, calling dev_queue_xmit_nit() after skb_dst_drop() in dev_hard_start_xmit(). When queueing a packet into af_packet socket, we drop dst anyway. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 822b15b8d11c..d28b3a023bb2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2022,9 +2022,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { - if (!list_empty(&ptype_all)) - dev_queue_xmit_nit(skb, dev); - /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2032,6 +2029,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); + if (!list_empty(&ptype_all)) + dev_queue_xmit_nit(skb, dev); + skb_orphan_try(skb); if (vlan_tx_tag_present(skb) && -- cgit v1.2.3 From a3d22a68d752ccc1a01bb0a64dd70b7a98bf9e23 Mon Sep 17 00:00:00 2001 From: Vladislav Zolotarov Date: Mon, 13 Dec 2010 06:27:10 +0000 Subject: bnx2x: Take the distribution range definition out of skb_tx_hash() Move the calcualation of the Tx hash for a given hash range into a separate function and define the skb_tx_hash(), which calculates a Tx hash for a [0; dev->real_num_tx_queues - 1] hash values range, using this function (__skb_tx_hash()). Signed-off-by: Vladislav Zolotarov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- net/core/dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d28b3a023bb2..b25dd087f06a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2112,14 +2112,19 @@ out: static u32 hashrnd __read_mostly; -u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) +/* + * Returns a Tx hash based on the given packet descriptor a Tx queues' number + * to be used as a distribution range. + */ +u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, + unsigned int num_tx_queues) { u32 hash; if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); - while (unlikely(hash >= dev->real_num_tx_queues)) - hash -= dev->real_num_tx_queues; + while (unlikely(hash >= num_tx_queues)) + hash -= num_tx_queues; return hash; } @@ -2129,9 +2134,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) hash = (__force u16) skb->protocol ^ skb->rxhash; hash = jhash_1word(hash, hashrnd); - return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); + return (u16) (((u64) hash * num_tx_queues) >> 32); } -EXPORT_SYMBOL(skb_tx_hash); +EXPORT_SYMBOL(__skb_tx_hash); static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { -- cgit v1.2.3 From b236da6931e2482bfe44a7865dd4e7bb036f3496 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 14 Dec 2010 03:09:15 +0000 Subject: net: use NUMA_NO_NODE instead of the magic number -1 Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b25dd087f06a..7ac26d2b9722 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5121,7 +5121,7 @@ static void netdev_init_one_queue(struct net_device *dev, spin_lock_init(&queue->_xmit_lock); netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); queue->xmit_lock_owner = -1; - netdev_queue_numa_node_write(queue, -1); + netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; } -- cgit v1.2.3 From 443457242beb6716b43db4d62fe148eab5515505 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 13 Dec 2010 12:44:07 +0000 Subject: net: factorize sync-rcu call in unregister_netdevice_many Add dev_close_many and dev_deactivate_many to factorize another sync-rcu operation on the netdevice unregister path. $ modprobe dummy numdummies=10000 $ ip link set dev dummy* up $ time rmmod dummy Without the patch With the patch real 0m 24.63s real 0m 5.15s user 0m 0.00s user 0m 0.00s sys 0m 6.05s sys 0m 5.14s Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- net/core/dev.c | 118 +++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 42 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 7ac26d2b9722..794b20de5d44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1222,52 +1222,90 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close(struct net_device *dev) +static int __dev_close_many(struct list_head *head) { - const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *dev; ASSERT_RTNL(); might_sleep(); - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); + list_for_each_entry(dev, head, unreg_list) { + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - clear_bit(__LINK_STATE_START, &dev->state); + clear_bit(__LINK_STATE_START, &dev->state); - /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(). - * - * dev->stop() will invoke napi_disable() on all of it's - * napi_struct instances on this device. - */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ + /* Synchronize to scheduled poll. We cannot touch poll list, it + * can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ + smp_mb__after_clear_bit(); /* Commit netif_running(). */ + } - dev_deactivate(dev); + dev_deactivate_many(head); - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ - if (ops->ndo_stop) - ops->ndo_stop(dev); + list_for_each_entry(dev, head, unreg_list) { + const struct net_device_ops *ops = dev->netdev_ops; - /* - * Device is now down. - */ + /* + * Call the device specific close. This cannot fail. + * Only if device is UP + * + * We allow it to be called even after a DETACH hot-plug + * event. + */ + if (ops->ndo_stop) + ops->ndo_stop(dev); + + /* + * Device is now down. + */ + + dev->flags &= ~IFF_UP; + + /* + * Shutdown NET_DMA + */ + net_dmaengine_put(); + } - dev->flags &= ~IFF_UP; + return 0; +} + +static int __dev_close(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + return __dev_close_many(&single); +} + +int dev_close_many(struct list_head *head) +{ + struct net_device *dev, *tmp; + LIST_HEAD(tmp_list); + + list_for_each_entry_safe(dev, tmp, head, unreg_list) + if (!(dev->flags & IFF_UP)) + list_move(&dev->unreg_list, &tmp_list); + + __dev_close_many(head); /* - * Shutdown NET_DMA + * Tell people we are down */ - net_dmaengine_put(); + list_for_each_entry(dev, head, unreg_list) { + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + call_netdevice_notifiers(NETDEV_DOWN, dev); + } + /* rollback_registered_many needs the complete original list */ + list_splice(&tmp_list, head); return 0; } @@ -1282,16 +1320,10 @@ static int __dev_close(struct net_device *dev) */ int dev_close(struct net_device *dev) { - if (!(dev->flags & IFF_UP)) - return 0; - - __dev_close(dev); + LIST_HEAD(single); - /* - * Tell people we are down - */ - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); - call_netdevice_notifiers(NETDEV_DOWN, dev); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); return 0; } @@ -4963,10 +4995,12 @@ static void rollback_registered_many(struct list_head *head) } BUG_ON(dev->reg_state != NETREG_REGISTERED); + } - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ unlist_netdevice(dev); -- cgit v1.2.3 From 55508d601dab7df5cbcc7a63f4be8620eface204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Tue, 14 Dec 2010 15:24:08 +0000 Subject: net: Use skb_checksum_start_offset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace skb->csum_start - skb_headroom(skb) with skb_checksum_start_offset(). Note for usb/smsc95xx: skb->data - skb->head == skb_headroom(skb). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 794b20de5d44..92d414ac0e30 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1793,7 +1793,7 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - offset = skb->csum_start - skb_headroom(skb); + offset = skb_checksum_start_offset(skb); BUG_ON(offset >= skb_headlen(skb)); csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -2090,8 +2090,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * checksumming here. */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, skb->csum_start - - skb_headroom(skb)); + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) goto out_kfree_skb; -- cgit v1.2.3 From 71d9dec24dce548bf699815c976cf063ad9257e2 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 15 Dec 2010 19:57:25 +0000 Subject: net: increase skb->users instead of skb_clone() In dev_queue_xmit_nit(), we have to clone skbs as we need to mangle skbs, however, we don't need to clone skbs for all the packet_types. Except for the first packet_type, we increase skb->users instead of skb_clone(). Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 92d414ac0e30..59877290bca7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1528,6 +1528,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(dev_forward_skb); +static inline int deliver_skb(struct sk_buff *skb, + struct packet_type *pt_prev, + struct net_device *orig_dev) +{ + atomic_inc(&skb->users); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); +} + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -1536,6 +1544,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb); static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; + struct sk_buff *skb2 = NULL; + struct packet_type *pt_prev = NULL; #ifdef CONFIG_NET_CLS_ACT if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) @@ -1552,7 +1562,13 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if ((ptype->dev == dev || !ptype->dev) && (ptype->af_packet_priv == NULL || (struct sock *)ptype->af_packet_priv != skb->sk)) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + if (pt_prev) { + deliver_skb(skb2, pt_prev, skb->dev); + pt_prev = ptype; + continue; + } + + skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) break; @@ -1574,9 +1590,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->transport_header = skb2->network_header; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype, skb->dev); + pt_prev = ptype; } } + if (pt_prev) + pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); rcu_read_unlock(); } @@ -2820,14 +2838,6 @@ static void net_tx_action(struct softirq_action *h) } } -static inline int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev, - struct net_device *orig_dev) -{ - atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); -} - #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) /* This hook is defined here for ATM LANE */ -- cgit v1.2.3 From 70978182d431e0348e6ef711d0f962d12c03bc46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2010 21:22:51 +0000 Subject: net: timestamp cloned packet in dev_queue_xmit_nit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le vendredi 17 décembre 2010 à 10:26 +0100, Eric Dumazet a écrit : > > I think we can add this after latest Changli patch : > > He does one skb_clone() before calling the sniffers. > We could set timestamp on this clone, instead of original skb. > > Problem solved. > [PATCH net-next-2.6] net: timestamp cloned packet in dev_queue_xmit_nit Now we do one clone of skb if at least one sniffer might take packet, we also can do the skb timestamping on the clone and let original packet unchanged. This is a generalization of commit 8caf153974f2 (net: sch_netem: Fix an inconsistency in ingress netem timestamps.) This way, we can have a good idea when packets are delivered to our stack (tcpdump -i ifb0), while a tcpdump on original device gives timestamps right before ingressing. This also speedup our stack, avoiding taking timestamps if not needed. Signed-off-by: Eric Dumazet Cc: Changli Gao Cc: Patrick McHardy Cc: Jarek Poplawski Acked-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 59877290bca7..a215269d2e35 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1547,13 +1547,6 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) struct sk_buff *skb2 = NULL; struct packet_type *pt_prev = NULL; -#ifdef CONFIG_NET_CLS_ACT - if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) - net_timestamp_set(skb); -#else - net_timestamp_set(skb); -#endif - rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { /* Never send packets back to the socket @@ -1572,6 +1565,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) if (!skb2) break; + net_timestamp_set(skb2); + /* skb->nh should be correctly set by sender, so that the second statement is just protection against buggy protocols. -- cgit v1.2.3 From 9497a0518e8183959e45da05f317f1cb36f2cd7c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:30 +0000 Subject: net offloading: Accept NETIF_F_HW_CSUM for all protocols. We currently only have software fallback for one type of checksum: the TCP/UDP one's complement. This means that a protocol that uses hardware offloading for a different type of checksum (FCoE, SCTP) must directly check the device's features and do the right thing ahead of time. By the time we get to dev_can_checksum(), we're only deciding whether to apply the one algorithm in software or hardware. NETIF_F_HW_CSUM has the same capabilities as the software version, so we should always use it if present. The primary advantage of this is multiply tagged vlans can use hardware checksumming. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a215269d2e35..d8befd06da04 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1734,7 +1734,7 @@ EXPORT_SYMBOL(netif_device_attach); static bool can_checksum_protocol(unsigned long features, __be16 protocol) { - return ((features & NETIF_F_NO_CSUM) || + return ((features & NETIF_F_GEN_CSUM) || ((features & NETIF_F_V4_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_V6_CSUM) && -- cgit v1.2.3 From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:31 +0000 Subject: net offloading: Generalize netif_get_vlan_features(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit netif_get_vlan_features() is currently only used by netif_needs_gso(), so it only concerns itself with GSO features. However, several other places also should take into account the contents of the packet when deciding whether to offload to hardware. This generalizes the function to return features about all of the various forms of offloading. Since offloads tend to be linked together, this avoids duplicating the logic in each location (i.e. the scatter/gather code also needs the checksum logic). Suggested-by: Michał Mirosław Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d8befd06da04..a51dfd7b56fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } -int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +{ + if (!can_checksum_protocol(protocol, features)) { + features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_SG; + } else if (illegal_highdma(skb->dev, skb)) { + features &= ~NETIF_F_SG; + } + + return features; +} + +int netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; + int features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; - } else if (!skb->vlan_tci) - return dev->features; + } else if (!vlan_tx_tag_present(skb)) { + return harmonize_features(skb, protocol, features); + } - if (protocol != htons(ETH_P_8021Q)) - return dev->features & dev->vlan_features; - else - return 0; + features &= skb->dev->vlan_features; + + if (protocol != htons(ETH_P_8021Q)) { + return harmonize_features(skb, protocol, features); + } else { + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM; + return harmonize_features(skb, protocol, features); + } } -EXPORT_SYMBOL(netif_get_vlan_features); +EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: -- cgit v1.2.3 From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:32 +0000 Subject: net offloading: Pass features into netif_needs_gso(). Now that there is a single function that can compute the device features relevant to a packet, we don't want to run it for each offload. This converts netif_needs_gso() to take the features of the device, rather than computing them itself. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a51dfd7b56fb..1444ed3861a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { + int features; + /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan_try(skb); + features = netif_skb_features(skb); + if (vlan_tx_tag_present(skb) && - !(dev->features & NETIF_F_HW_VLAN_TX)) { + !(features & NETIF_F_HW_VLAN_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - if (netif_needs_gso(dev, skb)) { + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb))) goto out_kfree_skb; if (skb->next) -- cgit v1.2.3 From 91ecb63c074d802f8cf103f1dafb4aed24d0f24c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:33 +0000 Subject: net offloading: Convert dev_gso_segment() to use precomputed features. This switches dev_gso_segment() to use the device features computed by the centralized routine. In doing so, it fixes a problem where it would always use dev->features, instead of those appropriate to the number of vlan tags if any are present. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1444ed3861a0..4cd3e84e1294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1971,16 +1971,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) /** * dev_gso_segment - Perform emulated hardware segmentation on skb. * @skb: buffer to segment + * @features: device features as applicable to this skb * * This function segments the given skb and stores the list of segments * in skb->next. */ -static int dev_gso_segment(struct sk_buff *skb) +static int dev_gso_segment(struct sk_buff *skb, int features) { - struct net_device *dev = skb->dev; struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); segs = skb_gso_segment(skb, features); @@ -2112,7 +2110,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb))) + if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; if (skb->next) goto gso; -- cgit v1.2.3 From 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:34 +0000 Subject: net offloading: Convert skb_need_linearize() to use precomputed features. This switches skb_need_linearize() to use the features that have been centrally computed. In doing so, this fixes a problem where scatter/gather should not be used because the card does not support checksum offloading on that type of packet. On device registration we only check that some form of checksum offloading is available if scatter/gatther is enabled but we must also check at transmission time. Examples of this include IPv6 or vlan packets on a NIC that only supports IPv4 offloading. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4cd3e84e1294..2f838f1d222c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2059,22 +2059,13 @@ EXPORT_SYMBOL(netif_skb_features); * support DMA from it. */ static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) + int features) { - if (skb_is_nonlinear(skb)) { - int features = dev->features; - - if (vlan_tx_tag_present(skb)) - features &= dev->vlan_features; - - return (skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && - (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb))); - } - - return 0; + !(features & NETIF_F_SG))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -2115,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->next) goto gso; } else { - if (skb_needs_linearize(skb, dev) && + if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto out_kfree_skb; -- cgit v1.2.3 From 0363466866d901fbc658f4e63dd61e7cc93dd0af Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:35 +0000 Subject: net offloading: Convert checksums to use centrally computed features. In order to compute the features for other offloads (primarily scatter/gather), we need to first check the ability of the NIC to offload the checksum for the packet. Since we have already computed this, we can directly use the result instead of figuring it out again. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2f838f1d222c..3fe443be4b15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - int features = dev->features; - - if (vlan_tx_tag_present(skb)) { - features &= dev->vlan_features; - } else if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - features &= dev->vlan_features; - } - - return can_checksum_protocol(features, protocol); -} - /** * skb_dev_set -- assign a new device to a buffer * @skb: buffer for the new device @@ -2015,6 +1988,17 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} + static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { if (!can_checksum_protocol(protocol, features)) { @@ -2117,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!dev_can_checksum(dev, skb) && + if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; } -- cgit v1.2.3 From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 9 Jan 2011 19:36:31 +0000 Subject: net: Add alloc_netdev_mqs function Added alloc_netdev_mqs function which allows the number of transmit and receive queues to be specified independenty. alloc_netdev_mq was changed to a macro to call the new function. Also added alloc_etherdev_mqs with same purpose. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3fe443be4b15..3295b94884ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) } /** - * alloc_netdev_mq - allocate network device + * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. + * for each queue on the device. */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) +struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs) { struct net_device *dev; size_t alloc_size; @@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); - if (queue_count < 1) { + if (txqs < 1) { pr_err("alloc_netdev: Unable to allocate device " "with zero queues.\n"); return NULL; } +#ifdef CONFIG_RPS + if (rxqs < 1) { + pr_err("alloc_netdev: Unable to allocate device " + "with zero RX queues.\n"); + return NULL; + } +#endif + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ @@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; + dev->num_tx_queues = txqs; + dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) goto free_pcpu; #ifdef CONFIG_RPS - dev->num_rx_queues = queue_count; - dev->real_num_rx_queues = queue_count; + dev->num_rx_queues = rxqs; + dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_pcpu; #endif @@ -5707,7 +5717,7 @@ free_p: kfree(p); return NULL; } -EXPORT_SYMBOL(alloc_netdev_mq); +EXPORT_SYMBOL(alloc_netdev_mqs); /** * free_netdev - free network device -- cgit v1.2.3 From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Jan 2011 08:30:54 +0000 Subject: net_sched: factorize qdisc stats handling HTB takes into account skb is segmented in stats updates. Generalize this to all schedulers. They should use qdisc_bstats_update() helper instead of manipulating bstats.bytes and bstats.packets Add bstats_update() helper too for classes that use gnet_stats_basic_packed fields. Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no stab is setup on qdisc. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3295b94884ab..a3ef808b5e36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2297,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, */ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - __qdisc_update_bstats(q, skb->len); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_bstats_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { if (unlikely(contended)) { spin_unlock(&q->busylock); -- cgit v1.2.3 From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jan 2011 12:13:14 +0000 Subject: net: remove dev_txq_stats_fold() After recent changes, (percpu stats on vlan/tunnels...), we dont need anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters. Only remaining users are ixgbe, sch_teql, gianfar & macvlan : 1) ixgbe can be converted to use existing tx_ring counters. 2) macvlan incremented txq->tx_dropped, it can use the dev->stats.tx_dropped counter. 3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats) Now we have ndo_get_stats64(), use it, even for "unsigned long" fields (No need to bring back a struct net_device_stats) 4) gianfar adds a stats structure per tx queue to hold tx_bytes/tx_packets This removes a lockdep warning (and possible lockup) in rndis gadget, calling dev_get_stats() from hard IRQ context. Ref: http://www.spinics.net/lists/netdev/msg149202.html Reported-by: Neil Jones Signed-off-by: Eric Dumazet CC: Jarek Poplawski CC: Alexander Duyck CC: Jeff Kirsher CC: Sandeep Gopalpet CC: Michal Nazarewicz Signed-off-by: David S. Miller --- net/core/dev.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a3ef808b5e36..83507c265e48 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5523,34 +5523,6 @@ void netdev_run_todo(void) } } -/** - * dev_txq_stats_fold - fold tx_queues stats - * @dev: device to get statistics from - * @stats: struct rtnl_link_stats64 to hold results - */ -void dev_txq_stats_fold(const struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - spin_lock_bh(&txq->_xmit_lock); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - spin_unlock_bh(&txq->_xmit_lock); - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } -} -EXPORT_SYMBOL(dev_txq_stats_fold); - /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ @@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); } else { netdev_stats_to_stats64(storage, &dev->stats); - dev_txq_stats_fold(dev, storage); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); return storage; -- cgit v1.2.3 From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 17 Jan 2011 20:46:00 +0000 Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan. In netif_skb_features() we return only the features that are valid for vlans if we have a vlan packet. However, we should not mask out NETIF_F_HW_VLAN_TX since it enables transmission of vlan tags and is obviously valid. Reported-by: Eric Dumazet Signed-off-by: Jesse Gross Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 83507c265e48..4c58d11d3b68 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } - features &= skb->dev->vlan_features; + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } } -- cgit v1.2.3 From d402786ea4f8433774a812d6b8635e737425cddd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 00:51:36 +0000 Subject: net: fix can_checksum_protocol() arguments swap commit 0363466866d901fbc (net offloading: Convert checksums to use centrally computed features.) mistakenly swapped can_checksum_protocol() arguments. This broke IPv6 on bnx2 for instance, on NIC without TCPv6 checksum offloads. Reported-by: Hans de Bruin Signed-off-by: Eric Dumazet Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4c58d11d3b68..8393ec408cd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2001,7 +2001,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { - if (!can_checksum_protocol(protocol, features)) { + if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.2.3 From c506653d35249bb4738bb139c24362e1ae724bc1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2011 13:16:16 -0800 Subject: net: arp_ioctl() must hold RTNL Commit 941666c2e3e0 "net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()" introduced a regression, reported by Jamie Heilman. "arp -Ds 192.168.2.41 eth0 pub" triggered the ASSERT_RTNL() assert in pneigh_lookup() Removing RTNL requirement from arp_ioctl() was a mistake, just revert that part. Reported-by: Jamie Heilman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8393ec408cd4..1e5077d2cca6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index); * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold RCU + * is not found or a pointer to the device. + * The caller must hold RCU or RTNL. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * -- cgit v1.2.3