From b595076a180a56d1bb170e6eceda6eb9d76f4cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 1 Nov 2010 15:38:34 -0400 Subject: tree-wide: fix comment/printk typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "gadget", "through", "command", "maintain", "maintain", "controller", "address", "between", "initiali[zs]e", "instead", "function", "select", "already", "equal", "access", "management", "hierarchy", "registration", "interest", "relative", "memory", "offset", "already", Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 35dfb8318483..89204e8c0e14 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6096,7 +6096,7 @@ static void __net_exit default_device_exit(struct net *net) static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network - * namespace. Do this in the reverse order of registeration. + * namespace. Do this in the reverse order of registration. * Do this across as many network namespaces as possible to * improve batching efficiency. */ -- cgit v1.2.3 From 9497a0518e8183959e45da05f317f1cb36f2cd7c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:30 +0000 Subject: net offloading: Accept NETIF_F_HW_CSUM for all protocols. We currently only have software fallback for one type of checksum: the TCP/UDP one's complement. This means that a protocol that uses hardware offloading for a different type of checksum (FCoE, SCTP) must directly check the device's features and do the right thing ahead of time. By the time we get to dev_can_checksum(), we're only deciding whether to apply the one algorithm in software or hardware. NETIF_F_HW_CSUM has the same capabilities as the software version, so we should always use it if present. The primary advantage of this is multiply tagged vlans can use hardware checksumming. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a215269d2e35..d8befd06da04 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1734,7 +1734,7 @@ EXPORT_SYMBOL(netif_device_attach); static bool can_checksum_protocol(unsigned long features, __be16 protocol) { - return ((features & NETIF_F_NO_CSUM) || + return ((features & NETIF_F_GEN_CSUM) || ((features & NETIF_F_V4_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_V6_CSUM) && -- cgit v1.2.3 From f01a5236bd4b140198fbcc550f085e8361fd73fa Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:31 +0000 Subject: net offloading: Generalize netif_get_vlan_features(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit netif_get_vlan_features() is currently only used by netif_needs_gso(), so it only concerns itself with GSO features. However, several other places also should take into account the contents of the packet when deciding whether to offload to hardware. This generalizes the function to return features about all of the various forms of offloading. Since offloads tend to be linked together, this avoids duplicating the logic in each location (i.e. the scatter/gather code also needs the checksum logic). Suggested-by: Michał Mirosław Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index d8befd06da04..a51dfd7b56fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2017,22 +2017,41 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } -int netif_get_vlan_features(struct sk_buff *skb, struct net_device *dev) +static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) +{ + if (!can_checksum_protocol(protocol, features)) { + features &= ~NETIF_F_ALL_CSUM; + features &= ~NETIF_F_SG; + } else if (illegal_highdma(skb->dev, skb)) { + features &= ~NETIF_F_SG; + } + + return features; +} + +int netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; + int features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; - } else if (!skb->vlan_tci) - return dev->features; + } else if (!vlan_tx_tag_present(skb)) { + return harmonize_features(skb, protocol, features); + } - if (protocol != htons(ETH_P_8021Q)) - return dev->features & dev->vlan_features; - else - return 0; + features &= skb->dev->vlan_features; + + if (protocol != htons(ETH_P_8021Q)) { + return harmonize_features(skb, protocol, features); + } else { + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | + NETIF_F_GEN_CSUM; + return harmonize_features(skb, protocol, features); + } } -EXPORT_SYMBOL(netif_get_vlan_features); +EXPORT_SYMBOL(netif_skb_features); /* * Returns true if either: -- cgit v1.2.3 From fc741216db156994c554ac31c1151fe0e00d8f0e Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:32 +0000 Subject: net offloading: Pass features into netif_needs_gso(). Now that there is a single function that can compute the device features relevant to a packet, we don't want to run it for each offload. This converts netif_needs_gso() to take the features of the device, rather than computing them itself. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a51dfd7b56fb..1444ed3861a0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2086,6 +2086,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int rc = NETDEV_TX_OK; if (likely(!skb->next)) { + int features; + /* * If device doesnt need skb->dst, release it right now while * its hot in this cpu cache @@ -2098,8 +2100,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan_try(skb); + features = netif_skb_features(skb); + if (vlan_tx_tag_present(skb) && - !(dev->features & NETIF_F_HW_VLAN_TX)) { + !(features & NETIF_F_HW_VLAN_TX)) { skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) goto out; @@ -2107,7 +2111,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } - if (netif_needs_gso(dev, skb)) { + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb))) goto out_kfree_skb; if (skb->next) -- cgit v1.2.3 From 91ecb63c074d802f8cf103f1dafb4aed24d0f24c Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:33 +0000 Subject: net offloading: Convert dev_gso_segment() to use precomputed features. This switches dev_gso_segment() to use the device features computed by the centralized routine. In doing so, it fixes a problem where it would always use dev->features, instead of those appropriate to the number of vlan tags if any are present. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 1444ed3861a0..4cd3e84e1294 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1971,16 +1971,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb) /** * dev_gso_segment - Perform emulated hardware segmentation on skb. * @skb: buffer to segment + * @features: device features as applicable to this skb * * This function segments the given skb and stores the list of segments * in skb->next. */ -static int dev_gso_segment(struct sk_buff *skb) +static int dev_gso_segment(struct sk_buff *skb, int features) { - struct net_device *dev = skb->dev; struct sk_buff *segs; - int features = dev->features & ~(illegal_highdma(dev, skb) ? - NETIF_F_SG : 0); segs = skb_gso_segment(skb, features); @@ -2112,7 +2110,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } if (netif_needs_gso(skb, features)) { - if (unlikely(dev_gso_segment(skb))) + if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; if (skb->next) goto gso; -- cgit v1.2.3 From 02932ce9e2c136e6fab2571c8e0dd69ae8ec9853 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:34 +0000 Subject: net offloading: Convert skb_need_linearize() to use precomputed features. This switches skb_need_linearize() to use the features that have been centrally computed. In doing so, this fixes a problem where scatter/gather should not be used because the card does not support checksum offloading on that type of packet. On device registration we only check that some form of checksum offloading is available if scatter/gatther is enabled but we must also check at transmission time. Examples of this include IPv6 or vlan packets on a NIC that only supports IPv4 offloading. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4cd3e84e1294..2f838f1d222c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2059,22 +2059,13 @@ EXPORT_SYMBOL(netif_skb_features); * support DMA from it. */ static inline int skb_needs_linearize(struct sk_buff *skb, - struct net_device *dev) + int features) { - if (skb_is_nonlinear(skb)) { - int features = dev->features; - - if (vlan_tx_tag_present(skb)) - features &= dev->vlan_features; - - return (skb_has_frag_list(skb) && - !(features & NETIF_F_FRAGLIST)) || + return skb_is_nonlinear(skb) && + ((skb_has_frag_list(skb) && + !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && - (!(features & NETIF_F_SG) || - illegal_highdma(dev, skb))); - } - - return 0; + !(features & NETIF_F_SG))); } int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@ -2115,7 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->next) goto gso; } else { - if (skb_needs_linearize(skb, dev) && + if (skb_needs_linearize(skb, features) && __skb_linearize(skb)) goto out_kfree_skb; -- cgit v1.2.3 From 0363466866d901fbc658f4e63dd61e7cc93dd0af Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Sun, 9 Jan 2011 06:23:35 +0000 Subject: net offloading: Convert checksums to use centrally computed features. In order to compute the features for other offloads (primarily scatter/gather), we need to first check the ability of the NIC to offload the checksum for the packet. Since we have already computed this, we can directly use the result instead of figuring it out again. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 2f838f1d222c..3fe443be4b15 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1732,33 +1732,6 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); -static bool can_checksum_protocol(unsigned long features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - -static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - int features = dev->features; - - if (vlan_tx_tag_present(skb)) { - features &= dev->vlan_features; - } else if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - features &= dev->vlan_features; - } - - return can_checksum_protocol(features, protocol); -} - /** * skb_dev_set -- assign a new device to a buffer * @skb: buffer for the new device @@ -2015,6 +1988,17 @@ static inline void skb_orphan_try(struct sk_buff *skb) } } +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_V4_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_V6_CSUM) && + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); +} + static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { if (!can_checksum_protocol(protocol, features)) { @@ -2117,7 +2101,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb_checksum_start_offset(skb)); - if (!dev_can_checksum(dev, skb) && + if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; } -- cgit v1.2.3 From 36909ea43814cba34f7c921e99cba33d770a54e1 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 9 Jan 2011 19:36:31 +0000 Subject: net: Add alloc_netdev_mqs function Added alloc_netdev_mqs function which allows the number of transmit and receive queues to be specified independenty. alloc_netdev_mq was changed to a macro to call the new function. Also added alloc_etherdev_mqs with same purpose. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3fe443be4b15..3295b94884ab 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5617,18 +5617,20 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) } /** - * alloc_netdev_mq - allocate network device + * alloc_netdev_mqs - allocate network device * @sizeof_priv: size of private data to allocate space for * @name: device name format string * @setup: callback to initialize device - * @queue_count: the number of subqueues to allocate + * @txqs: the number of TX subqueues to allocate + * @rxqs: the number of RX subqueues to allocate * * Allocates a struct net_device with private data area for driver use * and performs basic initialization. Also allocates subquue structs - * for each queue on the device at the end of the netdevice. + * for each queue on the device. */ -struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, - void (*setup)(struct net_device *), unsigned int queue_count) +struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs) { struct net_device *dev; size_t alloc_size; @@ -5636,12 +5638,20 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, BUG_ON(strlen(name) >= sizeof(dev->name)); - if (queue_count < 1) { + if (txqs < 1) { pr_err("alloc_netdev: Unable to allocate device " "with zero queues.\n"); return NULL; } +#ifdef CONFIG_RPS + if (rxqs < 1) { + pr_err("alloc_netdev: Unable to allocate device " + "with zero RX queues.\n"); + return NULL; + } +#endif + alloc_size = sizeof(struct net_device); if (sizeof_priv) { /* ensure 32-byte alignment of private area */ @@ -5672,14 +5682,14 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); - dev->num_tx_queues = queue_count; - dev->real_num_tx_queues = queue_count; + dev->num_tx_queues = txqs; + dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) goto free_pcpu; #ifdef CONFIG_RPS - dev->num_rx_queues = queue_count; - dev->real_num_rx_queues = queue_count; + dev->num_rx_queues = rxqs; + dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_pcpu; #endif @@ -5707,7 +5717,7 @@ free_p: kfree(p); return NULL; } -EXPORT_SYMBOL(alloc_netdev_mq); +EXPORT_SYMBOL(alloc_netdev_mqs); /** * free_netdev - free network device -- cgit v1.2.3 From bfe0d0298f2a67d94d58c39ea904a999aeeb7c3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Jan 2011 08:30:54 +0000 Subject: net_sched: factorize qdisc stats handling HTB takes into account skb is segmented in stats updates. Generalize this to all schedulers. They should use qdisc_bstats_update() helper instead of manipulating bstats.bytes and bstats.packets Add bstats_update() helper too for classes that use gnet_stats_basic_packed fields. Note : Right now, TCQ_F_CAN_BYPASS shortcurt can be taken only if no stab is setup on qdisc. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 3295b94884ab..a3ef808b5e36 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2297,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, */ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE)) skb_dst_force(skb); - __qdisc_update_bstats(q, skb->len); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + qdisc_bstats_update(q, skb); + if (sch_direct_xmit(skb, q, dev, txq, root_lock)) { if (unlikely(contended)) { spin_unlock(&q->busylock); -- cgit v1.2.3 From 1ac9ad1394fa542ac7ae0dc943ee3cda678799fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Jan 2011 12:13:14 +0000 Subject: net: remove dev_txq_stats_fold() After recent changes, (percpu stats on vlan/tunnels...), we dont need anymore per struct netdev_queue tx_bytes/tx_packets/tx_dropped counters. Only remaining users are ixgbe, sch_teql, gianfar & macvlan : 1) ixgbe can be converted to use existing tx_ring counters. 2) macvlan incremented txq->tx_dropped, it can use the dev->stats.tx_dropped counter. 3) sch_teql : almost revert ab35cd4b8f42 (Use net_device internal stats) Now we have ndo_get_stats64(), use it, even for "unsigned long" fields (No need to bring back a struct net_device_stats) 4) gianfar adds a stats structure per tx queue to hold tx_bytes/tx_packets This removes a lockdep warning (and possible lockup) in rndis gadget, calling dev_get_stats() from hard IRQ context. Ref: http://www.spinics.net/lists/netdev/msg149202.html Reported-by: Neil Jones Signed-off-by: Eric Dumazet CC: Jarek Poplawski CC: Alexander Duyck CC: Jeff Kirsher CC: Sandeep Gopalpet CC: Michal Nazarewicz Signed-off-by: David S. Miller --- net/core/dev.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a3ef808b5e36..83507c265e48 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5523,34 +5523,6 @@ void netdev_run_todo(void) } } -/** - * dev_txq_stats_fold - fold tx_queues stats - * @dev: device to get statistics from - * @stats: struct rtnl_link_stats64 to hold results - */ -void dev_txq_stats_fold(const struct net_device *dev, - struct rtnl_link_stats64 *stats) -{ - u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0; - unsigned int i; - struct netdev_queue *txq; - - for (i = 0; i < dev->num_tx_queues; i++) { - txq = netdev_get_tx_queue(dev, i); - spin_lock_bh(&txq->_xmit_lock); - tx_bytes += txq->tx_bytes; - tx_packets += txq->tx_packets; - tx_dropped += txq->tx_dropped; - spin_unlock_bh(&txq->_xmit_lock); - } - if (tx_bytes || tx_packets || tx_dropped) { - stats->tx_bytes = tx_bytes; - stats->tx_packets = tx_packets; - stats->tx_dropped = tx_dropped; - } -} -EXPORT_SYMBOL(dev_txq_stats_fold); - /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ @@ -5594,7 +5566,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); } else { netdev_stats_to_stats64(storage, &dev->stats); - dev_txq_stats_fold(dev, storage); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); return storage; -- cgit v1.2.3 From 6ee400aafb60289b78fcde5ebccd8c4973fc53f4 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 17 Jan 2011 20:46:00 +0000 Subject: net offloading: Do not mask out NETIF_F_HW_VLAN_TX for vlan. In netif_skb_features() we return only the features that are valid for vlans if we have a vlan packet. However, we should not mask out NETIF_F_HW_VLAN_TX since it enables transmission of vlan tags and is obviously valid. Reported-by: Eric Dumazet Signed-off-by: Jesse Gross Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 83507c265e48..4c58d11d3b68 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2023,13 +2023,13 @@ int netif_skb_features(struct sk_buff *skb) return harmonize_features(skb, protocol, features); } - features &= skb->dev->vlan_features; + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | - NETIF_F_GEN_CSUM; + NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } } -- cgit v1.2.3 From d402786ea4f8433774a812d6b8635e737425cddd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2011 00:51:36 +0000 Subject: net: fix can_checksum_protocol() arguments swap commit 0363466866d901fbc (net offloading: Convert checksums to use centrally computed features.) mistakenly swapped can_checksum_protocol() arguments. This broke IPv6 on bnx2 for instance, on NIC without TCPv6 checksum offloads. Reported-by: Hans de Bruin Signed-off-by: Eric Dumazet Acked-by: Jesse Gross Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4c58d11d3b68..8393ec408cd4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2001,7 +2001,7 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features) { - if (!can_checksum_protocol(protocol, features)) { + if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { -- cgit v1.2.3 From c506653d35249bb4738bb139c24362e1ae724bc1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Jan 2011 13:16:16 -0800 Subject: net: arp_ioctl() must hold RTNL Commit 941666c2e3e0 "net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()" introduced a regression, reported by Jamie Heilman. "arp -Ds 192.168.2.41 eth0 pub" triggered the ASSERT_RTNL() assert in pneigh_lookup() Removing RTNL requirement from arp_ioctl() was a mistake, just revert that part. Reported-by: Jamie Heilman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8393ec408cd4..1e5077d2cca6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -749,7 +749,8 @@ EXPORT_SYMBOL(dev_get_by_index); * @ha: hardware address * * Search for an interface by MAC address. Returns NULL if the device - * is not found or a pointer to the device. The caller must hold RCU + * is not found or a pointer to the device. + * The caller must hold RCU or RTNL. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * -- cgit v1.2.3 From 66c46d741e2e60f0e8b625b80edb0ab820c46d7a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 29 Jan 2011 20:44:54 -0800 Subject: gro: Reset dev pointer on reuse On older kernels the VLAN code may zero skb->dev before dropping it and causing it to be reused by GRO. Unfortunately we didn't reset skb->dev in that case which causes the next GRO user to get a bogus skb->dev pointer. This particular problem no longer happens with the current upstream kernel due to changes in VLAN processing. However, for correctness we should still reset the skb->dev pointer in the GRO reuse function in case a future user does the same thing. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 24ea2d71e7ea..93e44dbf6793 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3424,6 +3424,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; + skb->dev = napi->dev; napi->skb = skb; } -- cgit v1.2.3 From 8587523640441a9ff2564ebc6efeb39497ad6709 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 31 Jan 2011 16:23:42 -0800 Subject: net: Check rps_flow_table when RPS map length is 1 In get_rps_cpu, add check that the rps_flow_table for the device is NULL when trying to take fast path when RPS map length is one. Without this, RFS is effectively disabled if map length is one which is not correct. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 93e44dbf6793..4c907895876b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2563,7 +2563,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { - if (map->len == 1) { + if (map->len == 1 && + !rcu_dereference_raw(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; -- cgit v1.2.3 From 6d152e23ad1a7a5b40fef1f42e017d66e6115159 Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Wed, 2 Feb 2011 14:53:25 -0800 Subject: gro: reset skb_iif on reuse Like Herbert's change from a few days ago: 66c46d741e2e60f0e8b625b80edb0ab820c46d7a gro: Reset dev pointer on reuse this may not be necessary at this point, but we should still clean up the skb->skb_iif. If not we may end up with an invalid valid for skb->skb_iif when the skb is reused and the check is done in __netif_receive_skb. Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 4c907895876b..b6d0bf875a8e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3426,6 +3426,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); skb->vlan_tci = 0; skb->dev = napi->dev; + skb->skb_iif = 0; napi->skb = skb; } -- cgit v1.2.3 From 8d3bdbd55a7e2a3f2c148a4830aa26dd682b21c4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Feb 2011 15:02:50 -0800 Subject: net: Fix lockdep regression caused by initializing netdev queues too early. In commit aa9421041128abb4d269ee1dc502ff65fb3b7d69 ("net: init ingress queue") we moved the allocation and lock initialization of the queues into alloc_netdev_mq() since register_netdevice() is way too late. The problem is that dev->type is not setup until the setup() callback is invoked by alloc_netdev_mq(), and the dev->type is what determines the lockdep class to use for the locks in the queues. Fix this by doing the queue allocation after the setup() callback runs. This is safe because the setup() callback is not allowed to make any state changes that need to be undone on error (memory allocations, etc.). It may, however, make state changes that are undone by free_netdev() (such as netif_napi_add(), which is done by the ipoib driver's setup routine). The previous code also leaked a reference to the &init_net namespace object on RX/TX queue allocation failures. Signed-off-by: David S. Miller --- net/core/dev.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index b6d0bf875a8e..8e726cb47ed7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5660,30 +5660,35 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev_net_set(dev, &init_net); + dev->gso_max_size = GSO_MAX_SIZE; + + INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); + dev->ethtool_ntuple_list.count = 0; + INIT_LIST_HEAD(&dev->napi_list); + INIT_LIST_HEAD(&dev->unreg_list); + INIT_LIST_HEAD(&dev->link_watch_list); + dev->priv_flags = IFF_XMIT_DST_RELEASE; + setup(dev); + dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; if (netif_alloc_netdev_queues(dev)) - goto free_pcpu; + goto free_all; #ifdef CONFIG_RPS dev->num_rx_queues = rxqs; dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) - goto free_pcpu; + goto free_all; #endif - dev->gso_max_size = GSO_MAX_SIZE; - - INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); - dev->ethtool_ntuple_list.count = 0; - INIT_LIST_HEAD(&dev->napi_list); - INIT_LIST_HEAD(&dev->unreg_list); - INIT_LIST_HEAD(&dev->link_watch_list); - dev->priv_flags = IFF_XMIT_DST_RELEASE; - setup(dev); strcpy(dev->name, name); return dev; +free_all: + free_netdev(dev); + return NULL; + free_pcpu: free_percpu(dev->pcpu_refcnt); kfree(dev->_tx); -- cgit v1.2.3 From f87e6f47933e3ebeced9bb12615e830a72cedce4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 17 Feb 2011 22:54:38 +0000 Subject: net: dont leave active on stack LIST_HEAD Eric W. Biderman and Michal Hocko reported various memory corruptions that we suspected to be related to a LIST head located on stack, that was manipulated after thread left function frame (and eventually exited, so its stack was freed and reused). Eric Dumazet suggested the problem was probably coming from commit 443457242beb (net: factorize sync-rcu call in unregister_netdevice_many) This patch fixes __dev_close() and dev_close() to properly deinit their respective LIST_HEAD(single) before exiting. References: https://lkml.org/lkml/2011/2/16/304 References: https://lkml.org/lkml/2011/2/14/223 Reported-by: Michal Hocko Tested-by: Michal Hocko Reported-by: Eric W. Biderman Tested-by: Eric W. Biderman Signed-off-by: Linus Torvalds Signed-off-by: Eric Dumazet CC: Ingo Molnar CC: Octavian Purdila Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index 8e726cb47ed7..a18c1643ea9f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1280,10 +1280,13 @@ static int __dev_close_many(struct list_head *head) static int __dev_close(struct net_device *dev) { + int retval; LIST_HEAD(single); list_add(&dev->unreg_list, &single); - return __dev_close_many(&single); + retval = __dev_close_many(&single); + list_del(&single); + return retval; } int dev_close_many(struct list_head *head) @@ -1325,7 +1328,7 @@ int dev_close(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_close_many(&single); - + list_del(&single); return 0; } EXPORT_SYMBOL(dev_close); -- cgit v1.2.3 From ceaaec98ad99859ac90ac6863ad0a6cd075d8e0e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 17 Feb 2011 22:59:19 +0000 Subject: net: deinit automatic LIST_HEAD commit 9b5e383c11b08784 (net: Introduce unregister_netdevice_many()) left an active LIST_HEAD() in rollback_registered(), with possible memory corruption. Even if device is freed without touching its unreg_list (and therefore touching the previous memory location holding LISTE_HEAD(single), better close the bug for good, since its really subtle. (Same fix for default_device_exit_batch() for completeness) Reported-by: Michal Hocko Tested-by: Michal Hocko Reported-by: Eric W. Biderman Tested-by: Eric W. Biderman Signed-off-by: Linus Torvalds Signed-off-by: Eric Dumazet CC: Ingo Molnar CC: Octavian Purdila CC: stable [.33+] Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core/dev.c') diff --git a/net/core/dev.c b/net/core/dev.c index a18c1643ea9f..8ae6631abcc2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5066,6 +5066,7 @@ static void rollback_registered(struct net_device *dev) list_add(&dev->unreg_list, &single); rollback_registered_many(&single); + list_del(&single); } unsigned long netdev_fix_features(unsigned long features, const char *name) @@ -6219,6 +6220,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) } } unregister_netdevice_many(&dev_kill_list); + list_del(&dev_kill_list); rtnl_unlock(); } -- cgit v1.2.3