From be7aa663fc1d9156798f5af3c60e6df45e1fe5de Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Sat, 21 Feb 2015 19:27:00 -0500 Subject: IB/ipoib: change init sequence ordering In preparation for using per device work queues, we need to move the start of the neighbor thread task to after ipoib_ib_dev_init and move the destruction of the neighbor task to before ipoib_ib_dev_cleanup. Otherwise we will end up freeing our workqueue with work possibly still on it. Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 657b89b1d291..98c738d827d1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1269,15 +1269,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (ipoib_neigh_hash_init(priv) < 0) - goto out; /* Allocate RX/TX "rings" to hold queued skbs */ priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, GFP_KERNEL); if (!priv->rx_ring) { printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", ca->name, ipoib_recvq_size); - goto out_neigh_hash_cleanup; + goto out; } priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); @@ -1292,16 +1290,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) if (ipoib_ib_dev_init(dev, ca, port)) goto out_tx_ring_cleanup; + /* + * Must be after ipoib_ib_dev_init so we can allocate a per + * device wq there and use it here + */ + if (ipoib_neigh_hash_init(priv) < 0) + goto out_dev_uninit; + return 0; +out_dev_uninit: + ipoib_ib_dev_cleanup(dev); + out_tx_ring_cleanup: vfree(priv->tx_ring); out_rx_ring_cleanup: kfree(priv->rx_ring); -out_neigh_hash_cleanup: - ipoib_neigh_hash_uninit(dev); out: return -ENOMEM; } @@ -1324,6 +1330,12 @@ void ipoib_dev_cleanup(struct net_device *dev) } unregister_netdevice_many(&head); + /* + * Must be before ipoib_ib_dev_cleanup or we delete an in use + * work queue + */ + ipoib_neigh_hash_uninit(dev); + ipoib_ib_dev_cleanup(dev); kfree(priv->rx_ring); @@ -1331,8 +1343,6 @@ void ipoib_dev_cleanup(struct net_device *dev) priv->rx_ring = NULL; priv->tx_ring = NULL; - - ipoib_neigh_hash_uninit(dev); } static const struct header_ops ipoib_header_ops = { -- cgit v1.2.3 From 0b39578bcde4298a392fb2df16235c316d932127 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Sat, 21 Feb 2015 19:27:03 -0500 Subject: IB/ipoib: Use dedicated workqueues per interface During my recent work on the rtnl lock deadlock in the IPoIB driver, I saw that even once I fixed the apparent races for a single device, as soon as that device had any children, new races popped up. It turns out that this is because no matter how well we protect against races on a single device, the fact that all devices use the same workqueue, and flush_workqueue() flushes *everything* from that workqueue means that we would also have to prevent all races between different devices (for instance, ipoib_mcast_restart_task on interface ib0 can race with ipoib_mcast_flush_dev on interface ib0.8002, resulting in a deadlock on the rtnl_lock). There are several possible solutions to this problem: Make carrier_on_task and mcast_restart_task try to take the rtnl for some set period of time and if they fail, then bail. This runs the real risk of dropping work on the floor, which can end up being its own separate kind of deadlock. Set some global flag in the driver that says some device is in the middle of going down, letting all tasks know to bail. Again, this can drop work on the floor. Or the method this patch attempts to use, which is when we bring an interface up, create a workqueue specifically for that interface, so that when we take it back down, we are flushing only those tasks associated with our interface. In addition, keep the global workqueue, but now limit it to only flush tasks. In this way, the flush tasks can always flush the device specific work queues without having deadlock issues. Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 98c738d827d1..4be80bec93ca 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -839,7 +839,7 @@ static void ipoib_set_mcast_list(struct net_device *dev) return; } - queue_work(ipoib_workqueue, &priv->restart_task); + queue_work(priv->wq, &priv->restart_task); } static int ipoib_get_iflink(const struct net_device *dev) @@ -961,7 +961,7 @@ static void ipoib_reap_neigh(struct work_struct *work) __ipoib_reap_neigh(priv); if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + queue_delayed_work(priv->wq, &priv->neigh_reap_task, arp_tbl.gc_interval); } @@ -1140,7 +1140,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) /* start garbage collection */ clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task, + queue_delayed_work(priv->wq, &priv->neigh_reap_task, arp_tbl.gc_interval); return 0; @@ -1651,10 +1651,11 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); + flush_workqueue(ipoib_workqueue); /* Stop GC if started before flush */ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(ipoib_workqueue); + flush_workqueue(priv->wq); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1717,6 +1718,7 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); + flush_workqueue(ipoib_workqueue); rtnl_lock(); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); @@ -1725,7 +1727,7 @@ static void ipoib_remove_one(struct ib_device *device) /* Stop GC */ set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(ipoib_workqueue); + flush_workqueue(priv->wq); unregister_netdev(priv->dev); free_netdev(priv->dev); @@ -1760,14 +1762,16 @@ static int __init ipoib_init_module(void) return ret; /* - * We create our own workqueue mainly because we want to be - * able to flush it when devices are being removed. We can't - * use schedule_work()/flush_scheduled_work() because both - * unregister_netdev() and linkwatch_event take the rtnl lock, - * so flush_scheduled_work() can deadlock during device - * removal. + * We create a global workqueue here that is used for all flush + * operations. However, if you attempt to flush a workqueue + * from a task on that same workqueue, it deadlocks the system. + * We want to be able to flush the tasks associated with a + * specific net device, so we also create a workqueue for each + * netdevice. We queue up the tasks for that device only on + * its private workqueue, and we only queue up flush events + * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = create_singlethread_workqueue("ipoib"); + ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; -- cgit v1.2.3 From efc82eeeae4ece716091d8540079b7f276ca1ad5 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Sat, 21 Feb 2015 19:27:04 -0500 Subject: IB/ipoib: No longer use flush as a parameter Various places in the IPoIB code had a deadlock related to flushing the ipoib workqueue. Now that we have per device workqueues and a specific flush workqueue, there is no longer a deadlock issue with flushing the device specific workqueues and we can do so unilaterally. Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4be80bec93ca..176b3dfb49c3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - if (ipoib_ib_dev_open(dev, 1)) { + if (ipoib_ib_dev_open(dev)) { if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) return 0; goto err_disable; @@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev) return 0; err_stop: - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_stop(dev); err_disable: clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); @@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - ipoib_ib_dev_down(dev, 1); - ipoib_ib_dev_stop(dev, 0); + ipoib_ib_dev_down(dev); + ipoib_ib_dev_stop(dev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; -- cgit v1.2.3 From 1e85b806f9b988ce2e465fb0f86c8cca228d83a9 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 2 Apr 2015 13:39:03 +0300 Subject: IB/ipoib: Save only IPOIB_MAX_PATH_REC_QUEUE skb's Whenever there is no path->ah to the destination, keep only defined number of skb's. Otherwise there are cases that the driver can keep infinite list of skb's. For example, when one device want to send unicast arp to the destination, and from some reason the SM doesn't respond, the driver currently keeps all the skb's. If that unicast arp traffic stopped, all these skb's are kept by the path object till the interface is down. Signed-off-by: Erez Shitrit Signed-off-by: Or Gerlitz Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_main.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 176b3dfb49c3..7cad4dd87469 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, if (!path->query && path_rec_start(dev, path)) goto err_path; - - __skb_queue_tail(&neigh->queue, skb); + if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + __skb_queue_tail(&neigh->queue, skb); + else + goto err_drop; } spin_unlock_irqrestore(&priv->lock, flags); @@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, new_path = 1; } if (path) { - __skb_queue_tail(&path->queue, skb); + if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + __skb_queue_tail(&path->queue, skb); + } else { + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + } if (!path->query && path_rec_start(dev, path)) { spin_unlock_irqrestore(&priv->lock, flags); -- cgit v1.2.3