summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancesco Dolcini <francesco.dolcini@toradex.com>2022-05-06 13:49:17 +0200
committerFrancesco Dolcini <francesco.dolcini@toradex.com>2022-05-13 08:08:51 +0000
commit585dc27c53b6c1a32a7e11b724e91d7297388a13 (patch)
tree7a531c3a9185a01ca21a19fe990b34b310004590
parent226073276f6be24718d364b0cfd12019c07cab14 (diff)
net: phy: Fix race condition on link status change
commit 91a7cda1f4b8bdf770000a3b60640576dafe0cec upstream. This fixes the following error caused by a race condition between phydev->adjust_link() and a MDIO transaction in the phy interrupt handler. The issue was reproduced with the ethernet FEC driver and a micrel KSZ9031 phy. [ 146.195696] fec 2188000.ethernet eth0: MDIO read timeout [ 146.201779] ------------[ cut here ]------------ [ 146.206671] WARNING: CPU: 0 PID: 571 at drivers/net/phy/phy.c:942 phy_error+0x24/0x6c [ 146.214744] Modules linked in: bnep imx_vdoa imx_sdma evbug [ 146.220640] CPU: 0 PID: 571 Comm: irq/128-2188000 Not tainted 5.18.0-rc3-00080-gd569e86915b7 #9 [ 146.229563] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 146.236257] unwind_backtrace from show_stack+0x10/0x14 [ 146.241640] show_stack from dump_stack_lvl+0x58/0x70 [ 146.246841] dump_stack_lvl from __warn+0xb4/0x24c [ 146.251772] __warn from warn_slowpath_fmt+0x5c/0xd4 [ 146.256873] warn_slowpath_fmt from phy_error+0x24/0x6c [ 146.262249] phy_error from kszphy_handle_interrupt+0x40/0x48 [ 146.268159] kszphy_handle_interrupt from irq_thread_fn+0x1c/0x78 [ 146.274417] irq_thread_fn from irq_thread+0xf0/0x1dc [ 146.279605] irq_thread from kthread+0xe4/0x104 [ 146.284267] kthread from ret_from_fork+0x14/0x28 [ 146.289164] Exception stack(0xe6fa1fb0 to 0xe6fa1ff8) [ 146.294448] 1fa0: 00000000 00000000 00000000 00000000 [ 146.302842] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 146.311281] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 [ 146.318262] irq event stamp: 12325 [ 146.321780] hardirqs last enabled at (12333): [<c01984c4>] __up_console_sem+0x50/0x60 [ 146.330013] hardirqs last disabled at (12342): [<c01984b0>] __up_console_sem+0x3c/0x60 [ 146.338259] softirqs last enabled at (12324): [<c01017f0>] __do_softirq+0x2c0/0x624 [ 146.346311] softirqs last disabled at (12319): [<c01300ac>] __irq_exit_rcu+0x138/0x178 [ 146.354447] ---[ end trace 0000000000000000 ]--- With the FEC driver phydev->adjust_link() calls fec_enet_adjust_link() calls fec_stop()/fec_restart() and both these function reset and temporary disable the FEC disrupting any MII transaction that could be happening at the same time. fec_enet_adjust_link() and phy_read() can be running at the same time when we have one additional interrupt before the phy_state_machine() is able to terminate. Thread 1 (phylib WQ) | Thread 2 (phy interrupt) | | phy_interrupt() <-- PHY IRQ | handle_interrupt() | phy_read() | phy_trigger_machine() | --> schedule phylib WQ | | phy_state_machine() | phy_check_link_status() | phy_link_change() | phydev->adjust_link() | fec_enet_adjust_link() | --> FEC reset | phy_interrupt() <-- PHY IRQ | phy_read() | Fix this by acquiring the phydev lock in phy_interrupt(). Link: https://lore.kernel.org/all/20220422152612.GA510015@francesco-nb.int.toradex.com/ Fixes: c974bdbc3e77 ("net: phy: Use threaded IRQ, to allow IRQ from sleeping devices") cc: <stable@vger.kernel.org> Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com> Reviewed-by: Andrew Lunn <andrew@lunn.ch> Link: https://lore.kernel.org/r/20220506060815.327382-1-francesco.dolcini@toradex.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> [fd: backport: adapt locking before did_interrupt()/ack_interrupt() callbacks removal ] Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
-rw-r--r--drivers/net/phy/phy.c45
1 files changed, 40 insertions, 5 deletions
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 6c52ff8c0d2e..9e8cf64f0442 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -116,10 +116,15 @@ EXPORT_SYMBOL(phy_print_status);
*/
static int phy_clear_interrupt(struct phy_device *phydev)
{
- if (phydev->drv->ack_interrupt)
- return phydev->drv->ack_interrupt(phydev);
+ int ret = 0;
- return 0;
+ if (phydev->drv->ack_interrupt) {
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->ack_interrupt(phydev);
+ mutex_unlock(&phydev->lock);
+ }
+
+ return ret;
}
/**
@@ -761,6 +766,36 @@ static int phy_disable_interrupts(struct phy_device *phydev)
}
/**
+ * phy_did_interrupt - Checks if the PHY generated an interrupt
+ * @phydev: target phy_device struct
+ */
+static int phy_did_interrupt(struct phy_device *phydev)
+{
+ int ret;
+
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->did_interrupt(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+}
+
+/**
+ * phy_handle_interrupt - PHY specific interrupt handler
+ * @phydev: target phy_device struct
+ */
+static int phy_handle_interrupt(struct phy_device *phydev)
+{
+ int ret;
+
+ mutex_lock(&phydev->lock);
+ ret = phydev->drv->handle_interrupt(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return ret;
+}
+
+/**
* phy_interrupt - PHY interrupt handler
* @irq: interrupt line
* @phy_dat: phy_device pointer
@@ -771,11 +806,11 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
{
struct phy_device *phydev = phy_dat;
- if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
+ if (phydev->drv->did_interrupt && !phy_did_interrupt(phydev))
return IRQ_NONE;
if (phydev->drv->handle_interrupt) {
- if (phydev->drv->handle_interrupt(phydev))
+ if (phy_handle_interrupt(phydev))
goto phy_err;
} else {
/* reschedule state queue work to run as soon as possible */