From 41b5d3eee4af6a4ea488a1735ed82e4e593eec0d Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 25 Sep 2018 10:56:54 +0800 Subject: scsi: libsas: fix a race condition when smp task timeout commit b90cd6f2b905905fb42671009dc0e27c310a16ae upstream. When the lldd is processing the complete sas task in interrupt and set the task stat as SAS_TASK_STATE_DONE, the smp timeout timer is able to be triggered at the same time. And smp_task_timedout() will complete the task wheter the SAS_TASK_STATE_DONE is set or not. Then the sas task may freed before lldd end the interrupt process. Thus a use-after-free will happen. Fix this by calling the complete() only when SAS_TASK_STATE_DONE is not set. And remove the check of the return value of the del_timer(). Once the LLDD sets DONE, it must call task->done(), which will call smp_task_done()->complete() and the task will be completed and freed correctly. Reported-by: chenxiang Signed-off-by: Jason Yan CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Cc: Guenter Roeck --- drivers/scsi/libsas/sas_expander.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 7be581f7c35d..1a6f65db615e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -47,17 +47,16 @@ static void smp_task_timedout(unsigned long _task) unsigned long flags; spin_lock_irqsave(&task->task_state_lock, flags); - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } spin_unlock_irqrestore(&task->task_state_lock, flags); - - complete(&task->slow_task->completion); } static void smp_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } -- cgit v1.2.3 From 17d9e39b0211dc8a56d2e03ffa581ff6899da0a9 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 12 Apr 2019 16:57:56 +0800 Subject: scsi: libsas: Do discovery on empty PHY to update PHY info [ Upstream commit d8649fc1c5e40e691d589ed825998c36a947491c ] When we discover the PHY is empty in sas_rediscover_dev(), the PHY information (like negotiated linkrate) is not updated. As such, for a user examining sysfs for that PHY, they would see incorrect values: root@(none)$ cd /sys/class/sas_phy/phy-0:0:20 root@(none)$ more negotiated_linkrate 3.0 Gbit root@(none)$ echo 0 > enable root@(none)$ more negotiated_linkrate 3.0 Gbit So fix this, simply discover the PHY again, even though we know it's empty; in the above example, this gives us: root@(none)$ more negotiated_linkrate Phy disabled We must do this after unregistering the device associated with the PHY (in sas_unregister_devs_sas_addr()). Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 1a6f65db615e..ee1f9ee995e5 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2027,6 +2027,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) { phy->phy_state = PHY_EMPTY; sas_unregister_devs_sas_addr(dev, phy_id, last); + /* + * Even though the PHY is empty, for convenience we discover + * the PHY to update the PHY info, like negotiated linkrate. + */ + sas_ex_phy_discover(dev, phy_id); return res; } else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) && dev_type_flutter(type, phy->attached_dev_type)) { -- cgit v1.2.3 From 1d28cf14a89c400fa55f6f9a9a4ca3bc34094b34 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 14 May 2019 10:42:39 +0800 Subject: scsi: libsas: delete sas port if expander discover failed [ Upstream commit 3b0541791453fbe7f42867e310e0c9eb6295364d ] The sas_port(phy->port) allocated in sas_ex_discover_expander() will not be deleted when the expander failed to discover. This will cause resource leak and a further issue of kernel BUG like below: [159785.843156] port-2:17:29: trying to add phy phy-2:17:29 fails: it's already part of another port [159785.852144] ------------[ cut here ]------------ [159785.856833] kernel BUG at drivers/scsi/scsi_transport_sas.c:1086! [159785.863000] Internal error: Oops - BUG: 0 [#1] SMP [159785.867866] CPU: 39 PID: 16993 Comm: kworker/u96:2 Tainted: G W OE 4.19.25-vhulk1901.1.0.h111.aarch64 #1 [159785.878458] Hardware name: Huawei Technologies Co., Ltd. Hi1620EVBCS/Hi1620EVBCS, BIOS Hi1620 CS B070 1P TA 03/21/2019 [159785.889231] Workqueue: 0000:74:02.0_disco_q sas_discover_domain [159785.895224] pstate: 40c00009 (nZcv daif +PAN +UAO) [159785.900094] pc : sas_port_add_phy+0x188/0x1b8 [159785.904524] lr : sas_port_add_phy+0x188/0x1b8 [159785.908952] sp : ffff0001120e3b80 [159785.912341] x29: ffff0001120e3b80 x28: 0000000000000000 [159785.917727] x27: ffff802ade8f5400 x26: ffff0000681b7560 [159785.923111] x25: ffff802adf11a800 x24: ffff0000680e8000 [159785.928496] x23: ffff802ade8f5728 x22: ffff802ade8f5708 [159785.933880] x21: ffff802adea2db40 x20: ffff802ade8f5400 [159785.939264] x19: ffff802adea2d800 x18: 0000000000000010 [159785.944649] x17: 00000000821bf734 x16: ffff00006714faa0 [159785.950033] x15: ffff0000e8ab4ecf x14: 7261702079646165 [159785.955417] x13: 726c612073277469 x12: ffff00006887b830 [159785.960802] x11: ffff00006773eaa0 x10: 7968702079687020 [159785.966186] x9 : 0000000000002453 x8 : 726f702072656874 [159785.971570] x7 : 6f6e6120666f2074 x6 : ffff802bcfb21290 [159785.976955] x5 : ffff802bcfb21290 x4 : 0000000000000000 [159785.982339] x3 : ffff802bcfb298c8 x2 : 337752b234c2ab00 [159785.987723] x1 : 337752b234c2ab00 x0 : 0000000000000000 [159785.993108] Process kworker/u96:2 (pid: 16993, stack limit = 0x0000000072dae094) [159786.000576] Call trace: [159786.003097] sas_port_add_phy+0x188/0x1b8 [159786.007179] sas_ex_get_linkrate.isra.5+0x134/0x140 [159786.012130] sas_ex_discover_expander+0x128/0x408 [159786.016906] sas_ex_discover_dev+0x218/0x4c8 [159786.021249] sas_ex_discover_devices+0x9c/0x1a8 [159786.025852] sas_discover_root_expander+0x134/0x160 [159786.030802] sas_discover_domain+0x1b8/0x1e8 [159786.035148] process_one_work+0x1b4/0x3f8 [159786.039230] worker_thread+0x54/0x470 [159786.042967] kthread+0x134/0x138 [159786.046269] ret_from_fork+0x10/0x18 [159786.049918] Code: 91322300 f0004402 91178042 97fe4c9b (d4210000) [159786.056083] Modules linked in: hns3_enet_ut(OE) hclge(OE) hnae3(OE) hisi_sas_test_hw(OE) hisi_sas_test_main(OE) serdes(OE) [159786.067202] ---[ end trace 03622b9e2d99e196 ]--- [159786.071893] Kernel panic - not syncing: Fatal exception [159786.077190] SMP: stopping secondary CPUs [159786.081192] Kernel Offset: disabled [159786.084753] CPU features: 0x2,a2a00a38 Fixes: 2908d778ab3e ("[SCSI] aic94xx: new driver") Reported-by: Jian Luo Signed-off-by: Jason Yan CC: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index ee1f9ee995e5..400eee9d7783 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -978,6 +978,8 @@ static struct domain_device *sas_ex_discover_expander( list_del(&child->dev_list_node); spin_unlock_irq(&parent->port->dev_list_lock); sas_put_device(child); + sas_port_delete(phy->port); + phy->port = NULL; return NULL; } list_add_tail(&child->siblings, &parent->ex_dev.children); -- cgit v1.2.3 From 83e597884338f11cfe6986b5aeff732a2f22b665 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Tue, 25 Sep 2018 10:56:52 +0800 Subject: scsi: libsas: always unregister the old device if going to discover new [ Upstream commit 32c850bf587f993b2620b91e5af8a64a7813f504 ] If we went into sas_rediscover_dev() the attached_sas_addr was already insured not to be zero. So it's unnecessary to check if the attached_sas_addr is zero. And although if the sas address is not changed, we always have to unregister the old device when we are going to register a new one. We cannot just leave the device there and bring up the new. Signed-off-by: Jason Yan CC: chenxiang CC: John Garry CC: Johannes Thumshirn CC: Ewan Milne CC: Christoph Hellwig CC: Tomas Henzl CC: Dan Williams CC: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 400eee9d7783..d44f18f773c0 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -2049,14 +2049,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) return res; } - /* delete the old link */ - if (SAS_ADDR(phy->attached_sas_addr) && - SAS_ADDR(sas_addr) != SAS_ADDR(phy->attached_sas_addr)) { - SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", - SAS_ADDR(dev->sas_addr), phy_id, - SAS_ADDR(phy->attached_sas_addr)); - sas_unregister_devs_sas_addr(dev, phy_id, last); - } + /* we always have to delete the old device when we went here */ + SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", + SAS_ADDR(dev->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr)); + sas_unregister_devs_sas_addr(dev, phy_id, last); return sas_discover_new(dev, phy_id); } -- cgit v1.2.3 From ecf34d014e4594a9e73e69f4fba122de8edf7806 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 5 Jan 2019 00:01:28 +0800 Subject: scsi: libsas: Support SATA PHY connection rate unmatch fixing during discovery [ Upstream commit cec9771d2e954650095aa37a6a97722c8194e7d2 ] +----------+ +----------+ | | | | | |--- 3.0 G ---| |--- 6.0 G --- SAS disk | | | | | |--- 3.0 G ---| |--- 6.0 G --- SAS disk |initiator | | | | device |--- 3.0 G ---| Expander |--- 6.0 G --- SAS disk | | | | | |--- 3.0 G ---| |--- 6.0 G --- SATA disk -->failed to connect | | | | | | | |--- 6.0 G --- SATA disk -->failed to connect | | | | +----------+ +----------+ According to Serial Attached SCSI - 1.1 (SAS-1.1): If an expander PHY attached to a SATA PHY is using a physical link rate greater than the maximum connection rate supported by the pathway from an STP initiator port, a management application client should use the SMP PHY CONTROL function (see 10.4.3.10) to set the PROGRAMMED MAXIMUM PHYSICAL LINK RATE field of the expander PHY to the maximum connection rate supported by the pathway from that STP initiator port. Currently libsas does not support checking if this condition occurs, nor rectifying when it does. Such a condition is not at all common, however it has been seen on some pre-silicon environments where the initiator PHY only supports a 1.5 Gbit maximum linkrate, mated with 12G expander PHYs and 3/6G SATA phy. This patch adds support for checking and rectifying this condition during initial device discovery only. We do support checking min pathway connection rate during revalidation phase, when new devices can be detected in the topology. However we do not support in the case of the the user reprogramming PHY linkrates, such that min pathway condition is not met/maintained. A note on root port PHY rates: The libsas root port PHY rates calculation is broken. Libsas sets the rates (min, max, and current linkrate) of a root port to the same linkrate of the first PHY member of that same port. In doing so, it assumes that all other PHYs which subsequently join the port to have the same negotiated linkrate, when they could actually be different. In practice this doesn't happen, as initiator and expander PHYs are normally initialised with consistent min/max linkrates. This has not caused an issue so far, so leave alone for now. Tested-by: Jian Luo Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index d44f18f773c0..b7e4493d3dc1 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -806,6 +806,26 @@ static struct domain_device *sas_ex_discover_end_dev( #ifdef CONFIG_SCSI_SAS_ATA if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) { + if (child->linkrate > parent->min_linkrate) { + struct sas_phy_linkrates rates = { + .maximum_linkrate = parent->min_linkrate, + .minimum_linkrate = parent->min_linkrate, + }; + int ret; + + pr_notice("ex %016llx phy%02d SATA device linkrate > min pathway connection rate, attempting to lower device linkrate\n", + SAS_ADDR(child->sas_addr), phy_id); + ret = sas_smp_phy_control(parent, phy_id, + PHY_FUNC_LINK_RESET, &rates); + if (ret) { + pr_err("ex %016llx phy%02d SATA device could not set linkrate (%d)\n", + SAS_ADDR(child->sas_addr), phy_id, ret); + goto out_free; + } + pr_notice("ex %016llx phy%02d SATA device set linkrate successfully\n", + SAS_ADDR(child->sas_addr), phy_id); + child->linkrate = child->min_linkrate; + } res = sas_get_ata_info(child, phy); if (res) goto out_free; -- cgit v1.2.3 From fe862a86ce6fffc444fa6987d5bd5dffec76a6b1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Sat, 5 Jan 2019 00:01:27 +0800 Subject: scsi: libsas: Check SMP PHY control function result [ Upstream commit 01929a65dfa13e18d89264ab1378854a91857e59 ] Currently the SMP PHY control execution result is checked, however the function result for the command is not. As such, we may be missing all potential errors, like SMP FUNCTION FAILED, INVALID REQUEST FRAME LENGTH, etc., meaning the PHY control request has failed. In some scenarios we need to ensure the function result is accepted, so add a check for this. Tested-by: Jian Luo Signed-off-by: John Garry Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/libsas/sas_expander.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/scsi/libsas/sas_expander.c') diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index b7e4493d3dc1..7e8274938a3e 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -603,7 +603,14 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id, } res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE); - + if (res) { + pr_err("ex %016llx phy%02d PHY control failed: %d\n", + SAS_ADDR(dev->sas_addr), phy_id, res); + } else if (pc_resp[2] != SMP_RESP_FUNC_ACC) { + pr_err("ex %016llx phy%02d PHY control failed: function result 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, pc_resp[2]); + res = pc_resp[2]; + } kfree(pc_resp); kfree(pc_req); return res; -- cgit v1.2.3