From 932648b007de76badc61c1b13d7282288dbe887e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 May 2008 01:15:06 +0900 Subject: libata: reorganize ata_eh_reset() no reset method path Reorganize ata_eh_reset() such that @prereset() is called even when no reset method is available and if block is used instead of goto to skip actual reset. This makes no reset case behave better (readiness wait) and future changes easier. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 102 ++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'drivers/ata/libata-eh.c') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 62e033146bed..a34adc2c85df 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2098,7 +2098,9 @@ int ata_eh_reset(struct ata_link *link, int classify, u32 sstatus; int rc; - /* about to reset */ + /* + * Prepare to reset + */ spin_lock_irqsave(ap->lock, flags); ap->pflags |= ATA_PFLAG_RESETTING; spin_unlock_irqrestore(ap->lock, flags); @@ -2124,16 +2126,8 @@ int ata_eh_reset(struct ata_link *link, int classify, ap->ops->set_piomode(ap, dev); } - if (!softreset && !hardreset) { - if (verbose) - ata_link_printk(link, KERN_INFO, "no reset method " - "available, skipping reset\n"); - if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) - lflags |= ATA_LFLAG_ASSUME_ATA; - goto done; - } - /* prefer hardreset */ + reset = NULL; ehc->i.action &= ~ATA_EH_RESET; if (hardreset) { reset = hardreset; @@ -2141,11 +2135,6 @@ int ata_eh_reset(struct ata_link *link, int classify, } else if (softreset) { reset = softreset; ehc->i.action = ATA_EH_SOFTRESET; - } else { - ata_link_printk(link, KERN_ERR, "BUG: no reset method, " - "please report to linux-ide@vger.kernel.org\n"); - dump_stack(); - return -EINVAL; } if (prereset) { @@ -2165,55 +2154,68 @@ int ata_eh_reset(struct ata_link *link, int classify, "prereset failed (errno=%d)\n", rc); goto out; } - } - /* prereset() might have cleared ATA_EH_RESET */ - if (!(ehc->i.action & ATA_EH_RESET)) { - /* prereset told us not to reset, bang classes and return */ - ata_link_for_each_dev(dev, link) - classes[dev->devno] = ATA_DEV_NONE; - rc = 0; - goto out; + /* prereset() might have cleared ATA_EH_RESET. If so, + * bang classes and return. + */ + if (reset && !(ehc->i.action & ATA_EH_RESET)) { + ata_link_for_each_dev(dev, link) + classes[dev->devno] = ATA_DEV_NONE; + rc = 0; + goto out; + } } retry: + /* + * Perform reset + */ deadline = jiffies + ata_eh_reset_timeouts[try++]; - /* shut up during boot probing */ - if (verbose) - ata_link_printk(link, KERN_INFO, "%s resetting link\n", - reset == softreset ? "soft" : "hard"); + if (reset) { + if (verbose) + ata_link_printk(link, KERN_INFO, "%s resetting link\n", + reset == softreset ? "soft" : "hard"); - /* mark that this EH session started with reset */ - if (reset == hardreset) - ehc->i.flags |= ATA_EHI_DID_HARDRESET; - else - ehc->i.flags |= ATA_EHI_DID_SOFTRESET; + /* mark that this EH session started with reset */ + if (reset == hardreset) + ehc->i.flags |= ATA_EHI_DID_HARDRESET; + else + ehc->i.flags |= ATA_EHI_DID_SOFTRESET; - rc = ata_do_reset(link, reset, classes, deadline); + rc = ata_do_reset(link, reset, classes, deadline); - if (reset == hardreset && - ata_eh_followup_srst_needed(link, rc, classify, classes)) { - /* okay, let's do follow-up softreset */ - reset = softreset; + if (reset == hardreset && + ata_eh_followup_srst_needed(link, rc, classify, classes)) { + /* okay, let's do follow-up softreset */ + reset = softreset; - if (!reset) { - ata_link_printk(link, KERN_ERR, - "follow-up softreset required " - "but no softreset avaliable\n"); - rc = -EINVAL; - goto fail; + if (!reset) { + ata_link_printk(link, KERN_ERR, + "follow-up softreset required " + "but no softreset avaliable\n"); + rc = -EINVAL; + goto fail; + } + + ata_eh_about_to_do(link, NULL, ATA_EH_RESET); + rc = ata_do_reset(link, reset, classes, deadline); } - ata_eh_about_to_do(link, NULL, ATA_EH_RESET); - rc = ata_do_reset(link, reset, classes, deadline); + /* -EAGAIN can happen if we skipped followup SRST */ + if (rc && rc != -EAGAIN) + goto fail; + } else { + if (verbose) + ata_link_printk(link, KERN_INFO, "no reset method " + "available, skipping reset\n"); + if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) + lflags |= ATA_LFLAG_ASSUME_ATA; } - /* -EAGAIN can happen if we skipped followup SRST */ - if (rc && rc != -EAGAIN) - goto fail; - - done: + /* + * Post-reset processing + */ ata_link_for_each_dev(dev, link) { /* After the reset, the device state is PIO 0 and the * controller state is undefined. Reset also wakes up -- cgit v1.2.3 From dc98c32cbe80750ae2d9d9fbdae305d38f005de7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 May 2008 01:15:07 +0900 Subject: libata: move reset freeze/thaw handling into ata_eh_reset() Previously reset freeze/thaw handling lived outside of ata_eh_reset() mainly because the original PMP reset code needed the port frozen while resetting all the fan-out ports, which is no longer the case. This patch moves freeze/thaw handling into ata_eh_reset(). @prereset() and @postreset() are now called w/o freezing the port although @prereset() an be called frozen if the port is frozen prior to entering ata_eh_reset(). This makes code simpler and will help removing hotplug event related races. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) (limited to 'drivers/ata/libata-eh.c') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index a34adc2c85df..06a92c58a49d 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2170,6 +2170,9 @@ int ata_eh_reset(struct ata_link *link, int classify, /* * Perform reset */ + if (ata_is_host_link(link)) + ata_eh_freeze_port(ap); + deadline = jiffies + ata_eh_reset_timeouts[try++]; if (reset) { @@ -2238,6 +2241,10 @@ int ata_eh_reset(struct ata_link *link, int classify, if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) link->sata_spd = (sstatus >> 4) & 0xf; + /* thaw the port */ + if (ata_is_host_link(link)) + ata_eh_thaw_port(ap); + if (postreset) postreset(link, classes); @@ -2589,7 +2596,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, struct ata_link *link; struct ata_device *dev; int nr_failed_devs, nr_disabled_devs; - int reset, rc; + int rc; unsigned long flags; DPRINTK("ENTER\n"); @@ -2632,7 +2639,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, rc = 0; nr_failed_devs = 0; nr_disabled_devs = 0; - reset = 0; /* if UNLOADING, finish immediately */ if (ap->pflags & ATA_PFLAG_UNLOADING) @@ -2646,40 +2652,24 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (ata_eh_skip_recovery(link)) ehc->i.action = 0; - /* do we need to reset? */ - if (ehc->i.action & ATA_EH_RESET) - reset = 1; - ata_link_for_each_dev(dev, link) ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; } /* reset */ - if (reset) { - /* if PMP is attached, this function only deals with - * downstream links, port should stay thawed. - */ - if (!sata_pmp_attached(ap)) - ata_eh_freeze_port(ap); - - ata_port_for_each_link(link, ap) { - struct ata_eh_context *ehc = &link->eh_context; + ata_port_for_each_link(link, ap) { + struct ata_eh_context *ehc = &link->eh_context; - if (!(ehc->i.action & ATA_EH_RESET)) - continue; + if (!(ehc->i.action & ATA_EH_RESET)) + continue; - rc = ata_eh_reset(link, ata_link_nr_vacant(link), - prereset, softreset, hardreset, - postreset); - if (rc) { - ata_link_printk(link, KERN_ERR, - "reset failed, giving up\n"); - goto out; - } + rc = ata_eh_reset(link, ata_link_nr_vacant(link), + prereset, softreset, hardreset, postreset); + if (rc) { + ata_link_printk(link, KERN_ERR, + "reset failed, giving up\n"); + goto out; } - - if (!sata_pmp_attached(ap)) - ata_eh_thaw_port(ap); } /* the rest */ -- cgit v1.2.3 From f046519fc85a8fdf6a058b4ac9d897cdee6f3e52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 May 2008 01:15:08 +0900 Subject: libata: kill hotplug related race condition Originally, whole reset processing was done while the port is frozen and SError was cleared during @postreset(). This had two race conditions. 1: hotplug could occur after reset but before SError is cleared and libata won't know about it. 2: hotplug could occur after all the reset is complete but before the port is thawed. As all events are cleared on thaw, the hotplug event would be lost. Commit ac371987a81c61c2efbd6931245cdcaf43baad89 kills the first race by clearing SError during link resume but before link onlineness test. However, this doesn't fix race #2 and in some cases clearing SError after SRST is a good idea. This patch solves this problem by cross checking link onlineness with classification result after SError is cleared and port is thawed. Reset is retried if link is online but all devices attached to the link are unknown. As all devices will be revalidated, this one-way check is enough to ensure that all devices are detected and revalidated reliably. This, luckily, also fixes the cases where host controller returns bogus status while harddrive is spinning up after hotplug making classification run before the device sends the first FIS and thus causes misdetection. Low level drivers can bypass the logic by setting class explicitly to ATA_DEV_NONE if ever necessary (currently none requires this). Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 52 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) (limited to 'drivers/ata/libata-eh.c') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 06a92c58a49d..751dad0138ae 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2047,19 +2047,11 @@ static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, unsigned int *classes, unsigned long deadline) { struct ata_device *dev; - int rc; ata_link_for_each_dev(dev, link) classes[dev->devno] = ATA_DEV_UNKNOWN; - rc = reset(link, classes, deadline); - - /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ - ata_link_for_each_dev(dev, link) - if (classes[dev->devno] == ATA_DEV_UNKNOWN) - classes[dev->devno] = ATA_DEV_NONE; - - return rc; + return reset(link, classes, deadline); } static int ata_eh_followup_srst_needed(struct ata_link *link, @@ -2096,7 +2088,7 @@ int ata_eh_reset(struct ata_link *link, int classify, ata_reset_fn_t reset; unsigned long flags; u32 sstatus; - int rc; + int nr_known, rc; /* * Prepare to reset @@ -2245,9 +2237,49 @@ int ata_eh_reset(struct ata_link *link, int classify, if (ata_is_host_link(link)) ata_eh_thaw_port(ap); + /* postreset() should clear hardware SError. Although SError + * is cleared during link resume, clearing SError here is + * necessary as some PHYs raise hotplug events after SRST. + * This introduces race condition where hotplug occurs between + * reset and here. This race is mediated by cross checking + * link onlineness and classification result later. + */ if (postreset) postreset(link, classes); + /* clear cached SError */ + spin_lock_irqsave(link->ap->lock, flags); + link->eh_info.serror = 0; + spin_unlock_irqrestore(link->ap->lock, flags); + + /* Make sure onlineness and classification result correspond. + * Hotplug could have happened during reset and some + * controllers fail to wait while a drive is spinning up after + * being hotplugged causing misdetection. By cross checking + * link onlineness and classification result, those conditions + * can be reliably detected and retried. + */ + nr_known = 0; + ata_link_for_each_dev(dev, link) { + /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ + if (classes[dev->devno] == ATA_DEV_UNKNOWN) + classes[dev->devno] = ATA_DEV_NONE; + else + nr_known++; + } + + if (classify && !nr_known && ata_link_online(link)) { + if (try < max_tries) { + ata_link_printk(link, KERN_WARNING, "link online but " + "device misclassified, retrying\n"); + rc = -EAGAIN; + goto fail; + } + ata_link_printk(link, KERN_WARNING, + "link online but device misclassified, " + "device detection might fail\n"); + } + /* reset successful, schedule revalidation */ ata_eh_done(link, NULL, ATA_EH_RESET); ehc->i.action |= ATA_EH_REVALIDATE; -- cgit v1.2.3 From e0614db2a398d4d0dc5fb47fe2c2783141262a3e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 May 2008 01:15:09 +0900 Subject: libata: ignore recovered PHY errors No reason to get overzealous about recovered comm and data errors. Some PHYs habitually sets them w/o no good reason and being draconian about these soft error conditions doesn't seem to help anybody. If need ever rises, we might need to add soft PHY error condition, say AC_ERR_MAYBE_ATA_BUS and use it only to determine whether speed down is necessary but I don't think that's very likely to happen. It's far more likely we'll get timeouts or fatal transmission errors if recovered errors are so prominent that they hamper operation. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/libata-eh.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/ata/libata-eh.c') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 751dad0138ae..7894d83ea1eb 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1308,12 +1308,7 @@ static void ata_eh_analyze_serror(struct ata_link *link) unsigned int err_mask = 0, action = 0; u32 hotplug_mask; - if (serror & SERR_PERSISTENT) { - err_mask |= AC_ERR_ATA_BUS; - action |= ATA_EH_RESET; - } - if (serror & - (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { + if (serror & (SERR_PERSISTENT | SERR_DATA)) { err_mask |= AC_ERR_ATA_BUS; action |= ATA_EH_RESET; } -- cgit v1.2.3