summaryrefslogtreecommitdiff
path: root/drivers/s390/block/dasd.c
diff options
context:
space:
mode:
authorStefan Weinhuber <wein@de.ibm.com>2009-12-07 12:51:51 +0100
committerMartin Schwidefsky <sky@mschwide.boeblingen.de.ibm.com>2009-12-07 12:51:34 +0100
commiteb6e199bef288611157b8198c25d12b32bf058d0 (patch)
tree80737a2703a9f4d09cee2410342aeccb281413ae /drivers/s390/block/dasd.c
parent626350b63ef2cd447023d3dc2a34eaa7ca01bfff (diff)
[S390] dasd: improve error recovery for internal I/O
Most of the error conditions reported by a FICON storage server indicate situations which can be recovered. Sometimes the host just needs to retry an I/O request, but sometimes the recovery is more complex and requires the device driver to wait, choose a different path, etc. The DASD device driver has a fully featured error recovery for normal block layer I/O, but not for internal I/O request which are for example used during the device bring up. This can lead to situations where the IPL of a system fails because DASD devices are not properly recognized. This patch will extend the internal I/O handling to use the existing error recovery procedures. Signed-off-by: Stefan Weinhuber <wein@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'drivers/s390/block/dasd.c')
-rw-r--r--drivers/s390/block/dasd.c207
1 files changed, 152 insertions, 55 deletions
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 329115a4d4b3..4f211c175b55 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -63,6 +63,7 @@ static void do_restore_device(struct work_struct *);
static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *);
static void dasd_device_timeout(unsigned long);
static void dasd_block_timeout(unsigned long);
+static void __dasd_process_erp(struct dasd_device *, struct dasd_ccw_req *);
/*
* SECTION: Operations on the device structure.
@@ -959,7 +960,7 @@ static void dasd_device_timeout(unsigned long ptr)
device = (struct dasd_device *) ptr;
spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
/* re-activate request queue */
- device->stopped &= ~DASD_STOPPED_PENDING;
+ dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING);
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
dasd_schedule_device_bh(device);
}
@@ -1022,7 +1023,7 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
/* First of all start sense subsystem status request. */
dasd_eer_snss(device);
- device->stopped &= ~DASD_STOPPED_PENDING;
+ dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING);
dasd_schedule_device_bh(device);
if (device->block)
dasd_schedule_block_bh(device->block);
@@ -1404,6 +1405,20 @@ void dasd_schedule_device_bh(struct dasd_device *device)
tasklet_hi_schedule(&device->tasklet);
}
+void dasd_device_set_stop_bits(struct dasd_device *device, int bits)
+{
+ device->stopped |= bits;
+}
+EXPORT_SYMBOL_GPL(dasd_device_set_stop_bits);
+
+void dasd_device_remove_stop_bits(struct dasd_device *device, int bits)
+{
+ device->stopped &= ~bits;
+ if (!device->stopped)
+ wake_up(&generic_waitq);
+}
+EXPORT_SYMBOL_GPL(dasd_device_remove_stop_bits);
+
/*
* Queue a request to the head of the device ccw_queue.
* Start the I/O if possible.
@@ -1464,58 +1479,135 @@ static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr)
}
/*
- * Queue a request to the tail of the device ccw_queue and wait for
- * it's completion.
+ * checks if error recovery is necessary, returns 1 if yes, 0 otherwise.
*/
-int dasd_sleep_on(struct dasd_ccw_req *cqr)
+static int __dasd_sleep_on_erp(struct dasd_ccw_req *cqr)
{
struct dasd_device *device;
- int rc;
+ dasd_erp_fn_t erp_fn;
+ if (cqr->status == DASD_CQR_FILLED)
+ return 0;
device = cqr->startdev;
+ if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) {
+ if (cqr->status == DASD_CQR_TERMINATED) {
+ device->discipline->handle_terminated_request(cqr);
+ return 1;
+ }
+ if (cqr->status == DASD_CQR_NEED_ERP) {
+ erp_fn = device->discipline->erp_action(cqr);
+ erp_fn(cqr);
+ return 1;
+ }
+ if (cqr->status == DASD_CQR_FAILED)
+ dasd_log_sense(cqr, &cqr->irb);
+ if (cqr->refers) {
+ __dasd_process_erp(device, cqr);
+ return 1;
+ }
+ }
+ return 0;
+}
- cqr->callback = dasd_wakeup_cb;
- cqr->callback_data = (void *) &generic_waitq;
- dasd_add_request_tail(cqr);
- wait_event(generic_waitq, _wait_for_wakeup(cqr));
+static int __dasd_sleep_on_loop_condition(struct dasd_ccw_req *cqr)
+{
+ if (test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) {
+ if (cqr->refers) /* erp is not done yet */
+ return 1;
+ return ((cqr->status != DASD_CQR_DONE) &&
+ (cqr->status != DASD_CQR_FAILED));
+ } else
+ return (cqr->status == DASD_CQR_FILLED);
+}
- if (cqr->status == DASD_CQR_DONE)
+static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
+{
+ struct dasd_device *device;
+ int rc;
+ struct list_head ccw_queue;
+ struct dasd_ccw_req *cqr;
+
+ INIT_LIST_HEAD(&ccw_queue);
+ maincqr->status = DASD_CQR_FILLED;
+ device = maincqr->startdev;
+ list_add(&maincqr->blocklist, &ccw_queue);
+ for (cqr = maincqr; __dasd_sleep_on_loop_condition(cqr);
+ cqr = list_first_entry(&ccw_queue,
+ struct dasd_ccw_req, blocklist)) {
+
+ if (__dasd_sleep_on_erp(cqr))
+ continue;
+ if (cqr->status != DASD_CQR_FILLED) /* could be failed */
+ continue;
+
+ /* Non-temporary stop condition will trigger fail fast */
+ if (device->stopped & ~DASD_STOPPED_PENDING &&
+ test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
+ (!dasd_eer_enabled(device))) {
+ cqr->status = DASD_CQR_FAILED;
+ continue;
+ }
+
+ /* Don't try to start requests if device is stopped */
+ if (interruptible) {
+ rc = wait_event_interruptible(
+ generic_waitq, !(device->stopped));
+ if (rc == -ERESTARTSYS) {
+ cqr->status = DASD_CQR_FAILED;
+ maincqr->intrc = rc;
+ continue;
+ }
+ } else
+ wait_event(generic_waitq, !(device->stopped));
+
+ cqr->callback = dasd_wakeup_cb;
+ cqr->callback_data = (void *) &generic_waitq;
+ dasd_add_request_tail(cqr);
+ if (interruptible) {
+ rc = wait_event_interruptible(
+ generic_waitq, _wait_for_wakeup(cqr));
+ if (rc == -ERESTARTSYS) {
+ dasd_cancel_req(cqr);
+ /* wait (non-interruptible) for final status */
+ wait_event(generic_waitq,
+ _wait_for_wakeup(cqr));
+ cqr->status = DASD_CQR_FAILED;
+ maincqr->intrc = rc;
+ continue;
+ }
+ } else
+ wait_event(generic_waitq, _wait_for_wakeup(cqr));
+ }
+
+ maincqr->endclk = get_clock();
+ if ((maincqr->status != DASD_CQR_DONE) &&
+ (maincqr->intrc != -ERESTARTSYS))
+ dasd_log_sense(maincqr, &maincqr->irb);
+ if (maincqr->status == DASD_CQR_DONE)
rc = 0;
- else if (cqr->intrc)
- rc = cqr->intrc;
+ else if (maincqr->intrc)
+ rc = maincqr->intrc;
else
rc = -EIO;
return rc;
}
/*
+ * Queue a request to the tail of the device ccw_queue and wait for
+ * it's completion.
+ */
+int dasd_sleep_on(struct dasd_ccw_req *cqr)
+{
+ return _dasd_sleep_on(cqr, 0);
+}
+
+/*
* Queue a request to the tail of the device ccw_queue and wait
* interruptible for it's completion.
*/
int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr)
{
- struct dasd_device *device;
- int rc;
-
- device = cqr->startdev;
- cqr->callback = dasd_wakeup_cb;
- cqr->callback_data = (void *) &generic_waitq;
- dasd_add_request_tail(cqr);
- rc = wait_event_interruptible(generic_waitq, _wait_for_wakeup(cqr));
- if (rc == -ERESTARTSYS) {
- dasd_cancel_req(cqr);
- /* wait (non-interruptible) for final status */
- wait_event(generic_waitq, _wait_for_wakeup(cqr));
- cqr->intrc = rc;
- }
-
- if (cqr->status == DASD_CQR_DONE)
- rc = 0;
- else if (cqr->intrc)
- rc = cqr->intrc;
- else
- rc = -EIO;
- return rc;
+ return _dasd_sleep_on(cqr, 1);
}
/*
@@ -1629,7 +1721,7 @@ static void dasd_block_timeout(unsigned long ptr)
block = (struct dasd_block *) ptr;
spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags);
/* re-activate request queue */
- block->base->stopped &= ~DASD_STOPPED_PENDING;
+ dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING);
spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags);
dasd_schedule_block_bh(block);
}
@@ -1656,11 +1748,10 @@ void dasd_block_clear_timer(struct dasd_block *block)
/*
* Process finished error recovery ccw.
*/
-static inline void __dasd_block_process_erp(struct dasd_block *block,
- struct dasd_ccw_req *cqr)
+static void __dasd_process_erp(struct dasd_device *device,
+ struct dasd_ccw_req *cqr)
{
dasd_erp_fn_t erp_fn;
- struct dasd_device *device = block->base;
if (cqr->status == DASD_CQR_DONE)
DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful");
@@ -1724,9 +1815,12 @@ static void __dasd_process_request_queue(struct dasd_block *block)
*/
if (!list_empty(&block->ccw_queue))
break;
- spin_lock_irqsave(get_ccwdev_lock(basedev->cdev), flags);
- basedev->stopped |= DASD_STOPPED_PENDING;
- spin_unlock_irqrestore(get_ccwdev_lock(basedev->cdev), flags);
+ spin_lock_irqsave(
+ get_ccwdev_lock(basedev->cdev), flags);
+ dasd_device_set_stop_bits(basedev,
+ DASD_STOPPED_PENDING);
+ spin_unlock_irqrestore(
+ get_ccwdev_lock(basedev->cdev), flags);
dasd_block_set_timer(block, HZ/2);
break;
}
@@ -1812,7 +1906,7 @@ restart:
cqr->status = DASD_CQR_FILLED;
cqr->retries = 255;
spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
- base->stopped |= DASD_STOPPED_QUIESCE;
+ dasd_device_set_stop_bits(base, DASD_STOPPED_QUIESCE);
spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
flags);
goto restart;
@@ -1820,7 +1914,7 @@ restart:
/* Process finished ERP request. */
if (cqr->refers) {
- __dasd_block_process_erp(block, cqr);
+ __dasd_process_erp(base, cqr);
goto restart;
}
@@ -1951,7 +2045,7 @@ restart_cb:
/* Process finished ERP request. */
if (cqr->refers) {
spin_lock_bh(&block->queue_lock);
- __dasd_block_process_erp(block, cqr);
+ __dasd_process_erp(block->base, cqr);
spin_unlock_bh(&block->queue_lock);
/* restart list_for_xx loop since dasd_process_erp
* might remove multiple elements */
@@ -2410,16 +2504,16 @@ int dasd_generic_notify(struct ccw_device *cdev, int event)
cqr->status = DASD_CQR_QUEUED;
cqr->retries++;
}
- device->stopped |= DASD_STOPPED_DC_WAIT;
+ dasd_device_set_stop_bits(device, DASD_STOPPED_DC_WAIT);
dasd_device_clear_timer(device);
dasd_schedule_device_bh(device);
ret = 1;
break;
case CIO_OPER:
/* FIXME: add a sanity check. */
- device->stopped &= ~DASD_STOPPED_DC_WAIT;
+ dasd_device_remove_stop_bits(device, DASD_STOPPED_DC_WAIT);
if (device->stopped & DASD_UNRESUMED_PM) {
- device->stopped &= ~DASD_UNRESUMED_PM;
+ dasd_device_remove_stop_bits(device, DASD_UNRESUMED_PM);
dasd_restore_device(device);
ret = 1;
break;
@@ -2444,7 +2538,7 @@ int dasd_generic_pm_freeze(struct ccw_device *cdev)
if (IS_ERR(device))
return PTR_ERR(device);
/* disallow new I/O */
- device->stopped |= DASD_STOPPED_PM;
+ dasd_device_set_stop_bits(device, DASD_STOPPED_PM);
/* clear active requests */
INIT_LIST_HEAD(&freeze_queue);
spin_lock_irq(get_ccwdev_lock(cdev));
@@ -2496,14 +2590,18 @@ int dasd_generic_restore_device(struct ccw_device *cdev)
return PTR_ERR(device);
/* allow new IO again */
- device->stopped &= ~DASD_STOPPED_PM;
- device->stopped &= ~DASD_UNRESUMED_PM;
+ dasd_device_remove_stop_bits(device,
+ (DASD_STOPPED_PM | DASD_UNRESUMED_PM));
dasd_schedule_device_bh(device);
- if (device->discipline->restore)
+ /*
+ * call discipline restore function
+ * if device is stopped do nothing e.g. for disconnected devices
+ */
+ if (device->discipline->restore && !(device->stopped))
rc = device->discipline->restore(device);
- if (rc)
+ if (rc || device->stopped)
/*
* if the resume failed for the DASD we put it in
* an UNRESUMED stop state
@@ -2553,8 +2651,7 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
cqr->startdev = device;
cqr->memdev = device;
cqr->expires = 10*HZ;
- clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
- cqr->retries = 2;
+ cqr->retries = 256;
cqr->buildclk = get_clock();
cqr->status = DASD_CQR_FILLED;
return cqr;