summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Fomichev <dmitry.fomichev@wdc.com>2019-08-05 16:56:03 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-08-29 08:26:44 +0200
commit8e64ca30aefc1fa8810529e5f654a80ae868ed07 (patch)
treea6e794364c64c5d97512b43c9161d0478b600c6e
parent20622d5544267114b087f73f11a06a2c033408cc (diff)
dm kcopyd: always complete failed jobs
commit d1fef41465f0e8cae0693fb184caa6bfafb6cd16 upstream. This patch fixes a problem in dm-kcopyd that may leave jobs in complete queue indefinitely in the event of backing storage failure. This behavior has been observed while running 100% write file fio workload against an XFS volume created on top of a dm-zoned target device. If the underlying storage of dm-zoned goes to offline state under I/O, kcopyd sometimes never issues the end copy callback and dm-zoned reclaim work hangs indefinitely waiting for that completion. This behavior was traced down to the error handling code in process_jobs() function that places the failed job to complete_jobs queue, but doesn't wake up the job handler. In case of backing device failure, all outstanding jobs may end up going to complete_jobs queue via this code path and then stay there forever because there are no more successful I/O jobs to wake up the job handler. This patch adds a wake() call to always wake up kcopyd job wait queue for all I/O jobs that fail before dm_io() gets called for that job. The patch also sets the write error status in all sub jobs that are failed because their master job has failed. Fixes: b73c67c2cbb00 ("dm kcopyd: add sequential write feature") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Fomichev <dmitry.fomichev@wdc.com> Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--drivers/md/dm-kcopyd.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index b9d1897bcf5b..bd9a45b94b55 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -545,8 +545,10 @@ static int run_io_job(struct kcopyd_job *job)
* no point in continuing.
*/
if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) &&
- job->master_job->write_err)
+ job->master_job->write_err) {
+ job->write_err = job->master_job->write_err;
return -EIO;
+ }
io_job_start(job->kc->throttle);
@@ -598,6 +600,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
else
job->read_err = 1;
push(&kc->complete_jobs, job);
+ wake(kc);
break;
}