From c8894419acf5e56851de9741c5047bebd78acd1f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 12 May 2008 14:02:12 -0700 Subject: md: fix raid5 'repair' operations commit bd2ab67030e9116f1e4aae1289220255412b37fd "md: close a livelock window in handle_parity_checks5" introduced a bug in handling 'repair' operations. After a repair operation completes we clear the state bits tracking this operation. However, they are cleared too early and this results in the code deciding to re-run the parity check operation. Since we have done the repair in memory the second check does not find a mismatch and thus does not do a writeback. Test results: $ echo repair > /sys/block/md0/md/sync_action $ cat /sys/block/md0/md/mismatch_cnt 51072 $ echo repair > /sys/block/md0/md/sync_action $ cat /sys/block/md0/md/mismatch_cnt 0 (also fix incorrect indentation) Cc: Tested-by: George Spelvin Acked-by: NeilBrown Signed-off-by: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 087eee0cb809..ee0ea9183080 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2369,8 +2369,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, /* complete a check operation */ if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { - clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); - clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); if (s->failed == 0) { if (sh->ops.zero_sum_result == 0) /* parity is correct (on disc, @@ -2400,16 +2400,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, canceled_check = 1; /* STRIPE_INSYNC is not set */ } - /* check if we can clear a parity disk reconstruct */ - if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && - test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { - - clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); - } - /* start a new check operation if there are no failures, the stripe is * not insync, and a repair is not in flight */ @@ -2424,6 +2414,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, } } + /* check if we can clear a parity disk reconstruct */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } + + /* Wait for check parity and compute block operations to complete * before write-back. If a failure occurred while the check operation * was in flight we need to cycle this stripe through handle_stripe -- cgit v1.2.3 From e7e72bf641b1fc7b9df6f40bd2c36dfccd8d647c Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Wed, 14 May 2008 16:05:54 -0700 Subject: Remove blkdev warning triggered by using md As setting and clearing queue flags now requires that we hold a spinlock on the queue, and as blk_queue_stack_limits is called without that lock, get the lock inside blk_queue_stack_limits. For blk_queue_stack_limits to be able to find the right lock, each md personality needs to set q->queue_lock to point to the appropriate lock. Those personalities which didn't previously use a spin_lock, us q->__queue_lock. So always initialise that lock when allocated. With this in place, setting/clearing of the QUEUE_FLAG_PLUGGED bit will no longer cause warnings as it will be clear that the proper lock is held. Thanks to Dan Williams for review and fixing the silly bugs. Signed-off-by: NeilBrown Cc: Dan Williams Cc: Jens Axboe Cc: Alistair John Strachan Cc: Nick Piggin Cc: "Rafael J. Wysocki" Cc: Jacek Luczak Cc: Prakash Punnoor Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/raid5.c') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ee0ea9183080..93fde48c0f42 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4257,6 +4257,7 @@ static int run(mddev_t *mddev) goto abort; } spin_lock_init(&conf->device_lock); + mddev->queue->queue_lock = &conf->device_lock; init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_overlap); INIT_LIST_HEAD(&conf->handle_list); -- cgit v1.2.3