From f043d45591b6a6fd4ae2888a5dbbb53f8e50153b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 21 Oct 2015 18:36:49 +0100 Subject: dm btree remove: fix a bug when rebalancing nodes after removal commit 2871c69e025e8bc507651d5a9cf81a8a7da9d24b upstream. Commit 4c7e309340ff ("dm btree remove: fix bug in redistribute3") wasn't a complete fix for redistribute3(). The redistribute3 function takes 3 btree nodes and shares out the entries evenly between them. If the three nodes in total contained (MAX_ENTRIES * 3) - 1 entries between them then this was erroneously getting rebalanced as (MAX_ENTRIES - 1) on the left and right, and (MAX_ENTRIES + 1) in the center. Fix this issue by being more careful about calculating the target number of entries for the left and right nodes. Unit tested in userspace using this program: https://github.com/jthornber/redistribute3-test/blob/master/redistribute3_t.c Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree-remove.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 7c0d75547ccf..92cd09f3c69b 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -301,11 +301,16 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, { int s; uint32_t max_entries = le32_to_cpu(left->header.max_entries); - unsigned target = (nr_left + nr_center + nr_right) / 3; - BUG_ON(target > max_entries); + unsigned total = nr_left + nr_center + nr_right; + unsigned target_right = total / 3; + unsigned remainder = (target_right * 3) != total; + unsigned target_left = target_right + remainder; + + BUG_ON(target_left > max_entries); + BUG_ON(target_right > max_entries); if (nr_left < nr_right) { - s = nr_left - target; + s = nr_left - target_left; if (s < 0 && nr_center < -s) { /* not enough in central node */ @@ -316,10 +321,10 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(left, center, s); - shift(center, right, target - nr_right); + shift(center, right, target_right - nr_right); } else { - s = target - nr_right; + s = target_right - nr_right; if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); @@ -329,7 +334,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, } else shift(center, right, s); - shift(left, center, nr_left - target); + shift(left, center, nr_left - target_left); } *key_ptr(parent, c->index) = center->keys[0]; -- cgit v1.2.3 From 8104ee7b8f8bcfad0114cdad62dd3b4aea5fcf74 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 22 Oct 2015 10:56:40 -0400 Subject: dm btree: fix leak of bufio-backed block in btree_split_beneath error path commit 4dcb8b57df3593dcb20481d9d6cf79d1dc1534be upstream. btree_split_beneath()'s error path had an outstanding FIXME that speaks directly to the potential for _not_ cleaning up a previously allocated bufio-backed block. Fix this by releasing the previously allocated bufio block using unlock_block(). Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index c7726cebc495..d6e47033b5e0 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -523,7 +523,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) r = new_block(s->info, &right); if (r < 0) { - /* FIXME: put left */ + unlock_block(s->info, left); return r; } -- cgit v1.2.3 From 26b70a8981b22f03f677dd569b5c8191ade40848 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Oct 2015 12:09:12 -0400 Subject: md/raid1: submit_bio_wait() returns 0 on success commit 203d27b0226a05202438ddb39ef0ef1acb14a759 upstream. This was introduced with 9e882242c6193ae6f416f2d8d8db0d9126bd996b which changed the return value of submit_bio_wait() to return != 0 on error, but didn't update the caller accordingly. Fixes: 9e882242c6 ("block: Add submit_bio_wait(), remove from md") Reported-by: Bill Kuzeja Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5ce3cd5c4e1d..bff6c1c7fecb 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2248,7 +2248,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) -- cgit v1.2.3 From e73ccce0672d430409dd021735d658bdcd78656e Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 20 Oct 2015 12:09:13 -0400 Subject: md/raid10: submit_bio_wait() returns 0 on success commit 681ab4696062f5aa939c9e04d058732306a97176 upstream. This was introduced with 9e882242c6193ae6f416f2d8d8db0d9126bd996b which changed the return value of submit_bio_wait() to return != 0 on error, but didn't update the caller accordingly. Fixes: 9e882242c6 ("block: Add submit_bio_wait(), remove from md") Reported-by: Bill Kuzeja Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index fe0122771642..adfc83a0f023 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2590,7 +2590,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) choose_data_offset(r10_bio, rdev) + (sector - r10_bio->sector)); wbio->bi_bdev = rdev->bdev; - if (submit_bio_wait(WRITE, wbio) == 0) + if (submit_bio_wait(WRITE, wbio) < 0) /* Failure! */ ok = rdev_set_badblocks(rdev, sector, sectors, 0) -- cgit v1.2.3 From a89e9e234a84bf9a67586f57f4e027c880439383 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sat, 31 Oct 2015 10:53:50 +1100 Subject: md/raid5: fix locking in handle_stripe_clean_event() commit b8a9d66d043ffac116100775a469f05f5158c16f upstream. After commit 566c09c53455 ("raid5: relieve lock contention in get_active_stripe()") __find_stripe() is called under conf->hash_locks + hash. But handle_stripe_clean_event() calls remove_hash() under conf->device_lock. Under some cirscumstances the hash chain can be circuited, and we get an infinite loop with disabled interrupts and locked hash lock in __find_stripe(). This leads to hard lockup on multiple CPUs and following system crash. I was able to reproduce this behavior on raid6 over 6 ssd disks. The devices_handle_discard_safely option should be set to enable trim support. The following script was used: for i in `seq 1 32`; do dd if=/dev/zero of=large$i bs=10M count=100 & done neilb: original was against a 3.x kernel. I forward-ported to 4.3-rc. This verison is suitable for any kernel since Commit: 59fc630b8b5f ("RAID5: batch adjacent full stripe write") (v4.1+). I'll post a version for earlier kernels to stable. Signed-off-by: Roman Gushchin Fixes: 566c09c53455 ("raid5: relieve lock contention in get_active_stripe()") Signed-off-by: NeilBrown Cc: Shaohua Li Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 23af6772f146..0d767e31f455 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3494,6 +3494,7 @@ returnbi: } if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { + int hash; clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); if (sh->qd_idx >= 0) { @@ -3507,16 +3508,17 @@ returnbi: * no updated data, so remove it from hash list and the stripe * will be reinitialized */ - spin_lock_irq(&conf->device_lock); unhash: + hash = sh->hash_lock_index; + spin_lock_irq(conf->hash_locks + hash); remove_hash(sh); + spin_unlock_irq(conf->hash_locks + hash); if (head_sh->batch_head) { sh = list_first_entry(&sh->batch_list, struct stripe_head, batch_list); if (sh != head_sh) goto unhash; } - spin_unlock_irq(&conf->device_lock); sh = head_sh; if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) -- cgit v1.2.3 From bd4009424ef161c0d07ca48ce00bf2d81658ca21 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 31 Oct 2015 11:00:56 +1100 Subject: Revert "md: allow a partially recovered device to be hot-added to an array." commit d01552a76d71f9879af448e9142389ee9be6e95b upstream. This reverts commit 7eb418851f3278de67126ea0c427641ab4792c57. This commit is poorly justified, I can find not discusison in email, and it clearly causes a problem. If a device which is being recovered fails and is subsequently re-added to an array, there could easily have been changes to the array *before* the point where the recovery was up to. So the recovery must start again from the beginning. If a spare is being recovered and fails, then when it is re-added we really should do a bitmap-based recovery up to the recovery-offset, and then a full recovery from there. Before this reversion, we only did the "full recovery from there" which is not corect. After this reversion with will do a full recovery from the start, which is safer but not ideal. It will be left to a future patch to arrange the two different styles of recovery. Reported-and-tested-by: Nate Dailey Signed-off-by: NeilBrown Fixes: 7eb418851f32 ("md: allow a partially recovered device to be hot-added to an array.") Signed-off-by: Greg Kroah-Hartman --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index e8c44fcb1ad1..78c1f77e7903 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8013,8 +8013,7 @@ static int remove_and_add_spares(struct mddev *mddev, !test_bit(Bitmap_sync, &rdev->flags))) continue; - if (rdev->saved_raid_disk < 0) - rdev->recovery_offset = 0; + rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) -- cgit v1.2.3