summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-kcopyd.c19
-rw-r--r--drivers/md/dm-snap.c22
-rw-r--r--drivers/md/dm-thin-metadata.c4
-rw-r--r--drivers/md/dm-thin-metadata.h2
-rw-r--r--drivers/md/dm-thin.c65
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c2
8 files changed, 101 insertions, 19 deletions
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index cdf8d253209e..6fb6dd600970 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -68,6 +68,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
in_use > CUTOFF_WRITEBACK_SYNC)
return false;
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ return false;
+
if (dc->partial_stripes_expensive &&
bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
bio_sectors(bio)))
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 56fcccc30554..e0cfde3501e0 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -55,15 +55,17 @@ struct dm_kcopyd_client {
struct dm_kcopyd_throttle *throttle;
/*
- * We maintain three lists of jobs:
+ * We maintain four lists of jobs:
*
* i) jobs waiting for pages
* ii) jobs that have pages, and are waiting for the io to be issued.
- * iii) jobs that have completed.
+ * iii) jobs that don't need to do any IO and just run a callback
+ * iv) jobs that have completed.
*
- * All three of these are protected by job_lock.
+ * All four of these are protected by job_lock.
*/
spinlock_t job_lock;
+ struct list_head callback_jobs;
struct list_head complete_jobs;
struct list_head io_jobs;
struct list_head pages_jobs;
@@ -584,6 +586,7 @@ static void do_work(struct work_struct *work)
struct dm_kcopyd_client *kc = container_of(work,
struct dm_kcopyd_client, kcopyd_work);
struct blk_plug plug;
+ unsigned long flags;
/*
* The order that these are called is *very* important.
@@ -592,6 +595,10 @@ static void do_work(struct work_struct *work)
* list. io jobs call wake when they complete and it all
* starts again.
*/
+ spin_lock_irqsave(&kc->job_lock, flags);
+ list_splice_tail_init(&kc->callback_jobs, &kc->complete_jobs);
+ spin_unlock_irqrestore(&kc->job_lock, flags);
+
blk_start_plug(&plug);
process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job);
@@ -609,7 +616,7 @@ static void dispatch_job(struct kcopyd_job *job)
struct dm_kcopyd_client *kc = job->kc;
atomic_inc(&kc->nr_jobs);
if (unlikely(!job->source.count))
- push(&kc->complete_jobs, job);
+ push(&kc->callback_jobs, job);
else if (job->pages == &zero_page_list)
push(&kc->io_jobs, job);
else
@@ -796,7 +803,7 @@ void dm_kcopyd_do_callback(void *j, int read_err, unsigned long write_err)
job->read_err = read_err;
job->write_err = write_err;
- push(&kc->complete_jobs, job);
+ push(&kc->callback_jobs, job);
wake(kc);
}
EXPORT_SYMBOL(dm_kcopyd_do_callback);
@@ -826,6 +833,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro
return ERR_PTR(-ENOMEM);
spin_lock_init(&kc->job_lock);
+ INIT_LIST_HEAD(&kc->callback_jobs);
INIT_LIST_HEAD(&kc->complete_jobs);
INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs);
@@ -875,6 +883,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc)
/* Wait for completion of all jobs submitted by this client. */
wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
+ BUG_ON(!list_empty(&kc->callback_jobs));
BUG_ON(!list_empty(&kc->complete_jobs));
BUG_ON(!list_empty(&kc->io_jobs));
BUG_ON(!list_empty(&kc->pages_jobs));
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c65feeada864..2da0b9b213c7 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -19,6 +19,7 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
+#include <linux/semaphore.h>
#include "dm.h"
@@ -105,6 +106,9 @@ struct dm_snapshot {
/* The on disk metadata handler */
struct dm_exception_store *store;
+ /* Maximum number of in-flight COW jobs. */
+ struct semaphore cow_count;
+
struct dm_kcopyd_client *kcopyd_client;
/* Wait for events based on state_bits */
@@ -145,6 +149,19 @@ struct dm_snapshot {
#define RUNNING_MERGE 0
#define SHUTDOWN_MERGE 1
+/*
+ * Maximum number of chunks being copied on write.
+ *
+ * The value was decided experimentally as a trade-off between memory
+ * consumption, stalling the kernel's workqueues and maintaining a high enough
+ * throughput.
+ */
+#define DEFAULT_COW_THRESHOLD 2048
+
+static int cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
+
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
"A percentage of time allocated for copy on write");
@@ -1189,6 +1206,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables;
}
+ sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) {
r = PTR_ERR(s->kcopyd_client);
@@ -1560,6 +1579,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
}
list_add(&pe->out_of_order_entry, lh);
}
+ up(&s->cow_count);
}
/*
@@ -1583,6 +1603,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
+ down(&s->cow_count);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
}
@@ -1602,6 +1623,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
+ down(&s->cow_count);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 149fbac97cb6..d20f4023f6c1 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1689,7 +1689,7 @@ int dm_thin_remove_range(struct dm_thin_device *td,
return r;
}
-int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
+int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
{
int r;
uint32_t ref_count;
@@ -1697,7 +1697,7 @@ int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *resu
down_read(&pmd->root_lock);
r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
if (!r)
- *result = (ref_count != 0);
+ *result = (ref_count > 1);
up_read(&pmd->root_lock);
return r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index 35e954ea20a9..f6be0d733c20 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -195,7 +195,7 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result);
-int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
+int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result);
int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e);
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 81309d7836c5..345f4d81ba07 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -257,6 +257,7 @@ struct pool {
spinlock_t lock;
struct bio_list deferred_flush_bios;
+ struct bio_list deferred_flush_completions;
struct list_head prepared_mappings;
struct list_head prepared_discards;
struct list_head prepared_discards_pt2;
@@ -925,6 +926,39 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
mempool_free(m, m->tc->pool->mapping_pool);
}
+static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
+{
+ struct pool *pool = tc->pool;
+ unsigned long flags;
+
+ /*
+ * If the bio has the REQ_FUA flag set we must commit the metadata
+ * before signaling its completion.
+ */
+ if (!bio_triggers_commit(tc, bio)) {
+ bio_endio(bio);
+ return;
+ }
+
+ /*
+ * Complete bio with an error if earlier I/O caused changes to the
+ * metadata that can't be committed, e.g, due to I/O errors on the
+ * metadata device.
+ */
+ if (dm_thin_aborted_changes(tc->td)) {
+ bio_io_error(bio);
+ return;
+ }
+
+ /*
+ * Batch together any bios that trigger commits and then issue a
+ * single commit for them in process_deferred_bios().
+ */
+ spin_lock_irqsave(&pool->lock, flags);
+ bio_list_add(&pool->deferred_flush_completions, bio);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
{
struct thin_c *tc = m->tc;
@@ -957,7 +991,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m)
*/
if (bio) {
inc_remap_and_issue_cell(tc, m->cell, m->data_block);
- bio_endio(bio);
+ complete_overwrite_bio(tc, bio);
} else {
inc_all_io_entry(tc->pool, m->cell->holder);
remap_and_issue(tc, m->cell->holder, m->data_block);
@@ -1017,7 +1051,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
* passdown we have to check that these blocks are now unused.
*/
int r = 0;
- bool used = true;
+ bool shared = true;
struct thin_c *tc = m->tc;
struct pool *pool = tc->pool;
dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
@@ -1027,11 +1061,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
while (b != end) {
/* find start of unmapped run */
for (; b < end; b++) {
- r = dm_pool_block_is_used(pool->pmd, b, &used);
+ r = dm_pool_block_is_shared(pool->pmd, b, &shared);
if (r)
goto out;
- if (!used)
+ if (!shared)
break;
}
@@ -1040,11 +1074,11 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
/* find end of run */
for (e = b + 1; e != end; e++) {
- r = dm_pool_block_is_used(pool->pmd, e, &used);
+ r = dm_pool_block_is_shared(pool->pmd, e, &shared);
if (r)
goto out;
- if (used)
+ if (shared)
break;
}
@@ -2303,7 +2337,7 @@ static void process_deferred_bios(struct pool *pool)
{
unsigned long flags;
struct bio *bio;
- struct bio_list bios;
+ struct bio_list bios, bio_completions;
struct thin_c *tc;
tc = get_first_thin(pool);
@@ -2314,26 +2348,36 @@ static void process_deferred_bios(struct pool *pool)
}
/*
- * If there are any deferred flush bios, we must commit
- * the metadata before issuing them.
+ * If there are any deferred flush bios, we must commit the metadata
+ * before issuing them or signaling their completion.
*/
bio_list_init(&bios);
+ bio_list_init(&bio_completions);
+
spin_lock_irqsave(&pool->lock, flags);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
+
+ bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
+ bio_list_init(&pool->deferred_flush_completions);
spin_unlock_irqrestore(&pool->lock, flags);
- if (bio_list_empty(&bios) &&
+ if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
return;
if (commit(pool)) {
+ bio_list_merge(&bios, &bio_completions);
+
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
}
pool->last_commit_jiffies = jiffies;
+ while ((bio = bio_list_pop(&bio_completions)))
+ bio_endio(bio);
+
while ((bio = bio_list_pop(&bios)))
generic_make_request(bio);
}
@@ -2968,6 +3012,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
spin_lock_init(&pool->lock);
bio_list_init(&pool->deferred_flush_bios);
+ bio_list_init(&pool->deferred_flush_completions);
INIT_LIST_HEAD(&pool->prepared_mappings);
INIT_LIST_HEAD(&pool->prepared_discards);
INIT_LIST_HEAD(&pool->prepared_discards_pt2);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b62e6ab66b31..717787d09e0f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3798,6 +3798,8 @@ static int raid10_run(struct mddev *mddev)
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"reshape");
+ if (!mddev->sync_thread)
+ goto out_free_conf;
}
return 0;
@@ -4489,7 +4491,6 @@ bio_full:
atomic_inc(&r10_bio->remaining);
read_bio->bi_next = NULL;
generic_make_request(read_bio);
- sector_nr += nr_sectors;
sectors_done += nr_sectors;
if (sector_nr <= last)
goto read_more;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4afc419da60f..9ec74dfe94f4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6977,6 +6977,8 @@ static int raid5_run(struct mddev *mddev)
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"reshape");
+ if (!mddev->sync_thread)
+ goto abort;
}
/* Ok, everything is just fine now */