From ae61fd4496f9d9290b9e84fc373818b2ee780137 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 26 Jun 2013 17:25:38 -0700 Subject: bcache: FUA fixes commit e49c7c374e7aacd1f04ecbc21d9dbbeeea4a77d6 upstream. Journal writes need to be marked FUA, not just REQ_FLUSH. And btree node writes have... weird ordering requirements. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 23 +++++++++++++++++++++-- drivers/md/bcache/journal.c | 2 +- drivers/md/bcache/request.c | 13 ++++++++++++- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7b687a6f3dec..833c590806ba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -326,10 +326,25 @@ static void do_btree_write(struct btree *b) i->csum = btree_csum_set(b, i); btree_bio_init(b); - b->bio->bi_rw = REQ_META|WRITE_SYNC; + b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA; b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c); bch_bio_map(b->bio, i); + /* + * If we're appending to a leaf node, we don't technically need FUA - + * this write just needs to be persisted before the next journal write, + * which will be marked FLUSH|FUA. + * + * Similarly if we're writing a new btree root - the pointer is going to + * be in the next journal entry. + * + * But if we're writing a new btree node (that isn't a root) or + * appending to a non leaf btree node, we need either FUA or a flush + * when we write the parent with the new pointer. FUA is cheaper than a + * flush, and writes appending to leaf nodes aren't blocking anything so + * just make all btree node writes FUA to keep things sane. + */ + bkey_copy(&k.key, &b->key); SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i)); @@ -2142,6 +2157,9 @@ int bch_btree_insert(struct btree_op *op, struct cache_set *c) void bch_btree_set_root(struct btree *b) { unsigned i; + struct closure cl; + + closure_init_stack(&cl); BUG_ON(!b->written); @@ -2155,8 +2173,9 @@ void bch_btree_set_root(struct btree *b) b->c->root = b; __bkey_put(b->c, &b->key); - bch_journal_meta(b->c, NULL); + bch_journal_meta(b->c, &cl); pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0)); + closure_sync(&cl); } /* Cache lookup */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8a54d3b4f517..b49abb246bb6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -622,7 +622,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_sector = PTR_OFFSET(k, i); bio->bi_bdev = ca->bdev; - bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH; + bio->bi_rw = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA; bio->bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2f36743ce708..afb9a998a737 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1053,9 +1053,20 @@ static void request_write(struct cached_dev *dc, struct search *s) trace_bcache_writethrough(s->orig_bio); closure_bio_submit(bio, cl, s->d); } else { - s->op.cache_bio = bio; trace_bcache_writeback(s->orig_bio); bch_writeback_add(dc, bio_sectors(bio)); + + if (s->op.flush_journal) { + /* Also need to send a flush to the backing device */ + s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, + dc->disk.bio_split); + + bio->bi_size = 0; + bio->bi_vcnt = 0; + closure_bio_submit(bio, cl, s->d); + } else { + s->op.cache_bio = bio; + } } out: closure_call(&s->op.cl, bch_insert_data, NULL, cl); -- cgit v1.2.3