summaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2010-05-30 22:49:17 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2010-08-02 10:21:10 -0700
commit7589529d0a233f28bd20e1b7775324dcf9cfb499 (patch)
treed55bab3fdd972189146fe6941d4365d04289cee0 /fs/jbd2
parentb235a77c0fb44fd428525591a968da14844b6f4d (diff)
ext4, jbd2: Add barriers for file systems with exernal journals
commit cc3e1bea5d87635c519da657303690f5538bb4eb upstream (as of v2.6.33-rc3) This is a bit complicated because we are trying to optimize when we send barriers to the fs data disk. We could just throw in an extra barrier to the data disk whenever we send a barrier to the journal disk, but that's not always strictly necessary. We only need to send a barrier during a commit when there are data blocks which are must be written out due to an inode written in ordered mode, or if fsync() depends on the commit to force data blocks to disk. Finally, before we drop transactions from the beginning of the journal during a checkpoint operation, we need to guarantee that any blocks that were flushed out to the data disk are firmly on the rust platter before we drop the transaction from the journal. Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
2 files changed, 26 insertions, 8 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
#include <trace/events/jbd2.h>
/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock);
+
+ /*
+ * If there is an external journal, we need to make sure that
+ * any data blocks that were recently written out --- perhaps
+ * by jbd2_log_do_checkpoint() --- are flushed out before we
+ * drop the transactions from the external journal. It's
+ * unlikely this will be necessary, especially with a
+ * appropriately sized journal, but we need this to guarantee
+ * correctness. Fortunately jbd2_cleanup_journal_tail()
+ * doesn't get called all that often.
+ */
+ if ((journal->j_fs_dev != journal->j_dev) &&
+ (journal->j_flags & JBD2_BARRIER))
+ blkdev_issue_flush(journal->j_fs_dev, NULL);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8896c1d4febe..09ab6ac6a075 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
+ commit_transaction->t_flushed_data_blocks = 1;
jinode->i_flags &= ~JI_COMMIT_RUNNING;
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -708,8 +709,17 @@ start_journal_io:
}
}
- /* Done it all: now write the commit record asynchronously. */
+ /*
+ * If the journal is not located on the file system device,
+ * then we must flush the file system device before we issue
+ * the commit record
+ */
+ if (commit_transaction->t_flushed_data_blocks &&
+ (journal->j_fs_dev != journal->j_dev) &&
+ (journal->j_flags & JBD2_BARRIER))
+ blkdev_issue_flush(journal->j_fs_dev, NULL);
+ /* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
blkdev_issue_flush(journal->j_dev, NULL);
}
- /*
- * This is the right place to wait for data buffers both for ASYNC
- * and !ASYNC commit. If commit is ASYNC, we need to wait only after
- * the commit block went to disk (which happens above). If commit is
- * SYNC, we need to wait for data buffers before we start writing
- * commit block, which happens below in such setting.
- */
err = journal_finish_inode_data_buffers(journal, commit_transaction);
if (err) {
printk(KERN_WARNING