From 25985edcedea6396277003854657b5f3cb31a628 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Date: Wed, 30 Mar 2011 22:57:33 -0300
Subject: Fix common misspellings

Fixes generated by 'codespell' and manually reviewed.

Signed-off-by: Lucas De Marchi <lucas.demarchi@profusion.mobi>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 93c28a1d6bdc..80920bce01ab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2324,7 +2324,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
 
 		/*
 		 * if ret == 0 means we found what we were searching for, which
-		 * is weird, but possible, so only screw with path if we didnt
+		 * is weird, but possible, so only screw with path if we didn't
 		 * find the key and see if we have stuff that matches
 		 */
 		if (ret > 0) {
-- 
cgit v1.2.3


From fe3f566cd19bb6d787c92b2e202c85f929abf3ac Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 28 Mar 2011 08:30:38 +0000
Subject: Btrfs: Fix oops for defrag with compression turned on

When we defrag a file, whose size can be fit into an inline extent,
with compression enabled, the compress type is set to be
fs_info->compress_type, which is 0 if the btrfs filesystem is mounted
without compress option. This leads to oops.

Reported-by: Daniel Blueman <daniel.blueman@gmail.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 06274186b290..62ae9d5da806 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -111,6 +111,7 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
 static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root, struct inode *inode,
 				u64 start, size_t size, size_t compressed_size,
+				int compress_type,
 				struct page **compressed_pages)
 {
 	struct btrfs_key key;
@@ -125,12 +126,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
 	size_t cur_size = size;
 	size_t datasize;
 	unsigned long offset;
-	int compress_type = BTRFS_COMPRESS_NONE;
 
-	if (compressed_size && compressed_pages) {
-		compress_type = root->fs_info->compress_type;
+	if (compressed_size && compressed_pages)
 		cur_size = compressed_size;
-	}
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -220,7 +218,7 @@ fail:
 static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct inode *inode, u64 start, u64 end,
-				 size_t compressed_size,
+				 size_t compressed_size, int compress_type,
 				 struct page **compressed_pages)
 {
 	u64 isize = i_size_read(inode);
@@ -253,7 +251,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
 		inline_len = min_t(u64, isize, actual_end);
 	ret = insert_inline_extent(trans, root, inode, start,
 				   inline_len, compressed_size,
-				   compressed_pages);
+				   compress_type, compressed_pages);
 	BUG_ON(ret);
 	btrfs_delalloc_release_metadata(inode, end + 1 - start);
 	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
@@ -432,12 +430,13 @@ again:
 			 * to make an uncompressed inline extent.
 			 */
 			ret = cow_file_range_inline(trans, root, inode,
-						    start, end, 0, NULL);
+						    start, end, 0, 0, NULL);
 		} else {
 			/* try making a compressed inline extent */
 			ret = cow_file_range_inline(trans, root, inode,
 						    start, end,
-						    total_compressed, pages);
+						    total_compressed,
+						    compress_type, pages);
 		}
 		if (ret == 0) {
 			/*
@@ -791,7 +790,7 @@ static noinline int cow_file_range(struct inode *inode,
 	if (start == 0) {
 		/* lets try to make an inline extent */
 		ret = cow_file_range_inline(trans, root, inode,
-					    start, end, 0, NULL);
+					    start, end, 0, 0, NULL);
 		if (ret == 0) {
 			extent_clear_unlock_delalloc(inode,
 				     &BTRFS_I(inode)->io_tree,
-- 
cgit v1.2.3


From b44c59a80ded004e1a82712e5f9e17b131c03221 Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann@whamcloud.com>
Date: Thu, 31 Mar 2011 13:23:47 +0000
Subject: Btrfs: fix subvol_sem leak in btrfs_rename()

btrfs_rename() does not release the subvol_sem if the transaction failed to start.

Signed-off-by: Johann Lombardi <johann@whamcloud.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 62ae9d5da806..1ca3e68586cf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6961,8 +6961,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * should cover the worst case number of items we'll modify.
 	 */
 	trans = btrfs_start_transaction(root, 20);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
+	if (IS_ERR(trans)) {
+                ret = PTR_ERR(trans);
+                goto out_notrans;
+        }
 
 	btrfs_set_trans_block_group(trans, new_dir);
 
@@ -7062,7 +7064,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 out_fail:
 	btrfs_end_transaction_throttle(trans, root);
-
+out_notrans:
 	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&root->fs_info->subvol_sem);
 
-- 
cgit v1.2.3


From adae52b94e18afa1f84fab67df2a8a872c2f5533 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Thu, 31 Mar 2011 09:43:23 +0000
Subject: btrfs: clear __GFP_FS flag in the space cache inode

the object id of the space cache inode's key is allocated from the relative
root, just like the regular file. So we can't identify space cache inode by
checking the object id of the inode's key, and we have to clear __GFP_FS flag
at the time we look up the space cache inode.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1ca3e68586cf..57a03f6eb224 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2535,8 +2535,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
 	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
 	alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-	if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
-		inode->i_mapping->flags &= ~__GFP_FS;
 
 	/*
 	 * try to precache a NULL acl entry for files that don't have
-- 
cgit v1.2.3


From c9ddec74aa950a220cc4caa5215cfc5d886050b7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 28 Mar 2011 13:43:25 +0000
Subject: Btrfs: don't warn in btrfs_add_orphan

When I moved the orphan adding to btrfs_truncate I missed the fact that during
orphan cleanup we just add the orphan items to the orphan list without going
through btrfs_orphan_add, which results in lots of warnings on mount if you have
any orphan items that need to be truncated.  Just remove this warning since it's
ok, this will allow all of the normal space accounting take place.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 57a03f6eb224..cc6022842e0c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2220,8 +2220,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 			insert = 1;
 #endif
 		insert = 1;
-	} else {
-		WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
 	}
 
 	if (!BTRFS_I(inode)->orphan_meta_reserved) {
-- 
cgit v1.2.3


From 12ddb96cb6752218d8a1aeb696ec9b0ca7adb42f Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 5 Apr 2011 13:02:27 -0400
Subject: Btrfs: map the inode item when doing fill_inode_item

Instead of calling kmap_atomic for every thing we set in the inode item, map the
entire inode item at the start and unmap it at the end.  This makes a sequential
dd of 400mb O_DIRECT something like 1% faster.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cc6022842e0c..da2680263d9f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2589,6 +2589,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 			    struct btrfs_inode_item *item,
 			    struct inode *inode)
 {
+	if (!leaf->map_token)
+		map_private_extent_buffer(leaf, (unsigned long)item,
+					  sizeof(struct btrfs_inode_item),
+					  &leaf->map_token, &leaf->kaddr,
+					  &leaf->map_start, &leaf->map_len,
+					  KM_USER1);
+
 	btrfs_set_inode_uid(leaf, item, inode->i_uid);
 	btrfs_set_inode_gid(leaf, item, inode->i_gid);
 	btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2617,6 +2624,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 	btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
 	btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
 	btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+
+	if (leaf->map_token) {
+		unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+		leaf->map_token = NULL;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 1ef30be142d2cc60e2687ef267de864cf31be995 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 5 Apr 2011 19:25:36 -0400
Subject: Btrfs: do not call btrfs_update_inode in endio if nothing changed

In the DIO code we often don't update the i_disk_size because the i_size isn't
updated until after the DIO is completed, so basically we are allocating a path,
doing a search, and updating the inode item for no reason since nothing changed.
btrfs_ordered_update_i_size will return 1 if it didn't update i_disk_size, so
only run btrfs_update_inode if btrfs_ordered_update_i_size returns 0.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da2680263d9f..4a238d676e5b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1769,9 +1769,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	add_pending_csums(trans, inode, ordered_extent->file_offset,
 			  &ordered_extent->list);
 
-	btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-	ret = btrfs_update_inode(trans, root, inode);
-	BUG_ON(ret);
+	ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+	if (!ret) {
+		ret = btrfs_update_inode(trans, root, inode);
+		BUG_ON(ret);
+	}
+	ret = 0;
 out:
 	if (nolock) {
 		if (trans)
@@ -5865,8 +5868,10 @@ again:
 	}
 
 	add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
-	btrfs_ordered_update_i_size(inode, 0, ordered);
-	btrfs_update_inode(trans, root, inode);
+	ret = btrfs_ordered_update_i_size(inode, 0, ordered);
+	if (!ret)
+		btrfs_update_inode(trans, root, inode);
+	ret = 0;
 out_unlock:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
 			     ordered->file_offset + ordered->len - 1,
-- 
cgit v1.2.3


From 02f57c7aedef1a537f4b16db7061cdd8efa3bb4e Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 6 Apr 2011 14:25:44 -0400
Subject: Btrfs: don't split dio bios if we don't have to

We have been unconditionally allocating a new bio and re-adding all pages from
our original bio to the new bio.  This is needed if our original bio is larger
than our stripe size, but if it is smaller than the stripe size then there is no
need to do this.  So check the map length and if we are under that then go ahead
and submit the original bio.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4a238d676e5b..149c77fd1eb5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6006,13 +6006,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	int ret = 0;
 	int write = rw & REQ_WRITE;
 
-	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-	if (!bio)
-		return -ENOMEM;
-	bio->bi_private = dip;
-	bio->bi_end_io = btrfs_end_dio_bio;
-	atomic_inc(&dip->pending_bios);
-
 	map_length = orig_bio->bi_size;
 	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
 			      &map_length, NULL, 0);
@@ -6021,6 +6014,18 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 		return -EIO;
 	}
 
+	if (map_length >= orig_bio->bi_size) {
+		bio = orig_bio;
+		goto submit;
+	}
+
+	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_private = dip;
+	bio->bi_end_io = btrfs_end_dio_bio;
+	atomic_inc(&dip->pending_bios);
+
 	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
 		if (unlikely(map_length < submit_len + bvec->bv_len ||
 		    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6071,6 +6076,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 		}
 	}
 
+submit:
 	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
 				     csums);
 	if (!ret)
-- 
cgit v1.2.3


From 1ae399382512b3e4d6c923e53da9e45935577040 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 6 Apr 2011 14:41:34 -0400
Subject: Btrfs: do not use async submit for small DIO io's

When looking at our DIO performance Chris said that for small IO's doing the
async submit stuff tends to be more overhead than it's worth.  With this on top
of my other fixes I get about a 17-20% speedup doing a sequential dd with 4k
IO's.  Basically if we don't have to split the bio for the map length it's small
enough to be directly submitted, otherwise go back to the async submit.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 149c77fd1eb5..2bb76c6157a4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5957,7 +5957,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
 
 static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 					 int rw, u64 file_offset, int skip_sum,
-					 u32 *csums)
+					 u32 *csums, int async_submit)
 {
 	int write = rw & REQ_WRITE;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5968,13 +5968,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 	if (ret)
 		goto err;
 
-	if (write && !skip_sum) {
+	if (skip_sum)
+		goto map;
+
+	if (write && async_submit) {
 		ret = btrfs_wq_submit_bio(root->fs_info,
 				   inode, rw, bio, 0, 0,
 				   file_offset,
 				   __btrfs_submit_bio_start_direct_io,
 				   __btrfs_submit_bio_done);
 		goto err;
+	} else if (write) {
+		/*
+		 * If we aren't doing async submit, calculate the csum of the
+		 * bio now.
+		 */
+		ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
+		if (ret)
+			goto err;
 	} else if (!skip_sum) {
 		ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
 					  file_offset, csums);
@@ -5982,7 +5993,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
 			goto err;
 	}
 
-	ret = btrfs_map_bio(root, rw, bio, 0, 1);
+map:
+	ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
 err:
 	bio_put(bio);
 	return ret;
@@ -6004,6 +6016,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	int nr_pages = 0;
 	u32 *csums = dip->csums;
 	int ret = 0;
+	int async_submit = 0;
 	int write = rw & REQ_WRITE;
 
 	map_length = orig_bio->bi_size;
@@ -6019,6 +6032,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 		goto submit;
 	}
 
+	async_submit = 1;
 	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
 	if (!bio)
 		return -ENOMEM;
@@ -6039,7 +6053,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 			atomic_inc(&dip->pending_bios);
 			ret = __btrfs_submit_dio_bio(bio, inode, rw,
 						     file_offset, skip_sum,
-						     csums);
+						     csums, async_submit);
 			if (ret) {
 				bio_put(bio);
 				atomic_dec(&dip->pending_bios);
@@ -6078,7 +6092,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 
 submit:
 	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-				     csums);
+				     csums, async_submit);
 	if (!ret)
 		return 0;
 
-- 
cgit v1.2.3


From 16d299ac7446b5a75c5683a9ae11d7907d444c86 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 6 Apr 2011 14:53:07 -0400
Subject: Btrfs: reuse the extent_map we found when calling btrfs_get_extent

In btrfs_get_block_direct we call btrfs_get_extent to lookup the extent for the
range that we are looking for.  If we don't find an extent, btrfs_get_extent
will insert a extent_map for that area and mark it as a hole.  So it does the
job of allocating a new extent map and inserting it into the io tree.  But if
we're creating a new extent we free it up and redo all of that work.  So instead
pass the em to btrfs_new_extent_direct(), and if it will work just allocate the
disk space and set it up properly and bypass the freeing/allocating of a new
extent map and the expensive operation of inserting the thing into the io_tree.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bb76c6157a4..24310c9cb14f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5445,17 +5445,30 @@ out:
 }
 
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+						  struct extent_map *em,
 						  u64 start, u64 len)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
-	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	struct btrfs_key ins;
 	u64 alloc_hint;
 	int ret;
+	bool insert = false;
 
-	btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+	/*
+	 * Ok if the extent map we looked up is a hole and is for the exact
+	 * range we want, there is no reason to allocate a new one, however if
+	 * it is not right then we need to free this one and drop the cache for
+	 * our range.
+	 */
+	if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
+	    em->len != len) {
+		free_extent_map(em);
+		em = NULL;
+		insert = true;
+		btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+	}
 
 	trans = btrfs_join_transaction(root, 0);
 	if (IS_ERR(trans))
@@ -5471,10 +5484,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 		goto out;
 	}
 
-	em = alloc_extent_map(GFP_NOFS);
 	if (!em) {
-		em = ERR_PTR(-ENOMEM);
-		goto out;
+		em = alloc_extent_map(GFP_NOFS);
+		if (!em) {
+			em = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 	}
 
 	em->start = start;
@@ -5484,9 +5499,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 	em->block_start = ins.objectid;
 	em->block_len = ins.offset;
 	em->bdev = root->fs_info->fs_devices->latest_bdev;
+
+	/*
+	 * We need to do this because if we're using the original em we searched
+	 * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
+	 */
+	em->flags = 0;
 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
-	while (1) {
+	while (insert) {
 		write_lock(&em_tree->lock);
 		ret = add_extent_mapping(em_tree, em);
 		write_unlock(&em_tree->lock);
@@ -5704,8 +5725,7 @@ must_cow:
 	 * it above
 	 */
 	len = bh_result->b_size;
-	free_extent_map(em);
-	em = btrfs_new_extent_direct(inode, start, len);
+	em = btrfs_new_extent_direct(inode, em, start, len);
 	if (IS_ERR(em))
 		return PTR_ERR(em);
 	len = min(len, em->len - (start - em->start));
-- 
cgit v1.2.3


From 93a54bc4c28a125978cddbe2db9e347391e3522d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Wed, 6 Apr 2011 15:11:44 -0400
Subject: Btrfs: check for duplicate iov_base's when doing dio reads

Apparently it is ok to submit a read to an IDE device with the same target page
for different offsets.  This is what Windows does under qemu.  The problem is
under DIO we expect them to be different buffers for checksumming reasons, and
so this sort of thing will result in checksum errors, when in reality the file
is fine.  So when reading, check to make sure that all iov bases are different,
and if they aren't fall back to buffered mode, since that will work out right.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/inode.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 24310c9cb14f..00d59c6a9769 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6207,6 +6207,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 			unsigned long nr_segs)
 {
 	int seg;
+	int i;
 	size_t size;
 	unsigned long addr;
 	unsigned blocksize_mask = root->sectorsize - 1;
@@ -6221,8 +6222,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 		addr = (unsigned long)iov[seg].iov_base;
 		size = iov[seg].iov_len;
 		end += size;
-		if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+		if ((addr & blocksize_mask) || (size & blocksize_mask))
 			goto out;
+
+		/* If this is a write we don't need to check anymore */
+		if (rw & WRITE)
+			continue;
+
+		/*
+		 * Check to make sure we don't have duplicate iov_base's in this
+		 * iovec, if so return EINVAL, otherwise we'll get csum errors
+		 * when reading back.
+		 */
+		for (i = seg + 1; i < nr_segs; i++) {
+			if (iov[seg].iov_base == iov[i].iov_base)
+				goto out;
+		}
 	}
 	retval = 0;
 out:
-- 
cgit v1.2.3


From 8fb27640d0e2b43c5584bf0087431b7b8d3c319a Mon Sep 17 00:00:00 2001
From: Yoshinori Sano <yoshinori.sano@gmail.com>
Date: Sat, 9 Apr 2011 02:30:07 +0000
Subject: Btrfs: fix memory leaks in btrfs_new_inode()

This patch fixes memory leaks in btrfs_new_inode().

Signed-off-by: Yoshinori Sano <yoshinori.sano@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cc6022842e0c..2d1208f964eb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4526,14 +4526,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BUG_ON(!path);
 
 	inode = new_inode(root->fs_info->sb);
-	if (!inode)
+	if (!inode) {
+		btrfs_free_path(path);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	if (dir) {
 		trace_btrfs_inode_request(dir);
 
 		ret = btrfs_set_inode_index(dir, index);
 		if (ret) {
+			btrfs_free_path(path);
 			iput(inode);
 			return ERR_PTR(ret);
 		}
-- 
cgit v1.2.3


From a1b75f7d961955e697ec377f90115e3517df98f9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Fri, 8 Apr 2011 15:51:18 +0000
Subject: Btrfs: check for duplicate iov_base's when doing dio reads

Apparently it is ok to submit a read to an IDE device with the same target page
for different offsets.  This is what Windows does under qemu.  The problem is
under DIO we expect them to be different buffers for checksumming reasons, and
so this sort of thing will result in checksum errors, when in reality the file
is fine.  So when reading, check to make sure that all iov bases are different,
and if they aren't fall back to buffered mode, since that will work out right.
Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2d1208f964eb..edafc28883af 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6153,6 +6153,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 			unsigned long nr_segs)
 {
 	int seg;
+	int i;
 	size_t size;
 	unsigned long addr;
 	unsigned blocksize_mask = root->sectorsize - 1;
@@ -6167,8 +6168,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
 		addr = (unsigned long)iov[seg].iov_base;
 		size = iov[seg].iov_len;
 		end += size;
-		if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+		if ((addr & blocksize_mask) || (size & blocksize_mask))
 			goto out;
+
+		/* If this is a write we don't need to check anymore */
+		if (rw & WRITE)
+			continue;
+
+		/*
+		 * Check to make sure we don't have duplicate iov_base's in this
+		 * iovec, if so return EINVAL, otherwise we'll get csum errors
+		 * when reading back.
+		 */
+		for (i = seg + 1; i < nr_segs; i++) {
+			if (iov[seg].iov_base == iov[i].iov_base)
+				goto out;
+		}
 	}
 	retval = 0;
 out:
-- 
cgit v1.2.3


From 507903b81840a70cc6a179d4eb03584ad50e8c5b Mon Sep 17 00:00:00 2001
From: Arne Jansen <sensille@gmx.net>
Date: Wed, 6 Apr 2011 10:02:20 +0000
Subject: btrfs: using cached extent_state in set/unlock combinations

In several places the sequence (set_extent_uptodate, unlock_extent) is used.
This leads to a duplicate lookup of the extent state. This patch lets
set_extent_uptodate return a cached extent_state which can be passed to
unlock_extent_cached.
The occurences of the above sequences are updated to use the cache. Only
end_bio_extent_readpage is updated that it first gets a cached state to
pass it to the readpage_end_io_hook as the prototype requested and is later
on being used for set/unlock.

Signed-off-by: Arne Jansen <sensille@gmx.net>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index edafc28883af..5a993e0ec865 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5226,7 +5226,7 @@ again:
 			btrfs_mark_buffer_dirty(leaf);
 		}
 		set_extent_uptodate(io_tree, em->start,
-				    extent_map_end(em) - 1, GFP_NOFS);
+				    extent_map_end(em) - 1, NULL, GFP_NOFS);
 		goto insert;
 	} else {
 		printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
-- 
cgit v1.2.3


From b9e03af0bcc11310f6be4a3951c9ee2c26465011 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 23 Mar 2011 10:43:58 +0800
Subject: Btrfs: Check if btrfs_next_leaf() returns error in
 btrfs_real_readdir()

btrfs_next_leaf() can return -errno, and we should propagate
it to userspace.

This also simplifies how we walk the btree path.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 fs/btrfs/inode.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55a6a0b416d7..b9f7f5258343 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4221,10 +4221,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	struct btrfs_key found_key;
 	struct btrfs_path *path;
 	int ret;
-	u32 nritems;
 	struct extent_buffer *leaf;
 	int slot;
-	int advance;
 	unsigned char d_type;
 	int over = 0;
 	u32 di_cur;
@@ -4267,27 +4265,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0)
 		goto err;
-	advance = 0;
 
 	while (1) {
 		leaf = path->nodes[0];
-		nritems = btrfs_header_nritems(leaf);
 		slot = path->slots[0];
-		if (advance || slot >= nritems) {
-			if (slot >= nritems - 1) {
-				ret = btrfs_next_leaf(root, path);
-				if (ret)
-					break;
-				leaf = path->nodes[0];
-				nritems = btrfs_header_nritems(leaf);
-				slot = path->slots[0];
-			} else {
-				slot++;
-				path->slots[0]++;
-			}
+		if (slot >= btrfs_header_nritems(leaf)) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				goto err;
+			else if (ret > 0)
+				break;
+			continue;
 		}
 
-		advance = 1;
 		item = btrfs_item_nr(leaf, slot);
 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
@@ -4296,7 +4286,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 		if (btrfs_key_type(&found_key) != key_type)
 			break;
 		if (found_key.offset < filp->f_pos)
-			continue;
+			goto next;
 
 		filp->f_pos = found_key.offset;
 
@@ -4349,6 +4339,8 @@ skip:
 			di_cur += di_len;
 			di = (struct btrfs_dir_item *)((char *)di + di_len);
 		}
+next:
+		path->slots[0]++;
 	}
 
 	/* Reached end of directory/root. Bump pos past the last item. */
-- 
cgit v1.2.3


From 3153495d8ed6a9bb9f00aea42c18dc488a885dd6 Mon Sep 17 00:00:00 2001
From: Miao Xie <miaox@cn.fujitsu.com>
Date: Wed, 13 Apr 2011 13:19:21 +0800
Subject: Btrfs: Fix incorrect inode nlink in btrfs_link()

Link count of the inode is not decreased if btrfs_set_inode_index()
fails.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Singed-off-by: Li Zefan <lizf@cn.fujitsu.com>
---
 fs/btrfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b9f7f5258343..a4157cfdd533 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4846,9 +4846,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	if (inode->i_nlink == ~0U)
 		return -EMLINK;
 
-	btrfs_inc_nlink(inode);
-	inode->i_ctime = CURRENT_TIME;
-
 	err = btrfs_set_inode_index(dir, &index);
 	if (err)
 		goto fail;
@@ -4864,6 +4861,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		goto fail;
 	}
 
+	btrfs_inc_nlink(inode);
+	inode->i_ctime = CURRENT_TIME;
+
 	btrfs_set_trans_block_group(trans, dir);
 	ihold(inode);
 
-- 
cgit v1.2.3


From 8d413713ca744fa00cf4e05d4054d80727b84789 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Mon, 25 Apr 2011 19:43:52 -0400
Subject: Btrfs: check return value of kmalloc()

The check on the return value of kmalloc() is added to some places.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a4157cfdd533..c718d274a352 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -953,6 +953,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
 			 1, 0, NULL, GFP_NOFS);
 	while (start < end) {
 		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
+		BUG_ON(!async_cow);
 		async_cow->inode = inode;
 		async_cow->root = root;
 		async_cow->locked_page = locked_page;
@@ -5001,6 +5002,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
 	inline_size = btrfs_file_extent_inline_item_len(leaf,
 					btrfs_item_nr(leaf, path->slots[0]));
 	tmp = kmalloc(inline_size, GFP_NOFS);
+	if (!tmp)
+		return -ENOMEM;
 	ptr = btrfs_file_extent_inline_start(item);
 
 	read_extent_buffer(leaf, tmp, ptr, inline_size);
-- 
cgit v1.2.3


From 64728bbbf892ea7a4aba502c436afbe362217fb9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 25 Apr 2011 19:43:52 -0400
Subject: Btrfs: put the right bio if we have an error

In btrfs_submit_direct_hook if the first btrfs_map_block fails we need to put
the orig_bio, not bio.

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c718d274a352..ad6b515173ac 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6041,7 +6041,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
 	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
 			      &map_length, NULL, 0);
 	if (ret) {
-		bio_put(bio);
+		bio_put(orig_bio);
 		return -EIO;
 	}
 
-- 
cgit v1.2.3


From 7cf96da3ec7ca225acf4f284b0e904a1f5f98821 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Mon, 25 Apr 2011 19:43:53 -0400
Subject: Btrfs: cleanup error handling in inode.c

The error processing of several places is changed like setting the
error number only at the error.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/inode.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs/btrfs/inode.c')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ad6b515173ac..870869aab0b8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4731,9 +4731,10 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir);
 	if (err) {
@@ -4792,9 +4793,10 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir);
 	if (err) {
@@ -7278,9 +7280,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
 				&index);
-	err = PTR_ERR(inode);
-	if (IS_ERR(inode))
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
 		goto out_unlock;
+	}
 
 	err = btrfs_init_inode_security(trans, inode, dir);
 	if (err) {
-- 
cgit v1.2.3