summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/exofs/Kconfig11
-rw-r--r--fs/exofs/Kconfig.ore12
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c78
-rw-r--r--fs/ext3/inode.c24
-rw-r--r--fs/reiserfs/super.c27
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/inode.c21
-rw-r--r--fs/xfs/xfs_acl.c2
10 files changed, 142 insertions, 49 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f4c45d4aa10..6ad58a59cf5b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig"
endif # MISC_FILESYSTEMS
+source "fs/exofs/Kconfig.ore"
+
menuconfig NETWORK_FILESYSTEMS
bool "Network File Systems"
default y
diff --git a/fs/exofs/Kconfig b/fs/exofs/Kconfig
index da42f32c49be..86194b2f799d 100644
--- a/fs/exofs/Kconfig
+++ b/fs/exofs/Kconfig
@@ -1,14 +1,3 @@
-# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
-# for every ORE user we do it like this. Any user should add itself here
-# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
-# selected here, and we default to "ON". So in effect it is like been
-# selected by any of the users.
-config ORE
- tristate
- depends on EXOFS_FS || PNFS_OBJLAYOUT
- select ASYNC_XOR
- default SCSI_OSD_ULD
-
config EXOFS_FS
tristate "exofs: OSD based file system support"
depends on SCSI_OSD_ULD
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore
new file mode 100644
index 000000000000..1ca7fb7b6ba8
--- /dev/null
+++ b/fs/exofs/Kconfig.ore
@@ -0,0 +1,12 @@
+# ORE - Objects Raid Engine (libore.ko)
+#
+# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
+# for every ORE user we do it like this. Any user should add itself here
+# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
+# selected here, and we default to "ON". So in effect it is like been
+# selected by any of the users.
+config ORE
+ tristate
+ depends on EXOFS_FS || PNFS_OBJLAYOUT
+ select ASYNC_XOR
+ default SCSI_OSD_ULD
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index d271ad837202..49cf230554a2 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
/* first/last seg is split */
num_raid_units += layout->group_width;
- sgs_per_dev = div_u64(num_raid_units, data_devs);
+ sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
} else {
/* For Writes add parity pages array. */
max_par_pages = num_raid_units * pages_in_unit *
@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
u64 residual = ios->reading ?
or->in.residual : or->out.residual;
u64 offset = (ios->offset + ios->length) - residual;
- struct ore_dev *od = ios->oc->ods[
- per_dev->dev - ios->oc->first_dev];
+ unsigned dev = per_dev->dev - ios->oc->first_dev;
+ struct ore_dev *od = ios->oc->ods[dev];
- on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
+ on_dev_error(ios, od, dev, osi.osd_err_pri,
offset, residual);
}
if (osi.osd_err_pri >= acumulated_osd_err) {
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 29c47e5c4a86..d222c77cfa1b 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
/* @si contains info of the to-be-inserted page. Update of @si should be
* maintained by caller. Specificaly si->dev, si->obj_offset, ...
*/
-static int _add_to_read_4_write(struct ore_io_state *ios,
- struct ore_striping_info *si, struct page *page)
+static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
+ struct page *page, unsigned pg_len)
{
struct request_queue *q;
struct ore_per_dev_state *per_dev;
@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
_ore_add_sg_seg(per_dev, gap, true);
}
q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
- added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0);
- if (unlikely(added_len != PAGE_SIZE)) {
+ added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
+ si->obj_offset % PAGE_SIZE);
+ if (unlikely(added_len != pg_len)) {
ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
per_dev->bio->bi_vcnt);
return -ENOMEM;
}
- per_dev->length += PAGE_SIZE;
+ per_dev->length += pg_len;
return 0;
}
+/* read the beginning of an unaligned first page */
+static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
+{
+ struct ore_striping_info si;
+ unsigned pg_len;
+
+ ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
+
+ pg_len = si.obj_offset % PAGE_SIZE;
+ si.obj_offset -= pg_len;
+
+ ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
+ _LLU(si.obj_offset), pg_len, page->index, si.dev);
+
+ return _add_to_r4w(ios, &si, page, pg_len);
+}
+
+/* read the end of an incomplete last page */
+static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
+{
+ struct ore_striping_info si;
+ struct page *page;
+ unsigned pg_len, p, c;
+
+ ore_calc_stripe_info(ios->layout, *offset, 0, &si);
+
+ p = si.unit_off / PAGE_SIZE;
+ c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
+ ios->layout->mirrors_p1, si.par_dev, si.dev);
+ page = ios->sp2d->_1p_stripes[p].pages[c];
+
+ pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
+ *offset += pg_len;
+
+ ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
+ p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
+
+ BUG_ON(!page);
+
+ return _add_to_r4w(ios, &si, page, pg_len);
+}
+
static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
{
struct bio_vec *bv;
@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
struct page **pp = &_1ps->pages[c];
bool uptodate;
- if (*pp)
+ if (*pp) {
+ if (ios->offset % PAGE_SIZE)
+ /* Read the remainder of the page */
+ _add_to_r4w_first_page(ios, *pp);
/* to-be-written pages start here */
goto read_last_stripe;
+ }
*pp = ios->r4w->get_page(ios->private, offset,
&uptodate);
@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
return -ENOMEM;
if (!uptodate)
- _add_to_read_4_write(ios, &read_si, *pp);
+ _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
/* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true;
@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
}
read_last_stripe:
- offset = ios->offset + (ios->length + PAGE_SIZE - 1) /
- PAGE_SIZE * PAGE_SIZE;
+ offset = ios->offset + ios->length;
+ if (offset % PAGE_SIZE)
+ _add_to_r4w_last_page(ios, &offset);
+ /* offset will be aligned to next page */
+
last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
* bytes_in_stripe;
if (offset == last_stripe_end) /* Optimize for the aligned case */
@@ -503,7 +553,7 @@ read_last_stripe:
/* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true;
if (!uptodate)
- _add_to_read_4_write(ios, &read_si, page);
+ _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
}
offset += PAGE_SIZE;
@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
unsigned cur_len)
{
if (ios->reading) {
- BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev);
+ if (per_dev->cur_sg >= ios->sgs_per_dev) {
+ ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
+ per_dev->cur_sg, ios->sgs_per_dev);
+ return -ENOMEM;
+ }
_ore_add_sg_seg(per_dev, cur_len, true);
} else {
struct __stripe_pages_2d *sp2d = ios->sp2d;
@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
return -ENOMEM;
}
- BUG_ON(ios->offset % PAGE_SIZE);
-
/* Round io down to last full strip */
first_stripe = div_u64(ios->offset, stripe_size);
last_stripe = div_u64(ios->offset + ios->length, stripe_size);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 85fe655fe3e0..5b3f907e0b91 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1617,7 +1617,13 @@ static int ext3_ordered_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
/*
* We give up here if we're reentered, because it might be for a
@@ -1692,7 +1698,13 @@ static int ext3_writeback_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
if (ext3_journal_current_handle())
goto out_fail;
@@ -1735,7 +1747,13 @@ static int ext3_journalled_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
if (ext3_journal_current_handle())
goto no_write;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 14363b96b6af..5e3527be1146 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -453,16 +453,20 @@ int remove_save_link(struct inode *inode, int truncate)
static void reiserfs_kill_sb(struct super_block *s)
{
if (REISERFS_SB(s)) {
- if (REISERFS_SB(s)->xattr_root) {
- d_invalidate(REISERFS_SB(s)->xattr_root);
- dput(REISERFS_SB(s)->xattr_root);
- REISERFS_SB(s)->xattr_root = NULL;
- }
- if (REISERFS_SB(s)->priv_root) {
- d_invalidate(REISERFS_SB(s)->priv_root);
- dput(REISERFS_SB(s)->priv_root);
- REISERFS_SB(s)->priv_root = NULL;
- }
+ /*
+ * Force any pending inode evictions to occur now. Any
+ * inodes to be removed that have extended attributes
+ * associated with them need to clean them up before
+ * we can release the extended attribute root dentries.
+ * shrink_dcache_for_umount will BUG if we don't release
+ * those before it's called so ->put_super is too late.
+ */
+ shrink_dcache_sb(s);
+
+ dput(REISERFS_SB(s)->xattr_root);
+ REISERFS_SB(s)->xattr_root = NULL;
+ dput(REISERFS_SB(s)->priv_root);
+ REISERFS_SB(s)->priv_root = NULL;
}
kill_block_super(s);
@@ -1164,7 +1168,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names,
kfree(REISERFS_SB(s)->s_qf_names[i]);
REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
}
- REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+ if (*qfmt)
+ REISERFS_SB(s)->s_jquota_fmt = *qfmt;
}
#endif
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d8ffa7cc661d..dca0c3881e82 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
err = udf_expand_file_adinicb(inode);
if (err) {
udf_debug("udf_expand_adinicb: err=%d\n", err);
- up_write(&iinfo->i_data_sem);
return err;
}
} else {
@@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
iinfo->i_lenAlloc = pos + count;
else
iinfo->i_lenAlloc = inode->i_size;
+ up_write(&iinfo->i_data_sem);
}
- }
- up_write(&iinfo->i_data_sem);
+ } else
+ up_write(&iinfo->i_data_sem);
retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
if (retval > 0)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4fd1d809738c..e2787d05fd9a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -151,6 +151,12 @@ const struct address_space_operations udf_aops = {
.bmap = udf_bmap,
};
+/*
+ * Expand file stored in ICB to a normal one-block-file
+ *
+ * This function requires i_data_sem for writing and releases it.
+ * This function requires i_mutex held
+ */
int udf_expand_file_adinicb(struct inode *inode)
{
struct page *page;
@@ -169,9 +175,15 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ up_write(&iinfo->i_data_sem);
mark_inode_dirty(inode);
return 0;
}
+ /*
+ * Release i_data_sem so that we can lock a page - page lock ranks
+ * above i_data_sem. i_mutex still protects us against file changes.
+ */
+ up_write(&iinfo->i_data_sem);
page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
if (!page)
@@ -187,6 +199,7 @@ int udf_expand_file_adinicb(struct inode *inode)
SetPageUptodate(page);
kunmap(page);
}
+ down_write(&iinfo->i_data_sem);
memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
iinfo->i_lenAlloc);
iinfo->i_lenAlloc = 0;
@@ -196,17 +209,20 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ up_write(&iinfo->i_data_sem);
err = inode->i_data.a_ops->writepage(page, &udf_wbc);
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
kaddr = kmap(page);
+ down_write(&iinfo->i_data_sem);
memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
inode->i_size);
kunmap(page);
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
+ up_write(&iinfo->i_data_sem);
}
page_cache_release(page);
mark_inode_dirty(inode);
@@ -1111,10 +1127,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
if (bsize <
(udf_file_entry_alloc_offset(inode) + newsize)) {
err = udf_expand_file_adinicb(inode);
- if (err) {
- up_write(&iinfo->i_data_sem);
+ if (err)
return err;
- }
+ down_write(&iinfo->i_data_sem);
} else
iinfo->i_lenAlloc = newsize;
}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 76e4266d2e7e..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
struct xfs_acl_entry *ace;
- int count, i;
+ unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
if (count > XFS_ACL_MAX_ENTRIES)