summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorSachin Nikam <snikam@nvidia.com>2013-03-06 19:12:15 +0530
committerSachin Nikam <snikam@nvidia.com>2013-03-06 19:31:07 +0530
commitd3dd6b4227fe3e59df0a55f679ec196e4ccb1e3a (patch)
tree0137a07ecab729a19cea68b671c88def5bf83a2d /fs
parentbb36790494c382ec26250f351c8cea76613f0f02 (diff)
parent2713e2797a78a0fdda765bc5ad7fed41e94818ba (diff)
Merge branch 'linux-3.4.35' into rel-17
Bug 1243631 Change-Id: I915826047b2e20f0ad0a7d75df295c6cbf6e5b0a
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/binfmt_elf.c19
-rw-r--r--fs/binfmt_misc.c5
-rw-r--r--fs/binfmt_script.c4
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/buffer.c66
-rw-r--r--fs/ceph/addr.c49
-rw-r--r--fs/ceph/caps.c16
-rw-r--r--fs/ceph/debugfs.c1
-rw-r--r--fs/ceph/export.c18
-rw-r--r--fs/ceph/file.c4
-rw-r--r--fs/ceph/inode.c30
-rw-r--r--fs/ceph/mds_client.c77
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_unicode.c24
-rw-r--r--fs/cifs/cifsacl.c49
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/compat.c10
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h2
-rw-r--r--fs/ecryptfs/file.c100
-rw-r--r--fs/ecryptfs/inode.c70
-rw-r--r--fs/ecryptfs/main.c24
-rw-r--r--fs/ecryptfs/mmap.c39
-rw-r--r--fs/eventpoll.c22
-rw-r--r--fs/exec.c18
-rw-r--r--fs/ext3/inode.c17
-rw-r--r--fs/ext4/acl.c6
-rw-r--r--fs/ext4/balloc.c13
-rw-r--r--fs/ext4/extents.c79
-rw-r--r--fs/ext4/ialloc.c21
-rw-r--r--fs/ext4/inode.c27
-rw-r--r--fs/ext4/mballoc.c11
-rw-r--r--fs/ext4/move_extent.c174
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/resize.c26
-rw-r--r--fs/ext4/super.c28
-rw-r--r--fs/ext4/xattr.c7
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/gfs2/export.c4
-rw-r--r--fs/gfs2/lops.c18
-rw-r--r--fs/gfs2/trans.c8
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd/commit.c45
-rw-r--r--fs/jbd/transaction.c66
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jbd2/transaction.c3
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/jffs2/nodemgmt.c6
-rw-r--r--fs/jffs2/wbuf.c8
-rw-r--r--fs/lockd/clntproc.c3
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/mon.c4
-rw-r--r--fs/lockd/svc.c12
-rw-r--r--fs/lockd/svcproc.c3
-rw-r--r--fs/nfs/blocklayout/blocklayout.c177
-rw-r--r--fs/nfs/blocklayout/blocklayout.h1
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/dir.c15
-rw-r--r--fs/nfs/dns_resolve.c5
-rw-r--r--fs/nfs/idmap.c6
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/namespace.c39
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs4namespace.c3
-rw-r--r--fs/nfs/nfs4proc.c130
-rw-r--r--fs/nfs/nfs4xdr.c3
-rw-r--r--fs/nfs/objlayout/objio_osd.c1
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs.h2
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4state.c5
-rw-r--r--fs/nfsd/nfs4xdr.c11
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfsd.h11
-rw-r--r--fs/nfsd/nfssvc.c26
-rw-r--r--fs/nfsd/vfs.c10
-rw-r--r--fs/nilfs2/ioctl.c5
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dlmglue.c5
-rw-r--r--fs/ocfs2/suballoc.c7
-rw-r--r--fs/ocfs2/suballoc.h2
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/open.c7
-rw-r--r--fs/proc/page.c8
-rw-r--r--fs/proc/proc_sysctl.c5
-rw-r--r--fs/proc/stat.c14
-rw-r--r--fs/pstore/platform.c35
-rw-r--r--fs/reiserfs/inode.c16
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c60
-rw-r--r--fs/splice.c4
-rw-r--r--fs/stat.c2
-rw-r--r--fs/sysfs/dir.c16
-rw-r--r--fs/ubifs/debug.h2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/find.c12
-rw-r--r--fs/ubifs/lprops.c6
-rw-r--r--fs/ubifs/ubifs.h3
-rw-r--r--fs/udf/file.c35
-rw-r--r--fs/udf/inode.c11
-rw-r--r--fs/udf/super.c5
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_buf.c14
-rw-r--r--fs/xfs/xfs_export.c3
-rw-r--r--fs/xfs/xfs_log_recover.c2
119 files changed, 1382 insertions, 782 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 75e5f1c8e028..8c4292f89eef 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -392,10 +392,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
ino->flags |= AUTOFS_INF_PENDING;
spin_unlock(&sbi->fs_lock);
status = autofs4_mount_wait(dentry);
- if (status)
- return ERR_PTR(status);
spin_lock(&sbi->fs_lock);
ino->flags &= ~AUTOFS_INF_PENDING;
+ if (status) {
+ spin_unlock(&sbi->fs_lock);
+ return ERR_PTR(status);
+ }
}
done:
if (!(ino->flags & AUTOFS_INF_EXPIRING)) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 16f735417072..a009b9e322ac 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1698,30 +1698,19 @@ static int elf_note_info_init(struct elf_note_info *info)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
if (!info->psinfo)
- goto notes_free;
+ return 0;
info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
if (!info->prstatus)
- goto psinfo_free;
+ return 0;
info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
if (!info->fpu)
- goto prstatus_free;
+ return 0;
#ifdef ELF_CORE_COPY_XFPREGS
info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
if (!info->xfpu)
- goto fpu_free;
+ return 0;
#endif
return 1;
-#ifdef ELF_CORE_COPY_XFPREGS
- fpu_free:
- kfree(info->fpu);
-#endif
- prstatus_free:
- kfree(info->prstatus);
- psinfo_free:
- kfree(info->psinfo);
- notes_free:
- kfree(info->notes);
- return 0;
}
static int fill_note_info(struct elfhdr *elf, int phdrs,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 613aa0618235..e1724392d050 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -176,7 +176,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
goto _error;
bprm->argc ++;
- bprm->interp = iname; /* for binfmt_script */
+ /* Update interp in case binfmt_script needs it. */
+ retval = bprm_change_interp(iname, bprm);
+ if (retval < 0)
+ goto _error;
interp_file = open_exec (iname);
retval = PTR_ERR (interp_file);
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index d3b8c1f63155..df49d486b0c0 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -82,7 +82,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
retval = copy_strings_kernel(1, &i_name, bprm);
if (retval) return retval;
bprm->argc++;
- bprm->interp = interp;
+ retval = bprm_change_interp(interp, bprm);
+ if (retval < 0)
+ return retval;
/*
* OK, now restart the process with the interpreter's dentry.
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba11c30f302d..b3be92c980ee 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1047,6 +1047,7 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex);
check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
return ret;
diff --git a/fs/buffer.c b/fs/buffer.c
index 0bc1bed6c4e0..18669e92b676 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
/*
* Initialise the state of a blockdev page's buffers.
*/
-static void
+static sector_t
init_page_buffers(struct page *page, struct block_device *bdev,
sector_t block, int size)
{
@@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev,
block++;
bh = bh->b_this_page;
} while (bh != head);
+
+ /*
+ * Caller needs to validate requested block against end of device.
+ */
+ return end_block;
}
/*
* Create the page-cache page that contains the requested block.
*
- * This is user purely for blockdev mappings.
+ * This is used purely for blockdev mappings.
*/
-static struct page *
+static int
grow_dev_page(struct block_device *bdev, sector_t block,
- pgoff_t index, int size)
+ pgoff_t index, int size, int sizebits)
{
struct inode *inode = bdev->bd_inode;
struct page *page;
struct buffer_head *bh;
+ sector_t end_block;
+ int ret = 0; /* Will call free_more_memory() */
page = find_or_create_page(inode->i_mapping, index,
(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
if (!page)
- return NULL;
+ return ret;
BUG_ON(!PageLocked(page));
if (page_has_buffers(page)) {
bh = page_buffers(page);
if (bh->b_size == size) {
- init_page_buffers(page, bdev, block, size);
- return page;
+ end_block = init_page_buffers(page, bdev,
+ index << sizebits, size);
+ goto done;
}
if (!try_to_free_buffers(page))
goto failed;
@@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
- init_page_buffers(page, bdev, block, size);
+ end_block = init_page_buffers(page, bdev, index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock);
- return page;
-
+done:
+ ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
page_cache_release(page);
- return NULL;
+ return ret;
}
/*
@@ -999,7 +1007,6 @@ failed:
static int
grow_buffers(struct block_device *bdev, sector_t block, int size)
{
- struct page *page;
pgoff_t index;
int sizebits;
@@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
bdevname(bdev, b));
return -EIO;
}
- block = index << sizebits;
+
/* Create a page with the proper size buffers.. */
- page = grow_dev_page(bdev, block, index, size);
- if (!page)
- return 0;
- unlock_page(page);
- page_cache_release(page);
- return 1;
+ return grow_dev_page(bdev, block, index, size, sizebits);
}
static struct buffer_head *
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
- int ret;
- struct buffer_head *bh;
-
/* Size must be multiple of hard sectorsize */
if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
(size < 512 || size > PAGE_SIZE))) {
@@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
return NULL;
}
-retry:
- bh = __find_get_block(bdev, block, size);
- if (bh)
- return bh;
+ for (;;) {
+ struct buffer_head *bh;
+ int ret;
- ret = grow_buffers(bdev, block, size);
- if (ret == 0) {
- free_more_memory();
- goto retry;
- } else if (ret > 0) {
bh = __find_get_block(bdev, block, size);
if (bh)
return bh;
+
+ ret = grow_buffers(bdev, block, size);
+ if (ret < 0)
+ return NULL;
+ if (ret == 0)
+ free_more_memory();
}
- return NULL;
}
/*
@@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block);
* which corresponds to the passed block_device, block and size. The
* returned buffer has its reference count incremented.
*
- * __getblk() cannot fail - it just keeps trying. If you pass it an
- * illegal block number, __getblk() will happily return a buffer_head
- * which represents the non-existent block. Very weird.
- *
* __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
* attempt is failing. FIXME, perhaps?
*/
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 173b1d22e59b..bb01881cb1f2 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -54,7 +54,12 @@
(CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
-
+static inline struct ceph_snap_context *page_snap_context(struct page *page)
+{
+ if (PagePrivate(page))
+ return (void *)page->private;
+ return NULL;
+}
/*
* Dirty a page. Optimistically adjust accounting, on the assumption
@@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
{
struct inode *inode;
struct ceph_inode_info *ci;
- struct ceph_snap_context *snapc = (void *)page->private;
+ struct ceph_snap_context *snapc = page_snap_context(page);
BUG_ON(!PageLocked(page));
- BUG_ON(!page->private);
BUG_ON(!PagePrivate(page));
BUG_ON(!page->mapping);
@@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
struct inode *inode = page->mapping ? page->mapping->host : NULL;
dout("%p releasepage %p idx %lu\n", inode, page, page->index);
WARN_ON(PageDirty(page));
- WARN_ON(page->private);
WARN_ON(PagePrivate(page));
return 0;
}
@@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- page->index << PAGE_CACHE_SHIFT, &len,
+ (u64) page_offset(page), &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
@@ -264,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
kfree(req->r_pages);
}
+static void ceph_unlock_page_vector(struct page **pages, int num_pages)
+{
+ int i;
+
+ for (i = 0; i < num_pages; i++)
+ unlock_page(pages[i]);
+}
+
/*
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
@@ -283,7 +294,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
int nr_pages = 0;
int ret;
- off = page->index << PAGE_CACHE_SHIFT;
+ off = (u64) page_offset(page);
/* count pages */
next_index = page->index;
@@ -305,8 +316,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
NULL, 0,
ci->i_truncate_seq, ci->i_truncate_size,
NULL, false, 1, 0);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
/* build page vector */
nr_pages = len >> PAGE_CACHE_SHIFT;
@@ -344,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
return nr_pages;
out_pages:
+ ceph_unlock_page_vector(pages, nr_pages);
ceph_release_page_vector(pages, nr_pages);
out:
ceph_osdc_put_request(req);
@@ -423,7 +435,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
struct ceph_inode_info *ci;
struct ceph_fs_client *fsc;
struct ceph_osd_client *osdc;
- loff_t page_off = page->index << PAGE_CACHE_SHIFT;
+ loff_t page_off = page_offset(page);
int len = PAGE_CACHE_SIZE;
loff_t i_size;
int err = 0;
@@ -443,7 +455,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
osdc = &fsc->client->osdc;
/* verify this is a writeable snap context */
- snapc = (void *)page->private;
+ snapc = page_snap_context(page);
if (snapc == NULL) {
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
@@ -451,7 +463,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
oldest = get_oldest_context(inode, &snap_size);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
- inode, page, (void *)page->private);
+ inode, page, snapc);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
ceph_put_snap_context(oldest);
@@ -591,7 +603,7 @@ static void writepages_finish(struct ceph_osd_request *req,
clear_bdi_congested(&fsc->backing_dev_info,
BLK_RW_ASYNC);
- ceph_put_snap_context((void *)page->private);
+ ceph_put_snap_context(page_snap_context(page));
page->private = 0;
ClearPagePrivate(page);
dout("unlocking %d %p\n", i, page);
@@ -795,7 +807,7 @@ get_more_pages:
}
/* only if matching snap context */
- pgsnapc = (void *)page->private;
+ pgsnapc = page_snap_context(page);
if (pgsnapc->seq > snapc->seq) {
dout("page snapc %p %lld > oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -814,8 +826,7 @@ get_more_pages:
/* ok */
if (locked_pages == 0) {
/* prepare async write request */
- offset = (unsigned long long)page->index
- << PAGE_CACHE_SHIFT;
+ offset = (u64) page_offset(page);
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout,
@@ -829,8 +840,8 @@ get_more_pages:
ci->i_truncate_size,
&inode->i_mtime, true, 1, 0);
- if (!req) {
- rc = -ENOMEM;
+ if (IS_ERR(req)) {
+ rc = PTR_ERR(req);
unlock_page(page);
break;
}
@@ -984,7 +995,7 @@ retry_locked:
BUG_ON(!ci->i_snap_realm);
down_read(&mdsc->snap_rwsem);
BUG_ON(!ci->i_snap_realm->cached_context);
- snapc = (void *)page->private;
+ snapc = page_snap_context(page);
if (snapc && snapc != ci->i_head_snapc) {
/*
* this page is already dirty in another (older) snap
@@ -1177,7 +1188,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct page *page = vmf->page;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- loff_t off = page->index << PAGE_CACHE_SHIFT;
+ loff_t off = page_offset(page);
loff_t size, len;
int ret;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 620daad201db..e7d40776f58f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1349,11 +1349,15 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
if (!ci->i_head_snapc)
ci->i_head_snapc = ceph_get_snap_context(
ci->i_snap_realm->cached_context);
- dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
- ci->i_head_snapc);
+ dout(" inode %p now dirty snapc %p auth cap %p\n",
+ &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
- list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+ if (ci->i_auth_cap)
+ list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
+ else
+ list_add(&ci->i_dirty_item,
+ &mdsc->cap_dirty_migrating);
spin_unlock(&mdsc->cap_dirty_lock);
if (ci->i_flushing_caps == 0) {
ihold(inode);
@@ -2388,7 +2392,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
&atime);
/* max size increase? */
- if (max_size != ci->i_max_size) {
+ if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
ci->i_max_size = max_size;
if (max_size >= ci->i_wanted_max_size) {
@@ -2745,6 +2749,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
/* make sure we re-request max_size, if necessary */
spin_lock(&ci->i_ceph_lock);
+ ci->i_wanted_max_size = 0; /* reset */
ci->i_requested_max_size = 0;
spin_unlock(&ci->i_ceph_lock);
}
@@ -2840,8 +2845,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_IMPORT:
handle_cap_import(mdsc, inode, h, session,
snaptrace, snaptrace_len);
- ceph_check_caps(ceph_inode(inode), 0, session);
- goto done_unlocked;
}
/* the rest require a cap */
@@ -2858,6 +2861,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
+ case CEPH_CAP_OP_IMPORT:
handle_cap_grant(inode, h, session, cap, msg->middle);
goto done_unlocked;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962efdacee..6d59006bfa27 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
int err = -ENOMEM;
dout("ceph_fs_debugfs_init\n");
+ BUG_ON(!fsc->client->debugfs_dir);
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fbb2a643ef10..4098ccf8cb98 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -89,7 +89,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
* FIXME: we should try harder by querying the mds for the ino.
*/
static struct dentry *__fh_to_dentry(struct super_block *sb,
- struct ceph_nfs_fh *fh)
+ struct ceph_nfs_fh *fh, int fh_len)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
@@ -97,6 +97,9 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
struct ceph_vino vino;
int err;
+ if (fh_len < sizeof(*fh) / 4)
+ return ERR_PTR(-ESTALE);
+
dout("__fh_to_dentry %llx\n", fh->ino);
vino.ino = fh->ino;
vino.snap = CEPH_NOSNAP;
@@ -140,7 +143,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
* convert connectable fh to dentry
*/
static struct dentry *__cfh_to_dentry(struct super_block *sb,
- struct ceph_nfs_confh *cfh)
+ struct ceph_nfs_confh *cfh, int fh_len)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode;
@@ -148,6 +151,9 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
struct ceph_vino vino;
int err;
+ if (fh_len < sizeof(*cfh) / 4)
+ return ERR_PTR(-ESTALE);
+
dout("__cfh_to_dentry %llx (%llx/%x)\n",
cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
@@ -197,9 +203,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
if (fh_type == 1)
- return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw);
+ return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw,
+ fh_len);
else
- return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw);
+ return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw,
+ fh_len);
}
/*
@@ -220,6 +228,8 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
if (fh_type == 1)
return ERR_PTR(-ESTALE);
+ if (fh_len < sizeof(*cfh) / 4)
+ return ERR_PTR(-ESTALE);
pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
cfh->parent_name_hash);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed72428d9c75..9ce3a4bd4d1c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -529,8 +529,8 @@ more:
do_sync,
ci->i_truncate_seq, ci->i_truncate_size,
&mtime, false, 2, page_align);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
if (file->f_flags & O_DIRECT) {
pages = ceph_get_direct_page_vector(data, num_pages, false);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9fff9f3b17e4..81613bced19f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
if (rinfo->head->is_dentry) {
struct inode *dir = req->r_locked_dir;
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
- session, req->r_request_started, -1,
- &req->r_caps_reservation);
- if (err < 0)
- return err;
+ if (dir) {
+ err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ session, req->r_request_started, -1,
+ &req->r_caps_reservation);
+ if (err < 0)
+ return err;
+ } else {
+ WARN_ON_ONCE(1);
+ }
}
/*
@@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
* will have trouble splicing in the virtual snapdir later
*/
if (rinfo->head->is_dentry && !req->r_aborted &&
+ req->r_locked_dir &&
(rinfo->head->is_target || strncmp(req->r_dentry->d_name.name,
fsc->mount_options->snapdir_name,
req->r_dentry->d_name.len))) {
@@ -1461,7 +1466,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u64 to;
- int wrbuffer_refs, wake = 0;
+ int wrbuffer_refs, finish = 0;
retry:
spin_lock(&ci->i_ceph_lock);
@@ -1493,15 +1498,18 @@ retry:
truncate_inode_pages(inode->i_mapping, to);
spin_lock(&ci->i_ceph_lock);
- ci->i_truncate_pending--;
- if (ci->i_truncate_pending == 0)
- wake = 1;
+ if (to == ci->i_truncate_size) {
+ ci->i_truncate_pending = 0;
+ finish = 1;
+ }
spin_unlock(&ci->i_ceph_lock);
+ if (!finish)
+ goto retry;
if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
- if (wake)
- wake_up_all(&ci->i_cap_wq);
+
+ wake_up_all(&ci->i_cap_wq);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89971e137aab..3fd08ad5c478 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
dout("mdsc put_session %p %d -> %d\n", s,
atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
if (atomic_dec_and_test(&s->s_ref)) {
- if (s->s_authorizer)
+ if (s->s_auth.authorizer)
s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
s->s_mdsc->fsc->client->monc.auth,
- s->s_authorizer);
+ s->s_auth.authorizer);
kfree(s);
}
}
@@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_seq = 0;
mutex_init(&s->s_mutex);
- ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
- s->s_con.private = s;
- s->s_con.ops = &mds_con_ops;
- s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
- s->s_con.peer_name.num = cpu_to_le64(mds);
+ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
@@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
mdsc->sessions[mds] = s;
atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
- ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
return s;
@@ -1889,9 +1886,14 @@ finish:
static void __wake_requests(struct ceph_mds_client *mdsc,
struct list_head *head)
{
- struct ceph_mds_request *req, *nreq;
+ struct ceph_mds_request *req;
+ LIST_HEAD(tmp_list);
+
+ list_splice_init(head, &tmp_list);
- list_for_each_entry_safe(req, nreq, head, r_wait) {
+ while (!list_empty(&tmp_list)) {
+ req = list_entry(tmp_list.next,
+ struct ceph_mds_request, r_wait);
list_del_init(&req->r_wait);
__do_request(mdsc, req);
}
@@ -2531,7 +2533,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
session->s_state = CEPH_MDS_SESSION_RECONNECTING;
session->s_seq = 0;
+ ceph_con_close(&session->s_con);
ceph_con_open(&session->s_con,
+ CEPH_ENTITY_TYPE_MDS, mds,
ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
/* replay unsafe requests */
@@ -2636,7 +2640,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
session_state_name(s->s_state));
- if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
+ if (i >= newmap->m_max_mds ||
+ memcmp(ceph_mdsmap_get_addr(oldmap, i),
ceph_mdsmap_get_addr(newmap, i),
sizeof(struct ceph_entity_addr))) {
if (s->s_state == CEPH_MDS_SESSION_OPENING) {
@@ -3395,39 +3400,33 @@ out:
/*
* authentication
*/
-static int get_authorizer(struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new)
+
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- int ret = 0;
-
- if (force_new && s->s_authorizer) {
- ac->ops->destroy_authorizer(ac, s->s_authorizer);
- s->s_authorizer = NULL;
- }
- if (s->s_authorizer == NULL) {
- if (ac->ops->create_authorizer) {
- ret = ac->ops->create_authorizer(
- ac, CEPH_ENTITY_TYPE_MDS,
- &s->s_authorizer,
- &s->s_authorizer_buf,
- &s->s_authorizer_buf_len,
- &s->s_authorizer_reply_buf,
- &s->s_authorizer_reply_buf_len);
- if (ret)
- return ret;
- }
- }
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ if (force_new && auth->authorizer) {
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
+ int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ }
*proto = ac->protocol;
- *buf = s->s_authorizer_buf;
- *len = s->s_authorizer_buf_len;
- *reply_buf = s->s_authorizer_reply_buf;
- *reply_len = s->s_authorizer_reply_buf_len;
- return 0;
+
+ return auth;
}
@@ -3437,7 +3436,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
+ return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
}
static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 8c7c04ebb595..dd26846dd71d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -11,6 +11,7 @@
#include <linux/ceph/types.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/auth.h>
/*
* Some lock dependencies:
@@ -113,9 +114,7 @@ struct ceph_mds_session {
struct ceph_connection s_con;
- struct ceph_authorizer *s_authorizer;
- void *s_authorizer_buf, *s_authorizer_reply_buf;
- size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
+ struct ceph_auth_handshake s_auth;
/* protected by s_gen_ttl_lock */
spinlock_t s_gen_ttl_lock;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1e67dd7305a4..f36391815461 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -387,8 +387,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
- if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT)
- seq_printf(m, ",osdtimeout=%d", opt->osd_timeout);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, ",osdkeepalivetimeout=%d",
opt->osd_keepalive_timeout);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 6873bb634a97..22631445a4ac 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
compose_mount_options_err:
kfree(mountdata);
mountdata = ERR_PTR(rc);
+ kfree(*devname);
+ *devname = NULL;
goto compose_mount_options_out;
}
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index fbb9da951843..6a8568c6466f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -203,6 +203,27 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len,
int i;
wchar_t wchar_to; /* needed to quiet sparse */
+ /* special case for utf8 to handle no plane0 chars */
+ if (!strcmp(codepage->charset, "utf8")) {
+ /*
+ * convert utf8 -> utf16, we assume we have enough space
+ * as caller should have assumed conversion does not overflow
+ * in destination len is length in wchar_t units (16bits)
+ */
+ i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
+ (wchar_t *) to, len);
+
+ /* if success terminate and exit */
+ if (i >= 0)
+ goto success;
+ /*
+ * if fails fall back to UCS encoding as this
+ * function should not return negative values
+ * currently can fail only if source contains
+ * invalid encoded characters
+ */
+ }
+
for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
charlen = codepage->char2uni(from, len, &wchar_to);
if (charlen < 1) {
@@ -215,6 +236,7 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len,
put_unaligned_le16(wchar_to, &to[i]);
}
+success:
put_unaligned_le16(0, &to[i]);
return i;
}
@@ -328,6 +350,6 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
}
ctoUTF16_out:
- return i;
+ return j;
}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 3cc1b251ca08..6ccf176427f0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
}
static void
+cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+ dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
+}
+
+static void
id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
struct cifs_sid_id **psidid, char *typestr)
{
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
}
}
- memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+ cifs_copy_sid(&(*psidid)->sid, sidptr);
(*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
(*psidid)->refcount = 0;
@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
* any fields of the node after a reference is put .
*/
if (test_bit(SID_ID_MAPPED, &psidid->state)) {
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
psidid->time = jiffies; /* update ts for accessing */
goto id_sid_out;
}
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cFYI(1, "%s: Can't map and id to a SID", __func__);
+ } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
+ rc = -EIO;
+ cFYI(1, "%s: Downcall contained malformed key "
+ "(datalen=%hu)", __func__, sidkey->datalen);
} else {
lsid = (struct cifs_sid *)sidkey->payload.data;
- memcpy(&psidid->sid, lsid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
- memcpy(ssid, &psidid->sid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
+ cifs_copy_sid(&psidid->sid, lsid);
+ cifs_copy_sid(ssid, &psidid->sid);
set_bit(SID_ID_MAPPED, &psidid->state);
key_put(sidkey);
kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
return rc;
}
if (test_bit(SID_ID_MAPPED, &psidid->state))
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
else
rc = -EINVAL;
}
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
static void copy_sec_desc(const struct cifs_ntsd *pntsd,
struct cifs_ntsd *pnntsd, __u32 sidsoffset)
{
- int i;
-
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
-
- nowner_sid_ptr->revision = owner_sid_ptr->revision;
- nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
+ cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
/* copy group sid */
group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->gsidoffset));
ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
sizeof(struct cifs_sid));
-
- ngroup_sid_ptr->revision = group_sid_ptr->revision;
- ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
+ cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
return;
}
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(nowner_sid_ptr);
return rc;
}
- memcpy(owner_sid_ptr, nowner_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
kfree(nowner_sid_ptr);
*aclflag = CIFS_ACL_OWNER;
}
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(ngroup_sid_ptr);
return rc;
}
- memcpy(group_sid_ptr, ngroup_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
kfree(ngroup_sid_ptr);
*aclflag = CIFS_ACL_GROUP;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 65a78e9a5d40..f771e9f98f9f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -70,6 +70,7 @@ enum {
/* Mount options that take no arguments */
Opt_user_xattr, Opt_nouser_xattr,
Opt_forceuid, Opt_noforceuid,
+ Opt_forcegid, Opt_noforcegid,
Opt_noblocksend, Opt_noautotune,
Opt_hard, Opt_soft, Opt_perm, Opt_noperm,
Opt_mapchars, Opt_nomapchars, Opt_sfu,
@@ -121,6 +122,8 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_nouser_xattr, "nouser_xattr" },
{ Opt_forceuid, "forceuid" },
{ Opt_noforceuid, "noforceuid" },
+ { Opt_forcegid, "forcegid" },
+ { Opt_noforcegid, "noforcegid" },
{ Opt_noblocksend, "noblocksend" },
{ Opt_noautotune, "noautotune" },
{ Opt_hard, "hard" },
@@ -1287,6 +1290,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
case Opt_noforceuid:
override_uid = 0;
break;
+ case Opt_forcegid:
+ override_gid = 1;
+ break;
+ case Opt_noforcegid:
+ override_gid = 0;
+ break;
case Opt_noblocksend:
vol->noblocksnd = 1;
break;
diff --git a/fs/compat.c b/fs/compat.c
index f2944ace7a7b..2b371b38911d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1160,11 +1160,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_readv(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
@@ -1226,11 +1229,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_writev(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index debdfe0fc809..5d2069ff635c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd,
err = get_user(palp, &up->palette);
err |= get_user(length, &up->length);
+ if (err)
+ return -EFAULT;
up_native = compat_alloc_user_space(sizeof(struct video_spu_palette));
err = put_user(compat_ptr(palp), &up_native->palette);
diff --git a/fs/dcache.c b/fs/dcache.c
index b80531c91779..f104945dcc7d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -373,7 +373,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
* Inform try_to_ascend() that we are no longer attached to the
* dentry tree
*/
- dentry->d_flags |= DCACHE_DISCONNECTED;
+ dentry->d_flags |= DCACHE_DENTRY_KILLED;
if (parent)
spin_unlock(&parent->d_lock);
dentry_iput(dentry);
@@ -1030,7 +1030,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
* or deletion
*/
if (new != old->d_parent ||
- (old->d_flags & DCACHE_DISCONNECTED) ||
+ (old->d_flags & DCACHE_DENTRY_KILLED) ||
(!locked && read_seqretry(&rename_lock, seq))) {
spin_unlock(&new->d_lock);
new = NULL;
@@ -1116,6 +1116,8 @@ positive:
return 1;
rename_retry:
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
@@ -1218,6 +1220,8 @@ out:
rename_retry:
if (found)
return found;
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
@@ -2963,6 +2967,8 @@ resume:
return;
rename_retry:
+ if (locked)
+ goto again;
locked = 1;
write_seqlock(&rename_lock);
goto again;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f4aadd15b613..29c4fdab29db 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -305,9 +305,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
dio->end_io(dio->iocb, offset, transferred,
dio->private, ret, is_async);
} else {
+ inode_dio_done(dio->inode);
if (is_async)
aio_complete(dio->iocb, ret, 0);
- inode_dio_done(dio->inode);
}
return ret;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 867b64c5d84f..56e3aa57e188 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -568,6 +568,8 @@ struct ecryptfs_open_req {
struct inode *ecryptfs_get_inode(struct inode *lower_inode,
struct super_block *sb);
void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
+int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
size_t *decrypted_name_size,
struct dentry *ecryptfs_dentry,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b17f2f9b121..d45ba4568128 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -138,29 +138,50 @@ out:
return rc;
}
-static void ecryptfs_vma_close(struct vm_area_struct *vma)
-{
- filemap_write_and_wait(vma->vm_file->f_mapping);
-}
-
-static const struct vm_operations_struct ecryptfs_file_vm_ops = {
- .close = ecryptfs_vma_close,
- .fault = filemap_fault,
-};
+struct kmem_cache *ecryptfs_file_info_cache;
-static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int read_or_initialize_metadata(struct dentry *dentry)
{
+ struct inode *inode = dentry->d_inode;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct ecryptfs_crypt_stat *crypt_stat;
int rc;
- rc = generic_file_mmap(file, vma);
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ inode->i_sb)->mount_crypt_stat;
+ mutex_lock(&crypt_stat->cs_mutex);
+
+ if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED &&
+ crypt_stat->flags & ECRYPTFS_KEY_VALID) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = ecryptfs_read_metadata(dentry);
if (!rc)
- vma->vm_ops = &ecryptfs_file_vm_ops;
+ goto out;
+
+ if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) {
+ crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
+ | ECRYPTFS_ENCRYPTED);
+ rc = 0;
+ goto out;
+ }
+
+ if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) &&
+ !i_size_read(ecryptfs_inode_to_lower(inode))) {
+ rc = ecryptfs_initialize_file(dentry, inode);
+ if (!rc)
+ goto out;
+ }
+ rc = -EIO;
+out:
+ mutex_unlock(&crypt_stat->cs_mutex);
return rc;
}
-struct kmem_cache *ecryptfs_file_info_cache;
-
/**
* ecryptfs_open
* @inode: inode speciying file to open
@@ -236,32 +257,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
rc = 0;
goto out;
}
- mutex_lock(&crypt_stat->cs_mutex);
- if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
- || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
- rc = ecryptfs_read_metadata(ecryptfs_dentry);
- if (rc) {
- ecryptfs_printk(KERN_DEBUG,
- "Valid headers not found\n");
- if (!(mount_crypt_stat->flags
- & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
- rc = -EIO;
- printk(KERN_WARNING "Either the lower file "
- "is not in a valid eCryptfs format, "
- "or the key could not be retrieved. "
- "Plaintext passthrough mode is not "
- "enabled; returning -EIO\n");
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out_put;
- }
- rc = 0;
- crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
- | ECRYPTFS_ENCRYPTED);
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out;
- }
- }
- mutex_unlock(&crypt_stat->cs_mutex);
+ rc = read_or_initialize_metadata(ecryptfs_dentry);
+ if (rc)
+ goto out_put;
ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
(unsigned long long)i_size_read(inode));
@@ -277,8 +275,14 @@ out:
static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
- return file->f_mode & FMODE_WRITE
- ? filemap_write_and_wait(file->f_mapping) : 0;
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+
+ if (lower_file->f_op && lower_file->f_op->flush) {
+ filemap_write_and_wait(file->f_mapping);
+ return lower_file->f_op->flush(lower_file, td);
+ }
+
+ return 0;
}
static int ecryptfs_release(struct inode *inode, struct file *file)
@@ -292,15 +296,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- int rc = 0;
-
- rc = generic_file_fsync(file, start, end, datasync);
- if (rc)
- goto out;
- rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
- datasync);
-out:
- return rc;
+ return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
static int ecryptfs_fasync(int fd, struct file *file, int flag)
@@ -369,7 +365,7 @@ const struct file_operations ecryptfs_main_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = ecryptfs_file_mmap,
+ .mmap = generic_file_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ab35b113003b..11030b2fd3b4 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -143,6 +143,31 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
return 0;
}
+static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
+ struct dentry *lower_dir_dentry;
+ int rc;
+
+ dget(lower_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry);
+ if (rc) {
+ printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
+ goto out_unlock;
+ }
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
+ inode->i_ctime = dir->i_ctime;
+ d_drop(dentry);
+out_unlock:
+ unlock_dir(lower_dir_dentry);
+ dput(lower_dentry);
+ return rc;
+}
+
/**
* ecryptfs_do_create
* @directory_inode: inode of the new file's dentry's parent in ecryptfs
@@ -182,8 +207,10 @@ ecryptfs_do_create(struct inode *directory_inode,
}
inode = __ecryptfs_get_inode(lower_dentry->d_inode,
directory_inode->i_sb);
- if (IS_ERR(inode))
+ if (IS_ERR(inode)) {
+ vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
goto out_lock;
+ }
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
out_lock:
@@ -200,8 +227,8 @@ out:
*
* Returns zero on success
*/
-static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
- struct inode *ecryptfs_inode)
+int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode)
{
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
@@ -265,7 +292,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
* that this on disk file is prepared to be an ecryptfs file */
rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
if (rc) {
- drop_nlink(ecryptfs_inode);
+ ecryptfs_do_unlink(directory_inode, ecryptfs_dentry,
+ ecryptfs_inode);
+ make_bad_inode(ecryptfs_inode);
unlock_new_inode(ecryptfs_inode);
iput(ecryptfs_inode);
goto out;
@@ -477,27 +506,7 @@ out_lock:
static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
{
- int rc = 0;
- struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
- struct dentry *lower_dir_dentry;
-
- dget(lower_dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry);
- if (rc) {
- printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
- goto out_unlock;
- }
- fsstack_copy_attr_times(dir, lower_dir_inode);
- set_nlink(dentry->d_inode,
- ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
- dentry->d_inode->i_ctime = dir->i_ctime;
- d_drop(dentry);
-out_unlock:
- unlock_dir(lower_dir_dentry);
- dput(lower_dentry);
- return rc;
+ return ecryptfs_do_unlink(dir, dentry, dentry->d_inode);
}
static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -621,6 +630,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
struct dentry *trap = NULL;
+ struct inode *target_inode;
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -628,6 +638,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dget(lower_new_dentry);
lower_old_dir_dentry = dget_parent(lower_old_dentry);
lower_new_dir_dentry = dget_parent(lower_new_dentry);
+ target_inode = new_dentry->d_inode;
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
/* source should not be ancestor of target */
if (trap == lower_old_dentry) {
@@ -643,6 +654,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
+ if (target_inode)
+ fsstack_copy_attr_all(target_inode,
+ ecryptfs_inode_to_lower(target_inode));
fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
@@ -1002,12 +1016,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
goto out;
}
- if (S_ISREG(inode->i_mode)) {
- rc = filemap_write_and_wait(inode->i_mapping);
- if (rc)
- goto out;
- fsstack_copy_attr_all(inode, lower_inode);
- }
memcpy(&lower_ia, ia, sizeof(lower_ia));
if (ia->ia_valid & ATTR_FILE)
lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 68954937a071..240832e8b289 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode)
inode_info = ecryptfs_inode_to_private(inode);
if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count,
&inode_info->lower_file_mutex)) {
+ filemap_write_and_wait(inode->i_mapping);
fput(inode_info->lower_file);
inode_info->lower_file = NULL;
mutex_unlock(&inode_info->lower_file_mutex);
@@ -279,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
char *fnek_src;
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
+ u8 cipher_code;
*check_ruid = 0;
@@ -420,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
&& !fn_cipher_key_bytes_set)
mount_crypt_stat->global_default_fn_cipher_key_bytes =
mount_crypt_stat->global_default_cipher_key_size;
+
+ cipher_code = ecryptfs_code_for_cipher_string(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_key_size);
+ if (!cipher_code) {
+ ecryptfs_printk(KERN_ERR,
+ "eCryptfs doesn't support cipher: %s",
+ mount_crypt_stat->global_default_cipher_name);
+ rc = -EINVAL;
+ goto out;
+ }
+
mutex_lock(&key_tfm_list_mutex);
if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
NULL)) {
@@ -505,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}
- s->s_flags = flags;
rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
if (rc)
goto out1;
@@ -541,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
}
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
+
+ /**
+ * Set the POSIX ACL flag based on whether they're enabled in the lower
+ * mount. Force a read-only eCryptfs mount if the lower mount is ro.
+ * Allow a ro eCryptfs mount even when the lower mount is rw.
+ */
+ s->s_flags = flags & ~MS_POSIXACL;
+ s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
s->s_magic = ECRYPTFS_SUPER_MAGIC;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index a46b3a8fee1e..bd1d57f98f74 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -66,18 +66,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
{
int rc;
- /*
- * Refuse to write the page out if we are called from reclaim context
- * since our writepage() path may potentially allocate memory when
- * calling into the lower fs vfs_write() which may in turn invoke
- * us again.
- */
- if (current->flags & PF_MEMALLOC) {
- redirty_page_for_writepage(wbc, page);
- rc = 0;
- goto out;
- }
-
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting "
@@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file,
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
int rc;
- int need_unlock_page = 1;
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
@@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file,
"zeros in page with index = [0x%.16lx]\n", index);
goto out;
}
- set_page_dirty(page);
- unlock_page(page);
- need_unlock_page = 0;
+ rc = ecryptfs_encrypt_page(page);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
+ "index [0x%.16lx])\n", index);
+ goto out;
+ }
if (pos + copied > i_size_read(ecryptfs_inode)) {
i_size_write(ecryptfs_inode, pos + copied);
ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
"[0x%.16llx]\n",
(unsigned long long)i_size_read(ecryptfs_inode));
- balance_dirty_pages_ratelimited(mapping);
- rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
- if (rc) {
- printk(KERN_ERR "Error writing inode size to metadata; "
- "rc = [%d]\n", rc);
- goto out;
- }
}
- rc = copied;
+ rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
+ if (rc)
+ printk(KERN_ERR "Error writing inode size to metadata; "
+ "rc = [%d]\n", rc);
+ else
+ rc = copied;
out:
- if (need_unlock_page)
- unlock_page(page);
+ unlock_page(page);
page_cache_release(page);
return rc;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 079d1be65ba9..0863bab42c4d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1285,7 +1285,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* otherwise we might miss an event that happens between the
* f_op->poll() call and the new event set registering.
*/
- epi->event.events = event->events;
+ epi->event.events = event->events; /* need barrier below */
pt._key = event->events;
epi->event.data = event->data; /* protected by mtx */
if (epi->event.events & EPOLLWAKEUP) {
@@ -1296,6 +1296,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
}
/*
+ * The following barrier has two effects:
+ *
+ * 1) Flush epi changes above to other CPUs. This ensures
+ * we do not miss events from ep_poll_callback if an
+ * event occurs immediately after we call f_op->poll().
+ * We need this because we did not take ep->lock while
+ * changing epi above (but ep_poll_callback does take
+ * ep->lock).
+ *
+ * 2) We also need to ensure we do not miss _past_ events
+ * when calling f_op->poll(). This barrier also
+ * pairs with the barrier in wq_has_sleeper (see
+ * comments for wq_has_sleeper).
+ *
+ * This barrier will now guarantee ep_poll_callback or f_op->poll
+ * (or both) will notice the readiness of an item.
+ */
+ smp_mb();
+
+ /*
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
diff --git a/fs/exec.c b/fs/exec.c
index 9ba382d7e726..c136a5e750fc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1114,7 +1114,8 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
set_fs(USER_DS);
- current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD);
+ current->flags &=
+ ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE);
flush_thread();
current->personality &= ~bprm->per_clear;
@@ -1205,9 +1206,24 @@ void free_bprm(struct linux_binprm *bprm)
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
+ /* If a binfmt changed the interp, free it. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
kfree(bprm);
}
+int bprm_change_interp(char *interp, struct linux_binprm *bprm)
+{
+ /* If a binfmt changed the interp, free it first. */
+ if (bprm->interp != bprm->filename)
+ kfree(bprm->interp);
+ bprm->interp = kstrdup(interp, GFP_KERNEL);
+ if (!bprm->interp)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(bprm_change_interp);
+
/*
* install the new credentials for this executable
*/
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 10d7812f6021..075281734fcb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3068,6 +3068,8 @@ static int ext3_do_update_inode(handle_t *handle,
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
+ __le32 disksize;
again:
/* we can't allow multiple procs in here at once, its a bit racey */
@@ -3105,7 +3107,11 @@ again:
raw_inode->i_gid_high = 0;
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
- raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ disksize = cpu_to_le32(ei->i_disksize);
+ if (disksize != raw_inode->i_size) {
+ need_datasync = 1;
+ raw_inode->i_size = disksize;
+ }
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3121,8 +3127,11 @@ again:
if (!S_ISREG(inode->i_mode)) {
raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
} else {
- raw_inode->i_size_high =
- cpu_to_le32(ei->i_disksize >> 32);
+ disksize = cpu_to_le32(ei->i_disksize >> 32);
+ if (disksize != raw_inode->i_size_high) {
+ raw_inode->i_size_high = disksize;
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3175,6 +3184,8 @@ again:
ext3_clear_inode_state(inode, EXT3_STATE_NEW);
atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
+ if (need_datasync)
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
out_brelse:
brelse (bh);
ext3_std_error(inode->i_sb, err);
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a5c29bb3b835..8535c45dfceb 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -410,8 +410,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
retry:
handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ goto release_and_out;
+ }
error = ext4_set_acl(handle, inode, type, acl);
ext4_journal_stop(handle);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index df76291d692b..3e1018ace702 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -326,7 +326,7 @@ err_out:
return 0;
}
/**
- * ext4_read_block_bitmap()
+ * ext4_read_block_bitmap_nowait()
* @sb: super block
* @block_group: given block group
*
@@ -422,6 +422,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh;
bh = ext4_read_block_bitmap_nowait(sb, block_group);
+ if (!bh)
+ return NULL;
if (ext4_wait_block_bitmap(sb, block_group, bh)) {
put_bh(bh);
return NULL;
@@ -447,11 +449,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
free_clusters = percpu_counter_read_positive(fcc);
dirty_clusters = percpu_counter_read_positive(dcc);
- root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es));
+
+ /*
+ * r_blocks_count should always be multiple of the cluster ratio so
+ * we are safe to do a plane bit shift only.
+ */
+ root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
if (free_clusters - (nclusters + root_clusters + dirty_clusters) <
EXT4_FREECLUSTERS_WATERMARK) {
- free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc));
+ free_clusters = percpu_counter_sum_positive(fcc);
dirty_clusters = percpu_counter_sum_positive(dcc);
}
/* Check whether we have space after accounting for current
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 8b384cc3b75a..852d4c257ace 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -52,6 +52,9 @@
#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
+#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
+
static int ext4_split_extent(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path,
@@ -2107,13 +2110,14 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
* removes index from the index block.
*/
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+ struct ext4_ext_path *path, int depth)
{
int err;
ext4_fsblk_t leaf;
/* free index block */
- path--;
+ depth--;
+ path = path + depth;
leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2138,6 +2142,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+ while (--depth >= 0) {
+ if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+ break;
+ path--;
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err)
+ break;
+ path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+ err = ext4_ext_dirty(handle, inode, path);
+ if (err)
+ break;
+ }
return err;
}
@@ -2471,7 +2488,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* if this leaf is free, then we should
* remove it from index block above */
if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
- err = ext4_ext_rm_idx(handle, inode, path + depth);
+ err = ext4_ext_rm_idx(handle, inode, path, depth);
out:
return err;
@@ -2672,7 +2689,7 @@ cont:
/* index is empty, remove it;
* handle must be already prepared by the
* truncatei_leaf() */
- err = ext4_ext_rm_idx(handle, inode, path + i);
+ err = ext4_ext_rm_idx(handle, inode, path, i);
}
/* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
@@ -2829,6 +2846,9 @@ static int ext4_split_extent_at(handle_t *handle,
unsigned int ee_len, depth;
int err = 0;
+ BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
+ (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
+
ext_debug("ext4_split_extents_at: inode %lu, logical"
"block %llu\n", inode->i_ino, (unsigned long long)split);
@@ -2887,7 +2907,14 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
- err = ext4_ext_zeroout(inode, &orig_ex);
+ if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
+ if (split_flag & EXT4_EXT_DATA_VALID1)
+ err = ext4_ext_zeroout(inode, ex2);
+ else
+ err = ext4_ext_zeroout(inode, ex);
+ } else
+ err = ext4_ext_zeroout(inode, &orig_ex);
+
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
@@ -2940,12 +2967,13 @@ static int ext4_split_extent(handle_t *handle,
uninitialized = ext4_ext_is_uninitialized(ex);
if (map->m_lblk + map->m_len < ee_block + ee_len) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2;
+ if (split_flag & EXT4_EXT_DATA_VALID2)
+ split_flag1 |= EXT4_EXT_DATA_VALID1;
err = ext4_split_extent_at(handle, inode, path,
map->m_lblk + map->m_len, split_flag1, flags1);
if (err)
@@ -2958,8 +2986,8 @@ static int ext4_split_extent(handle_t *handle,
return PTR_ERR(path);
if (map->m_lblk >= ee_block) {
- split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
- EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT |
+ EXT4_EXT_DATA_VALID2);
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2)
@@ -3237,26 +3265,47 @@ static int ext4_split_unwritten_extents(handle_t *handle,
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
-
+ if (flags & EXT4_GET_BLOCKS_CONVERT)
+ split_flag |= EXT4_EXT_DATA_VALID2;
flags |= EXT4_GET_BLOCKS_PRE_IO;
return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
- struct inode *inode,
- struct ext4_ext_path *path)
+ struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path)
{
struct ext4_extent *ex;
+ ext4_lblk_t ee_block;
+ unsigned int ee_len;
int depth;
int err = 0;
depth = ext_depth(inode);
ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ (unsigned long long)ee_block, ee_len);
+
+ /* If extent is larger than requested then split is required */
+ if (ee_block != map->m_lblk || ee_len > map->m_len) {
+ err = ext4_split_unwritten_extents(handle, inode, map, path,
+ EXT4_GET_BLOCKS_CONVERT);
+ if (err < 0)
+ goto out;
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ }
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3564,7 +3613,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}
/* IO end_io complete, convert the filled extent to written */
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
- ret = ext4_convert_unwritten_extents_endio(handle, inode,
+ ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
path);
if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 0ee374d27fdb..902544e7ee5c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -697,6 +697,10 @@ repeat_in_this_group:
"inode=%lu", ino + 1);
continue;
}
+ BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+ if (err)
+ goto fail;
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
ext4_unlock_group(sb, group);
@@ -710,6 +714,11 @@ repeat_in_this_group:
goto out;
got:
+ BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+ if (err)
+ goto fail;
+
/* We may have to initialize the block bitmap if it isn't already */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
@@ -725,7 +734,6 @@ got:
BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
- brelse(block_bitmap_bh);
/* recheck and clear flag under lock if we still need to */
ext4_lock_group(sb, group);
@@ -737,16 +745,12 @@ got:
gdp);
}
ext4_unlock_group(sb, group);
+ brelse(block_bitmap_bh);
if (err)
goto fail;
}
- BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "get_write_access");
err = ext4_journal_get_write_access(handle, group_desc_bh);
if (err)
@@ -789,11 +793,6 @@ got:
ext4_unlock_group(sb, group);
}
- BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
- if (err)
- goto fail;
-
BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
if (err)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 55a654d86182..a0d7e2617fda 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1424,6 +1424,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
index = mpd->first_page;
end = mpd->next_page - 1;
+
+ pagevec_init(&pvec, 0);
while (index <= end) {
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
if (nr_pages == 0)
@@ -2386,6 +2388,16 @@ static int ext4_nonda_switch(struct super_block *sb)
free_blocks = EXT4_C2B(sbi,
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
+ /*
+ * Start pushing delalloc when 1/2 of free blocks are dirty.
+ */
+ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
+ !writeback_in_progress(sb->s_bdi) &&
+ down_read_trylock(&sb->s_umount)) {
+ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
+ up_read(&sb->s_umount);
+ }
+
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
/*
@@ -2394,13 +2406,6 @@ static int ext4_nonda_switch(struct super_block *sb)
*/
return 1;
}
- /*
- * Even if we don't switch but are nearing capacity,
- * start pushing delalloc when 1/2 of free blocks are dirty.
- */
- if (free_blocks < 2 * dirty_blocks)
- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
-
return 0;
}
@@ -3889,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
/* For fields not not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
@@ -3937,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- ext4_isize_set(raw_inode, ei->i_disksize);
+ if (ei->i_disksize != ext4_isize(raw_inode)) {
+ ext4_isize_set(raw_inode, ei->i_disksize);
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -3988,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle,
err = rc;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
- ext4_update_inode_fsync_trans(handle, inode, 0);
+ ext4_update_inode_fsync_trans(handle, inode, need_datasync);
out_brelse:
brelse(bh);
ext4_std_error(inode->i_sb, err);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6b0a57eafb53..3122ece15147 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4126,7 +4126,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1;
/* Add the prealloc space to lg */
- rcu_read_lock();
+ spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
pa_inode_list) {
spin_lock(&tmp_pa->pa_lock);
@@ -4150,12 +4150,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
if (!added)
list_add_tail_rcu(&pa->pa_inode_list,
&lg->lg_prealloc_list[order]);
- rcu_read_unlock();
+ spin_unlock(&lg->lg_prealloc_lock);
/* Now trim the list to be not more than 8 elements */
if (lg_prealloc_count > 8) {
ext4_mb_discard_lg_preallocations(sb, lg,
- order, lg_prealloc_count);
+ order, lg_prealloc_count);
return;
}
return ;
@@ -4984,8 +4984,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
- unlikely(start >= max_blks))
+ if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) ||
+ start >= max_blks ||
+ range->len < sb->s_blocksize)
return -EINVAL;
if (end >= max_blks)
end = max_blks - 1;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c5826c623e7a..e2016f34b58f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
}
/**
- * mext_check_null_inode - NULL check for two inodes
- *
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
- */
-static int
-mext_check_null_inode(struct inode *inode1, struct inode *inode2,
- const char *function, unsigned int line)
-{
- int ret = 0;
-
- if (inode1 == NULL) {
- __ext4_error(inode2->i_sb, function, line,
- "Both inodes should not be NULL: "
- "inode1 NULL inode2 %lu", inode2->i_ino);
- ret = -EIO;
- } else if (inode2 == NULL) {
- __ext4_error(inode1->i_sb, function, line,
- "Both inodes should not be NULL: "
- "inode1 %lu inode2 NULL", inode1->i_ino);
- ret = -EIO;
- }
- return ret;
-}
-
-/**
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
*
- * @orig_inode: original inode structure
- * @donor_inode: donor inode structure
- * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
- * i_ino order.
+ * Acquire write lock of i_data_sem of the two inodes
*/
static void
-double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
+double_down_write_data_sem(struct inode *first, struct inode *second)
{
- struct inode *first = orig_inode, *second = donor_inode;
+ if (first < second) {
+ down_write(&EXT4_I(first)->i_data_sem);
+ down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ } else {
+ down_write(&EXT4_I(second)->i_data_sem);
+ down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
- /*
- * Use the inode number to provide the stable locking order instead
- * of its address, because the C language doesn't guarantee you can
- * compare pointers that don't come from the same array.
- */
- if (donor_inode->i_ino < orig_inode->i_ino) {
- first = donor_inode;
- second = orig_inode;
}
-
- down_write(&EXT4_I(first)->i_data_sem);
- down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
}
/**
@@ -969,14 +935,6 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
- /* Files should be in the same ext4 FS */
- if (orig_inode->i_sb != donor_inode->i_sb) {
- ext4_debug("ext4 move extent: The argument files "
- "should be in same FS [ino:orig %lu, donor %lu]\n",
- orig_inode->i_ino, donor_inode->i_ino);
- return -EINVAL;
- }
-
/* Ext4 move extent supports only extent based file */
if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
ext4_debug("ext4 move extent: orig file is not extents "
@@ -1072,35 +1030,19 @@ mext_check_arguments(struct inode *orig_inode,
* @inode1: the inode structure
* @inode2: the inode structure
*
- * Lock two inodes' i_mutex by i_ino order.
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
+ * Lock two inodes' i_mutex
*/
-static int
+static void
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
{
- int ret = 0;
-
- BUG_ON(inode1 == NULL && inode2 == NULL);
-
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
- if (ret < 0)
- goto out;
-
- if (inode1 == inode2) {
- mutex_lock(&inode1->i_mutex);
- goto out;
- }
-
- if (inode1->i_ino < inode2->i_ino) {
+ BUG_ON(inode1 == inode2);
+ if (inode1 < inode2) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
-
-out:
- return ret;
}
/**
@@ -1109,28 +1051,13 @@ out:
* @inode1: the inode that is released first
* @inode2: the inode that is released second
*
- * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/
-static int
+static void
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
- int ret = 0;
-
- BUG_ON(inode1 == NULL && inode2 == NULL);
-
- ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
- if (ret < 0)
- goto out;
-
- if (inode1)
- mutex_unlock(&inode1->i_mutex);
-
- if (inode2 && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
-
-out:
- return ret;
+ mutex_unlock(&inode1->i_mutex);
+ mutex_unlock(&inode2->i_mutex);
}
/**
@@ -1187,16 +1114,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
ext4_lblk_t rest_blocks;
pgoff_t orig_page_offset = 0, seq_end_page;
- int ret1, ret2, depth, last_extent = 0;
+ int ret, depth, last_extent = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
int data_offset_in_page;
int block_len_in_page;
int uninit;
- /* orig and donor should be different file */
- if (orig_inode->i_ino == donor_inode->i_ino) {
+ if (orig_inode->i_sb != donor_inode->i_sb) {
+ ext4_debug("ext4 move extent: The argument files "
+ "should be in same FS [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EINVAL;
+ }
+
+ /* orig and donor should be different inodes */
+ if (orig_inode == donor_inode) {
ext4_debug("ext4 move extent: The argument files should not "
- "be same file [ino:orig %lu, donor %lu]\n",
+ "be same inode [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
@@ -1208,18 +1142,21 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
-
+ /* TODO: This is non obvious task to swap blocks for inodes with full
+ jornaling enabled */
+ if (ext4_should_journal_data(orig_inode) ||
+ ext4_should_journal_data(donor_inode)) {
+ return -EINVAL;
+ }
/* Protect orig and donor inodes against a truncate */
- ret1 = mext_inode_double_lock(orig_inode, donor_inode);
- if (ret1 < 0)
- return ret1;
+ mext_inode_double_lock(orig_inode, donor_inode);
/* Protect extent tree against block allocations via delalloc */
double_down_write_data_sem(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
- ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+ ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len);
- if (ret1)
+ if (ret)
goto out;
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
@@ -1227,13 +1164,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (file_end < block_end)
len -= block_end - file_end;
- ret1 = get_ext_path(orig_inode, block_start, &orig_path);
- if (ret1)
+ ret = get_ext_path(orig_inode, block_start, &orig_path);
+ if (ret)
goto out;
/* Get path structure to check the hole */
- ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
- if (ret1)
+ ret = get_ext_path(orig_inode, block_start, &holecheck_path);
+ if (ret)
goto out;
depth = ext_depth(orig_inode);
@@ -1252,13 +1189,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
last_extent = mext_next_extent(orig_inode,
holecheck_path, &ext_cur);
if (last_extent < 0) {
- ret1 = last_extent;
+ ret = last_extent;
goto out;
}
last_extent = mext_next_extent(orig_inode, orig_path,
&ext_dummy);
if (last_extent < 0) {
- ret1 = last_extent;
+ ret = last_extent;
goto out;
}
seq_start = le32_to_cpu(ext_cur->ee_block);
@@ -1272,7 +1209,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (le32_to_cpu(ext_cur->ee_block) > block_end) {
ext4_debug("ext4 move extent: The specified range of file "
"may be the hole\n");
- ret1 = -EINVAL;
+ ret = -EINVAL;
goto out;
}
@@ -1292,7 +1229,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
last_extent = mext_next_extent(orig_inode, holecheck_path,
&ext_cur);
if (last_extent < 0) {
- ret1 = last_extent;
+ ret = last_extent;
break;
}
add_blocks = ext4_ext_get_actual_len(ext_cur);
@@ -1349,18 +1286,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
orig_page_offset,
data_offset_in_page,
block_len_in_page, uninit,
- &ret1);
+ &ret);
/* Count how many blocks we have exchanged */
*moved_len += block_len_in_page;
- if (ret1 < 0)
+ if (ret < 0)
break;
if (*moved_len > len) {
EXT4_ERROR_INODE(orig_inode,
"We replaced blocks too much! "
"sum of replaced: %llu requested: %llu",
*moved_len, len);
- ret1 = -EIO;
+ ret = -EIO;
break;
}
@@ -1374,22 +1311,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
}
double_down_write_data_sem(orig_inode, donor_inode);
- if (ret1 < 0)
+ if (ret < 0)
break;
/* Decrease buffer counter */
if (holecheck_path)
ext4_ext_drop_refs(holecheck_path);
- ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
- if (ret1)
+ ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
+ if (ret)
break;
depth = holecheck_path->p_depth;
/* Decrease buffer counter */
if (orig_path)
ext4_ext_drop_refs(orig_path);
- ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
- if (ret1)
+ ret = get_ext_path(orig_inode, seq_start, &orig_path);
+ if (ret)
break;
ext_cur = holecheck_path[depth].p_ext;
@@ -1412,12 +1349,7 @@ out:
kfree(holecheck_path);
}
double_up_write_data_sem(orig_inode, donor_inode);
- ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
-
- if (ret1)
- return ret1;
- else if (ret2)
- return ret2;
+ mext_inode_double_unlock(orig_inode, donor_inode);
- return 0;
+ return ret;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 0a94cbbe8828..ac769399b141 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1801,9 +1801,7 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT4_FS_XATTR
inode->i_op = &ext4_special_inode_operations;
-#endif
err = ext4_add_nondir(handle, dentry, inode);
}
ext4_journal_stop(handle);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3407a62590d6..231cacb8f640 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
* be a partial of a flex group.
*
* @sb: super block of fs to which the groups belongs
+ *
+ * Returns 0 on a successful allocation of the metadata blocks in the
+ * block group.
*/
-static void ext4_alloc_group_tables(struct super_block *sb,
+static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
int flexbg_size)
{
@@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(struct super_block *sb,
(last_group & ~(flexbg_size - 1))));
next_group:
group = group_data[0].group;
+ if (src_group >= group_data[0].group + flex_gd->count)
+ return -ENOSPC;
start_blk = ext4_group_first_block_no(sb, src_group);
last_blk = start_blk + group_data[src_group - group].blocks_count;
@@ -235,7 +240,6 @@ next_group:
start_blk += overhead;
- BUG_ON(src_group >= group_data[0].group + flex_gd->count);
/* We collect contiguous blocks as much as possible. */
src_group++;
for (; src_group <= last_group; src_group++)
@@ -300,6 +304,7 @@ next_group:
group_data[i].free_blocks_count);
}
}
+ return 0;
}
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
@@ -451,6 +456,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
gdblocks = ext4_bg_num_gdb(sb, group);
start = ext4_group_first_block_no(sb, group);
+ if (!ext4_bg_has_super(sb, group))
+ goto handle_itb;
+
/* Copy all of the GDT blocks into the backup in this group */
for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
struct buffer_head *gdb;
@@ -493,6 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
goto out;
}
+handle_itb:
/* Initialize group tables of the grop @group */
if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
goto handle_bb;
@@ -1293,13 +1302,15 @@ exit_journal:
err = err2;
if (!err) {
- int i;
+ int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
+ int gdb_num_end = ((group + flex_gd->count - 1) /
+ EXT4_DESC_PER_BLOCK(sb));
+
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
- for (i = 0; i < flex_gd->count; i++, group++) {
+ for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
- int gdb_num;
- gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
+
gdb_bh = sbi->s_group_desc[gdb_num];
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
gdb_bh->b_size);
@@ -1676,7 +1687,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
*/
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
flexbg_size)) {
- ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
+ if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
+ break;
err = ext4_flex_group_add(sb, resize_inode, flex_gd);
if (unlikely(err))
break;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8f3459e2ffee..94dc725a1bb8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1599,9 +1599,7 @@ static int parse_options(char *options, struct super_block *sb,
unsigned int *journal_ioprio,
int is_remount)
{
-#ifdef CONFIG_QUOTA
struct ext4_sb_info *sbi = EXT4_SB(sb);
-#endif
char *p;
substring_t args[MAX_OPT_ARGS];
int token;
@@ -1650,6 +1648,16 @@ static int parse_options(char *options, struct super_block *sb,
}
}
#endif
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ int blocksize =
+ BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+ if (blocksize < PAGE_CACHE_SIZE) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "dioread_nolock if block size != PAGE_SIZE");
+ return 0;
+ }
+ }
return 1;
}
@@ -1692,7 +1700,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
static const char *token2str(int token)
{
- static const struct match_token *t;
+ const struct match_token *t;
for (t = tokens; t->token != Opt_err; t++)
if (t->token == token && !strchr(t->pattern, '='))
@@ -2112,7 +2120,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
+ mutex_lock(&inode->i_mutex);
ext4_truncate(inode);
+ mutex_unlock(&inode->i_mutex);
nr_truncates++;
} else {
ext4_msg(sb, KERN_DEBUG,
@@ -3226,15 +3236,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
clear_opt(sb, DELALLOC);
}
- blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- if (blocksize < PAGE_SIZE) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "dioread_nolock if block size != PAGE_SIZE");
- goto failed_mount;
- }
- }
-
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3276,6 +3277,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
goto failed_mount;
+ blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
@@ -4506,7 +4508,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
ext4_setup_system_zone(sb);
- if (sbi->s_journal == NULL)
+ if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
#ifdef CONFIG_QUOTA
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e88748e55c0f..e712a8cb1653 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -495,7 +495,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- dquot_free_block(inode, 1);
+ dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
}
@@ -784,7 +784,8 @@ inserted:
else {
/* The old block is released after updating
the inode. */
- error = dquot_alloc_block(inode, 1);
+ error = dquot_alloc_block(inode,
+ EXT4_C2B(EXT4_SB(sb), 1));
if (error)
goto cleanup;
error = ext4_journal_get_write_access(handle,
@@ -880,7 +881,7 @@ cleanup:
return error;
cleanup_dquot:
- dquot_free_block(inode, 1);
+ dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
goto cleanup;
bad_block:
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 42a853eec632..1b21e0a061d5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -68,6 +68,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
{
return test_bit(BDI_writeback_running, &bdi->state);
}
+EXPORT_SYMBOL(writeback_in_progress);
static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
{
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ceb3b29c8cdf..1f81f70dda62 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1580,6 +1580,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->pages[req->num_pages] = page;
req->num_pages++;
+ offset = 0;
num -= this_num;
total_len += this_num;
index++;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index bc438320cac5..d48478a864b0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -645,7 +645,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
spin_lock(&fc->lock);
fi->attr_version = ++fc->attr_version;
- drop_nlink(inode);
+ /*
+ * If i_nlink == 0 then unlink doesn't make sense, yet this can
+ * happen if userspace filesystem is careless. It would be
+ * difficult to enforce correct nlink usage so just ignore this
+ * condition here
+ */
+ if (inode->i_nlink > 0)
+ drop_nlink(inode);
spin_unlock(&fc->lock);
fuse_invalidate_attr(inode);
fuse_invalidate_attr(dir);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 70ba891654f8..fdef7f0e28ab 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -168,6 +168,8 @@ static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
case GFS2_SMALL_FH_SIZE:
case GFS2_LARGE_FH_SIZE:
case GFS2_OLD_FH_SIZE:
+ if (fh_len < GFS2_SMALL_FH_SIZE)
+ return NULL;
this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
this.no_formal_ino |= be32_to_cpu(fh[1]);
this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
@@ -187,6 +189,8 @@ static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid,
switch (fh_type) {
case GFS2_LARGE_FH_SIZE:
case GFS2_OLD_FH_SIZE:
+ if (fh_len < GFS2_LARGE_FH_SIZE)
+ return NULL;
parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
parent.no_formal_ino |= be32_to_cpu(fh[5]);
parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 6b1efb594d90..a392e32af480 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -258,16 +258,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_meta_header *mh;
struct gfs2_trans *tr;
- lock_buffer(bd->bd_bh);
- gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr))
- goto out;
+ return;
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
if (!list_empty(&le->le_list))
- goto out;
+ return;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
gfs2_meta_check(sdp, bd->bd_bh);
@@ -278,9 +276,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
tr->tr_num_buf_new++;
-out:
- gfs2_log_unlock(sdp);
- unlock_buffer(bd->bd_bh);
}
static void buf_lo_before_commit(struct gfs2_sbd *sdp)
@@ -611,11 +606,9 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
- lock_buffer(bd->bd_bh);
- gfs2_log_lock(sdp);
if (tr) {
if (!list_empty(&bd->bd_list_tr))
- goto out;
+ return;
tr->tr_touched = 1;
if (gfs2_is_jdata(ip)) {
tr->tr_num_buf++;
@@ -623,7 +616,7 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
}
}
if (!list_empty(&le->le_list))
- goto out;
+ return;
set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
@@ -635,9 +628,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
} else {
list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
}
-out:
- gfs2_log_unlock(sdp);
- unlock_buffer(bd->bd_bh);
}
static void gfs2_check_magic(struct buffer_head *bh)
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 86ac75d99d31..6ab2a77e7726 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -145,14 +145,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_bufdata *bd;
+ lock_buffer(bh);
+ gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd)
gfs2_assert(sdp, bd->bd_gl == gl);
else {
+ gfs2_log_unlock(sdp);
+ unlock_buffer(bh);
gfs2_attach_bufdata(gl, bh, meta);
bd = bh->b_private;
+ lock_buffer(bh);
+ gfs2_log_lock(sdp);
}
lops_add(sdp, &bd->bd_le);
+ gfs2_log_unlock(sdp);
+ unlock_buffer(bh);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index dd4687ff30d0..516eb21895c6 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -179,7 +179,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb,
{
struct isofs_fid *ifid = (struct isofs_fid *)fid;
- if (fh_type != 2)
+ if (fh_len < 2 || fh_type != 2)
return NULL;
return isofs_export_iget(sb,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f2b9a571f4cf..9626bc8cbbd4 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -86,7 +86,12 @@ nope:
static void release_data_buffer(struct buffer_head *bh)
{
if (buffer_freed(bh)) {
+ WARN_ON_ONCE(buffer_dirty(bh));
clear_buffer_freed(bh);
+ clear_buffer_mapped(bh);
+ clear_buffer_new(bh);
+ clear_buffer_req(bh);
+ bh->b_bdev = NULL;
release_buffer_page(bh);
} else
put_bh(bh);
@@ -853,17 +858,35 @@ restart_loop:
* there's no point in keeping a checkpoint record for
* it. */
- /* A buffer which has been freed while still being
- * journaled by a previous transaction may end up still
- * being dirty here, but we want to avoid writing back
- * that buffer in the future after the "add to orphan"
- * operation been committed, That's not only a performance
- * gain, it also stops aliasing problems if the buffer is
- * left behind for writeback and gets reallocated for another
- * use in a different page. */
- if (buffer_freed(bh) && !jh->b_next_transaction) {
- clear_buffer_freed(bh);
- clear_buffer_jbddirty(bh);
+ /*
+ * A buffer which has been freed while still being journaled by
+ * a previous transaction.
+ */
+ if (buffer_freed(bh)) {
+ /*
+ * If the running transaction is the one containing
+ * "add to orphan" operation (b_next_transaction !=
+ * NULL), we have to wait for that transaction to
+ * commit before we can really get rid of the buffer.
+ * So just clear b_modified to not confuse transaction
+ * credit accounting and refile the buffer to
+ * BJ_Forget of the running transaction. If the just
+ * committed transaction contains "add to orphan"
+ * operation, we can completely invalidate the buffer
+ * now. We are rather throughout in that since the
+ * buffer may be still accessible when blocksize <
+ * pagesize and it is attached to the last partial
+ * page.
+ */
+ jh->b_modified = 0;
+ if (!jh->b_next_transaction) {
+ clear_buffer_freed(bh);
+ clear_buffer_jbddirty(bh);
+ clear_buffer_mapped(bh);
+ clear_buffer_new(bh);
+ clear_buffer_req(bh);
+ bh->b_bdev = NULL;
+ }
}
if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index b2a7e5244e39..ee959a9fed62 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1845,15 +1845,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
* We're outside-transaction here. Either or both of j_running_transaction
* and j_committing_transaction may be NULL.
*/
-static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
+ int partial_page)
{
transaction_t *transaction;
struct journal_head *jh;
int may_free = 1;
- int ret;
BUFFER_TRACE(bh, "entry");
+retry:
/*
* It is safe to proceed here without the j_list_lock because the
* buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1881,10 +1882,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* clear the buffer dirty bit at latest at the moment when the
* transaction marking the buffer as freed in the filesystem
* structures is committed because from that moment on the
- * buffer can be reallocated and used by a different page.
+ * block can be reallocated and used by a different page.
* Since the block hasn't been freed yet but the inode has
* already been added to orphan list, it is safe for us to add
* the buffer to BJ_Forget list of the newest transaction.
+ *
+ * Also we have to clear buffer_mapped flag of a truncated buffer
+ * because the buffer_head may be attached to the page straddling
+ * i_size (can happen only when blocksize < pagesize) and thus the
+ * buffer_head can be reused when the file is extended again. So we end
+ * up keeping around invalidated buffers attached to transactions'
+ * BJ_Forget list just to stop checkpointing code from cleaning up
+ * the transaction this buffer was modified in.
*/
transaction = jh->b_transaction;
if (transaction == NULL) {
@@ -1911,13 +1920,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* committed, the buffer won't be needed any
* longer. */
JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
- ret = __dispose_buffer(jh,
+ may_free = __dispose_buffer(jh,
journal->j_running_transaction);
- journal_put_journal_head(jh);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
- return ret;
+ goto zap_buffer;
} else {
/* There is no currently-running transaction. So the
* orphan record which we wrote for this file must have
@@ -1925,13 +1930,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* the committing transaction, if it exists. */
if (journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "give to committing trans");
- ret = __dispose_buffer(jh,
+ may_free = __dispose_buffer(jh,
journal->j_committing_transaction);
- journal_put_journal_head(jh);
- spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- spin_unlock(&journal->j_state_lock);
- return ret;
+ goto zap_buffer;
} else {
/* The orphan record's transaction has
* committed. We can cleanse this buffer */
@@ -1952,10 +1953,26 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
}
/*
* The buffer is committing, we simply cannot touch
- * it. So we just set j_next_transaction to the
- * running transaction (if there is one) and mark
- * buffer as freed so that commit code knows it should
- * clear dirty bits when it is done with the buffer.
+ * it. If the page is straddling i_size we have to wait
+ * for commit and try again.
+ */
+ if (partial_page) {
+ tid_t tid = journal->j_committing_transaction->t_tid;
+
+ journal_put_journal_head(jh);
+ spin_unlock(&journal->j_list_lock);
+ jbd_unlock_bh_state(bh);
+ spin_unlock(&journal->j_state_lock);
+ unlock_buffer(bh);
+ log_wait_commit(journal, tid);
+ lock_buffer(bh);
+ goto retry;
+ }
+ /*
+ * OK, buffer won't be reachable after truncate. We just set
+ * j_next_transaction to the running transaction (if there is
+ * one) and mark buffer as freed so that commit code knows it
+ * should clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1978,6 +1995,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
}
zap_buffer:
+ /*
+ * This is tricky. Although the buffer is truncated, it may be reused
+ * if blocksize < pagesize and it is attached to the page straddling
+ * EOF. Since the buffer might have been added to BJ_Forget list of the
+ * running transaction, journal_get_write_access() won't clear
+ * b_modified and credit accounting gets confused. So clear b_modified
+ * here. */
+ jh->b_modified = 0;
journal_put_journal_head(jh);
zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
@@ -2026,7 +2051,8 @@ void journal_invalidatepage(journal_t *journal,
if (offset <= curr_off) {
/* This block is wholly outside the truncation point */
lock_buffer(bh);
- may_free &= journal_unmap_buffer(journal, bh);
+ may_free &= journal_unmap_buffer(journal, bh,
+ offset > 0);
unlock_buffer(bh);
}
curr_off = next_off;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9956ac68b72b..e5bfb1150cf5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1317,6 +1317,11 @@ static void jbd2_mark_journal_empty(journal_t *journal)
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
read_lock(&journal->j_state_lock);
+ /* Is it already empty? */
+ if (sb->s_start == 0) {
+ read_unlock(&journal->j_state_lock);
+ return;
+ }
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index ddcd3549c6c2..de8b4cb7c31e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -209,7 +209,8 @@ repeat:
if (!new_transaction)
goto alloc_transaction;
write_lock(&journal->j_state_lock);
- if (!journal->j_running_transaction) {
+ if (!journal->j_running_transaction &&
+ !journal->j_barrier_count) {
jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index db3889ba8818..8608f8779143 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ struct jffs2_raw_inode ri;
+ uint32_t alloc_len = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
+ jffs2_dbg(1, "%s()\n", __func__);
+
+ if (pageofs > inode->i_size) {
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+ if (ret)
+ return ret;
+ }
+
+ mutex_lock(&f->sem);
pg = grab_cache_page_write_begin(mapping, index, flags);
- if (!pg)
+ if (!pg) {
+ if (alloc_len)
+ jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
return -ENOMEM;
+ }
*pagep = pg;
- jffs2_dbg(1, "%s()\n", __func__);
-
- if (pageofs > inode->i_size) {
+ if (alloc_len) {
/* Make new hole frag from old EOF to new page */
- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
- struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
- uint32_t alloc_len;
jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs);
- ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
- ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
- if (ret)
- goto out_page;
-
- mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
- mutex_unlock(&f->sem);
}
/*
@@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* case of a short-copy.
*/
if (!PageUptodate(pg)) {
- mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
- mutex_unlock(&f->sem);
if (ret)
goto out_page;
}
+ mutex_unlock(&f->sem);
jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
return ret;
out_page:
unlock_page(pg);
page_cache_release(pg);
+ mutex_unlock(&f->sem);
return ret;
}
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 6784d1e7a7eb..ffa8f12c7a54 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -375,14 +375,16 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
spin_unlock(&c->erase_completion_lock);
ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
- if (ret)
- return ret;
+
/* Just lock it again and continue. Nothing much can change because
we hold c->alloc_sem anyway. In fact, it's not entirely clear why
we hold c->erase_completion_lock in the majority of this function...
but that's a question for another (more caffeine-rich) day. */
spin_lock(&c->erase_completion_lock);
+ if (ret)
+ return ret;
+
waste = jeb->free_size;
jffs2_link_node_ref(c, jeb,
(jeb->offset + c->sector_size - waste) | REF_OBSOLETE,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 74d9be19df3f..6bec5c0bc268 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1043,10 +1043,10 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
ops.datbuf = NULL;
ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
- if (ret || ops.oobretlen != ops.ooblen) {
+ if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n",
jeb->offset, ops.ooblen, ops.oobretlen, ret);
- if (!ret)
+ if (!ret || mtd_is_bitflip(ret))
ret = -EIO;
return ret;
}
@@ -1085,10 +1085,10 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
ops.datbuf = NULL;
ret = mtd_read_oob(c->mtd, jeb->offset, &ops);
- if (ret || ops.oobretlen != ops.ooblen) {
+ if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n",
jeb->offset, ops.ooblen, ops.oobretlen, ret);
- if (!ret)
+ if (!ret || mtd_is_bitflip(ret))
ret = -EIO;
return ret;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8392cb85bd54..a3a09877ea62 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -551,6 +551,9 @@ again:
status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
if (status < 0)
break;
+ /* Resend the blocking lock request after a server reboot */
+ if (resp->status == nlm_lck_denied_grace_period)
+ continue;
if (resp->status != nlm_lck_blocked)
break;
}
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index d269ada7670e..982d2676e1f8 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr,
{
__be32 *p;
- BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
+ WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD);
p = xdr_reserve_space(xdr, 4);
*p = stat;
}
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7ef14b3c5bee..606a8dd8818c 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -40,6 +40,7 @@ struct nsm_args {
u32 proc;
char *mon_name;
+ char *nodename;
};
struct nsm_res {
@@ -94,6 +95,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
.vers = 3,
.proc = NLMPROC_NSM_NOTIFY,
.mon_name = nsm->sm_mon_name,
+ .nodename = utsname()->nodename,
};
struct rpc_message msg = {
.rpc_argp = &args,
@@ -430,7 +432,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
{
__be32 *p;
- encode_nsm_string(xdr, utsname()->nodename);
+ encode_nsm_string(xdr, argp->nodename);
p = xdr_reserve_space(xdr, 4 + 4 + 4);
*p++ = cpu_to_be32(argp->prog);
*p++ = cpu_to_be32(argp->vers);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 3250f280a171..58ddc38cfccd 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -251,10 +251,9 @@ out_err:
return err;
}
-static int lockd_up_net(struct net *net)
+static int lockd_up_net(struct svc_serv *serv, struct net *net)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
- struct svc_serv *serv = nlmsvc_rqst->rq_server;
int error;
if (ln->nlmsvc_users++)
@@ -276,10 +275,9 @@ err_rpcb:
return error;
}
-static void lockd_down_net(struct net *net)
+static void lockd_down_net(struct svc_serv *serv, struct net *net)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
- struct svc_serv *serv = nlmsvc_rqst->rq_server;
if (ln->nlmsvc_users) {
if (--ln->nlmsvc_users == 0) {
@@ -307,7 +305,7 @@ int lockd_up(struct net *net)
* Check whether we're already up and running.
*/
if (nlmsvc_rqst) {
- error = lockd_up_net(net);
+ error = lockd_up_net(nlmsvc_rqst->rq_server, net);
goto out;
}
@@ -378,7 +376,7 @@ out:
return error;
err_start:
- lockd_down_net(net);
+ lockd_down_net(serv, net);
goto destroy_and_out;
}
EXPORT_SYMBOL_GPL(lockd_up);
@@ -390,7 +388,7 @@ void
lockd_down(struct net *net)
{
mutex_lock(&nlmsvc_mutex);
- lockd_down_net(net);
+ lockd_down_net(nlmsvc_rqst->rq_server, net);
if (nlmsvc_users) {
if (--nlmsvc_users)
goto out;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index d27aab11f324..d413af338da1 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -67,7 +67,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
- if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
+ error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
+ if (error != 0)
goto no_locks;
*filp = file;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7f6a23f0244e..3a9c2470e4f9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
return bio;
}
-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
sector_t isect, struct page *page,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
- struct parallel_io *par)
+ struct parallel_io *par,
+ unsigned int offset, int len)
{
+ isect = isect + (offset >> SECTOR_SHIFT);
+ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
+ npg, rw, (unsigned long long)isect, offset, len);
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, len, offset) < len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}
+static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+ struct parallel_io *par)
+{
+ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
+ end_io, par, 0, PAGE_CACHE_SIZE);
+}
+
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -443,6 +457,107 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
return;
}
+static void
+bl_read_single_end_io(struct bio *bio, int error)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct page *page = bvec->bv_page;
+
+ /* Only one page in bvec */
+ unlock_page(page);
+}
+
+static int
+bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int offset, unsigned int len)
+{
+ struct bio *bio;
+ struct page *shadow_page;
+ sector_t isect;
+ char *kaddr, *kshadow_addr;
+ int ret = 0;
+
+ dprintk("%s: offset %u len %u\n", __func__, offset, len);
+
+ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (shadow_page == NULL)
+ return -ENOMEM;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
+ (offset / SECTOR_SIZE);
+
+ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = bl_read_single_end_io;
+
+ lock_page(shadow_page);
+ if (bio_add_page(bio, shadow_page,
+ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
+ unlock_page(shadow_page);
+ bio_put(bio);
+ return -EIO;
+ }
+
+ submit_bio(READ, bio);
+ wait_on_page_locked(shadow_page);
+ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
+ ret = -EIO;
+ } else {
+ kaddr = kmap_atomic(page);
+ kshadow_addr = kmap_atomic(shadow_page);
+ memcpy(kaddr + offset, kshadow_addr + offset, len);
+ kunmap_atomic(kshadow_addr);
+ kunmap_atomic(kaddr);
+ }
+ __free_page(shadow_page);
+ bio_put(bio);
+
+ return ret;
+}
+
+static int
+bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int dirty_offset, unsigned int dirty_len,
+ bool full_page)
+{
+ int ret = 0;
+ unsigned int start, end;
+
+ if (full_page) {
+ start = 0;
+ end = PAGE_CACHE_SIZE;
+ } else {
+ start = round_down(dirty_offset, SECTOR_SIZE);
+ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
+ }
+
+ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
+ if (!be) {
+ zero_user_segments(page, start, dirty_offset,
+ dirty_offset + dirty_len, end);
+ if (start == 0 && end == PAGE_CACHE_SIZE &&
+ trylock_page(page)) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return ret;
+ }
+
+ if (start != dirty_offset)
+ ret = bl_do_readpage_sync(page, be, start,
+ dirty_offset - start);
+
+ if (!ret && (dirty_offset + dirty_len < end))
+ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
+ end - dirty_offset - dirty_len);
+
+ return ret;
+}
+
/* Given an unmapped page, zero it or read in page for COW, page is locked
* by caller.
*/
@@ -476,7 +591,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
SetPageUptodate(page);
cleanup:
- bl_put_extent(cow_read);
if (bh)
free_buffer_head(bh);
if (ret) {
@@ -547,6 +661,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct parallel_io *par;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages;
struct page *page;
pgoff_t index;
@@ -651,10 +766,11 @@ next_page:
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
+ bl_put_extent(cow_read);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
- isect, NULL);
+ isect, &cow_read);
if (!be || !is_writable(be, isect)) {
wdata->pnfs_error = -EINVAL;
goto out;
@@ -671,7 +787,26 @@ next_page:
extent_length = be->be_length -
(isect - be->be_f_offset);
}
- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+
+ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
+ pg_offset = offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + count > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = count;
+
+ saved_len = pg_len;
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
+ !bl_is_sector_init(be->be_inval, isect)) {
+ ret = bl_read_partial_page_sync(pages[i], cow_read,
+ pg_offset, pg_len, true);
+ if (ret) {
+ dprintk("%s bl_read_partial_page_sync fail %d\n",
+ __func__, ret);
+ wdata->pnfs_error = ret;
+ goto out;
+ }
+
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
@@ -680,15 +815,33 @@ next_page:
wdata->pnfs_error = ret;
goto out;
}
+
+ /* Expand to full page write */
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
+ (pg_len & (SECTOR_SIZE - 1))) {
+ /* ahh, nasty case. We have to do sync full sector
+ * read-modify-write cycles.
+ */
+ unsigned int saved_offset = pg_offset;
+ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
+ pg_len, false);
+ pg_offset = round_down(pg_offset, SECTOR_SIZE);
+ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
+ - pg_offset;
}
- bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+ bio = do_add_page_to_bio(bio, wdata->npages - i, WRITE,
isect, pages[i], be,
- bl_end_io_write, par);
+ bl_end_io_write, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
wdata->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
+ offset += saved_len;
+ count -= saved_len;
isect += PAGE_CACHE_SECTORS;
last_isect = isect;
extent_length -= PAGE_CACHE_SECTORS;
@@ -706,17 +859,16 @@ next_page:
}
write_done:
- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
- if (count < wdata->res.count) {
- wdata->res.count = count;
- }
+ wdata->res.count = wdata->args.count;
out:
bl_put_extent(be);
+ bl_put_extent(cow_read);
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
+ bl_put_extent(cow_read);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}
@@ -1003,6 +1155,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = {
static struct pnfs_layoutdriver_type blocklayout_type = {
.id = LAYOUT_BLOCK_VOLUME,
.name = "LAYOUT_BLOCK_VOLUME",
+ .owner = THIS_MODULE,
.read_pagelist = bl_read_pagelist,
.write_pagelist = bl_write_pagelist,
.alloc_layout_hdr = bl_alloc_layout_hdr,
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 03350690118e..39bb51a8dd18 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -41,6 +41,7 @@
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct block_mount_id {
spinlock_t bm_lock; /* protects list */
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 60f7e4ec842c..37f6de36973c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -694,8 +694,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
*/
static void nfs_destroy_server(struct nfs_server *server)
{
- if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
- !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
+ if (server->nlm_host)
nlmclnt_done(server->nlm_host);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8789210c6905..a0daac7ad508 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1103,7 +1103,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
struct nfs_fattr *fattr = NULL;
int error;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && (nd->flags & LOOKUP_RCU))
return -ECHILD;
parent = dget_parent(dentry);
@@ -1219,11 +1219,14 @@ static int nfs_dentry_delete(const struct dentry *dentry)
}
+/* Ensure that we revalidate inode->i_nlink */
static void nfs_drop_nlink(struct inode *inode)
{
spin_lock(&inode->i_lock);
- if (inode->i_nlink > 0)
- drop_nlink(inode);
+ /* drop the inode if we're reasonably sure this is the last link */
+ if (inode->i_nlink == 1)
+ clear_nlink(inode);
+ NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
spin_unlock(&inode->i_lock);
}
@@ -1238,8 +1241,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
- drop_nlink(inode);
nfs_complete_unlink(dentry, inode);
+ nfs_drop_nlink(inode);
}
iput(inode);
}
@@ -1502,7 +1505,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
struct iattr attr;
int openflags, ret = 0;
- if (nd->flags & LOOKUP_RCU)
+ if (nd && (nd->flags & LOOKUP_RCU))
return -ECHILD;
inode = dentry->d_inode;
@@ -1800,10 +1803,8 @@ static int nfs_safe_remove(struct dentry *dentry)
if (inode != NULL) {
nfs_inode_return_delegation(inode);
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
- /* The VFS may want to delete this inode */
if (error == 0)
nfs_drop_nlink(inode);
- nfs_mark_for_revalidate(inode);
} else
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
if (error == -ENOENT)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index b3924b8a6000..786cd6533713 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -214,7 +214,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
{
char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
struct nfs_dns_ent key, *item;
- unsigned long ttl;
+ unsigned int ttl;
ssize_t len;
int ret = -EINVAL;
@@ -237,7 +237,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
key.namelen = len;
memset(&key.h, 0, sizeof(key.h));
- ttl = get_expiry(&buf);
+ if (get_uint(&buf, &ttl) < 0)
+ goto out;
if (ttl == 0)
goto out;
key.h.expiry_time = ttl + seconds_since_boot();
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 98d664349f7e..9131b17f3a72 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -747,9 +747,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
}
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
- ret = mlen;
- complete_request_key(cons, -ENOKEY);
- goto out_incomplete;
+ ret = -ENOKEY;
+ goto out;
}
namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
@@ -766,7 +765,6 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
out:
complete_request_key(cons, ret);
-out_incomplete:
return ret;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5b3500..edf411988bf3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -152,7 +152,7 @@ static void nfs_zap_caches_locked(struct inode *inode)
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
- memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+ memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE;
else
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b777bdaba4c5..33aa76fec3aa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,8 +277,9 @@ extern void nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct super_block *sb);
/* namespace.c */
+#define NFS_PATH_CANONICAL 1
extern char *nfs_path(char **p, struct dentry *dentry,
- char *buffer, ssize_t buflen);
+ char *buffer, ssize_t buflen, unsigned flags);
extern struct vfsmount *nfs_d_automount(struct path *path);
#ifdef CONFIG_NFS_V4
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
@@ -371,7 +372,7 @@ static inline char *nfs_devname(struct dentry *dentry,
char *buffer, ssize_t buflen)
{
char *dummy;
- return nfs_path(&dummy, dentry, buffer, buflen);
+ return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL);
}
/*
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 8e65c7f1f87c..015f71f8f62c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info)
else
msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT];
- status = rpc_call_sync(mnt_clnt, &msg, 0);
+ status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT);
rpc_shutdown_client(mnt_clnt);
if (status < 0)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d51868e5683c..43275167649a 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -37,6 +37,7 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry,
* @dentry - pointer to dentry
* @buffer - result buffer
* @buflen - length of buffer
+ * @flags - options (see below)
*
* Helper function for constructing the server pathname
* by arbitrary hashed dentry.
@@ -44,8 +45,14 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry,
* This is mainly for use in figuring out the path on the
* server side when automounting on top of an existing partition
* and in generating /proc/mounts and friends.
+ *
+ * Supported flags:
+ * NFS_PATH_CANONICAL: ensure there is exactly one slash after
+ * the original device (export) name
+ * (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
+char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
+ unsigned flags)
{
char *end;
int namelen;
@@ -78,7 +85,7 @@ rename_retry:
rcu_read_unlock();
goto rename_retry;
}
- if (*end != '/') {
+ if ((flags & NFS_PATH_CANONICAL) && *end != '/') {
if (--buflen < 0) {
spin_unlock(&dentry->d_lock);
rcu_read_unlock();
@@ -95,9 +102,11 @@ rename_retry:
return end;
}
namelen = strlen(base);
- /* Strip off excess slashes in base string */
- while (namelen > 0 && base[namelen - 1] == '/')
- namelen--;
+ if (flags & NFS_PATH_CANONICAL) {
+ /* Strip off excess slashes in base string */
+ while (namelen > 0 && base[namelen - 1] == '/')
+ namelen--;
+ }
buflen -= namelen;
if (buflen < 0) {
spin_unlock(&dentry->d_lock);
@@ -244,11 +253,31 @@ out_nofree:
return mnt;
}
+static int
+nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_getattr(mnt, dentry, stat);
+ generic_fillattr(dentry->d_inode, stat);
+ return 0;
+}
+
+static int
+nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ if (NFS_FH(dentry->d_inode)->size != 0)
+ return nfs_setattr(dentry, attr);
+ return -EACCES;
+}
+
const struct inode_operations nfs_mountpoint_inode_operations = {
.getattr = nfs_getattr,
+ .setattr = nfs_setattr,
};
const struct inode_operations nfs_referral_inode_operations = {
+ .getattr = nfs_namespace_getattr,
+ .setattr = nfs_namespace_setattr,
};
static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae6711a..a7a043d272da 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
nfs_fattr_init(info->fattr);
status = rpc_call_sync(client, &msg, 0);
dprintk("%s: reply fsinfo: %d\n", __func__, status);
- if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
+ if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) {
msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
msg.rpc_resp = info->fattr;
status = rpc_call_sync(client, &msg, 0);
@@ -644,7 +644,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, int plus)
{
struct inode *dir = dentry->d_inode;
- __be32 *verf = NFS_COOKIEVERF(dir);
+ __be32 *verf = NFS_I(dir)->cookieverf;
struct nfs3_readdirargs arg = {
.fh = NFS_FH(dir),
.cookie = cookie,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a7f3dedc4ec7..b604be2dae6a 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end)
static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
{
char *limit;
- char *path = nfs_path(&limit, dentry, buffer, buflen);
+ char *path = nfs_path(&limit, dentry, buffer, buflen,
+ NFS_PATH_CANONICAL);
if (!IS_ERR(path)) {
char *path_component = nfs_path_component(path, limit);
if (path_component)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d60c2d870ab0..30351877aee2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
nfs4_schedule_session_recovery(clp->cl_session);
- exception->retry = 1;
- break;
+ goto wait_on_recovery;
#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
if (exception->timeout > HZ) {
@@ -1478,9 +1477,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->o_arg.server,
&data->o_arg.seq_args,
- &data->o_res.seq_res, task))
- return;
- rpc_call_start(task);
+ &data->o_res.seq_res,
+ task) != 0)
+ nfs_release_seqid(data->o_arg.seqid);
+ else
+ rpc_call_start(task);
return;
unlock_no_action:
rcu_read_unlock();
@@ -2097,9 +2098,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
&calldata->arg.seq_args,
&calldata->res.seq_res,
- task))
- goto out;
- rpc_call_start(task);
+ task) != 0)
+ nfs_release_seqid(calldata->arg.seqid);
+ else
+ rpc_call_start(task);
out:
dprintk("%s: done!\n", __func__);
}
@@ -3150,11 +3152,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
dentry->d_parent->d_name.name,
dentry->d_name.name,
(unsigned long long)cookie);
- nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
+ nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
if (status >= 0) {
- memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+ memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
}
@@ -4306,6 +4308,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
rpc_restart_call_prepare(task);
}
+ nfs_release_seqid(calldata->arg.seqid);
}
static void nfs4_locku_prepare(struct rpc_task *task, void *data)
@@ -4322,9 +4325,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
calldata->timestamp = jiffies;
if (nfs4_setup_sequence(calldata->server,
&calldata->arg.seq_args,
- &calldata->res.seq_res, task))
- return;
- rpc_call_start(task);
+ &calldata->res.seq_res,
+ task) != 0)
+ nfs_release_seqid(calldata->arg.seqid);
+ else
+ rpc_call_start(task);
}
static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4469,7 +4474,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
/* Do we need to do an open_to_lock_owner? */
if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
- return;
+ goto out_release_lock_seqid;
data->arg.open_stateid = &state->stateid;
data->arg.new_lock_owner = 1;
data->res.open_seqid = data->arg.open_seqid;
@@ -4478,10 +4483,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
data->timestamp = jiffies;
if (nfs4_setup_sequence(data->server,
&data->arg.seq_args,
- &data->res.seq_res, task))
+ &data->res.seq_res,
+ task) == 0) {
+ rpc_call_start(task);
return;
- rpc_call_start(task);
- dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
+ }
+ nfs_release_seqid(data->arg.open_seqid);
+out_release_lock_seqid:
+ nfs_release_seqid(data->arg.lock_seqid);
+ dprintk("%s: done!, ret = %d\n", __func__, task->tk_status);
}
static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
@@ -5729,13 +5739,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data)
rpc_call_start(task);
}
+static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data)
+{
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ nfs41_sequence_prepare(task, data);
+}
+
static const struct rpc_call_ops nfs41_sequence_ops = {
.rpc_call_done = nfs41_sequence_call_done,
.rpc_call_prepare = nfs41_sequence_prepare,
.rpc_release = nfs41_sequence_release,
};
-static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
+static const struct rpc_call_ops nfs41_sequence_privileged_ops = {
+ .rpc_call_done = nfs41_sequence_call_done,
+ .rpc_call_prepare = nfs41_sequence_prepare_privileged,
+ .rpc_release = nfs41_sequence_release,
+};
+
+static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred,
+ const struct rpc_call_ops *seq_ops)
{
struct nfs4_sequence_data *calldata;
struct rpc_message msg = {
@@ -5745,7 +5768,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
struct rpc_task_setup task_setup_data = {
.rpc_client = clp->cl_rpcclient,
.rpc_message = &msg,
- .callback_ops = &nfs41_sequence_ops,
+ .callback_ops = seq_ops,
.flags = RPC_TASK_ASYNC | RPC_TASK_SOFT,
};
@@ -5772,7 +5795,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
return 0;
- task = _nfs41_proc_sequence(clp, cred);
+ task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops);
if (IS_ERR(task))
ret = PTR_ERR(task);
else
@@ -5786,7 +5809,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
struct rpc_task *task;
int ret;
- task = _nfs41_proc_sequence(clp, cred);
+ task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
@@ -5966,11 +5989,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
dprintk("<-- %s\n", __func__);
}
+static size_t max_response_pages(struct nfs_server *server)
+{
+ u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
+ return nfs_page_array_len(0, max_resp_sz);
+}
+
+static void nfs4_free_pages(struct page **pages, size_t size)
+{
+ int i;
+
+ if (!pages)
+ return;
+
+ for (i = 0; i < size; i++) {
+ if (!pages[i])
+ break;
+ __free_page(pages[i]);
+ }
+ kfree(pages);
+}
+
+static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
+{
+ struct page **pages;
+ int i;
+
+ pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+ if (!pages) {
+ dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
+ return NULL;
+ }
+
+ for (i = 0; i < size; i++) {
+ pages[i] = alloc_page(gfp_flags);
+ if (!pages[i]) {
+ dprintk("%s: failed to allocate page\n", __func__);
+ nfs4_free_pages(pages, size);
+ return NULL;
+ }
+ }
+
+ return pages;
+}
+
static void nfs4_layoutget_release(void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
+ struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
dprintk("--> %s\n", __func__);
+ nfs4_free_pages(lgp->args.layout.pages, max_pages);
put_nfs_open_context(lgp->args.ctx);
kfree(calldata);
dprintk("<-- %s\n", __func__);
@@ -5982,9 +6052,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
.rpc_release = nfs4_layoutget_release,
};
-int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
+int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
{
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ size_t max_pages = max_response_pages(server);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
@@ -6002,6 +6073,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
dprintk("--> %s\n", __func__);
+ lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
+ if (!lgp->args.layout.pages) {
+ nfs4_layoutget_release(lgp);
+ return -ENOMEM;
+ }
+ lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+
lgp->res.layoutp = &lgp->args.layout;
lgp->res.seq_res.sr_slot = NULL;
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
@@ -6047,12 +6125,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
return;
}
spin_lock(&lo->plh_inode->i_lock);
- if (task->tk_status == 0) {
- if (lrp->res.lrs_present) {
- pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- } else
- BUG_ON(!list_empty(&lo->plh_segs));
- }
+ if (task->tk_status == 0 && lrp->res.lrs_present)
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c54aae364bee..c8ac9a1461c2 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6081,7 +6081,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_open(xdr, res);
if (status)
goto out;
- if (decode_getfh(xdr, &res->fh) != 0)
+ status = decode_getfh(xdr, &res->fh);
+ if (status)
goto out;
if (decode_getfattr(xdr, res->f_attr, res->server) != 0)
goto out;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 1afe74c42c8a..65538f569f42 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -589,6 +589,7 @@ static struct pnfs_layoutdriver_type objlayout_type = {
.flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_LAYOUTRET_ON_ERROR,
+ .owner = THIS_MODULE,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 38512bcd2e98..059e2c350ad7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -574,9 +574,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
struct pnfs_layout_segment *lseg = NULL;
- struct page **pages = NULL;
- int i;
- u32 max_resp_sz, max_pages;
dprintk("--> %s\n", __func__);
@@ -585,20 +582,6 @@ send_layoutget(struct pnfs_layout_hdr *lo,
if (lgp == NULL)
return NULL;
- /* allocate pages for xdr post processing */
- max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
- max_pages = nfs_page_array_len(0, max_resp_sz);
-
- pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
- if (!pages)
- goto out_err_free;
-
- for (i = 0; i < max_pages; i++) {
- pages[i] = alloc_page(gfp_flags);
- if (!pages[i])
- goto out_err_free;
- }
-
lgp->args.minlength = PAGE_CACHE_SIZE;
if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length;
@@ -607,39 +590,19 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->args.layout.pages = pages;
- lgp->args.layout.pglen = max_pages * PAGE_SIZE;
lgp->lsegpp = &lseg;
lgp->gfp_flags = gfp_flags;
/* Synchronously retrieve layout information from server and
* store in lseg.
*/
- nfs4_proc_layoutget(lgp);
+ nfs4_proc_layoutget(lgp, gfp_flags);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
}
- /* free xdr pages */
- for (i = 0; i < max_pages; i++)
- __free_page(pages[i]);
- kfree(pages);
-
return lseg;
-
-out_err_free:
- /* free any allocated xdr pages, lgp as it's not used */
- if (pages) {
- for (i = 0; i < max_pages; i++) {
- if (!pages[i])
- break;
- __free_page(pages[i]);
- }
- kfree(pages);
- }
- kfree(lgp);
- return NULL;
}
/* Initiates a LAYOUTRETURN(FILE) */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf68eeec..8d95818330cc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -161,7 +161,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
struct pnfs_devicelist *devlist);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev);
-extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
+extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 4ac7fca7e4bf..c252161e7b1d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -812,7 +812,7 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root)
int err = 0;
if (!page)
return -ENOMEM;
- devname = nfs_path(&dummy, root, page, PAGE_SIZE);
+ devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0);
if (IS_ERR(devname))
err = PTR_ERR(devname);
else
@@ -1138,7 +1138,7 @@ static int nfs_get_option_str(substring_t args[], char **option)
{
kfree(*option);
*option = match_strdup(args);
- return !option;
+ return !*option;
}
static int nfs_get_option_ul(substring_t args[], unsigned long *option)
@@ -1886,6 +1886,7 @@ static int nfs_validate_mount_data(void *options,
memcpy(sap, &data->addr, sizeof(data->addr));
args->nfs_server.addrlen = sizeof(data->addr);
+ args->nfs_server.port = ntohs(data->addr.sin_port);
if (!nfs_verify_server_address(sap))
goto out_no_address;
@@ -2598,6 +2599,7 @@ static int nfs4_validate_mount_data(void *options,
return -EFAULT;
if (!nfs_verify_server_address(sap))
goto out_no_address;
+ args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
if (data->auth_flavourlen) {
if (data->auth_flavourlen > 1)
@@ -3146,4 +3148,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
return res;
}
+MODULE_ALIAS("nfs4");
+
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8e9689abbc0c..e9a020f283f7 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -401,7 +401,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int migrated, i, err;
/* listsize */
- err = get_int(mesg, &fsloc->locations_count);
+ err = get_uint(mesg, &fsloc->locations_count);
if (err)
return err;
if (fsloc->locations_count > MAX_FS_LOCATIONS)
@@ -459,7 +459,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
return -EINVAL;
for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
- err = get_int(mesg, &f->pseudoflavor);
+ err = get_uint(mesg, &f->pseudoflavor);
if (err)
return err;
/*
@@ -468,7 +468,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
* problem at export time instead of when a client fails
* to authenticate.
*/
- err = get_int(mesg, &f->flags);
+ err = get_uint(mesg, &f->flags);
if (err)
return err;
/* Only some flags are allowed to differ between flavors: */
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 322d11ce06a4..01b090d135b7 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -581,7 +581,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
/* Just to make sure it's null-terminated: */
memcpy(buf, name, namelen);
buf[namelen] = '\0';
- ret = kstrtouint(name, 10, id);
+ ret = kstrtouint(buf, 10, id);
return ret == 0;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 987e719fbae8..dd0308d65f66 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -194,6 +194,7 @@ static __be32
do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
struct svc_fh *resfh;
+ int accmode;
__be32 status;
resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
@@ -253,9 +254,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
/* set reply cache */
fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
&resfh->fh_handle);
- if (!open->op_created)
- status = do_open_permission(rqstp, resfh, open,
- NFSD_MAY_NOP);
+ accmode = NFSD_MAY_NOP;
+ if (open->op_created)
+ accmode |= NFSD_MAY_OWNER_OVERRIDE;
+ status = do_open_permission(rqstp, resfh, open, accmode);
set_change_info(&open->op_cinfo, current_fh);
fh_dup2(current_fh, resfh);
out:
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e79c24e232f4..abd785e5fd2d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1053,6 +1053,8 @@ free_client(struct nfs4_client *clp)
put_group_info(clp->cl_cred.cr_group_info);
kfree(clp->cl_principal);
kfree(clp->cl_name.data);
+ idr_remove_all(&clp->cl_stateids);
+ idr_destroy(&clp->cl_stateids);
kfree(clp);
}
@@ -2356,7 +2358,7 @@ nfsd4_init_slabs(void)
if (openowner_slab == NULL)
goto out_nomem;
lockowner_slab = kmem_cache_create("nfsd4_lockowners",
- sizeof(struct nfs4_openowner), 0, 0, NULL);
+ sizeof(struct nfs4_lockowner), 0, 0, NULL);
if (lockowner_slab == NULL)
goto out_nomem;
file_slab = kmem_cache_create("nfsd4_files",
@@ -3783,6 +3785,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
nfsd4_close_open_stateid(stp);
+ release_last_closed_stateid(oo);
oo->oo_last_closed_stid = stp;
/* place unused nfs4_stateowners on so_close_lru list to be
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 283d15e9f46d..967d68eef282 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,11 +2920,16 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
len = maxcount;
v = 0;
while (len > 0) {
- pn = resp->rqstp->rq_resused++;
+ pn = resp->rqstp->rq_resused;
+ if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */
+ maxcount -= len;
+ break;
+ }
resp->rqstp->rq_vec[v].iov_base =
page_address(resp->rqstp->rq_respages[pn]);
resp->rqstp->rq_vec[v].iov_len =
len < PAGE_SIZE ? len : PAGE_SIZE;
+ resp->rqstp->rq_resused++;
v++;
len -= PAGE_SIZE;
}
@@ -2970,6 +2975,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
return nfserr;
if (resp->xbuf->page_len)
return nfserr_resource;
+ if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+ return nfserr_resource;
page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]);
@@ -3019,6 +3026,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
return nfserr;
if (resp->xbuf->page_len)
return nfserr_resource;
+ if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused])
+ return nfserr_resource;
RESERVE_SPACE(NFS4_VERIFIER_SIZE);
savep = p;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3ab12eb2967f..d014727fc8f2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -663,9 +663,7 @@ static ssize_t __write_ports_addfd(char *buf)
err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
if (err < 0) {
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
return err;
}
@@ -734,9 +732,7 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
return err;
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1671429ffa66..1336a6512cdc 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -73,6 +73,17 @@ int nfsd_nrpools(void);
int nfsd_get_nrthreads(int n, int *);
int nfsd_set_nrthreads(int n, int *);
+static inline void nfsd_destroy(struct net *net)
+{
+ int destroy = (nfsd_serv->sv_nrthreads == 1);
+
+ if (destroy)
+ svc_shutdown_net(nfsd_serv, net);
+ svc_destroy(nfsd_serv);
+ if (destroy)
+ nfsd_serv = NULL;
+}
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern struct svc_version nfsd_acl_version2;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index bcda12a3f082..53459b055cfc 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -254,8 +254,6 @@ static void nfsd_shutdown(void)
static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
- /* When last nfsd thread exits we need to do some clean-up */
- nfsd_serv = NULL;
nfsd_shutdown();
svc_rpcb_cleanup(serv, net);
@@ -332,6 +330,7 @@ static int nfsd_get_default_max_blksize(void)
int nfsd_create_serv(void)
{
int error;
+ struct net *net = current->nsproxy->net_ns;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
@@ -346,7 +345,7 @@ int nfsd_create_serv(void)
if (nfsd_serv == NULL)
return -ENOMEM;
- error = svc_bind(nfsd_serv, current->nsproxy->net_ns);
+ error = svc_bind(nfsd_serv, net);
if (error < 0) {
svc_destroy(nfsd_serv);
return error;
@@ -427,11 +426,7 @@ int nfsd_set_nrthreads(int n, int *nthreads)
if (err)
break;
}
-
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
-
+ nfsd_destroy(net);
return err;
}
@@ -478,9 +473,7 @@ out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown();
out_destroy:
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv); /* Release server */
+ nfsd_destroy(net); /* Release server */
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -563,12 +556,13 @@ nfsd(void *vrqstp)
nfsdstats.th_cnt --;
out:
- if (rqstp->rq_server->sv_nrthreads == 1)
- svc_shutdown_net(rqstp->rq_server, &init_net);
+ rqstp->rq_server = NULL;
/* Release the thread */
svc_exit_thread(rqstp);
+ nfsd_destroy(&init_net);
+
/* Release module */
mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
@@ -656,7 +650,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
}
/* Store reply in cache. */
- nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
return 1;
}
@@ -682,9 +676,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
mutex_lock(&nfsd_mutex);
/* this function really, really should have been called svc_put() */
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
mutex_unlock(&nfsd_mutex);
return ret;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 568666156ea4..f03160106b95 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1477,13 +1477,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
case NFS3_CREATE_EXCLUSIVE:
if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
&& dchild->d_inode->i_atime.tv_sec == v_atime
- && dchild->d_inode->i_size == 0 )
+ && dchild->d_inode->i_size == 0 ) {
+ if (created)
+ *created = 1;
break;
+ }
case NFS4_CREATE_EXCLUSIVE4_1:
if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
&& dchild->d_inode->i_atime.tv_sec == v_atime
- && dchild->d_inode->i_size == 0 )
+ && dchild->d_inode->i_size == 0 ) {
+ if (created)
+ *created = 1;
goto set_attr;
+ }
/* fallthru */
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 6fe98ed58545..372949338915 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -666,8 +666,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
if (ret < 0)
printk(KERN_ERR "NILFS: GC failed during preparation: "
"cannot read source blocks: err=%d\n", ret);
- else
+ else {
+ if (nilfs_sb_need_update(nilfs))
+ set_nilfs_discontinued(nilfs);
ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+ }
nilfs_remove_all_gcinodes(nilfs);
clear_nilfs_gc_running(nilfs);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b97e8e..a50636025364 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
+ break;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8445fbc8985c..6f292dd53c6d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -579,8 +579,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
- if (unlikely(!(mask & IN_ALL_EVENTS)))
- return -EINVAL;
fsn_mark = fsnotify_find_inode_mark(group, inode);
if (!fsn_mark)
@@ -632,8 +630,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
/* don't allow invalid bits: we don't want flags set */
mask = inotify_arg_to_mask(arg);
- if (unlikely(!(mask & IN_ALL_EVENTS)))
- return -EINVAL;
tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
if (unlikely(!tmp_i_mark))
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 657743254eb9..340bd02839be 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
level = ocfs2_iocb_rw_locked_level(iocb);
ocfs2_rw_unlock(inode, level);
+ inode_dio_done(inode);
if (is_async)
aio_complete(iocb, ret, 0);
- inode_dio_done(inode);
}
/*
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 81a4cd22f80b..231eab2b2d07 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
* everything is up to the caller :) */
status = ocfs2_should_refresh_lock_res(lockres);
if (status < 0) {
+ ocfs2_cluster_unlock(osb, lockres, level);
mlog_errno(status);
goto bail;
}
@@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
ocfs2_complete_lock_res_refresh(lockres, status);
- if (status < 0)
+ if (status < 0) {
+ ocfs2_cluster_unlock(osb, lockres, level);
mlog_errno(status);
+ }
ocfs2_track_lock_refresh(lockres);
}
bail:
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f169da4624fd..b7e74b580c0f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
* cluster groups will be staying in cache for the duration of
* this operation.
*/
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
/* Claim the first region */
status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
@@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
* Do this *after* figuring out how many bits we're taking out
* of our target group.
*/
- if (ac->ac_allow_chain_relink &&
+ if (!ac->ac_disable_chain_relink &&
(prev_group_bh) &&
(ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
status = ocfs2_relink_block_group(handle, alloc_inode,
@@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
- ac->ac_allow_chain_relink = 1;
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
@@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
* searching each chain in order. Don't allow chain relinking
* because we only calculate enough journal credits for one
* relink per alloc. */
- ac->ac_allow_chain_relink = 0;
+ ac->ac_disable_chain_relink = 1;
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index b8afabfeede4..a36d0aa50911 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,7 +49,7 @@ struct ocfs2_alloc_context {
/* these are used by the chain search */
u16 ac_chain;
- int ac_allow_chain_relink;
+ int ac_disable_chain_relink;
group_search_t *ac_group_search;
u64 ac_last_group;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 0ba9ea1e7961..2e3ea308c144 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
struct buffer_head *dir_bh = NULL;
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
- if (!ret) {
+ if (ret) {
mlog_errno(ret);
goto leave;
}
diff --git a/fs/open.c b/fs/open.c
index 3f1108b19143..cf1d34fc5e69 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -882,9 +882,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
int lookup_flags = 0;
int acc_mode;
- if (!(flags & O_CREAT))
- mode = 0;
- op->mode = mode;
+ if (flags & O_CREAT)
+ op->mode = (mode & S_IALLUGO) | S_IFREG;
+ else
+ op->mode = 0;
/* Must never be set by userspace */
flags &= ~FMODE_NONOTIFY;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7fcd0d60a968..b8730d9ebaee 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page)
u |= 1 << KPF_COMPOUND_TAIL;
if (PageHuge(page))
u |= 1 << KPF_HUGE;
- else if (PageTransCompound(page))
+ /*
+ * PageTransCompound can be true for non-huge compound pages (slab
+ * pages or pages allocated by drivers with __GFP_COMP) because it
+ * just checks PG_head/PG_tail, so we need to check PageLRU to make
+ * sure a given page is a thp, not a non-huge compound page.
+ */
+ else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
u |= 1 << KPF_THP;
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 21d836f40292..ab5352101db5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
err = ERR_PTR(-ENOMEM);
inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
- if (h)
- sysctl_head_finish(h);
-
if (!inode)
goto out;
@@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
d_add(dentry, inode);
out:
+ if (h)
+ sysctl_head_finish(h);
sysctl_head_finish(head);
return err;
}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 6c91b17cb5fd..98bf0c2738d6 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu)
static u64 get_idle_time(int cpu)
{
- u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
+ u64 idle, idle_time = -1ULL;
+
+ if (cpu_online(cpu))
+ idle_time = get_cpu_idle_time_us(cpu, NULL);
if (idle_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.idle */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.idle */
idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
else
idle = usecs_to_cputime64(idle_time);
@@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu)
static u64 get_iowait_time(int cpu)
{
- u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
+ u64 iowait, iowait_time = -1ULL;
+
+ if (cpu_online(cpu))
+ iowait_time = get_cpu_iowait_time_us(cpu, NULL);
if (iowait_time == -1ULL)
- /* !NO_HZ so we can rely on cpustat.iowait */
+ /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
else
iowait = usecs_to_cputime64(iowait_time);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 82c585f715e3..4a66a5c4a63f 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -88,6 +88,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
}
}
+bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+{
+ /*
+ * In case of NMI path, pstore shouldn't be blocked
+ * regardless of reason.
+ */
+ if (in_nmi())
+ return true;
+
+ switch (reason) {
+ /* In panic case, other cpus are stopped by smp_send_stop(). */
+ case KMSG_DUMP_PANIC:
+ /* Emergency restart shouldn't be blocked by spin lock. */
+ case KMSG_DUMP_EMERG:
+ return true;
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
+
/*
* callback from kmsg_dump. (s2,l2) has the most recently
* written bytes, older bytes are in (s1,l1). Save as much
@@ -111,10 +132,12 @@ static void pstore_dump(struct kmsg_dumper *dumper,
why = get_reason_str(reason);
- if (in_nmi()) {
- is_locked = spin_trylock(&psinfo->buf_lock);
- if (!is_locked)
- pr_err("pstore dump routine blocked in NMI, may corrupt error record\n");
+ if (pstore_cannot_block_path(reason)) {
+ is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
+ if (!is_locked) {
+ pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
+ , in_nmi() ? "NMI" : why);
+ }
} else
spin_lock_irqsave(&psinfo->buf_lock, flags);
oopscount++;
@@ -145,9 +168,9 @@ static void pstore_dump(struct kmsg_dumper *dumper,
total += l1_cpy + l2_cpy;
part++;
}
- if (in_nmi()) {
+ if (pstore_cannot_block_path(reason)) {
if (is_locked)
- spin_unlock(&psinfo->buf_lock);
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
} else
spin_unlock_irqrestore(&psinfo->buf_lock, flags);
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 494c315c7417..c11db5134b72 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1573,8 +1573,10 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
reiserfs_warning(sb, "reiserfs-13077",
"nfsd/reiserfs, fhtype=%d, len=%d - odd",
fh_type, fh_len);
- fh_type = 5;
+ fh_type = fh_len;
}
+ if (fh_len < 2)
+ return NULL;
return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
@@ -1583,6 +1585,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
+ if (fh_type > fh_len)
+ fh_type = fh_len;
if (fh_type < 4)
return NULL;
@@ -1784,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
BUG_ON(!th->t_trans_id);
- dquot_initialize(inode);
+ reiserfs_write_unlock(inode->i_sb);
err = dquot_alloc_inode(inode);
+ reiserfs_write_lock(inode->i_sb);
if (err)
goto out_end_trans;
if (!dir->i_nlink) {
@@ -1981,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
out_end_trans:
journal_end(th, th->t_super, th->t_blocks_allocated);
+ reiserfs_write_unlock(inode->i_sb);
/* Drop can be outside and it needs more credits so it's better to have it outside */
dquot_drop(inode);
+ reiserfs_write_lock(inode->i_sb);
inode->i_flags |= S_NOQUOTA;
make_bad_inode(inode);
@@ -3105,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- depth = reiserfs_write_lock_once(inode->i_sb);
if (is_quota_modification(inode, attr))
dquot_initialize(inode);
-
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
@@ -3172,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
error = journal_begin(&th, inode->i_sb, jbegin_count);
if (error)
goto out;
+ reiserfs_write_unlock_once(inode->i_sb, depth);
error = dquot_transfer(inode, attr);
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (error) {
journal_end(&th, inode->i_sb, jbegin_count);
goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b162b8..2f40a4c70a4d 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
key2type(&(key->on_disk_key)));
#endif
+ reiserfs_write_unlock(inode->i_sb);
retval = dquot_alloc_space_nodirty(inode, pasted_size);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(search_path);
return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
"reiserquota insert_item(): allocating %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
+ reiserfs_write_unlock(inode->i_sb);
/* We can't dirty inode here. It would be immediately written but
* appropriate stat item isn't inserted yet... */
retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(path);
return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 8b7616ef06d8..8169be93ac0f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s)
retval = remove_save_link_only(s, &save_link_key, 0);
continue;
}
+ reiserfs_write_unlock(s);
dquot_initialize(inode);
+ reiserfs_write_lock(s);
if (truncate && S_ISDIR(inode->i_mode)) {
/* We got a truncate request for a dir which is impossible.
@@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
kfree(qf_names[i]);
#endif
err = -EINVAL;
- goto out_err;
+ goto out_unlock;
}
#ifdef CONFIG_QUOTA
handle_quota_files(s, qf_names, &qfmt);
@@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (blocks) {
err = reiserfs_resize(s, blocks);
if (err != 0)
- goto out_err;
+ goto out_unlock;
}
if (*mount_flags & MS_RDONLY) {
@@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
/* it is read-only already */
goto out_ok;
+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_suspend() without it.
+ */
+ reiserfs_write_unlock(s);
err = dquot_suspend(s, -1);
if (err < 0)
goto out_err;
+ reiserfs_write_lock(s);
/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
/* Mounting a rw partition read-only. */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (reiserfs_is_journal_aborted(journal)) {
err = journal->j_errno;
- goto out_err;
+ goto out_unlock;
}
handle_data_mode(s, mount_options);
@@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
/* Mount a partition which is read-only, read-write */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
SB_JOURNAL(s)->j_must_wait = 1;
err = journal_end(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
s->s_dirt = 0;
if (!(*mount_flags & MS_RDONLY)) {
+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_resume() without it.
+ */
+ reiserfs_write_unlock(s);
dquot_resume(s, -1);
+ reiserfs_write_lock(s);
finish_unfinished(s);
reiserfs_xattr_init(s, *mount_flags);
}
@@ -1413,9 +1427,10 @@ out_ok:
reiserfs_write_unlock(s);
return 0;
+out_unlock:
+ reiserfs_write_unlock(s);
out_err:
kfree(new_opts);
- reiserfs_write_unlock(s);
return err;
}
@@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_commit(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_acquire(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
ret =
journal_begin(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+ reiserfs_write_unlock(dquot->dq_sb);
if (ret) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
goto out;
}
ret = dquot_release(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
reiserfs_write_unlock(dquot->dq_sb);
+out:
return ret;
}
@@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
ret = journal_begin(&th, sb, 2);
if (ret)
goto out;
+ reiserfs_write_unlock(sb);
ret = dquot_commit_info(sb, type);
+ reiserfs_write_lock(sb);
err = journal_end(&th, sb, 2);
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(sb);
return ret;
}
@@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
struct reiserfs_transaction_handle th;
int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
- if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
- return -EINVAL;
+ reiserfs_write_lock(sb);
+ if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+ err = -EINVAL;
+ goto out;
+ }
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb) {
@@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = dquot_quota_on(sb, type, format_id, path);
+ reiserfs_write_unlock(sb);
+ return dquot_quota_on(sb, type, format_id, path);
out:
+ reiserfs_write_unlock(sb);
return err;
}
@@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ reiserfs_write_lock(sb);
err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+ reiserfs_write_unlock(sb);
if (err)
goto out;
if (offset || tocopy != sb->s_blocksize)
@@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ reiserfs_write_lock(sb);
reiserfs_prepare_for_journal(sb, bh, 1);
journal_mark_dirty(current->journal_info, sb, bh);
if (!journal_quota)
reiserfs_add_ordered_list(inode, bh);
+ reiserfs_write_unlock(sb);
brelse(bh);
offset = 0;
towrite -= tocopy;
diff --git a/fs/splice.c b/fs/splice.c
index 5cac690f8103..bed6a3c29355 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -696,8 +696,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
return -EINVAL;
more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
- if (sd->len < sd->total_len)
+
+ if (sd->len < sd->total_len && pipe->nrbufs > 1)
more |= MSG_SENDPAGE_NOTLAST;
+
return file->f_op->sendpage(file, buf->page, buf->offset,
sd->len, &pos, more);
}
diff --git a/fs/stat.c b/fs/stat.c
index c733dc5753ae..dc6d0be300ca 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(vfs_getattr);
int vfs_fstat(unsigned int fd, struct kstat *stat)
{
- struct file *f = fget(fd);
+ struct file *f = fget_raw(fd);
int error = -EBADF;
if (f) {
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 35a36d39fa2c..a545d819b495 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -457,20 +457,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
/**
* sysfs_pathname - return full path to sysfs dirent
* @sd: sysfs_dirent whose path we want
- * @path: caller allocated buffer
+ * @path: caller allocated buffer of size PATH_MAX
*
* Gives the name "/" to the sysfs_root entry; any path returned
* is relative to wherever sysfs is mounted.
- *
- * XXX: does no error checking on @path size
*/
static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
{
if (sd->s_parent) {
sysfs_pathname(sd->s_parent, path);
- strcat(path, "/");
+ strlcat(path, "/", PATH_MAX);
}
- strcat(path, sd->s_name);
+ strlcat(path, sd->s_name, PATH_MAX);
return path;
}
@@ -503,9 +501,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
char *path = kzalloc(PATH_MAX, GFP_KERNEL);
WARN(1, KERN_WARNING
"sysfs: cannot create duplicate filename '%s'\n",
- (path == NULL) ? sd->s_name :
- strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
- sd->s_name));
+ (path == NULL) ? sd->s_name
+ : (sysfs_pathname(acxt->parent_sd, path),
+ strlcat(path, "/", PATH_MAX),
+ strlcat(path, sd->s_name, PATH_MAX),
+ path));
kfree(path);
}
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 9f717655df18..28e3de1892b9 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -170,7 +170,7 @@ struct ubifs_global_debug_info {
#define ubifs_dbg_msg(type, fmt, ...) \
pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
-#define DBG_KEY_BUF_LEN 32
+#define DBG_KEY_BUF_LEN 48
#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \
char __tmp_key_buf[DBG_KEY_BUF_LEN]; \
pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ec9f1870ab7f..8640a12f0650 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -977,7 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
struct timespec time;
- unsigned int saved_nlink;
+ unsigned int uninitialized_var(saved_nlink);
/*
* Budget request settings: deletion direntry, new direntry, removing
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2559d174e004..5dc48cacb5a0 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
if (!lprops) {
lprops = ubifs_fast_find_freeable(c);
if (!lprops) {
- ubifs_assert(c->freeable_cnt == 0);
- if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ /*
+ * The first condition means the following: go scan the
+ * LPT if there are uncategorized lprops, which means
+ * there may be freeable LEBs there (UBIFS does not
+ * store the information about freeable LEBs in the
+ * master node).
+ */
+ if (c->in_a_category_cnt != c->main_lebs ||
+ c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ ubifs_assert(c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f8a181e647cc..ea9d49164ffa 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
default:
ubifs_assert(0);
}
+
lprops->flags &= ~LPROPS_CAT_MASK;
lprops->flags |= cat;
+ c->in_a_category_cnt += 1;
+ ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
}
/**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
default:
ubifs_assert(0);
}
+
+ c->in_a_category_cnt -= 1;
+ ubifs_assert(c->in_a_category_cnt >= 0);
}
/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 93d59aceaaef..4971cb23b6c8 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1184,6 +1184,8 @@ struct ubifs_debug_info;
* @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
* @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ * basically meants that they were loaded from the flash
*
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
@@ -1413,6 +1415,7 @@ struct ubifs_info {
struct list_head freeable_list;
struct list_head frdi_idx_list;
int freeable_cnt;
+ int in_a_category_cnt;
int ltab_lnum;
int ltab_offs;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7f3f7ba3df6e..d1c6093fd3d3 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -39,20 +39,24 @@
#include "udf_i.h"
#include "udf_sb.h"
-static int udf_adinicb_readpage(struct file *file, struct page *page)
+static void __udf_adinicb_readpage(struct page *page)
{
struct inode *inode = page->mapping->host;
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
- BUG_ON(!PageLocked(page));
-
kaddr = kmap(page);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
+ memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
flush_dcache_page(page);
SetPageUptodate(page);
kunmap(page);
+}
+
+static int udf_adinicb_readpage(struct file *file, struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ __udf_adinicb_readpage(page);
unlock_page(page);
return 0;
@@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page,
return 0;
}
+static int udf_adinicb_write_begin(struct file *file,
+ struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned flags, struct page **pagep,
+ void **fsdata)
+{
+ struct page *page;
+
+ if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+ return -EIO;
+ page = grab_cache_page_write_begin(mapping, 0, flags);
+ if (!page)
+ return -ENOMEM;
+ *pagep = page;
+
+ if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+ __udf_adinicb_readpage(page);
+ return 0;
+}
+
static int udf_adinicb_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
@@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file,
const struct address_space_operations udf_adinicb_aops = {
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
- .write_begin = simple_write_begin,
- .write_end = udf_adinicb_write_end,
+ .write_begin = udf_adinicb_write_begin,
+ .write_end = udf_adinicb_write_end,
};
static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7d7528008359..aa70035fb402 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -574,6 +574,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
struct udf_inode_info *iinfo = UDF_I(inode);
int goal = 0, pgoal = iinfo->i_location.logicalBlockNum;
int lastblock = 0;
+ bool isBeyondEOF;
*err = 0;
*new = 0;
@@ -653,7 +654,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* Are we beyond EOF? */
if (etype == -1) {
int ret;
-
+ isBeyondEOF = 1;
if (count) {
if (c)
laarr[0] = laarr[1];
@@ -696,6 +697,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
endnum = c + 1;
lastblock = 1;
} else {
+ isBeyondEOF = 0;
endnum = startnum = ((count > 2) ? 2 : count);
/* if the current extent is in position 0,
@@ -738,10 +740,13 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
goal, err);
if (!newblocknum) {
brelse(prev_epos.bh);
+ brelse(cur_epos.bh);
+ brelse(next_epos.bh);
*err = -ENOSPC;
return 0;
}
- iinfo->i_lenExtents += inode->i_sb->s_blocksize;
+ if (isBeyondEOF)
+ iinfo->i_lenExtents += inode->i_sb->s_blocksize;
}
/* if the extent the requsted block is located in contains multiple
@@ -768,6 +773,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
brelse(prev_epos.bh);
+ brelse(cur_epos.bh);
+ brelse(next_epos.bh);
newblock = udf_get_pblock(inode->i_sb, newblocknum,
iinfo->i_location.partitionReferenceNum, 0);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e660ffdd91b6..4988a8afcc8f 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1287,6 +1287,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
+ ret = 1;
goto out_bh;
}
@@ -1331,8 +1332,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0)
+ (struct sparablePartitionMap *)gpm) < 0) {
+ ret = 1;
goto out_bh;
+ }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0dbb9e70fe21..7a978c7be842 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -89,11 +89,11 @@ xfs_destroy_ioend(
}
if (ioend->io_iocb) {
+ inode_dio_done(ioend->io_inode);
if (ioend->io_isasync) {
aio_complete(ioend->io_iocb, ioend->io_error ?
ioend->io_error : ioend->io_result, 0);
}
- inode_dio_done(ioend->io_inode);
}
mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 6819b5163e33..bb76128b5f47 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io(
{
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
- xfs_buf_ioerror(bp, -error);
+ /*
+ * don't overwrite existing errors - otherwise we can lose errors on
+ * buffers that require multiple bios to complete.
+ */
+ if (!bp->b_error)
+ xfs_buf_ioerror(bp, -error);
- if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+ if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
_xfs_buf_ioend(bp, 1);
@@ -1243,6 +1248,11 @@ next_chunk:
if (size)
goto next_chunk;
} else {
+ /*
+ * This is guaranteed not to be the last io reference count
+ * because the caller (xfs_buf_iorequest) holds a count itself.
+ */
+ atomic_dec(&bp->b_io_remaining);
xfs_buf_ioerror(bp, EIO);
bio_put(bio);
}
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 558910f5e3c0..5703fb85f6bb 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -195,6 +195,9 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
struct inode *inode = NULL;
+ if (fh_len < xfs_fileid_length(fileid_type))
+ return NULL;
+
switch (fileid_type) {
case FILEID_INO32_GEN_PARENT:
inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ecad5bad66c..0abb1623a7cb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3514,7 +3514,7 @@ xlog_do_recovery_pass(
* - order is important.
*/
error = xlog_bread_offset(log, 0,
- bblks - split_bblks, hbp,
+ bblks - split_bblks, dbp,
offset + BBTOB(split_bblks));
if (error)
goto bread_err2;