diff options
author | Sachin Nikam <snikam@nvidia.com> | 2013-03-06 19:12:15 +0530 |
---|---|---|
committer | Sachin Nikam <snikam@nvidia.com> | 2013-03-06 19:31:07 +0530 |
commit | d3dd6b4227fe3e59df0a55f679ec196e4ccb1e3a (patch) | |
tree | 0137a07ecab729a19cea68b671c88def5bf83a2d /fs | |
parent | bb36790494c382ec26250f351c8cea76613f0f02 (diff) | |
parent | 2713e2797a78a0fdda765bc5ad7fed41e94818ba (diff) |
Merge branch 'linux-3.4.35' into rel-17
Bug 1243631
Change-Id: I915826047b2e20f0ad0a7d75df295c6cbf6e5b0a
Diffstat (limited to 'fs')
119 files changed, 1382 insertions, 782 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 75e5f1c8e028..8c4292f89eef 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -392,10 +392,12 @@ static struct vfsmount *autofs4_d_automount(struct path *path) ino->flags |= AUTOFS_INF_PENDING; spin_unlock(&sbi->fs_lock); status = autofs4_mount_wait(dentry); - if (status) - return ERR_PTR(status); spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_PENDING; + if (status) { + spin_unlock(&sbi->fs_lock); + return ERR_PTR(status); + } } done: if (!(ino->flags & AUTOFS_INF_EXPIRING)) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 16f735417072..a009b9e322ac 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1698,30 +1698,19 @@ static int elf_note_info_init(struct elf_note_info *info) return 0; info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); if (!info->psinfo) - goto notes_free; + return 0; info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); if (!info->prstatus) - goto psinfo_free; + return 0; info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); if (!info->fpu) - goto prstatus_free; + return 0; #ifdef ELF_CORE_COPY_XFPREGS info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); if (!info->xfpu) - goto fpu_free; + return 0; #endif return 1; -#ifdef ELF_CORE_COPY_XFPREGS - fpu_free: - kfree(info->fpu); -#endif - prstatus_free: - kfree(info->prstatus); - psinfo_free: - kfree(info->psinfo); - notes_free: - kfree(info->notes); - return 0; } static int fill_note_info(struct elfhdr *elf, int phdrs, diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 613aa0618235..e1724392d050 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -176,7 +176,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto _error; bprm->argc ++; - bprm->interp = iname; /* for binfmt_script */ + /* Update interp in case binfmt_script needs it. */ + retval = bprm_change_interp(iname, bprm); + if (retval < 0) + goto _error; interp_file = open_exec (iname); retval = PTR_ERR (interp_file); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d3b8c1f63155..df49d486b0c0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -82,7 +82,9 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) retval = copy_strings_kernel(1, &i_name, bprm); if (retval) return retval; bprm->argc++; - bprm->interp = interp; + retval = bprm_change_interp(interp, bprm); + if (retval < 0) + return retval; /* * OK, now restart the process with the interpreter's dentry. diff --git a/fs/block_dev.c b/fs/block_dev.c index ba11c30f302d..b3be92c980ee 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1047,6 +1047,7 @@ int revalidate_disk(struct gendisk *disk) mutex_lock(&bdev->bd_mutex); check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; diff --git a/fs/buffer.c b/fs/buffer.c index 0bc1bed6c4e0..18669e92b676 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; - +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -999,7 +1007,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d22e59b..bb01881cb1f2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -54,7 +54,12 @@ (CONGESTION_ON_THRESH(congestion_kb) - \ (CONGESTION_ON_THRESH(congestion_kb) >> 2)) - +static inline struct ceph_snap_context *page_snap_context(struct page *page) +{ + if (PagePrivate(page)) + return (void *)page->private; + return NULL; +} /* * Dirty a page. Optimistically adjust accounting, on the assumption @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) { struct inode *inode; struct ceph_inode_info *ci; - struct ceph_snap_context *snapc = (void *)page->private; + struct ceph_snap_context *snapc = page_snap_context(page); BUG_ON(!PageLocked(page)); - BUG_ON(!page->private); BUG_ON(!PagePrivate(page)); BUG_ON(!page->mapping); @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g) struct inode *inode = page->mapping ? page->mapping->host : NULL; dout("%p releasepage %p idx %lu\n", inode, page, page->index); WARN_ON(PageDirty(page)); - WARN_ON(page->private); WARN_ON(PagePrivate(page)); return 0; } @@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, - page->index << PAGE_CACHE_SHIFT, &len, + (u64) page_offset(page), &len, ci->i_truncate_seq, ci->i_truncate_size, &page, 1, 0); if (err == -ENOENT) @@ -264,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) kfree(req->r_pages); } +static void ceph_unlock_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + unlock_page(pages[i]); +} + /* * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. @@ -283,7 +294,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) int nr_pages = 0; int ret; - off = page->index << PAGE_CACHE_SHIFT; + off = (u64) page_offset(page); /* count pages */ next_index = page->index; @@ -305,8 +316,8 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, NULL, false, 1, 0); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* build page vector */ nr_pages = len >> PAGE_CACHE_SHIFT; @@ -344,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) return nr_pages; out_pages: + ceph_unlock_page_vector(pages, nr_pages); ceph_release_page_vector(pages, nr_pages); out: ceph_osdc_put_request(req); @@ -423,7 +435,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) struct ceph_inode_info *ci; struct ceph_fs_client *fsc; struct ceph_osd_client *osdc; - loff_t page_off = page->index << PAGE_CACHE_SHIFT; + loff_t page_off = page_offset(page); int len = PAGE_CACHE_SIZE; loff_t i_size; int err = 0; @@ -443,7 +455,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) osdc = &fsc->client->osdc; /* verify this is a writeable snap context */ - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc == NULL) { dout("writepage %p page %p not dirty?\n", inode, page); goto out; @@ -451,7 +463,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) oldest = get_oldest_context(inode, &snap_size); if (snapc->seq > oldest->seq) { dout("writepage %p page %p snapc %p not writeable - noop\n", - inode, page, (void *)page->private); + inode, page, snapc); /* we should only noop if called by kswapd */ WARN_ON((current->flags & PF_MEMALLOC) == 0); ceph_put_snap_context(oldest); @@ -591,7 +603,7 @@ static void writepages_finish(struct ceph_osd_request *req, clear_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); - ceph_put_snap_context((void *)page->private); + ceph_put_snap_context(page_snap_context(page)); page->private = 0; ClearPagePrivate(page); dout("unlocking %d %p\n", i, page); @@ -795,7 +807,7 @@ get_more_pages: } /* only if matching snap context */ - pgsnapc = (void *)page->private; + pgsnapc = page_snap_context(page); if (pgsnapc->seq > snapc->seq) { dout("page snapc %p %lld > oldest %p %lld\n", pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -814,8 +826,7 @@ get_more_pages: /* ok */ if (locked_pages == 0) { /* prepare async write request */ - offset = (unsigned long long)page->index - << PAGE_CACHE_SHIFT; + offset = (u64) page_offset(page); len = wsize; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, @@ -829,8 +840,8 @@ get_more_pages: ci->i_truncate_size, &inode->i_mtime, true, 1, 0); - if (!req) { - rc = -ENOMEM; + if (IS_ERR(req)) { + rc = PTR_ERR(req); unlock_page(page); break; } @@ -984,7 +995,7 @@ retry_locked: BUG_ON(!ci->i_snap_realm); down_read(&mdsc->snap_rwsem); BUG_ON(!ci->i_snap_realm->cached_context); - snapc = (void *)page->private; + snapc = page_snap_context(page); if (snapc && snapc != ci->i_head_snapc) { /* * this page is already dirty in another (older) snap @@ -1177,7 +1188,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = vmf->page; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; - loff_t off = page->index << PAGE_CACHE_SHIFT; + loff_t off = page_offset(page); loff_t size, len; int ret; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 620daad201db..e7d40776f58f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1349,11 +1349,15 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) if (!ci->i_head_snapc) ci->i_head_snapc = ceph_get_snap_context( ci->i_snap_realm->cached_context); - dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, - ci->i_head_snapc); + dout(" inode %p now dirty snapc %p auth cap %p\n", + &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap); BUG_ON(!list_empty(&ci->i_dirty_item)); spin_lock(&mdsc->cap_dirty_lock); - list_add(&ci->i_dirty_item, &mdsc->cap_dirty); + if (ci->i_auth_cap) + list_add(&ci->i_dirty_item, &mdsc->cap_dirty); + else + list_add(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); spin_unlock(&mdsc->cap_dirty_lock); if (ci->i_flushing_caps == 0) { ihold(inode); @@ -2388,7 +2392,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, &atime); /* max size increase? */ - if (max_size != ci->i_max_size) { + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); ci->i_max_size = max_size; if (max_size >= ci->i_wanted_max_size) { @@ -2745,6 +2749,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, /* make sure we re-request max_size, if necessary */ spin_lock(&ci->i_ceph_lock); + ci->i_wanted_max_size = 0; /* reset */ ci->i_requested_max_size = 0; spin_unlock(&ci->i_ceph_lock); } @@ -2840,8 +2845,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, session, snaptrace, snaptrace_len); - ceph_check_caps(ceph_inode(inode), 0, session); - goto done_unlocked; } /* the rest require a cap */ @@ -2858,6 +2861,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: + case CEPH_CAP_OP_IMPORT: handle_cap_grant(inode, h, session, cap, msg->middle); goto done_unlocked; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fb962efdacee..6d59006bfa27 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) int err = -ENOMEM; dout("ceph_fs_debugfs_init\n"); + BUG_ON(!fsc->client->debugfs_dir); fsc->debugfs_congestion_kb = debugfs_create_file("writeback_congestion_kb", 0600, diff --git a/fs/ceph/export.c b/fs/ceph/export.c index fbb2a643ef10..4098ccf8cb98 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -89,7 +89,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, * FIXME: we should try harder by querying the mds for the ino. */ static struct dentry *__fh_to_dentry(struct super_block *sb, - struct ceph_nfs_fh *fh) + struct ceph_nfs_fh *fh, int fh_len) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -97,6 +97,9 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, struct ceph_vino vino; int err; + if (fh_len < sizeof(*fh) / 4) + return ERR_PTR(-ESTALE); + dout("__fh_to_dentry %llx\n", fh->ino); vino.ino = fh->ino; vino.snap = CEPH_NOSNAP; @@ -140,7 +143,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, * convert connectable fh to dentry */ static struct dentry *__cfh_to_dentry(struct super_block *sb, - struct ceph_nfs_confh *cfh) + struct ceph_nfs_confh *cfh, int fh_len) { struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct inode *inode; @@ -148,6 +151,9 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, struct ceph_vino vino; int err; + if (fh_len < sizeof(*cfh) / 4) + return ERR_PTR(-ESTALE); + dout("__cfh_to_dentry %llx (%llx/%x)\n", cfh->ino, cfh->parent_ino, cfh->parent_name_hash); @@ -197,9 +203,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { if (fh_type == 1) - return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw); + return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, + fh_len); else - return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw); + return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, + fh_len); } /* @@ -220,6 +228,8 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, if (fh_type == 1) return ERR_PTR(-ESTALE); + if (fh_len < sizeof(*cfh) / 4) + return ERR_PTR(-ESTALE); pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, cfh->parent_name_hash); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ed72428d9c75..9ce3a4bd4d1c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -529,8 +529,8 @@ more: do_sync, ci->i_truncate_seq, ci->i_truncate_size, &mtime, false, 2, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); if (file->f_flags & O_DIRECT) { pages = ceph_get_direct_page_vector(data, num_pages, false); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9fff9f3b17e4..81613bced19f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -992,11 +992,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, if (rinfo->head->is_dentry) { struct inode *dir = req->r_locked_dir; - err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, - session, req->r_request_started, -1, - &req->r_caps_reservation); - if (err < 0) - return err; + if (dir) { + err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag, + session, req->r_request_started, -1, + &req->r_caps_reservation); + if (err < 0) + return err; + } else { + WARN_ON_ONCE(1); + } } /* @@ -1004,6 +1008,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, * will have trouble splicing in the virtual snapdir later */ if (rinfo->head->is_dentry && !req->r_aborted && + req->r_locked_dir && (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, fsc->mount_options->snapdir_name, req->r_dentry->d_name.len))) { @@ -1461,7 +1466,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode); u64 to; - int wrbuffer_refs, wake = 0; + int wrbuffer_refs, finish = 0; retry: spin_lock(&ci->i_ceph_lock); @@ -1493,15 +1498,18 @@ retry: truncate_inode_pages(inode->i_mapping, to); spin_lock(&ci->i_ceph_lock); - ci->i_truncate_pending--; - if (ci->i_truncate_pending == 0) - wake = 1; + if (to == ci->i_truncate_size) { + ci->i_truncate_pending = 0; + finish = 1; + } spin_unlock(&ci->i_ceph_lock); + if (!finish) + goto retry; if (wrbuffer_refs == 0) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); - if (wake) - wake_up_all(&ci->i_cap_wq); + + wake_up_all(&ci->i_cap_wq); } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 89971e137aab..3fd08ad5c478 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s) dout("mdsc put_session %p %d -> %d\n", s, atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); if (atomic_dec_and_test(&s->s_ref)) { - if (s->s_authorizer) + if (s->s_auth.authorizer) s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( s->s_mdsc->fsc->client->monc.auth, - s->s_authorizer); + s->s_auth.authorizer); kfree(s); } } @@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_seq = 0; mutex_init(&s->s_mutex); - ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); - s->s_con.private = s; - s->s_con.ops = &mds_con_ops; - s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; - s->s_con.peer_name.num = cpu_to_le64(mds); + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; @@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, mdsc->sessions[mds] = s; atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */ - ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); + ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, + ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); return s; @@ -1889,9 +1886,14 @@ finish: static void __wake_requests(struct ceph_mds_client *mdsc, struct list_head *head) { - struct ceph_mds_request *req, *nreq; + struct ceph_mds_request *req; + LIST_HEAD(tmp_list); + + list_splice_init(head, &tmp_list); - list_for_each_entry_safe(req, nreq, head, r_wait) { + while (!list_empty(&tmp_list)) { + req = list_entry(tmp_list.next, + struct ceph_mds_request, r_wait); list_del_init(&req->r_wait); __do_request(mdsc, req); } @@ -2531,7 +2533,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_seq = 0; + ceph_con_close(&session->s_con); ceph_con_open(&session->s_con, + CEPH_ENTITY_TYPE_MDS, mds, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); /* replay unsafe requests */ @@ -2636,7 +2640,8 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", session_state_name(s->s_state)); - if (memcmp(ceph_mdsmap_get_addr(oldmap, i), + if (i >= newmap->m_max_mds || + memcmp(ceph_mdsmap_get_addr(oldmap, i), ceph_mdsmap_get_addr(newmap, i), sizeof(struct ceph_entity_addr))) { if (s->s_state == CEPH_MDS_SESSION_OPENING) { @@ -3395,39 +3400,33 @@ out: /* * authentication */ -static int get_authorizer(struct ceph_connection *con, - void **buf, int *len, int *proto, - void **reply_buf, int *reply_len, int force_new) + +/* + * Note: returned pointer is the address of a structure that's + * managed separately. Caller must *not* attempt to free it. + */ +static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, + int *proto, int force_new) { struct ceph_mds_session *s = con->private; struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - int ret = 0; - - if (force_new && s->s_authorizer) { - ac->ops->destroy_authorizer(ac, s->s_authorizer); - s->s_authorizer = NULL; - } - if (s->s_authorizer == NULL) { - if (ac->ops->create_authorizer) { - ret = ac->ops->create_authorizer( - ac, CEPH_ENTITY_TYPE_MDS, - &s->s_authorizer, - &s->s_authorizer_buf, - &s->s_authorizer_buf_len, - &s->s_authorizer_reply_buf, - &s->s_authorizer_reply_buf_len); - if (ret) - return ret; - } - } + struct ceph_auth_handshake *auth = &s->s_auth; + if (force_new && auth->authorizer) { + if (ac->ops && ac->ops->destroy_authorizer) + ac->ops->destroy_authorizer(ac, auth->authorizer); + auth->authorizer = NULL; + } + if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { + int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, + auth); + if (ret) + return ERR_PTR(ret); + } *proto = ac->protocol; - *buf = s->s_authorizer_buf; - *len = s->s_authorizer_buf_len; - *reply_buf = s->s_authorizer_reply_buf; - *reply_len = s->s_authorizer_reply_buf_len; - return 0; + + return auth; } @@ -3437,7 +3436,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); + return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len); } static int invalidate_authorizer(struct ceph_connection *con) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 8c7c04ebb595..dd26846dd71d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -11,6 +11,7 @@ #include <linux/ceph/types.h> #include <linux/ceph/messenger.h> #include <linux/ceph/mdsmap.h> +#include <linux/ceph/auth.h> /* * Some lock dependencies: @@ -113,9 +114,7 @@ struct ceph_mds_session { struct ceph_connection s_con; - struct ceph_authorizer *s_authorizer; - void *s_authorizer_buf, *s_authorizer_reply_buf; - size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; + struct ceph_auth_handshake s_auth; /* protected by s_gen_ttl_lock */ spinlock_t s_gen_ttl_lock; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7305a4..f36391815461 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -387,8 +387,6 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); - if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) - seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, ",osdkeepalivetimeout=%d", opt->osd_keepalive_timeout); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 6873bb634a97..22631445a4ac 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -226,6 +226,8 @@ compose_mount_options_out: compose_mount_options_err: kfree(mountdata); mountdata = ERR_PTR(rc); + kfree(*devname); + *devname = NULL; goto compose_mount_options_out; } diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index fbb9da951843..6a8568c6466f 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -203,6 +203,27 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, int i; wchar_t wchar_to; /* needed to quiet sparse */ + /* special case for utf8 to handle no plane0 chars */ + if (!strcmp(codepage->charset, "utf8")) { + /* + * convert utf8 -> utf16, we assume we have enough space + * as caller should have assumed conversion does not overflow + * in destination len is length in wchar_t units (16bits) + */ + i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, + (wchar_t *) to, len); + + /* if success terminate and exit */ + if (i >= 0) + goto success; + /* + * if fails fall back to UCS encoding as this + * function should not return negative values + * currently can fail only if source contains + * invalid encoded characters + */ + } + for (i = 0; len && *from; i++, from += charlen, len -= charlen) { charlen = codepage->char2uni(from, len, &wchar_to); if (charlen < 1) { @@ -215,6 +236,7 @@ cifs_strtoUTF16(__le16 *to, const char *from, int len, put_unaligned_le16(wchar_to, &to[i]); } +success: put_unaligned_le16(0, &to[i]); return i; } @@ -328,6 +350,6 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, } ctoUTF16_out: - return i; + return j; } diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 3cc1b251ca08..6ccf176427f0 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr) } static void +cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src) +{ + memcpy(dst, src, sizeof(*dst)); + dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS); +} + +static void id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, struct cifs_sid_id **psidid, char *typestr) { @@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr, } } - memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid)); + cifs_copy_sid(&(*psidid)->sid, sidptr); (*psidid)->time = jiffies - (SID_MAP_RETRY + 1); (*psidid)->refcount = 0; @@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) * any fields of the node after a reference is put . */ if (test_bit(SID_ID_MAPPED, &psidid->state)) { - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); + cifs_copy_sid(ssid, &psidid->sid); psidid->time = jiffies; /* update ts for accessing */ goto id_sid_out; } @@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) if (IS_ERR(sidkey)) { rc = -EINVAL; cFYI(1, "%s: Can't map and id to a SID", __func__); + } else if (sidkey->datalen < sizeof(struct cifs_sid)) { + rc = -EIO; + cFYI(1, "%s: Downcall contained malformed key " + "(datalen=%hu)", __func__, sidkey->datalen); } else { lsid = (struct cifs_sid *)sidkey->payload.data; - memcpy(&psidid->sid, lsid, - sidkey->datalen < sizeof(struct cifs_sid) ? - sidkey->datalen : sizeof(struct cifs_sid)); - memcpy(ssid, &psidid->sid, - sidkey->datalen < sizeof(struct cifs_sid) ? - sidkey->datalen : sizeof(struct cifs_sid)); + cifs_copy_sid(&psidid->sid, lsid); + cifs_copy_sid(ssid, &psidid->sid); set_bit(SID_ID_MAPPED, &psidid->state); key_put(sidkey); kfree(psidid->sidstr); @@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid) return rc; } if (test_bit(SID_ID_MAPPED, &psidid->state)) - memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid)); + cifs_copy_sid(ssid, &psidid->sid); else rc = -EINVAL; } @@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) static void copy_sec_desc(const struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, __u32 sidsoffset) { - int i; - struct cifs_sid *owner_sid_ptr, *group_sid_ptr; struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr; @@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd, owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset); - - nowner_sid_ptr->revision = owner_sid_ptr->revision; - nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth; - for (i = 0; i < 6; i++) - nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i]; - for (i = 0; i < 5; i++) - nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i]; + cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr); /* copy group sid */ group_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->gsidoffset)); ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset + sizeof(struct cifs_sid)); - - ngroup_sid_ptr->revision = group_sid_ptr->revision; - ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth; - for (i = 0; i < 6; i++) - ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; - for (i = 0; i < 5; i++) - ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i]; + cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr); return; } @@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, kfree(nowner_sid_ptr); return rc; } - memcpy(owner_sid_ptr, nowner_sid_ptr, - sizeof(struct cifs_sid)); + cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr); kfree(nowner_sid_ptr); *aclflag = CIFS_ACL_OWNER; } @@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, kfree(ngroup_sid_ptr); return rc; } - memcpy(group_sid_ptr, ngroup_sid_ptr, - sizeof(struct cifs_sid)); + cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr); kfree(ngroup_sid_ptr); *aclflag = CIFS_ACL_GROUP; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 65a78e9a5d40..f771e9f98f9f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -70,6 +70,7 @@ enum { /* Mount options that take no arguments */ Opt_user_xattr, Opt_nouser_xattr, Opt_forceuid, Opt_noforceuid, + Opt_forcegid, Opt_noforcegid, Opt_noblocksend, Opt_noautotune, Opt_hard, Opt_soft, Opt_perm, Opt_noperm, Opt_mapchars, Opt_nomapchars, Opt_sfu, @@ -121,6 +122,8 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_nouser_xattr, "nouser_xattr" }, { Opt_forceuid, "forceuid" }, { Opt_noforceuid, "noforceuid" }, + { Opt_forcegid, "forcegid" }, + { Opt_noforcegid, "noforcegid" }, { Opt_noblocksend, "noblocksend" }, { Opt_noautotune, "noautotune" }, { Opt_hard, "hard" }, @@ -1287,6 +1290,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, case Opt_noforceuid: override_uid = 0; break; + case Opt_forcegid: + override_gid = 1; + break; + case Opt_noforcegid: + override_gid = 0; + break; case Opt_noblocksend: vol->noblocksnd = 1; break; diff --git a/fs/compat.c b/fs/compat.c index f2944ace7a7b..2b371b38911d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1160,11 +1160,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_readv(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_readv(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } @@ -1226,11 +1229,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, struct file *file; int fput_needed; ssize_t ret; + loff_t pos; file = fget_light(fd, &fput_needed); if (!file) return -EBADF; - ret = compat_writev(file, vec, vlen, &file->f_pos); + pos = file->f_pos; + ret = compat_writev(file, vec, vlen, &pos); + file->f_pos = pos; fput_light(file, fput_needed); return ret; } diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index debdfe0fc809..5d2069ff635c 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -210,6 +210,8 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, err = get_user(palp, &up->palette); err |= get_user(length, &up->length); + if (err) + return -EFAULT; up_native = compat_alloc_user_space(sizeof(struct video_spu_palette)); err = put_user(compat_ptr(palp), &up_native->palette); diff --git a/fs/dcache.c b/fs/dcache.c index b80531c91779..f104945dcc7d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -373,7 +373,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) * Inform try_to_ascend() that we are no longer attached to the * dentry tree */ - dentry->d_flags |= DCACHE_DISCONNECTED; + dentry->d_flags |= DCACHE_DENTRY_KILLED; if (parent) spin_unlock(&parent->d_lock); dentry_iput(dentry); @@ -1030,7 +1030,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq * or deletion */ if (new != old->d_parent || - (old->d_flags & DCACHE_DISCONNECTED) || + (old->d_flags & DCACHE_DENTRY_KILLED) || (!locked && read_seqretry(&rename_lock, seq))) { spin_unlock(&new->d_lock); new = NULL; @@ -1116,6 +1116,8 @@ positive: return 1; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -1218,6 +1220,8 @@ out: rename_retry: if (found) return found; + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; @@ -2963,6 +2967,8 @@ resume: return; rename_retry: + if (locked) + goto again; locked = 1; write_seqlock(&rename_lock); goto again; diff --git a/fs/direct-io.c b/fs/direct-io.c index f4aadd15b613..29c4fdab29db 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -305,9 +305,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is dio->end_io(dio->iocb, offset, transferred, dio->private, ret, is_async); } else { + inode_dio_done(dio->inode); if (is_async) aio_complete(dio->iocb, ret, 0); - inode_dio_done(dio->inode); } return ret; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 867b64c5d84f..56e3aa57e188 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -568,6 +568,8 @@ struct ecryptfs_open_req { struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2b17f2f9b121..d45ba4568128 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -138,29 +138,50 @@ out: return rc; } -static void ecryptfs_vma_close(struct vm_area_struct *vma) -{ - filemap_write_and_wait(vma->vm_file->f_mapping); -} - -static const struct vm_operations_struct ecryptfs_file_vm_ops = { - .close = ecryptfs_vma_close, - .fault = filemap_fault, -}; +struct kmem_cache *ecryptfs_file_info_cache; -static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma) +static int read_or_initialize_metadata(struct dentry *dentry) { + struct inode *inode = dentry->d_inode; + struct ecryptfs_mount_crypt_stat *mount_crypt_stat; + struct ecryptfs_crypt_stat *crypt_stat; int rc; - rc = generic_file_mmap(file, vma); + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + mount_crypt_stat = &ecryptfs_superblock_to_private( + inode->i_sb)->mount_crypt_stat; + mutex_lock(&crypt_stat->cs_mutex); + + if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED && + crypt_stat->flags & ECRYPTFS_KEY_VALID) { + rc = 0; + goto out; + } + + rc = ecryptfs_read_metadata(dentry); if (!rc) - vma->vm_ops = &ecryptfs_file_vm_ops; + goto out; + + if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) { + crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED + | ECRYPTFS_ENCRYPTED); + rc = 0; + goto out; + } + + if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) && + !i_size_read(ecryptfs_inode_to_lower(inode))) { + rc = ecryptfs_initialize_file(dentry, inode); + if (!rc) + goto out; + } + rc = -EIO; +out: + mutex_unlock(&crypt_stat->cs_mutex); return rc; } -struct kmem_cache *ecryptfs_file_info_cache; - /** * ecryptfs_open * @inode: inode speciying file to open @@ -236,32 +257,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = 0; goto out; } - mutex_lock(&crypt_stat->cs_mutex); - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED) - || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) { - rc = ecryptfs_read_metadata(ecryptfs_dentry); - if (rc) { - ecryptfs_printk(KERN_DEBUG, - "Valid headers not found\n"); - if (!(mount_crypt_stat->flags - & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) { - rc = -EIO; - printk(KERN_WARNING "Either the lower file " - "is not in a valid eCryptfs format, " - "or the key could not be retrieved. " - "Plaintext passthrough mode is not " - "enabled; returning -EIO\n"); - mutex_unlock(&crypt_stat->cs_mutex); - goto out_put; - } - rc = 0; - crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED - | ECRYPTFS_ENCRYPTED); - mutex_unlock(&crypt_stat->cs_mutex); - goto out; - } - } - mutex_unlock(&crypt_stat->cs_mutex); + rc = read_or_initialize_metadata(ecryptfs_dentry); + if (rc) + goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); @@ -277,8 +275,14 @@ out: static int ecryptfs_flush(struct file *file, fl_owner_t td) { - return file->f_mode & FMODE_WRITE - ? filemap_write_and_wait(file->f_mapping) : 0; + struct file *lower_file = ecryptfs_file_to_lower(file); + + if (lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + return lower_file->f_op->flush(lower_file, td); + } + + return 0; } static int ecryptfs_release(struct inode *inode, struct file *file) @@ -292,15 +296,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) static int ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int rc = 0; - - rc = generic_file_fsync(file, start, end, datasync); - if (rc) - goto out; - rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end, - datasync); -out: - return rc; + return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } static int ecryptfs_fasync(int fd, struct file *file, int flag) @@ -369,7 +365,7 @@ const struct file_operations ecryptfs_main_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = ecryptfs_compat_ioctl, #endif - .mmap = ecryptfs_file_mmap, + .mmap = generic_file_mmap, .open = ecryptfs_open, .flush = ecryptfs_flush, .release = ecryptfs_release, diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ab35b113003b..11030b2fd3b4 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -143,6 +143,31 @@ static int ecryptfs_interpose(struct dentry *lower_dentry, return 0; } +static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; + int rc; + + dget(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); + rc = vfs_unlink(lower_dir_inode, lower_dentry); + if (rc) { + printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); + goto out_unlock; + } + fsstack_copy_attr_times(dir, lower_dir_inode); + set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink); + inode->i_ctime = dir->i_ctime; + d_drop(dentry); +out_unlock: + unlock_dir(lower_dir_dentry); + dput(lower_dentry); + return rc; +} + /** * ecryptfs_do_create * @directory_inode: inode of the new file's dentry's parent in ecryptfs @@ -182,8 +207,10 @@ ecryptfs_do_create(struct inode *directory_inode, } inode = __ecryptfs_get_inode(lower_dentry->d_inode, directory_inode->i_sb); - if (IS_ERR(inode)) + if (IS_ERR(inode)) { + vfs_unlink(lower_dir_dentry->d_inode, lower_dentry); goto out_lock; + } fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode); fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode); out_lock: @@ -200,8 +227,8 @@ out: * * Returns zero on success */ -static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, - struct inode *ecryptfs_inode) +int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry, + struct inode *ecryptfs_inode) { struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; @@ -265,7 +292,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); if (rc) { - drop_nlink(ecryptfs_inode); + ecryptfs_do_unlink(directory_inode, ecryptfs_dentry, + ecryptfs_inode); + make_bad_inode(ecryptfs_inode); unlock_new_inode(ecryptfs_inode); iput(ecryptfs_inode); goto out; @@ -477,27 +506,7 @@ out_lock: static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) { - int rc = 0; - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); - struct dentry *lower_dir_dentry; - - dget(lower_dentry); - lower_dir_dentry = lock_parent(lower_dentry); - rc = vfs_unlink(lower_dir_inode, lower_dentry); - if (rc) { - printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); - goto out_unlock; - } - fsstack_copy_attr_times(dir, lower_dir_inode); - set_nlink(dentry->d_inode, - ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink); - dentry->d_inode->i_ctime = dir->i_ctime; - d_drop(dentry); -out_unlock: - unlock_dir(lower_dir_dentry); - dput(lower_dentry); - return rc; + return ecryptfs_do_unlink(dir, dentry, dentry->d_inode); } static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, @@ -621,6 +630,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_old_dir_dentry; struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; + struct inode *target_inode; lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); @@ -628,6 +638,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, dget(lower_new_dentry); lower_old_dir_dentry = dget_parent(lower_old_dentry); lower_new_dir_dentry = dget_parent(lower_new_dentry); + target_inode = new_dentry->d_inode; trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); /* source should not be ancestor of target */ if (trap == lower_old_dentry) { @@ -643,6 +654,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, lower_new_dir_dentry->d_inode, lower_new_dentry); if (rc) goto out_lock; + if (target_inode) + fsstack_copy_attr_all(target_inode, + ecryptfs_inode_to_lower(target_inode)); fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode); if (new_dir != old_dir) fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode); @@ -1002,12 +1016,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) goto out; } - if (S_ISREG(inode->i_mode)) { - rc = filemap_write_and_wait(inode->i_mapping); - if (rc) - goto out; - fsstack_copy_attr_all(inode, lower_inode); - } memcpy(&lower_ia, ia, sizeof(lower_ia)); if (ia->ia_valid & ATTR_FILE) lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 68954937a071..240832e8b289 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -162,6 +162,7 @@ void ecryptfs_put_lower_file(struct inode *inode) inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + filemap_write_and_wait(inode->i_mapping); fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); @@ -279,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *fnek_src; char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; + u8 cipher_code; *check_ruid = 0; @@ -420,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; + + cipher_code = ecryptfs_code_for_cipher_string( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_key_size); + if (!cipher_code) { + ecryptfs_printk(KERN_ERR, + "eCryptfs doesn't support cipher: %s", + mount_crypt_stat->global_default_cipher_name); + rc = -EINVAL; + goto out; + } + mutex_lock(&key_tfm_list_mutex); if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name, NULL)) { @@ -505,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; @@ -541,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags } ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + /** + * Set the POSIX ACL flag based on whether they're enabled in the lower + * mount. Force a read-only eCryptfs mount if the lower mount is ro. + * Allow a ro eCryptfs mount even when the lower mount is rw. + */ + s->s_flags = flags & ~MS_POSIXACL; + s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL); + s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; s->s_magic = ECRYPTFS_SUPER_MAGIC; diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index a46b3a8fee1e..bd1d57f98f74 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -66,18 +66,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) { int rc; - /* - * Refuse to write the page out if we are called from reclaim context - * since our writepage() path may potentially allocate memory when - * calling into the lower fs vfs_write() which may in turn invoke - * us again. - */ - if (current->flags & PF_MEMALLOC) { - redirty_page_for_writepage(wbc, page); - rc = 0; - goto out; - } - rc = ecryptfs_encrypt_page(page); if (rc) { ecryptfs_printk(KERN_WARNING, "Error encrypting " @@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file, struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; int rc; - int need_unlock_page = 1; ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" "(page w/ index = [0x%.16lx], to = [%d])\n", index, to); @@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file, "zeros in page with index = [0x%.16lx]\n", index); goto out; } - set_page_dirty(page); - unlock_page(page); - need_unlock_page = 0; + rc = ecryptfs_encrypt_page(page); + if (rc) { + ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " + "index [0x%.16lx])\n", index); + goto out; + } if (pos + copied > i_size_read(ecryptfs_inode)) { i_size_write(ecryptfs_inode, pos + copied); ecryptfs_printk(KERN_DEBUG, "Expanded file size to " "[0x%.16llx]\n", (unsigned long long)i_size_read(ecryptfs_inode)); - balance_dirty_pages_ratelimited(mapping); - rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); - if (rc) { - printk(KERN_ERR "Error writing inode size to metadata; " - "rc = [%d]\n", rc); - goto out; - } } - rc = copied; + rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode); + if (rc) + printk(KERN_ERR "Error writing inode size to metadata; " + "rc = [%d]\n", rc); + else + rc = copied; out: - if (need_unlock_page) - unlock_page(page); + unlock_page(page); page_cache_release(page); return rc; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 079d1be65ba9..0863bab42c4d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1285,7 +1285,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ - epi->event.events = event->events; + epi->event.events = event->events; /* need barrier below */ pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { @@ -1296,6 +1296,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even } /* + * The following barrier has two effects: + * + * 1) Flush epi changes above to other CPUs. This ensures + * we do not miss events from ep_poll_callback if an + * event occurs immediately after we call f_op->poll(). + * We need this because we did not take ep->lock while + * changing epi above (but ep_poll_callback does take + * ep->lock). + * + * 2) We also need to ensure we do not miss _past_ events + * when calling f_op->poll(). This barrier also + * pairs with the barrier in wq_has_sleeper (see + * comments for wq_has_sleeper). + * + * This barrier will now guarantee ep_poll_callback or f_op->poll + * (or both) will notice the readiness of an item. + */ + smp_mb(); + + /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ diff --git a/fs/exec.c b/fs/exec.c index 9ba382d7e726..c136a5e750fc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1114,7 +1114,8 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD); + current->flags &= + ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD | PF_NOFREEZE); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1205,9 +1206,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 10d7812f6021..075281734fcb 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3068,6 +3068,8 @@ static int ext3_do_update_inode(handle_t *handle, struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; + __le32 disksize; again: /* we can't allow multiple procs in here at once, its a bit racey */ @@ -3105,7 +3107,11 @@ again: raw_inode->i_gid_high = 0; } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - raw_inode->i_size = cpu_to_le32(ei->i_disksize); + disksize = cpu_to_le32(ei->i_disksize); + if (disksize != raw_inode->i_size) { + need_datasync = 1; + raw_inode->i_size = disksize; + } raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); @@ -3121,8 +3127,11 @@ again: if (!S_ISREG(inode->i_mode)) { raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); } else { - raw_inode->i_size_high = - cpu_to_le32(ei->i_disksize >> 32); + disksize = cpu_to_le32(ei->i_disksize >> 32); + if (disksize != raw_inode->i_size_high) { + raw_inode->i_size_high = disksize; + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, @@ -3175,6 +3184,8 @@ again: ext3_clear_inode_state(inode, EXT3_STATE_NEW); atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); + if (need_datasync) + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); out_brelse: brelse (bh); ext3_std_error(inode->i_sb, err); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index a5c29bb3b835..8535c45dfceb 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -410,8 +410,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, retry: handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto release_and_out; + } error = ext4_set_acl(handle, inode, type, acl); ext4_journal_stop(handle); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index df76291d692b..3e1018ace702 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -326,7 +326,7 @@ err_out: return 0; } /** - * ext4_read_block_bitmap() + * ext4_read_block_bitmap_nowait() * @sb: super block * @block_group: given block group * @@ -422,6 +422,8 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) struct buffer_head *bh; bh = ext4_read_block_bitmap_nowait(sb, block_group); + if (!bh) + return NULL; if (ext4_wait_block_bitmap(sb, block_group, bh)) { put_bh(bh); return NULL; @@ -447,11 +449,16 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi, free_clusters = percpu_counter_read_positive(fcc); dirty_clusters = percpu_counter_read_positive(dcc); - root_clusters = EXT4_B2C(sbi, ext4_r_blocks_count(sbi->s_es)); + + /* + * r_blocks_count should always be multiple of the cluster ratio so + * we are safe to do a plane bit shift only. + */ + root_clusters = ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; if (free_clusters - (nclusters + root_clusters + dirty_clusters) < EXT4_FREECLUSTERS_WATERMARK) { - free_clusters = EXT4_C2B(sbi, percpu_counter_sum_positive(fcc)); + free_clusters = percpu_counter_sum_positive(fcc); dirty_clusters = percpu_counter_sum_positive(dcc); } /* Check whether we have space after accounting for current diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8b384cc3b75a..852d4c257ace 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -52,6 +52,9 @@ #define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ #define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ +#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ +#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ + static int ext4_split_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, @@ -2107,13 +2110,14 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2138,6 +2142,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = (path+1)->p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2471,7 +2488,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2672,7 +2689,7 @@ cont: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); @@ -2829,6 +2846,9 @@ static int ext4_split_extent_at(handle_t *handle, unsigned int ee_len, depth; int err = 0; + BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) == + (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)); + ext_debug("ext4_split_extents_at: inode %lu, logical" "block %llu\n", inode->i_ino, (unsigned long long)split); @@ -2887,7 +2907,14 @@ static int ext4_split_extent_at(handle_t *handle, err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { - err = ext4_ext_zeroout(inode, &orig_ex); + if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { + if (split_flag & EXT4_EXT_DATA_VALID1) + err = ext4_ext_zeroout(inode, ex2); + else + err = ext4_ext_zeroout(inode, ex); + } else + err = ext4_ext_zeroout(inode, &orig_ex); + if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ @@ -2940,12 +2967,13 @@ static int ext4_split_extent(handle_t *handle, uninitialized = ext4_ext_is_uninitialized(ex); if (map->m_lblk + map->m_len < ee_block + ee_len) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1 | EXT4_EXT_MARK_UNINIT2; + if (split_flag & EXT4_EXT_DATA_VALID2) + split_flag1 |= EXT4_EXT_DATA_VALID1; err = ext4_split_extent_at(handle, inode, path, map->m_lblk + map->m_len, split_flag1, flags1); if (err) @@ -2958,8 +2986,8 @@ static int ext4_split_extent(handle_t *handle, return PTR_ERR(path); if (map->m_lblk >= ee_block) { - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ? - EXT4_EXT_MAY_ZEROOUT : 0; + split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | + EXT4_EXT_DATA_VALID2); if (uninitialized) split_flag1 |= EXT4_EXT_MARK_UNINIT1; if (split_flag & EXT4_EXT_MARK_UNINIT2) @@ -3237,26 +3265,47 @@ static int ext4_split_unwritten_extents(handle_t *handle, split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; split_flag |= EXT4_EXT_MARK_UNINIT2; - + if (flags & EXT4_GET_BLOCKS_CONVERT) + split_flag |= EXT4_EXT_DATA_VALID2; flags |= EXT4_GET_BLOCKS_PRE_IO; return ext4_split_extent(handle, inode, path, map, split_flag, flags); } static int ext4_convert_unwritten_extents_endio(handle_t *handle, - struct inode *inode, - struct ext4_ext_path *path) + struct inode *inode, + struct ext4_map_blocks *map, + struct ext4_ext_path *path) { struct ext4_extent *ex; + ext4_lblk_t ee_block; + unsigned int ee_len; int depth; int err = 0; depth = ext_depth(inode); ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical" "block %llu, max_blocks %u\n", inode->i_ino, - (unsigned long long)le32_to_cpu(ex->ee_block), - ext4_ext_get_actual_len(ex)); + (unsigned long long)ee_block, ee_len); + + /* If extent is larger than requested then split is required */ + if (ee_block != map->m_lblk || ee_len > map->m_len) { + err = ext4_split_unwritten_extents(handle, inode, map, path, + EXT4_GET_BLOCKS_CONVERT); + if (err < 0) + goto out; + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, map->m_lblk, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + } err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -3564,7 +3613,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, } /* IO end_io complete, convert the filled extent to written */ if ((flags & EXT4_GET_BLOCKS_CONVERT)) { - ret = ext4_convert_unwritten_extents_endio(handle, inode, + ret = ext4_convert_unwritten_extents_endio(handle, inode, map, path); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 0ee374d27fdb..902544e7ee5c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -697,6 +697,10 @@ repeat_in_this_group: "inode=%lu", ino + 1); continue; } + BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode_bitmap_bh); + if (err) + goto fail; ext4_lock_group(sb, group); ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data); ext4_unlock_group(sb, group); @@ -710,6 +714,11 @@ repeat_in_this_group: goto out; got: + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); + if (err) + goto fail; + /* We may have to initialize the block bitmap if it isn't already */ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { @@ -725,7 +734,6 @@ got: BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh); - brelse(block_bitmap_bh); /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); @@ -737,16 +745,12 @@ got: gdp); } ext4_unlock_group(sb, group); + brelse(block_bitmap_bh); if (err) goto fail; } - BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, inode_bitmap_bh); - if (err) - goto fail; - BUFFER_TRACE(group_desc_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, group_desc_bh); if (err) @@ -789,11 +793,6 @@ got: ext4_unlock_group(sb, group); } - BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); - err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh); - if (err) - goto fail; - BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh); if (err) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 55a654d86182..a0d7e2617fda 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1424,6 +1424,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) index = mpd->first_page; end = mpd->next_page - 1; + + pagevec_init(&pvec, 0); while (index <= end) { nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); if (nr_pages == 0) @@ -2386,6 +2388,16 @@ static int ext4_nonda_switch(struct super_block *sb) free_blocks = EXT4_C2B(sbi, percpu_counter_read_positive(&sbi->s_freeclusters_counter)); dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); + /* + * Start pushing delalloc when 1/2 of free blocks are dirty. + */ + if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && + !writeback_in_progress(sb->s_bdi) && + down_read_trylock(&sb->s_umount)) { + writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); + up_read(&sb->s_umount); + } + if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { /* @@ -2394,13 +2406,6 @@ static int ext4_nonda_switch(struct super_block *sb) */ return 1; } - /* - * Even if we don't switch but are nearing capacity, - * start pushing delalloc when 1/2 of free blocks are dirty. - */ - if (free_blocks < 2 * dirty_blocks) - writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); - return 0; } @@ -3889,6 +3894,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; + int need_datasync = 0; /* For fields not not tracking in the in-memory inode, * initialise them to zero for new inodes. */ @@ -3937,7 +3943,10 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_file_acl_high = cpu_to_le16(ei->i_file_acl >> 32); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); - ext4_isize_set(raw_inode, ei->i_disksize); + if (ei->i_disksize != ext4_isize(raw_inode)) { + ext4_isize_set(raw_inode, ei->i_disksize); + need_datasync = 1; + } if (ei->i_disksize > 0x7fffffffULL) { struct super_block *sb = inode->i_sb; if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -3988,7 +3997,7 @@ static int ext4_do_update_inode(handle_t *handle, err = rc; ext4_clear_inode_state(inode, EXT4_STATE_NEW); - ext4_update_inode_fsync_trans(handle, inode, 0); + ext4_update_inode_fsync_trans(handle, inode, need_datasync); out_brelse: brelse(bh); ext4_std_error(inode->i_sb, err); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 6b0a57eafb53..3122ece15147 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4126,7 +4126,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - rcu_read_lock(); + spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4150,12 +4150,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - rcu_read_unlock(); + spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; @@ -4984,8 +4984,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) end = start + (range->len >> sb->s_blocksize_bits) - 1; minlen = range->minlen >> sb->s_blocksize_bits; - if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || - unlikely(start >= max_blks)) + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb) || + start >= max_blks || + range->len < sb->s_blocksize) return -EINVAL; if (end >= max_blks) end = max_blks - 1; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index c5826c623e7a..e2016f34b58f 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, } /** - * mext_check_null_inode - NULL check for two inodes - * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. - */ -static int -mext_check_null_inode(struct inode *inode1, struct inode *inode2, - const char *function, unsigned int line) -{ - int ret = 0; - - if (inode1 == NULL) { - __ext4_error(inode2->i_sb, function, line, - "Both inodes should not be NULL: " - "inode1 NULL inode2 %lu", inode2->i_ino); - ret = -EIO; - } else if (inode2 == NULL) { - __ext4_error(inode1->i_sb, function, line, - "Both inodes should not be NULL: " - "inode1 %lu inode2 NULL", inode1->i_ino); - ret = -EIO; - } - return ret; -} - -/** * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem * - * @orig_inode: original inode structure - * @donor_inode: donor inode structure - * Acquire write lock of i_data_sem of the two inodes (orig and donor) by - * i_ino order. + * Acquire write lock of i_data_sem of the two inodes */ static void -double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) +double_down_write_data_sem(struct inode *first, struct inode *second) { - struct inode *first = orig_inode, *second = donor_inode; + if (first < second) { + down_write(&EXT4_I(first)->i_data_sem); + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); + } else { + down_write(&EXT4_I(second)->i_data_sem); + down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); - /* - * Use the inode number to provide the stable locking order instead - * of its address, because the C language doesn't guarantee you can - * compare pointers that don't come from the same array. - */ - if (donor_inode->i_ino < orig_inode->i_ino) { - first = donor_inode; - second = orig_inode; } - - down_write(&EXT4_I(first)->i_data_sem); - down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); } /** @@ -969,14 +935,6 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - /* Files should be in the same ext4 FS */ - if (orig_inode->i_sb != donor_inode->i_sb) { - ext4_debug("ext4 move extent: The argument files " - "should be in same FS [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - /* Ext4 move extent supports only extent based file */ if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " @@ -1072,35 +1030,19 @@ mext_check_arguments(struct inode *orig_inode, * @inode1: the inode structure * @inode2: the inode structure * - * Lock two inodes' i_mutex by i_ino order. - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. + * Lock two inodes' i_mutex */ -static int +static void mext_inode_double_lock(struct inode *inode1, struct inode *inode2) { - int ret = 0; - - BUG_ON(inode1 == NULL && inode2 == NULL); - - ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); - if (ret < 0) - goto out; - - if (inode1 == inode2) { - mutex_lock(&inode1->i_mutex); - goto out; - } - - if (inode1->i_ino < inode2->i_ino) { + BUG_ON(inode1 == inode2); + if (inode1 < inode2) { mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); } else { mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); } - -out: - return ret; } /** @@ -1109,28 +1051,13 @@ out: * @inode1: the inode that is released first * @inode2: the inode that is released second * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. */ -static int +static void mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) { - int ret = 0; - - BUG_ON(inode1 == NULL && inode2 == NULL); - - ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); - if (ret < 0) - goto out; - - if (inode1) - mutex_unlock(&inode1->i_mutex); - - if (inode2 && inode2 != inode1) - mutex_unlock(&inode2->i_mutex); - -out: - return ret; + mutex_unlock(&inode1->i_mutex); + mutex_unlock(&inode2->i_mutex); } /** @@ -1187,16 +1114,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; ext4_lblk_t rest_blocks; pgoff_t orig_page_offset = 0, seq_end_page; - int ret1, ret2, depth, last_extent = 0; + int ret, depth, last_extent = 0; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; int data_offset_in_page; int block_len_in_page; int uninit; - /* orig and donor should be different file */ - if (orig_inode->i_ino == donor_inode->i_ino) { + if (orig_inode->i_sb != donor_inode->i_sb) { + ext4_debug("ext4 move extent: The argument files " + "should be in same FS [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* orig and donor should be different inodes */ + if (orig_inode == donor_inode) { ext4_debug("ext4 move extent: The argument files should not " - "be same file [ino:orig %lu, donor %lu]\n", + "be same inode [ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } @@ -1208,18 +1142,21 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } - + /* TODO: This is non obvious task to swap blocks for inodes with full + jornaling enabled */ + if (ext4_should_journal_data(orig_inode) || + ext4_should_journal_data(donor_inode)) { + return -EINVAL; + } /* Protect orig and donor inodes against a truncate */ - ret1 = mext_inode_double_lock(orig_inode, donor_inode); - if (ret1 < 0) - return ret1; + mext_inode_double_lock(orig_inode, donor_inode); /* Protect extent tree against block allocations via delalloc */ double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ - ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, + ret = mext_check_arguments(orig_inode, donor_inode, orig_start, donor_start, &len); - if (ret1) + if (ret) goto out; file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; @@ -1227,13 +1164,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (file_end < block_end) len -= block_end - file_end; - ret1 = get_ext_path(orig_inode, block_start, &orig_path); - if (ret1) + ret = get_ext_path(orig_inode, block_start, &orig_path); + if (ret) goto out; /* Get path structure to check the hole */ - ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); - if (ret1) + ret = get_ext_path(orig_inode, block_start, &holecheck_path); + if (ret) goto out; depth = ext_depth(orig_inode); @@ -1252,13 +1189,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, last_extent = mext_next_extent(orig_inode, holecheck_path, &ext_cur); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; goto out; } last_extent = mext_next_extent(orig_inode, orig_path, &ext_dummy); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; goto out; } seq_start = le32_to_cpu(ext_cur->ee_block); @@ -1272,7 +1209,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, if (le32_to_cpu(ext_cur->ee_block) > block_end) { ext4_debug("ext4 move extent: The specified range of file " "may be the hole\n"); - ret1 = -EINVAL; + ret = -EINVAL; goto out; } @@ -1292,7 +1229,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, last_extent = mext_next_extent(orig_inode, holecheck_path, &ext_cur); if (last_extent < 0) { - ret1 = last_extent; + ret = last_extent; break; } add_blocks = ext4_ext_get_actual_len(ext_cur); @@ -1349,18 +1286,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, orig_page_offset, data_offset_in_page, block_len_in_page, uninit, - &ret1); + &ret); /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; - if (ret1 < 0) + if (ret < 0) break; if (*moved_len > len) { EXT4_ERROR_INODE(orig_inode, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); - ret1 = -EIO; + ret = -EIO; break; } @@ -1374,22 +1311,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } double_down_write_data_sem(orig_inode, donor_inode); - if (ret1 < 0) + if (ret < 0) break; /* Decrease buffer counter */ if (holecheck_path) ext4_ext_drop_refs(holecheck_path); - ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); - if (ret1) + ret = get_ext_path(orig_inode, seq_start, &holecheck_path); + if (ret) break; depth = holecheck_path->p_depth; /* Decrease buffer counter */ if (orig_path) ext4_ext_drop_refs(orig_path); - ret1 = get_ext_path(orig_inode, seq_start, &orig_path); - if (ret1) + ret = get_ext_path(orig_inode, seq_start, &orig_path); + if (ret) break; ext_cur = holecheck_path[depth].p_ext; @@ -1412,12 +1349,7 @@ out: kfree(holecheck_path); } double_up_write_data_sem(orig_inode, donor_inode); - ret2 = mext_inode_double_unlock(orig_inode, donor_inode); - - if (ret1) - return ret1; - else if (ret2) - return ret2; + mext_inode_double_unlock(orig_inode, donor_inode); - return 0; + return ret; } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 0a94cbbe8828..ac769399b141 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1801,9 +1801,7 @@ retry: err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4_FS_XATTR inode->i_op = &ext4_special_inode_operations; -#endif err = ext4_add_nondir(handle, dentry, inode); } ext4_journal_stop(handle); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3407a62590d6..231cacb8f640 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -200,8 +200,11 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) * be a partial of a flex group. * * @sb: super block of fs to which the groups belongs + * + * Returns 0 on a successful allocation of the metadata blocks in the + * block group. */ -static void ext4_alloc_group_tables(struct super_block *sb, +static int ext4_alloc_group_tables(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, int flexbg_size) { @@ -226,6 +229,8 @@ static void ext4_alloc_group_tables(struct super_block *sb, (last_group & ~(flexbg_size - 1)))); next_group: group = group_data[0].group; + if (src_group >= group_data[0].group + flex_gd->count) + return -ENOSPC; start_blk = ext4_group_first_block_no(sb, src_group); last_blk = start_blk + group_data[src_group - group].blocks_count; @@ -235,7 +240,6 @@ next_group: start_blk += overhead; - BUG_ON(src_group >= group_data[0].group + flex_gd->count); /* We collect contiguous blocks as much as possible. */ src_group++; for (; src_group <= last_group; src_group++) @@ -300,6 +304,7 @@ next_group: group_data[i].free_blocks_count); } } + return 0; } static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, @@ -451,6 +456,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb, gdblocks = ext4_bg_num_gdb(sb, group); start = ext4_group_first_block_no(sb, group); + if (!ext4_bg_has_super(sb, group)) + goto handle_itb; + /* Copy all of the GDT blocks into the backup in this group */ for (j = 0, block = start + 1; j < gdblocks; j++, block++) { struct buffer_head *gdb; @@ -493,6 +501,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb, goto out; } +handle_itb: /* Initialize group tables of the grop @group */ if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) goto handle_bb; @@ -1293,13 +1302,15 @@ exit_journal: err = err2; if (!err) { - int i; + int gdb_num = group / EXT4_DESC_PER_BLOCK(sb); + int gdb_num_end = ((group + flex_gd->count - 1) / + EXT4_DESC_PER_BLOCK(sb)); + update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block)); - for (i = 0; i < flex_gd->count; i++, group++) { + for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - int gdb_num; - gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); + gdb_bh = sbi->s_group_desc[gdb_num]; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, gdb_bh->b_size); @@ -1676,7 +1687,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) */ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, flexbg_size)) { - ext4_alloc_group_tables(sb, flex_gd, flexbg_size); + if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0) + break; err = ext4_flex_group_add(sb, resize_inode, flex_gd); if (unlikely(err)) break; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f3459e2ffee..94dc725a1bb8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1599,9 +1599,7 @@ static int parse_options(char *options, struct super_block *sb, unsigned int *journal_ioprio, int is_remount) { -#ifdef CONFIG_QUOTA struct ext4_sb_info *sbi = EXT4_SB(sb); -#endif char *p; substring_t args[MAX_OPT_ARGS]; int token; @@ -1650,6 +1648,16 @@ static int parse_options(char *options, struct super_block *sb, } } #endif + if (test_opt(sb, DIOREAD_NOLOCK)) { + int blocksize = + BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + + if (blocksize < PAGE_CACHE_SIZE) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "dioread_nolock if block size != PAGE_SIZE"); + return 0; + } + } return 1; } @@ -1692,7 +1700,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, static const char *token2str(int token) { - static const struct match_token *t; + const struct match_token *t; for (t = tokens; t->token != Opt_err; t++) if (t->token == token && !strchr(t->pattern, '=')) @@ -2112,7 +2120,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); + mutex_lock(&inode->i_mutex); ext4_truncate(inode); + mutex_unlock(&inode->i_mutex); nr_truncates++; } else { ext4_msg(sb, KERN_DEBUG, @@ -3226,15 +3236,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) clear_opt(sb, DELALLOC); } - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (test_opt(sb, DIOREAD_NOLOCK)) { - if (blocksize < PAGE_SIZE) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "dioread_nolock if block size != PAGE_SIZE"); - goto failed_mount; - } - } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); @@ -3276,6 +3277,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) goto failed_mount; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, @@ -4506,7 +4508,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } ext4_setup_system_zone(sb); - if (sbi->s_journal == NULL) + if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e88748e55c0f..e712a8cb1653 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -495,7 +495,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) ext4_handle_sync(handle); - dquot_free_block(inode, 1); + dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); } @@ -784,7 +784,8 @@ inserted: else { /* The old block is released after updating the inode. */ - error = dquot_alloc_block(inode, 1); + error = dquot_alloc_block(inode, + EXT4_C2B(EXT4_SB(sb), 1)); if (error) goto cleanup; error = ext4_journal_get_write_access(handle, @@ -880,7 +881,7 @@ cleanup: return error; cleanup_dquot: - dquot_free_block(inode, 1); + dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1)); goto cleanup; bad_block: diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 42a853eec632..1b21e0a061d5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -68,6 +68,7 @@ int writeback_in_progress(struct backing_dev_info *bdi) { return test_bit(BDI_writeback_running, &bdi->state); } +EXPORT_SYMBOL(writeback_in_progress); static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ceb3b29c8cdf..1f81f70dda62 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1580,6 +1580,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->pages[req->num_pages] = page; req->num_pages++; + offset = 0; num -= this_num; total_len += this_num; index++; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bc438320cac5..d48478a864b0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -645,7 +645,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - drop_nlink(inode); + /* + * If i_nlink == 0 then unlink doesn't make sense, yet this can + * happen if userspace filesystem is careless. It would be + * difficult to enforce correct nlink usage so just ignore this + * condition here + */ + if (inode->i_nlink > 0) + drop_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 70ba891654f8..fdef7f0e28ab 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -168,6 +168,8 @@ static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, case GFS2_SMALL_FH_SIZE: case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: + if (fh_len < GFS2_SMALL_FH_SIZE) + return NULL; this.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32; this.no_formal_ino |= be32_to_cpu(fh[1]); this.no_addr = ((u64)be32_to_cpu(fh[2])) << 32; @@ -187,6 +189,8 @@ static struct dentry *gfs2_fh_to_parent(struct super_block *sb, struct fid *fid, switch (fh_type) { case GFS2_LARGE_FH_SIZE: case GFS2_OLD_FH_SIZE: + if (fh_len < GFS2_LARGE_FH_SIZE) + return NULL; parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32; parent.no_formal_ino |= be32_to_cpu(fh[5]); parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6b1efb594d90..a392e32af480 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -258,16 +258,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_meta_header *mh; struct gfs2_trans *tr; - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); if (!list_empty(&bd->bd_list_tr)) - goto out; + return; tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); if (!list_empty(&le->le_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); @@ -278,9 +276,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } static void buf_lo_before_commit(struct gfs2_sbd *sdp) @@ -611,11 +606,9 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); - lock_buffer(bd->bd_bh); - gfs2_log_lock(sdp); if (tr) { if (!list_empty(&bd->bd_list_tr)) - goto out; + return; tr->tr_touched = 1; if (gfs2_is_jdata(ip)) { tr->tr_num_buf++; @@ -623,7 +616,7 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) } } if (!list_empty(&le->le_list)) - goto out; + return; set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); @@ -635,9 +628,6 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) } else { list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); } -out: - gfs2_log_unlock(sdp); - unlock_buffer(bd->bd_bh); } static void gfs2_check_magic(struct buffer_head *bh) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 86ac75d99d31..6ab2a77e7726 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -145,14 +145,22 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_bufdata *bd; + lock_buffer(bh); + gfs2_log_lock(sdp); bd = bh->b_private; if (bd) gfs2_assert(sdp, bd->bd_gl == gl); else { + gfs2_log_unlock(sdp); + unlock_buffer(bh); gfs2_attach_bufdata(gl, bh, meta); bd = bh->b_private; + lock_buffer(bh); + gfs2_log_lock(sdp); } lops_add(sdp, &bd->bd_le); + gfs2_log_unlock(sdp); + unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) diff --git a/fs/isofs/export.c b/fs/isofs/export.c index dd4687ff30d0..516eb21895c6 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -179,7 +179,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, { struct isofs_fid *ifid = (struct isofs_fid *)fid; - if (fh_type != 2) + if (fh_len < 2 || fh_type != 2) return NULL; return isofs_export_iget(sb, diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index f2b9a571f4cf..9626bc8cbbd4 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -86,7 +86,12 @@ nope: static void release_data_buffer(struct buffer_head *bh) { if (buffer_freed(bh)) { + WARN_ON_ONCE(buffer_dirty(bh)); clear_buffer_freed(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; release_buffer_page(bh); } else put_bh(bh); @@ -853,17 +858,35 @@ restart_loop: * there's no point in keeping a checkpoint record for * it. */ - /* A buffer which has been freed while still being - * journaled by a previous transaction may end up still - * being dirty here, but we want to avoid writing back - * that buffer in the future after the "add to orphan" - * operation been committed, That's not only a performance - * gain, it also stops aliasing problems if the buffer is - * left behind for writeback and gets reallocated for another - * use in a different page. */ - if (buffer_freed(bh) && !jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + /* + * A buffer which has been freed while still being journaled by + * a previous transaction. + */ + if (buffer_freed(bh)) { + /* + * If the running transaction is the one containing + * "add to orphan" operation (b_next_transaction != + * NULL), we have to wait for that transaction to + * commit before we can really get rid of the buffer. + * So just clear b_modified to not confuse transaction + * credit accounting and refile the buffer to + * BJ_Forget of the running transaction. If the just + * committed transaction contains "add to orphan" + * operation, we can completely invalidate the buffer + * now. We are rather throughout in that since the + * buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial + * page. + */ + jh->b_modified = 0; + if (!jh->b_next_transaction) { + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + clear_buffer_mapped(bh); + clear_buffer_new(bh); + clear_buffer_req(bh); + bh->b_bdev = NULL; + } } if (buffer_jbddirty(bh)) { diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index b2a7e5244e39..ee959a9fed62 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -1845,15 +1845,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) * We're outside-transaction here. Either or both of j_running_transaction * and j_committing_transaction may be NULL. */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) +static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + int partial_page) { transaction_t *transaction; struct journal_head *jh; int may_free = 1; - int ret; BUFFER_TRACE(bh, "entry"); +retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1881,10 +1882,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * clear the buffer dirty bit at latest at the moment when the * transaction marking the buffer as freed in the filesystem * structures is committed because from that moment on the - * buffer can be reallocated and used by a different page. + * block can be reallocated and used by a different page. * Since the block hasn't been freed yet but the inode has * already been added to orphan list, it is safe for us to add * the buffer to BJ_Forget list of the newest transaction. + * + * Also we have to clear buffer_mapped flag of a truncated buffer + * because the buffer_head may be attached to the page straddling + * i_size (can happen only when blocksize < pagesize) and thus the + * buffer_head can be reused when the file is extended again. So we end + * up keeping around invalidated buffers attached to transactions' + * BJ_Forget list just to stop checkpointing code from cleaning up + * the transaction this buffer was modified in. */ transaction = jh->b_transaction; if (transaction == NULL) { @@ -1911,13 +1920,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * committed, the buffer won't be needed any * longer. */ JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_running_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* There is no currently-running transaction. So the * orphan record which we wrote for this file must have @@ -1925,13 +1930,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) * the committing transaction, if it exists. */ if (journal->j_committing_transaction) { JBUFFER_TRACE(jh, "give to committing trans"); - ret = __dispose_buffer(jh, + may_free = __dispose_buffer(jh, journal->j_committing_transaction); - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return ret; + goto zap_buffer; } else { /* The orphan record's transaction has * committed. We can cleanse this buffer */ @@ -1952,10 +1953,26 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } /* * The buffer is committing, we simply cannot touch - * it. So we just set j_next_transaction to the - * running transaction (if there is one) and mark - * buffer as freed so that commit code knows it should - * clear dirty bits when it is done with the buffer. + * it. If the page is straddling i_size we have to wait + * for commit and try again. + */ + if (partial_page) { + tid_t tid = journal->j_committing_transaction->t_tid; + + journal_put_journal_head(jh); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh); + spin_unlock(&journal->j_state_lock); + unlock_buffer(bh); + log_wait_commit(journal, tid); + lock_buffer(bh); + goto retry; + } + /* + * OK, buffer won't be reachable after truncate. We just set + * j_next_transaction to the running transaction (if there is + * one) and mark buffer as freed so that commit code knows it + * should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) @@ -1978,6 +1995,14 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) } zap_buffer: + /* + * This is tricky. Although the buffer is truncated, it may be reused + * if blocksize < pagesize and it is attached to the page straddling + * EOF. Since the buffer might have been added to BJ_Forget list of the + * running transaction, journal_get_write_access() won't clear + * b_modified and credit accounting gets confused. So clear b_modified + * here. */ + jh->b_modified = 0; journal_put_journal_head(jh); zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); @@ -2026,7 +2051,8 @@ void journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh); + may_free &= journal_unmap_buffer(journal, bh, + offset > 0); unlock_buffer(bh); } curr_off = next_off; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9956ac68b72b..e5bfb1150cf5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1317,6 +1317,11 @@ static void jbd2_mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); read_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + read_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index ddcd3549c6c2..de8b4cb7c31e 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -209,7 +209,8 @@ repeat: if (!new_transaction) goto alloc_transaction; write_lock(&journal->j_state_lock); - if (!journal->j_running_transaction) { + if (!journal->j_running_transaction && + !journal->j_barrier_count) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index db3889ba8818..8608f8779143 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -138,33 +138,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; + uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; + jffs2_dbg(1, "%s()\n", __func__); + + if (pageofs > inode->i_size) { + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + return ret; + } + + mutex_lock(&f->sem); pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) + if (!pg) { + if (alloc_len) + jffs2_complete_reservation(c); + mutex_unlock(&f->sem); return -ENOMEM; + } *pagep = pg; - jffs2_dbg(1, "%s()\n", __func__); - - if (pageofs > inode->i_size) { + if (alloc_len) { /* Make new hole frag from old EOF to new page */ - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; - uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - goto out_page; - - mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -191,7 +197,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); - mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -206,12 +211,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); - mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; - mutex_unlock(&f->sem); } /* @@ -220,18 +223,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { - mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); - mutex_unlock(&f->sem); if (ret) goto out_page; } + mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); + mutex_unlock(&f->sem); return ret; } diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 6784d1e7a7eb..ffa8f12c7a54 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -375,14 +375,16 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, spin_unlock(&c->erase_completion_lock); ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); - if (ret) - return ret; + /* Just lock it again and continue. Nothing much can change because we hold c->alloc_sem anyway. In fact, it's not entirely clear why we hold c->erase_completion_lock in the majority of this function... but that's a question for another (more caffeine-rich) day. */ spin_lock(&c->erase_completion_lock); + if (ret) + return ret; + waste = jeb->free_size; jffs2_link_node_ref(c, jeb, (jeb->offset + c->sector_size - waste) | REF_OBSOLETE, diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 74d9be19df3f..6bec5c0bc268 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -1043,10 +1043,10 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c, ops.datbuf = NULL; ret = mtd_read_oob(c->mtd, jeb->offset, &ops); - if (ret || ops.oobretlen != ops.ooblen) { + if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) { pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", jeb->offset, ops.ooblen, ops.oobretlen, ret); - if (!ret) + if (!ret || mtd_is_bitflip(ret)) ret = -EIO; return ret; } @@ -1085,10 +1085,10 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c, ops.datbuf = NULL; ret = mtd_read_oob(c->mtd, jeb->offset, &ops); - if (ret || ops.oobretlen != ops.ooblen) { + if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) { pr_err("cannot read OOB for EB at %08x, requested %zd bytes, read %zd bytes, error %d\n", jeb->offset, ops.ooblen, ops.oobretlen, ret); - if (!ret) + if (!ret || mtd_is_bitflip(ret)) ret = -EIO; return ret; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 8392cb85bd54..a3a09877ea62 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -551,6 +551,9 @@ again: status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; + /* Resend the blocking lock request after a server reboot */ + if (resp->status == nlm_lck_denied_grace_period) + continue; if (resp->status != nlm_lck_blocked) break; } diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index d269ada7670e..982d2676e1f8 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -223,7 +223,7 @@ static void encode_nlm_stat(struct xdr_stream *xdr, { __be32 *p; - BUG_ON(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); + WARN_ON_ONCE(be32_to_cpu(stat) > NLM_LCK_DENIED_GRACE_PERIOD); p = xdr_reserve_space(xdr, 4); *p = stat; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 7ef14b3c5bee..606a8dd8818c 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -40,6 +40,7 @@ struct nsm_args { u32 proc; char *mon_name; + char *nodename; }; struct nsm_res { @@ -94,6 +95,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, .vers = 3, .proc = NLMPROC_NSM_NOTIFY, .mon_name = nsm->sm_mon_name, + .nodename = utsname()->nodename, }; struct rpc_message msg = { .rpc_argp = &args, @@ -430,7 +432,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp) { __be32 *p; - encode_nsm_string(xdr, utsname()->nodename); + encode_nsm_string(xdr, argp->nodename); p = xdr_reserve_space(xdr, 4 + 4 + 4); *p++ = cpu_to_be32(argp->prog); *p++ = cpu_to_be32(argp->vers); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 3250f280a171..58ddc38cfccd 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -251,10 +251,9 @@ out_err: return err; } -static int lockd_up_net(struct net *net) +static int lockd_up_net(struct svc_serv *serv, struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); - struct svc_serv *serv = nlmsvc_rqst->rq_server; int error; if (ln->nlmsvc_users++) @@ -276,10 +275,9 @@ err_rpcb: return error; } -static void lockd_down_net(struct net *net) +static void lockd_down_net(struct svc_serv *serv, struct net *net) { struct lockd_net *ln = net_generic(net, lockd_net_id); - struct svc_serv *serv = nlmsvc_rqst->rq_server; if (ln->nlmsvc_users) { if (--ln->nlmsvc_users == 0) { @@ -307,7 +305,7 @@ int lockd_up(struct net *net) * Check whether we're already up and running. */ if (nlmsvc_rqst) { - error = lockd_up_net(net); + error = lockd_up_net(nlmsvc_rqst->rq_server, net); goto out; } @@ -378,7 +376,7 @@ out: return error; err_start: - lockd_down_net(net); + lockd_down_net(serv, net); goto destroy_and_out; } EXPORT_SYMBOL_GPL(lockd_up); @@ -390,7 +388,7 @@ void lockd_down(struct net *net) { mutex_lock(&nlmsvc_mutex); - lockd_down_net(net); + lockd_down_net(nlmsvc_rqst->rq_server, net); if (nlmsvc_users) { if (--nlmsvc_users) goto out; diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index d27aab11f324..d413af338da1 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -67,7 +67,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Obtain file pointer. Not used by FREE_ALL call. */ if (filp != NULL) { - if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0) + error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh)); + if (error != 0) goto no_locks; *filp = file; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 7f6a23f0244e..3a9c2470e4f9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -162,25 +162,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, return bio; } -static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, +static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_extent *be, void (*end_io)(struct bio *, int err), - struct parallel_io *par) + struct parallel_io *par, + unsigned int offset, int len) { + isect = isect + (offset >> SECTOR_SHIFT); + dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, + npg, rw, (unsigned long long)isect, offset, len); retry: if (!bio) { bio = bl_alloc_init_bio(npg, isect, be, end_io, par); if (!bio) return ERR_PTR(-ENOMEM); } - if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { + if (bio_add_page(bio, page, len, offset) < len) { bio = bl_submit_bio(rw, bio); goto retry; } return bio; } +static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, + sector_t isect, struct page *page, + struct pnfs_block_extent *be, + void (*end_io)(struct bio *, int err), + struct parallel_io *par) +{ + return do_add_page_to_bio(bio, npg, rw, isect, page, be, + end_io, par, 0, PAGE_CACHE_SIZE); +} + /* This is basically copied from mpage_end_io_read */ static void bl_end_io_read(struct bio *bio, int err) { @@ -443,6 +457,107 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be) return; } +static void +bl_read_single_end_io(struct bio *bio, int error) +{ + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct page *page = bvec->bv_page; + + /* Only one page in bvec */ + unlock_page(page); +} + +static int +bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int offset, unsigned int len) +{ + struct bio *bio; + struct page *shadow_page; + sector_t isect; + char *kaddr, *kshadow_addr; + int ret = 0; + + dprintk("%s: offset %u len %u\n", __func__, offset, len); + + shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (shadow_page == NULL) + return -ENOMEM; + + bio = bio_alloc(GFP_NOIO, 1); + if (bio == NULL) + return -ENOMEM; + + isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) + + (offset / SECTOR_SIZE); + + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = bl_read_single_end_io; + + lock_page(shadow_page); + if (bio_add_page(bio, shadow_page, + SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) { + unlock_page(shadow_page); + bio_put(bio); + return -EIO; + } + + submit_bio(READ, bio); + wait_on_page_locked(shadow_page); + if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { + ret = -EIO; + } else { + kaddr = kmap_atomic(page); + kshadow_addr = kmap_atomic(shadow_page); + memcpy(kaddr + offset, kshadow_addr + offset, len); + kunmap_atomic(kshadow_addr); + kunmap_atomic(kaddr); + } + __free_page(shadow_page); + bio_put(bio); + + return ret; +} + +static int +bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be, + unsigned int dirty_offset, unsigned int dirty_len, + bool full_page) +{ + int ret = 0; + unsigned int start, end; + + if (full_page) { + start = 0; + end = PAGE_CACHE_SIZE; + } else { + start = round_down(dirty_offset, SECTOR_SIZE); + end = round_up(dirty_offset + dirty_len, SECTOR_SIZE); + } + + dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len); + if (!be) { + zero_user_segments(page, start, dirty_offset, + dirty_offset + dirty_len, end); + if (start == 0 && end == PAGE_CACHE_SIZE && + trylock_page(page)) { + SetPageUptodate(page); + unlock_page(page); + } + return ret; + } + + if (start != dirty_offset) + ret = bl_do_readpage_sync(page, be, start, + dirty_offset - start); + + if (!ret && (dirty_offset + dirty_len < end)) + ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len, + end - dirty_offset - dirty_len); + + return ret; +} + /* Given an unmapped page, zero it or read in page for COW, page is locked * by caller. */ @@ -476,7 +591,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) SetPageUptodate(page); cleanup: - bl_put_extent(cow_read); if (bh) free_buffer_head(bh); if (ret) { @@ -547,6 +661,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) struct parallel_io *par; loff_t offset = wdata->args.offset; size_t count = wdata->args.count; + unsigned int pg_offset, pg_len, saved_len; struct page **pages = wdata->args.pages; struct page *page; pgoff_t index; @@ -651,10 +766,11 @@ next_page: if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); + bl_put_extent(cow_read); bio = bl_submit_bio(WRITE, bio); /* Get the next one */ be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), - isect, NULL); + isect, &cow_read); if (!be || !is_writable(be, isect)) { wdata->pnfs_error = -EINVAL; goto out; @@ -671,7 +787,26 @@ next_page: extent_length = be->be_length - (isect - be->be_f_offset); } - if (be->be_state == PNFS_BLOCK_INVALID_DATA) { + + dprintk("%s offset %lld count %Zu\n", __func__, offset, count); + pg_offset = offset & ~PAGE_CACHE_MASK; + if (pg_offset + count > PAGE_CACHE_SIZE) + pg_len = PAGE_CACHE_SIZE - pg_offset; + else + pg_len = count; + + saved_len = pg_len; + if (be->be_state == PNFS_BLOCK_INVALID_DATA && + !bl_is_sector_init(be->be_inval, isect)) { + ret = bl_read_partial_page_sync(pages[i], cow_read, + pg_offset, pg_len, true); + if (ret) { + dprintk("%s bl_read_partial_page_sync fail %d\n", + __func__, ret); + wdata->pnfs_error = ret; + goto out; + } + ret = bl_mark_sectors_init(be->be_inval, isect, PAGE_CACHE_SECTORS); if (unlikely(ret)) { @@ -680,15 +815,33 @@ next_page: wdata->pnfs_error = ret; goto out; } + + /* Expand to full page write */ + pg_offset = 0; + pg_len = PAGE_CACHE_SIZE; + } else if ((pg_offset & (SECTOR_SIZE - 1)) || + (pg_len & (SECTOR_SIZE - 1))) { + /* ahh, nasty case. We have to do sync full sector + * read-modify-write cycles. + */ + unsigned int saved_offset = pg_offset; + ret = bl_read_partial_page_sync(pages[i], be, pg_offset, + pg_len, false); + pg_offset = round_down(pg_offset, SECTOR_SIZE); + pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE) + - pg_offset; } - bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE, + bio = do_add_page_to_bio(bio, wdata->npages - i, WRITE, isect, pages[i], be, - bl_end_io_write, par); + bl_end_io_write, par, + pg_offset, pg_len); if (IS_ERR(bio)) { wdata->pnfs_error = PTR_ERR(bio); bio = NULL; goto out; } + offset += saved_len; + count -= saved_len; isect += PAGE_CACHE_SECTORS; last_isect = isect; extent_length -= PAGE_CACHE_SECTORS; @@ -706,17 +859,16 @@ next_page: } write_done: - wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); - if (count < wdata->res.count) { - wdata->res.count = count; - } + wdata->res.count = wdata->args.count; out: bl_put_extent(be); + bl_put_extent(cow_read); bl_submit_bio(WRITE, bio); put_parallel(par); return PNFS_ATTEMPTED; out_mds: bl_put_extent(be); + bl_put_extent(cow_read); kfree(par); return PNFS_NOT_ATTEMPTED; } @@ -1003,6 +1155,7 @@ static const struct nfs_pageio_ops bl_pg_write_ops = { static struct pnfs_layoutdriver_type blocklayout_type = { .id = LAYOUT_BLOCK_VOLUME, .name = "LAYOUT_BLOCK_VOLUME", + .owner = THIS_MODULE, .read_pagelist = bl_read_pagelist, .write_pagelist = bl_write_pagelist, .alloc_layout_hdr = bl_alloc_layout_hdr, diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 03350690118e..39bb51a8dd18 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -41,6 +41,7 @@ #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) +#define SECTOR_SIZE (1 << SECTOR_SHIFT) struct block_mount_id { spinlock_t bm_lock; /* protects list */ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 60f7e4ec842c..37f6de36973c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -694,8 +694,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, */ static void nfs_destroy_server(struct nfs_server *server) { - if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || - !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) + if (server->nlm_host) nlmclnt_done(server->nlm_host); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8789210c6905..a0daac7ad508 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1103,7 +1103,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; parent = dget_parent(dentry); @@ -1219,11 +1219,14 @@ static int nfs_dentry_delete(const struct dentry *dentry) } +/* Ensure that we revalidate inode->i_nlink */ static void nfs_drop_nlink(struct inode *inode) { spin_lock(&inode->i_lock); - if (inode->i_nlink > 0) - drop_nlink(inode); + /* drop the inode if we're reasonably sure this is the last link */ + if (inode->i_nlink == 1) + clear_nlink(inode); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } @@ -1238,8 +1241,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - drop_nlink(inode); nfs_complete_unlink(dentry, inode); + nfs_drop_nlink(inode); } iput(inode); } @@ -1502,7 +1505,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct iattr attr; int openflags, ret = 0; - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; inode = dentry->d_inode; @@ -1800,10 +1803,8 @@ static int nfs_safe_remove(struct dentry *dentry) if (inode != NULL) { nfs_inode_return_delegation(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - /* The VFS may want to delete this inode */ if (error == 0) nfs_drop_nlink(inode); - nfs_mark_for_revalidate(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); if (error == -ENOENT) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index b3924b8a6000..786cd6533713 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -214,7 +214,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) { char buf1[NFS_DNS_HOSTNAME_MAXLEN+1]; struct nfs_dns_ent key, *item; - unsigned long ttl; + unsigned int ttl; ssize_t len; int ret = -EINVAL; @@ -237,7 +237,8 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) key.namelen = len; memset(&key.h, 0, sizeof(key.h)); - ttl = get_expiry(&buf); + if (get_uint(&buf, &ttl) < 0) + goto out; if (ttl == 0) goto out; key.h.expiry_time = ttl + seconds_since_boot(); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 98d664349f7e..9131b17f3a72 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -747,9 +747,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) } if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { - ret = mlen; - complete_request_key(cons, -ENOKEY); - goto out_incomplete; + ret = -ENOKEY; + goto out; } namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); @@ -766,7 +765,6 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) out: complete_request_key(cons, ret); -out_incomplete: return ret; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e8bbfa5b3500..edf411988bf3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -152,7 +152,7 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b777bdaba4c5..33aa76fec3aa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -277,8 +277,9 @@ extern void nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); /* namespace.c */ +#define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, - char *buffer, ssize_t buflen); + char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); #ifdef CONFIG_NFS_V4 rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); @@ -371,7 +372,7 @@ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; - return nfs_path(&dummy, dentry, buffer, buflen); + return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 8e65c7f1f87c..015f71f8f62c 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -181,7 +181,7 @@ int nfs_mount(struct nfs_mount_request *info) else msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; - status = rpc_call_sync(mnt_clnt, &msg, 0); + status = rpc_call_sync(mnt_clnt, &msg, RPC_TASK_SOFT|RPC_TASK_TIMEOUT); rpc_shutdown_client(mnt_clnt); if (status < 0) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d51868e5683c..43275167649a 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -37,6 +37,7 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry, * @dentry - pointer to dentry * @buffer - result buffer * @buflen - length of buffer + * @flags - options (see below) * * Helper function for constructing the server pathname * by arbitrary hashed dentry. @@ -44,8 +45,14 @@ static struct vfsmount *nfs_do_submount(struct dentry *dentry, * This is mainly for use in figuring out the path on the * server side when automounting on top of an existing partition * and in generating /proc/mounts and friends. + * + * Supported flags: + * NFS_PATH_CANONICAL: ensure there is exactly one slash after + * the original device (export) name + * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen) +char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, + unsigned flags) { char *end; int namelen; @@ -78,7 +85,7 @@ rename_retry: rcu_read_unlock(); goto rename_retry; } - if (*end != '/') { + if ((flags & NFS_PATH_CANONICAL) && *end != '/') { if (--buflen < 0) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); @@ -95,9 +102,11 @@ rename_retry: return end; } namelen = strlen(base); - /* Strip off excess slashes in base string */ - while (namelen > 0 && base[namelen - 1] == '/') - namelen--; + if (flags & NFS_PATH_CANONICAL) { + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + } buflen -= namelen; if (buflen < 0) { spin_unlock(&dentry->d_lock); @@ -244,11 +253,31 @@ out_nofree: return mnt; } +static int +nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_getattr(mnt, dentry, stat); + generic_fillattr(dentry->d_inode, stat); + return 0; +} + +static int +nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (NFS_FH(dentry->d_inode)->size != 0) + return nfs_setattr(dentry, attr); + return -EACCES; +} + const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, + .setattr = nfs_setattr, }; const struct inode_operations nfs_referral_inode_operations = { + .getattr = nfs_namespace_getattr, + .setattr = nfs_namespace_setattr, }; static void nfs_expire_automounts(struct work_struct *work) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 5242eae6711a..a7a043d272da 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -69,7 +69,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle, nfs_fattr_init(info->fattr); status = rpc_call_sync(client, &msg, 0); dprintk("%s: reply fsinfo: %d\n", __func__, status); - if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + if (status == 0 && !(info->fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_resp = info->fattr; status = rpc_call_sync(client, &msg, 0); @@ -644,7 +644,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, int plus) { struct inode *dir = dentry->d_inode; - __be32 *verf = NFS_COOKIEVERF(dir); + __be32 *verf = NFS_I(dir)->cookieverf; struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), .cookie = cookie, diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index a7f3dedc4ec7..b604be2dae6a 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -81,7 +81,8 @@ static char *nfs_path_component(const char *nfspath, const char *end) static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) { char *limit; - char *path = nfs_path(&limit, dentry, buffer, buflen); + char *path = nfs_path(&limit, dentry, buffer, buflen, + NFS_PATH_CANONICAL); if (!IS_ERR(path)) { char *path_component = nfs_path_component(path, limit); if (path_component) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d60c2d870ab0..30351877aee2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); nfs4_schedule_session_recovery(clp->cl_session); - exception->retry = 1; - break; + goto wait_on_recovery; #endif /* defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { @@ -1478,9 +1477,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, task)) - return; - rpc_call_start(task); + &data->o_res.seq_res, + task) != 0) + nfs_release_seqid(data->o_arg.seqid); + else + rpc_call_start(task); return; unlock_no_action: rcu_read_unlock(); @@ -2097,9 +2098,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), &calldata->arg.seq_args, &calldata->res.seq_res, - task)) - goto out; - rpc_call_start(task); + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); out: dprintk("%s: done!\n", __func__); } @@ -3150,11 +3152,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -4306,6 +4308,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) rpc_restart_call_prepare(task); } + nfs_release_seqid(calldata->arg.seqid); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -4322,9 +4325,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, task)) - return; - rpc_call_start(task); + &calldata->res.seq_res, + task) != 0) + nfs_release_seqid(calldata->arg.seqid); + else + rpc_call_start(task); } static const struct rpc_call_ops nfs4_locku_ops = { @@ -4469,7 +4474,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) /* Do we need to do an open_to_lock_owner? */ if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) - return; + goto out_release_lock_seqid; data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; @@ -4478,10 +4483,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, task)) + &data->res.seq_res, + task) == 0) { + rpc_call_start(task); return; - rpc_call_start(task); - dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); + } + nfs_release_seqid(data->arg.open_seqid); +out_release_lock_seqid: + nfs_release_seqid(data->arg.lock_seqid); + dprintk("%s: done!, ret = %d\n", __func__, task->tk_status); } static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata) @@ -5729,13 +5739,26 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) rpc_call_start(task); } +static void nfs41_sequence_prepare_privileged(struct rpc_task *task, void *data) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + nfs41_sequence_prepare(task, data); +} + static const struct rpc_call_ops nfs41_sequence_ops = { .rpc_call_done = nfs41_sequence_call_done, .rpc_call_prepare = nfs41_sequence_prepare, .rpc_release = nfs41_sequence_release, }; -static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static const struct rpc_call_ops nfs41_sequence_privileged_ops = { + .rpc_call_done = nfs41_sequence_call_done, + .rpc_call_prepare = nfs41_sequence_prepare_privileged, + .rpc_release = nfs41_sequence_release, +}; + +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, + const struct rpc_call_ops *seq_ops) { struct nfs4_sequence_data *calldata; struct rpc_message msg = { @@ -5745,7 +5768,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ struct rpc_task_setup task_setup_data = { .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, - .callback_ops = &nfs41_sequence_ops, + .callback_ops = seq_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_SOFT, }; @@ -5772,7 +5795,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return 0; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_ops); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -5786,7 +5809,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred); + task = _nfs41_proc_sequence(clp, cred, &nfs41_sequence_privileged_ops); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; @@ -5966,11 +5989,58 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } +static size_t max_response_pages(struct nfs_server *server) +{ + u32 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + return nfs_page_array_len(0, max_resp_sz); +} + +static void nfs4_free_pages(struct page **pages, size_t size) +{ + int i; + + if (!pages) + return; + + for (i = 0; i < size; i++) { + if (!pages[i]) + break; + __free_page(pages[i]); + } + kfree(pages); +} + +static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) +{ + struct page **pages; + int i; + + pages = kcalloc(size, sizeof(struct page *), gfp_flags); + if (!pages) { + dprintk("%s: can't alloc array of %zu pages\n", __func__, size); + return NULL; + } + + for (i = 0; i < size; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) { + dprintk("%s: failed to allocate page\n", __func__); + nfs4_free_pages(pages, size); + return NULL; + } + } + + return pages; +} + static void nfs4_layoutget_release(void *calldata) { struct nfs4_layoutget *lgp = calldata; + struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); dprintk("--> %s\n", __func__); + nfs4_free_pages(lgp->args.layout.pages, max_pages); put_nfs_open_context(lgp->args.ctx); kfree(calldata); dprintk("<-- %s\n", __func__); @@ -5982,9 +6052,10 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) +int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); + size_t max_pages = max_response_pages(server); struct rpc_task *task; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], @@ -6002,6 +6073,13 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) dprintk("--> %s\n", __func__); + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); + if (!lgp->args.layout.pages) { + nfs4_layoutget_release(lgp); + return -ENOMEM; + } + lgp->args.layout.pglen = max_pages * PAGE_SIZE; + lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); @@ -6047,12 +6125,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) return; } spin_lock(&lo->plh_inode->i_lock); - if (task->tk_status == 0) { - if (lrp->res.lrs_present) { - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - BUG_ON(!list_empty(&lo->plh_segs)); - } + if (task->tk_status == 0 && lrp->res.lrs_present) + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c54aae364bee..c8ac9a1461c2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6081,7 +6081,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_open(xdr, res); if (status) goto out; - if (decode_getfh(xdr, &res->fh) != 0) + status = decode_getfh(xdr, &res->fh); + if (status) goto out; if (decode_getfattr(xdr, res->f_attr, res->server) != 0) goto out; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 1afe74c42c8a..65538f569f42 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -589,6 +589,7 @@ static struct pnfs_layoutdriver_type objlayout_type = { .flags = PNFS_LAYOUTRET_ON_SETATTR | PNFS_LAYOUTRET_ON_ERROR, + .owner = THIS_MODULE, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 38512bcd2e98..059e2c350ad7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -574,9 +574,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; struct pnfs_layout_segment *lseg = NULL; - struct page **pages = NULL; - int i; - u32 max_resp_sz, max_pages; dprintk("--> %s\n", __func__); @@ -585,20 +582,6 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (lgp == NULL) return NULL; - /* allocate pages for xdr post processing */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - - pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); - if (!pages) - goto out_err_free; - - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_err_free; - } - lgp->args.minlength = PAGE_CACHE_SIZE; if (lgp->args.minlength > range->length) lgp->args.minlength = range->length; @@ -607,39 +590,19 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->args.layout.pages = pages; - lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp); + nfs4_proc_layoutget(lgp, gfp_flags); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } - /* free xdr pages */ - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - return lseg; - -out_err_free: - /* free any allocated xdr pages, lgp as it's not used */ - if (pages) { - for (i = 0; i < max_pages; i++) { - if (!pages[i]) - break; - __free_page(pages[i]); - } - kfree(pages); - } - kfree(lgp); - return NULL; } /* Initiates a LAYOUTRETURN(FILE) */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 442ebf68eeec..8d95818330cc 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -161,7 +161,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4ac7fca7e4bf..c252161e7b1d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -812,7 +812,7 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root) int err = 0; if (!page) return -ENOMEM; - devname = nfs_path(&dummy, root, page, PAGE_SIZE); + devname = nfs_path(&dummy, root, page, PAGE_SIZE, 0); if (IS_ERR(devname)) err = PTR_ERR(devname); else @@ -1138,7 +1138,7 @@ static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); *option = match_strdup(args); - return !option; + return !*option; } static int nfs_get_option_ul(substring_t args[], unsigned long *option) @@ -1886,6 +1886,7 @@ static int nfs_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); + args->nfs_server.port = ntohs(data->addr.sin_port); if (!nfs_verify_server_address(sap)) goto out_no_address; @@ -2598,6 +2599,7 @@ static int nfs4_validate_mount_data(void *options, return -EFAULT; if (!nfs_verify_server_address(sap)) goto out_no_address; + args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); if (data->auth_flavourlen) { if (data->auth_flavourlen > 1) @@ -3146,4 +3148,6 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, return res; } +MODULE_ALIAS("nfs4"); + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8e9689abbc0c..e9a020f283f7 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -401,7 +401,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc) int migrated, i, err; /* listsize */ - err = get_int(mesg, &fsloc->locations_count); + err = get_uint(mesg, &fsloc->locations_count); if (err) return err; if (fsloc->locations_count > MAX_FS_LOCATIONS) @@ -459,7 +459,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) return -EINVAL; for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) { - err = get_int(mesg, &f->pseudoflavor); + err = get_uint(mesg, &f->pseudoflavor); if (err) return err; /* @@ -468,7 +468,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) * problem at export time instead of when a client fails * to authenticate. */ - err = get_int(mesg, &f->flags); + err = get_uint(mesg, &f->flags); if (err) return err; /* Only some flags are allowed to differ between flavors: */ diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 322d11ce06a4..01b090d135b7 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -581,7 +581,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel /* Just to make sure it's null-terminated: */ memcpy(buf, name, namelen); buf[namelen] = '\0'; - ret = kstrtouint(name, 10, id); + ret = kstrtouint(buf, 10, id); return ret == 0; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 987e719fbae8..dd0308d65f66 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -194,6 +194,7 @@ static __be32 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct svc_fh *resfh; + int accmode; __be32 status; resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); @@ -253,9 +254,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* set reply cache */ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, &resfh->fh_handle); - if (!open->op_created) - status = do_open_permission(rqstp, resfh, open, - NFSD_MAY_NOP); + accmode = NFSD_MAY_NOP; + if (open->op_created) + accmode |= NFSD_MAY_OWNER_OVERRIDE; + status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); fh_dup2(current_fh, resfh); out: diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e79c24e232f4..abd785e5fd2d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1053,6 +1053,8 @@ free_client(struct nfs4_client *clp) put_group_info(clp->cl_cred.cr_group_info); kfree(clp->cl_principal); kfree(clp->cl_name.data); + idr_remove_all(&clp->cl_stateids); + idr_destroy(&clp->cl_stateids); kfree(clp); } @@ -2356,7 +2358,7 @@ nfsd4_init_slabs(void) if (openowner_slab == NULL) goto out_nomem; lockowner_slab = kmem_cache_create("nfsd4_lockowners", - sizeof(struct nfs4_openowner), 0, 0, NULL); + sizeof(struct nfs4_lockowner), 0, 0, NULL); if (lockowner_slab == NULL) goto out_nomem; file_slab = kmem_cache_create("nfsd4_files", @@ -3783,6 +3785,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); + release_last_closed_stateid(oo); oo->oo_last_closed_stid = stp; /* place unused nfs4_stateowners on so_close_lru list to be diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 283d15e9f46d..967d68eef282 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2920,11 +2920,16 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, len = maxcount; v = 0; while (len > 0) { - pn = resp->rqstp->rq_resused++; + pn = resp->rqstp->rq_resused; + if (!resp->rqstp->rq_respages[pn]) { /* ran out of pages */ + maxcount -= len; + break; + } resp->rqstp->rq_vec[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); resp->rqstp->rq_vec[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + resp->rqstp->rq_resused++; v++; len -= PAGE_SIZE; } @@ -2970,6 +2975,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused++]); @@ -3019,6 +3026,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 return nfserr; if (resp->xbuf->page_len) return nfserr_resource; + if (!resp->rqstp->rq_respages[resp->rqstp->rq_resused]) + return nfserr_resource; RESERVE_SPACE(NFS4_VERIFIER_SIZE); savep = p; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3ab12eb2967f..d014727fc8f2 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -663,9 +663,7 @@ static ssize_t __write_ports_addfd(char *buf) err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); if (err < 0) { - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } @@ -734,9 +732,7 @@ out_close: svc_xprt_put(xprt); } out_err: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); return err; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1671429ffa66..1336a6512cdc 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -73,6 +73,17 @@ int nfsd_nrpools(void); int nfsd_get_nrthreads(int n, int *); int nfsd_set_nrthreads(int n, int *); +static inline void nfsd_destroy(struct net *net) +{ + int destroy = (nfsd_serv->sv_nrthreads == 1); + + if (destroy) + svc_shutdown_net(nfsd_serv, net); + svc_destroy(nfsd_serv); + if (destroy) + nfsd_serv = NULL; +} + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern struct svc_version nfsd_acl_version2; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bcda12a3f082..53459b055cfc 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -254,8 +254,6 @@ static void nfsd_shutdown(void) static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { - /* When last nfsd thread exits we need to do some clean-up */ - nfsd_serv = NULL; nfsd_shutdown(); svc_rpcb_cleanup(serv, net); @@ -332,6 +330,7 @@ static int nfsd_get_default_max_blksize(void) int nfsd_create_serv(void) { int error; + struct net *net = current->nsproxy->net_ns; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { @@ -346,7 +345,7 @@ int nfsd_create_serv(void) if (nfsd_serv == NULL) return -ENOMEM; - error = svc_bind(nfsd_serv, current->nsproxy->net_ns); + error = svc_bind(nfsd_serv, net); if (error < 0) { svc_destroy(nfsd_serv); return error; @@ -427,11 +426,7 @@ int nfsd_set_nrthreads(int n, int *nthreads) if (err) break; } - - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); - + nfsd_destroy(net); return err; } @@ -478,9 +473,7 @@ out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown(); out_destroy: - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); /* Release server */ + nfsd_destroy(net); /* Release server */ out: mutex_unlock(&nfsd_mutex); return error; @@ -563,12 +556,13 @@ nfsd(void *vrqstp) nfsdstats.th_cnt --; out: - if (rqstp->rq_server->sv_nrthreads == 1) - svc_shutdown_net(rqstp->rq_server, &init_net); + rqstp->rq_server = NULL; /* Release the thread */ svc_exit_thread(rqstp); + nfsd_destroy(&init_net); + /* Release module */ mutex_unlock(&nfsd_mutex); module_put_and_exit(0); @@ -656,7 +650,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) } /* Store reply in cache. */ - nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); return 1; } @@ -682,9 +676,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file) mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ - if (nfsd_serv->sv_nrthreads == 1) - svc_shutdown_net(nfsd_serv, net); - svc_destroy(nfsd_serv); + nfsd_destroy(net); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 568666156ea4..f03160106b95 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1477,13 +1477,19 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, case NFS3_CREATE_EXCLUSIVE: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; break; + } case NFS4_CREATE_EXCLUSIVE4_1: if ( dchild->d_inode->i_mtime.tv_sec == v_mtime && dchild->d_inode->i_atime.tv_sec == v_atime - && dchild->d_inode->i_size == 0 ) + && dchild->d_inode->i_size == 0 ) { + if (created) + *created = 1; goto set_attr; + } /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 6fe98ed58545..372949338915 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -666,8 +666,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); - else + else { + if (nilfs_sb_need_update(nilfs)) + set_nilfs_discontinued(nilfs); ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); + } nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index f35794b97e8e..a50636025364 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) if ((old->path.mnt == new->path.mnt) && (old->path.dentry == new->path.dentry)) return true; + break; case (FSNOTIFY_EVENT_NONE): return true; default: diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 8445fbc8985c..6f292dd53c6d 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -579,8 +579,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; fsn_mark = fsnotify_find_inode_mark(group, inode); if (!fsn_mark) @@ -632,8 +630,6 @@ static int inotify_new_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!(mask & IN_ALL_EVENTS))) - return -EINVAL; tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); if (unlikely(!tmp_i_mark)) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 657743254eb9..340bd02839be 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -593,9 +593,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); + inode_dio_done(inode); if (is_async) aio_complete(iocb, ret, 0); - inode_dio_done(inode); } /* diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 81a4cd22f80b..231eab2b2d07 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2545,6 +2545,7 @@ int ocfs2_super_lock(struct ocfs2_super *osb, * everything is up to the caller :) */ status = ocfs2_should_refresh_lock_res(lockres); if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); goto bail; } @@ -2553,8 +2554,10 @@ int ocfs2_super_lock(struct ocfs2_super *osb, ocfs2_complete_lock_res_refresh(lockres, status); - if (status < 0) + if (status < 0) { + ocfs2_cluster_unlock(osb, lockres, level); mlog_errno(status); + } ocfs2_track_lock_refresh(lockres); } bail: diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f169da4624fd..b7e74b580c0f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -642,7 +642,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle, * cluster groups will be staying in cache for the duration of * this operation. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, @@ -1823,7 +1823,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, * Do this *after* figuring out how many bits we're taking out * of our target group. */ - if (ac->ac_allow_chain_relink && + if (!ac->ac_disable_chain_relink && (prev_group_bh) && (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) { status = ocfs2_relink_block_group(handle, alloc_inode, @@ -1928,7 +1928,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; - ac->ac_allow_chain_relink = 1; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); @@ -1947,7 +1946,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, * searching each chain in order. Don't allow chain relinking * because we only calculate enough journal credits for one * relink per alloc. */ - ac->ac_allow_chain_relink = 0; + ac->ac_disable_chain_relink = 1; for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b8afabfeede4..a36d0aa50911 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -49,7 +49,7 @@ struct ocfs2_alloc_context { /* these are used by the chain search */ u16 ac_chain; - int ac_allow_chain_relink; + int ac_disable_chain_relink; group_search_t *ac_group_search; u64 ac_last_group; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 0ba9ea1e7961..2e3ea308c144 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7189,7 +7189,7 @@ int ocfs2_init_security_and_acl(struct inode *dir, struct buffer_head *dir_bh = NULL; ret = ocfs2_init_security_get(inode, dir, qstr, NULL); - if (!ret) { + if (ret) { mlog_errno(ret); goto leave; } diff --git a/fs/open.c b/fs/open.c index 3f1108b19143..cf1d34fc5e69 100644 --- a/fs/open.c +++ b/fs/open.c @@ -882,9 +882,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (!(flags & O_CREAT)) - mode = 0; - op->mode = mode; + if (flags & O_CREAT) + op->mode = (mode & S_IALLUGO) | S_IFREG; + else + op->mode = 0; /* Must never be set by userspace */ flags &= ~FMODE_NONOTIFY; diff --git a/fs/proc/page.c b/fs/proc/page.c index 7fcd0d60a968..b8730d9ebaee 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -115,7 +115,13 @@ u64 stable_page_flags(struct page *page) u |= 1 << KPF_COMPOUND_TAIL; if (PageHuge(page)) u |= 1 << KPF_HUGE; - else if (PageTransCompound(page)) + /* + * PageTransCompound can be true for non-huge compound pages (slab + * pages or pages allocated by drivers with __GFP_COMP) because it + * just checks PG_head/PG_tail, so we need to check PageLRU to make + * sure a given page is a thp, not a non-huge compound page. + */ + else if (PageTransCompound(page) && PageLRU(compound_trans_head(page))) u |= 1 << KPF_THP; /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 21d836f40292..ab5352101db5 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -462,9 +462,6 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, err = ERR_PTR(-ENOMEM); inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); - if (h) - sysctl_head_finish(h); - if (!inode) goto out; @@ -473,6 +470,8 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, d_add(dentry, inode); out: + if (h) + sysctl_head_finish(h); sysctl_head_finish(head); return err; } diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6c91b17cb5fd..98bf0c2738d6 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,10 +45,13 @@ static cputime64_t get_iowait_time(int cpu) static u64 get_idle_time(int cpu) { - u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); + u64 idle, idle_time = -1ULL; + + if (cpu_online(cpu)) + idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.idle */ + /* !NO_HZ or cpu offline so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; else idle = usecs_to_cputime64(idle_time); @@ -58,10 +61,13 @@ static u64 get_idle_time(int cpu) static u64 get_iowait_time(int cpu) { - u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL); + u64 iowait, iowait_time = -1ULL; + + if (cpu_online(cpu)) + iowait_time = get_cpu_iowait_time_us(cpu, NULL); if (iowait_time == -1ULL) - /* !NO_HZ so we can rely on cpustat.iowait */ + /* !NO_HZ or cpu offline so we can rely on cpustat.iowait */ iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else iowait = usecs_to_cputime64(iowait_time); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 82c585f715e3..4a66a5c4a63f 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -88,6 +88,27 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) } } +bool pstore_cannot_block_path(enum kmsg_dump_reason reason) +{ + /* + * In case of NMI path, pstore shouldn't be blocked + * regardless of reason. + */ + if (in_nmi()) + return true; + + switch (reason) { + /* In panic case, other cpus are stopped by smp_send_stop(). */ + case KMSG_DUMP_PANIC: + /* Emergency restart shouldn't be blocked by spin lock. */ + case KMSG_DUMP_EMERG: + return true; + default: + return false; + } +} +EXPORT_SYMBOL_GPL(pstore_cannot_block_path); + /* * callback from kmsg_dump. (s2,l2) has the most recently * written bytes, older bytes are in (s1,l1). Save as much @@ -111,10 +132,12 @@ static void pstore_dump(struct kmsg_dumper *dumper, why = get_reason_str(reason); - if (in_nmi()) { - is_locked = spin_trylock(&psinfo->buf_lock); - if (!is_locked) - pr_err("pstore dump routine blocked in NMI, may corrupt error record\n"); + if (pstore_cannot_block_path(reason)) { + is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags); + if (!is_locked) { + pr_err("pstore dump routine blocked in %s path, may corrupt error record\n" + , in_nmi() ? "NMI" : why); + } } else spin_lock_irqsave(&psinfo->buf_lock, flags); oopscount++; @@ -145,9 +168,9 @@ static void pstore_dump(struct kmsg_dumper *dumper, total += l1_cpy + l2_cpy; part++; } - if (in_nmi()) { + if (pstore_cannot_block_path(reason)) { if (is_locked) - spin_unlock(&psinfo->buf_lock); + spin_unlock_irqrestore(&psinfo->buf_lock, flags); } else spin_unlock_irqrestore(&psinfo->buf_lock, flags); } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 494c315c7417..c11db5134b72 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1573,8 +1573,10 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, reiserfs_warning(sb, "reiserfs-13077", "nfsd/reiserfs, fhtype=%d, len=%d - odd", fh_type, fh_len); - fh_type = 5; + fh_type = fh_len; } + if (fh_len < 2) + return NULL; return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1], (fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0); @@ -1583,6 +1585,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { + if (fh_type > fh_len) + fh_type = fh_len; if (fh_type < 4) return NULL; @@ -1784,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); - dquot_initialize(inode); + reiserfs_write_unlock(inode->i_sb); err = dquot_alloc_inode(inode); + reiserfs_write_lock(inode->i_sb); if (err) goto out_end_trans; if (!dir->i_nlink) { @@ -1981,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, out_end_trans: journal_end(th, th->t_super, th->t_blocks_allocated); + reiserfs_write_unlock(inode->i_sb); /* Drop can be outside and it needs more credits so it's better to have it outside */ dquot_drop(inode); + reiserfs_write_lock(inode->i_sb); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); @@ -3105,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) /* must be turned off for recursive notify_change calls */ ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); - depth = reiserfs_write_lock_once(inode->i_sb); if (is_quota_modification(inode, attr)) dquot_initialize(inode); - + depth = reiserfs_write_lock_once(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate @@ -3172,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) error = journal_begin(&th, inode->i_sb, jbegin_count); if (error) goto out; + reiserfs_write_unlock_once(inode->i_sb, depth); error = dquot_transfer(inode, attr); + depth = reiserfs_write_lock_once(inode->i_sb); if (error) { journal_end(&th, inode->i_sb, jbegin_count); goto out; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index f8afa4b162b8..2f40a4c70a4d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree key2type(&(key->on_disk_key))); #endif + reiserfs_write_unlock(inode->i_sb); retval = dquot_alloc_space_nodirty(inode, pasted_size); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(search_path); return retval; @@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif + reiserfs_write_unlock(inode->i_sb); /* We can't dirty inode here. It would be immediately written but * appropriate stat item isn't inserted yet... */ retval = dquot_alloc_space_nodirty(inode, quota_bytes); + reiserfs_write_lock(inode->i_sb); if (retval) { pathrelse(path); return retval; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 8b7616ef06d8..8169be93ac0f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s) retval = remove_save_link_only(s, &save_link_key, 0); continue; } + reiserfs_write_unlock(s); dquot_initialize(inode); + reiserfs_write_lock(s); if (truncate && S_ISDIR(inode->i_mode)) { /* We got a truncate request for a dir which is impossible. @@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) kfree(qf_names[i]); #endif err = -EINVAL; - goto out_err; + goto out_unlock; } #ifdef CONFIG_QUOTA handle_quota_files(s, qf_names, &qfmt); @@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (blocks) { err = reiserfs_resize(s, blocks); if (err != 0) - goto out_err; + goto out_unlock; } if (*mount_flags & MS_RDONLY) { @@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) /* it is read-only already */ goto out_ok; + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_suspend() without it. + */ + reiserfs_write_unlock(s); err = dquot_suspend(s, -1); if (err < 0) goto out_err; + reiserfs_write_lock(s); /* try to remount file system with read-only permissions */ if (sb_umount_state(rs) == REISERFS_VALID_FS @@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mounting a rw partition read-only. */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) if (reiserfs_is_journal_aborted(journal)) { err = journal->j_errno; - goto out_err; + goto out_unlock; } handle_data_mode(s, mount_options); @@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */ err = journal_begin(&th, s, 10); if (err) - goto out_err; + goto out_unlock; /* Mount a partition which is read-only, read-write */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); @@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) SB_JOURNAL(s)->j_must_wait = 1; err = journal_end(&th, s, 10); if (err) - goto out_err; + goto out_unlock; s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { + /* + * Drop write lock. Quota will retake it when needed and lock + * ordering requires calling dquot_resume() without it. + */ + reiserfs_write_unlock(s); dquot_resume(s, -1); + reiserfs_write_lock(s); finish_unfinished(s); reiserfs_xattr_init(s, *mount_flags); } @@ -1413,9 +1427,10 @@ out_ok: reiserfs_write_unlock(s); return 0; +out_unlock: + reiserfs_write_unlock(s); out_err: kfree(new_opts); - reiserfs_write_unlock(s); return err; } @@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot) REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_commit(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot) REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (ret) goto out; + reiserfs_write_unlock(dquot->dq_sb); ret = dquot_acquire(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(dquot->dq_sb); return ret; } @@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot) ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + reiserfs_write_unlock(dquot->dq_sb); if (ret) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); goto out; } ret = dquot_release(dquot); + reiserfs_write_lock(dquot->dq_sb); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (!ret && err) ret = err; - out: reiserfs_write_unlock(dquot->dq_sb); +out: return ret; } @@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type) ret = journal_begin(&th, sb, 2); if (ret) goto out; + reiserfs_write_unlock(sb); ret = dquot_commit_info(sb, type); + reiserfs_write_lock(sb); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; - out: +out: reiserfs_write_unlock(sb); return ret; } @@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, struct reiserfs_transaction_handle th; int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA; - if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) - return -EINVAL; + reiserfs_write_lock(sb); + if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) { + err = -EINVAL; + goto out; + } /* Quotafile not on the same filesystem? */ if (path->dentry->d_sb != sb) { @@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = dquot_quota_on(sb, type, format_id, path); + reiserfs_write_unlock(sb); + return dquot_quota_on(sb, type, format_id, path); out: + reiserfs_write_unlock(sb); return err; } @@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + reiserfs_write_lock(sb); err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE); + reiserfs_write_unlock(sb); if (err) goto out; if (offset || tocopy != sb->s_blocksize) @@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, flush_dcache_page(bh->b_page); set_buffer_uptodate(bh); unlock_buffer(bh); + reiserfs_write_lock(sb); reiserfs_prepare_for_journal(sb, bh, 1); journal_mark_dirty(current->journal_info, sb, bh); if (!journal_quota) reiserfs_add_ordered_list(inode, bh); + reiserfs_write_unlock(sb); brelse(bh); offset = 0; towrite -= tocopy; diff --git a/fs/splice.c b/fs/splice.c index 5cac690f8103..bed6a3c29355 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -696,8 +696,10 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, return -EINVAL; more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - if (sd->len < sd->total_len) + + if (sd->len < sd->total_len && pipe->nrbufs > 1) more |= MSG_SENDPAGE_NOTLAST; + return file->f_op->sendpage(file, buf->page, buf->offset, sd->len, &pos, more); } diff --git a/fs/stat.c b/fs/stat.c index c733dc5753ae..dc6d0be300ca 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_fstat(unsigned int fd, struct kstat *stat) { - struct file *f = fget(fd); + struct file *f = fget_raw(fd); int error = -EBADF; if (f) { diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 35a36d39fa2c..a545d819b495 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -457,20 +457,18 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) /** * sysfs_pathname - return full path to sysfs dirent * @sd: sysfs_dirent whose path we want - * @path: caller allocated buffer + * @path: caller allocated buffer of size PATH_MAX * * Gives the name "/" to the sysfs_root entry; any path returned * is relative to wherever sysfs is mounted. - * - * XXX: does no error checking on @path size */ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) { if (sd->s_parent) { sysfs_pathname(sd->s_parent, path); - strcat(path, "/"); + strlcat(path, "/", PATH_MAX); } - strcat(path, sd->s_name); + strlcat(path, sd->s_name, PATH_MAX); return path; } @@ -503,9 +501,11 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) char *path = kzalloc(PATH_MAX, GFP_KERNEL); WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name : - strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"), - sd->s_name)); + (path == NULL) ? sd->s_name + : (sysfs_pathname(acxt->parent_sd, path), + strlcat(path, "/", PATH_MAX), + strlcat(path, sd->s_name, PATH_MAX), + path)); kfree(path); } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 9f717655df18..28e3de1892b9 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -170,7 +170,7 @@ struct ubifs_global_debug_info { #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) -#define DBG_KEY_BUF_LEN 32 +#define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ec9f1870ab7f..8640a12f0650 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -977,7 +977,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req ino_req = { .dirtied_ino = 1, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - unsigned int saved_nlink; + unsigned int uninitialized_var(saved_nlink); /* * Budget request settings: deletion direntry, new direntry, removing diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 2559d174e004..5dc48cacb5a0 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c) if (!lprops) { lprops = ubifs_fast_find_freeable(c); if (!lprops) { - ubifs_assert(c->freeable_cnt == 0); - if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + /* + * The first condition means the following: go scan the + * LPT if there are uncategorized lprops, which means + * there may be freeable LEBs there (UBIFS does not + * store the information about freeable LEBs in the + * master node). + */ + if (c->in_a_category_cnt != c->main_lebs || + c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) { + ubifs_assert(c->freeable_cnt == 0); lprops = scan_for_leb_for_idx(c); if (IS_ERR(lprops)) { err = PTR_ERR(lprops); diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index f8a181e647cc..ea9d49164ffa 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, default: ubifs_assert(0); } + lprops->flags &= ~LPROPS_CAT_MASK; lprops->flags |= cat; + c->in_a_category_cnt += 1; + ubifs_assert(c->in_a_category_cnt <= c->main_lebs); } /** @@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c, default: ubifs_assert(0); } + + c->in_a_category_cnt -= 1; + ubifs_assert(c->in_a_category_cnt >= 0); } /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 93d59aceaaef..4971cb23b6c8 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1184,6 +1184,8 @@ struct ubifs_debug_info; * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) * @freeable_cnt: number of freeable LEBs in @freeable_list + * @in_a_category_cnt: count of lprops which are in a certain category, which + * basically meants that they were loaded from the flash * * @ltab_lnum: LEB number of LPT's own lprops table * @ltab_offs: offset of LPT's own lprops table @@ -1413,6 +1415,7 @@ struct ubifs_info { struct list_head freeable_list; struct list_head frdi_idx_list; int freeable_cnt; + int in_a_category_cnt; int ltab_lnum; int ltab_offs; diff --git a/fs/udf/file.c b/fs/udf/file.c index 7f3f7ba3df6e..d1c6093fd3d3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -39,20 +39,24 @@ #include "udf_i.h" #include "udf_sb.h" -static int udf_adinicb_readpage(struct file *file, struct page *page) +static void __udf_adinicb_readpage(struct page *page) { struct inode *inode = page->mapping->host; char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - BUG_ON(!PageLocked(page)); - kaddr = kmap(page); - memset(kaddr, 0, PAGE_CACHE_SIZE); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); + memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); kunmap(page); +} + +static int udf_adinicb_readpage(struct file *file, struct page *page) +{ + BUG_ON(!PageLocked(page)); + __udf_adinicb_readpage(page); unlock_page(page); return 0; @@ -77,6 +81,25 @@ static int udf_adinicb_writepage(struct page *page, return 0; } +static int udf_adinicb_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, + void **fsdata) +{ + struct page *page; + + if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE)) + return -EIO; + page = grab_cache_page_write_begin(mapping, 0, flags); + if (!page) + return -ENOMEM; + *pagep = page; + + if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) + __udf_adinicb_readpage(page); + return 0; +} + static int udf_adinicb_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, @@ -98,8 +121,8 @@ static int udf_adinicb_write_end(struct file *file, const struct address_space_operations udf_adinicb_aops = { .readpage = udf_adinicb_readpage, .writepage = udf_adinicb_writepage, - .write_begin = simple_write_begin, - .write_end = udf_adinicb_write_end, + .write_begin = udf_adinicb_write_begin, + .write_end = udf_adinicb_write_end, }; static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7d7528008359..aa70035fb402 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -574,6 +574,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, struct udf_inode_info *iinfo = UDF_I(inode); int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; + bool isBeyondEOF; *err = 0; *new = 0; @@ -653,7 +654,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, /* Are we beyond EOF? */ if (etype == -1) { int ret; - + isBeyondEOF = 1; if (count) { if (c) laarr[0] = laarr[1]; @@ -696,6 +697,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, endnum = c + 1; lastblock = 1; } else { + isBeyondEOF = 0; endnum = startnum = ((count > 2) ? 2 : count); /* if the current extent is in position 0, @@ -738,10 +740,13 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, goal, err); if (!newblocknum) { brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); *err = -ENOSPC; return 0; } - iinfo->i_lenExtents += inode->i_sb->s_blocksize; + if (isBeyondEOF) + iinfo->i_lenExtents += inode->i_sb->s_blocksize; } /* if the extent the requsted block is located in contains multiple @@ -768,6 +773,8 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, udf_update_extents(inode, laarr, startnum, endnum, &prev_epos); brelse(prev_epos.bh); + brelse(cur_epos.bh); + brelse(next_epos.bh); newblock = udf_get_pblock(inode->i_sb, newblocknum, iinfo->i_location.partitionReferenceNum, 0); diff --git a/fs/udf/super.c b/fs/udf/super.c index e660ffdd91b6..4988a8afcc8f 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1287,6 +1287,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1331,8 +1332,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0dbb9e70fe21..7a978c7be842 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -89,11 +89,11 @@ xfs_destroy_ioend( } if (ioend->io_iocb) { + inode_dio_done(ioend->io_inode); if (ioend->io_isasync) { aio_complete(ioend->io_iocb, ioend->io_error ? ioend->io_error : ioend->io_result, 0); } - inode_dio_done(ioend->io_inode); } mempool_free(ioend, xfs_ioend_pool); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 6819b5163e33..bb76128b5f47 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io( { xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; - xfs_buf_ioerror(bp, -error); + /* + * don't overwrite existing errors - otherwise we can lose errors on + * buffers that require multiple bios to complete. + */ + if (!bp->b_error) + xfs_buf_ioerror(bp, -error); - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); _xfs_buf_ioend(bp, 1); @@ -1243,6 +1248,11 @@ next_chunk: if (size) goto next_chunk; } else { + /* + * This is guaranteed not to be the last io reference count + * because the caller (xfs_buf_iorequest) holds a count itself. + */ + atomic_dec(&bp->b_io_remaining); xfs_buf_ioerror(bp, EIO); bio_put(bio); } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 558910f5e3c0..5703fb85f6bb 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -195,6 +195,9 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; + if (fh_len < xfs_fileid_length(fileid_type)) + return NULL; + switch (fileid_type) { case FILEID_INO32_GEN_PARENT: inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 8ecad5bad66c..0abb1623a7cb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3514,7 +3514,7 @@ xlog_do_recovery_pass( * - order is important. */ error = xlog_bread_offset(log, 0, - bblks - split_bblks, hbp, + bblks - split_bblks, dbp, offset + BBTOB(split_bblks)); if (error) goto bread_err2; |