diff options
author | Clark Williams <williams@redhat.com> | 2012-01-25 21:31:16 -0600 |
---|---|---|
committer | Clark Williams <williams@redhat.com> | 2012-01-25 21:31:16 -0600 |
commit | 974f4b86951972b133b4ec5b36956b979dea08d0 (patch) | |
tree | 090b441b1c96f708e8a033c7f88fa383dd1e7cba /fs | |
parent | 40a54e2e116950a27dbd0a46a7b1ff8e982de477 (diff) | |
parent | 3499d6424f682a58761d827012567c552b053842 (diff) |
Merge commit 'v3.2.2' into rt-3.2.2-rt10
Diffstat (limited to 'fs')
-rw-r--r-- | fs/aio.c | 11 | ||||
-rw-r--r-- | fs/cifs/connect.c | 23 | ||||
-rw-r--r-- | fs/dcache.c | 78 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 28 | ||||
-rw-r--r-- | fs/ext4/super.c | 7 | ||||
-rw-r--r-- | fs/nfs/blocklayout/blocklayout.c | 28 | ||||
-rw-r--r-- | fs/nfs/blocklayout/extents.c | 11 | ||||
-rw-r--r-- | fs/nfs/callback_proc.c | 2 | ||||
-rw-r--r-- | fs/nfs/file.c | 4 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 96 | ||||
-rw-r--r-- | fs/nfs/nfs4xdr.c | 31 | ||||
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 3 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.c | 4 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 12 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 1 | ||||
-rw-r--r-- | fs/nfs/super.c | 43 | ||||
-rw-r--r-- | fs/nfsd/export.c | 2 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 17 | ||||
-rw-r--r-- | fs/notify/mark.c | 8 | ||||
-rw-r--r-- | fs/proc/base.c | 145 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 3 | ||||
-rw-r--r-- | fs/proc/uptime.c | 9 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_discard.c | 4 |
24 files changed, 304 insertions, 283 deletions
@@ -476,14 +476,21 @@ static void kiocb_batch_init(struct kiocb_batch *batch, long total) batch->count = total; } -static void kiocb_batch_free(struct kiocb_batch *batch) +static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch) { struct kiocb *req, *n; + if (list_empty(&batch->head)) + return; + + spin_lock_irq(&ctx->ctx_lock); list_for_each_entry_safe(req, n, &batch->head, ki_batch) { list_del(&req->ki_batch); + list_del(&req->ki_list); kmem_cache_free(kiocb_cachep, req); + ctx->reqs_active--; } + spin_unlock_irq(&ctx->ctx_lock); } /* @@ -1742,7 +1749,7 @@ long do_io_submit(aio_context_t ctx_id, long nr, } blk_finish_plug(&plug); - kiocb_batch_free(&batch); + kiocb_batch_free(ctx, &batch); put_ioctx(ctx); return i ? i : ret; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f3670cf72587..63e4be46eec4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2914,18 +2914,33 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_IOSIZE (1024 * 1024) /* - * Windows only supports a max of 60k reads. Default to that when posix - * extensions aren't in force. + * Windows only supports a max of 60kb reads and 65535 byte writes. Default to + * those values when posix extensions aren't in force. In actuality here, we + * use 65536 to allow for a write that is a multiple of 4k. Most servers seem + * to be ok with the extra byte even though Windows doesn't send writes that + * are that large. + * + * Citation: + * + * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx */ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) +#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; - unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : - CIFS_DEFAULT_IOSIZE; + unsigned int wsize; + + /* start with specified wsize, or default */ + if (pvolume_info->wsize) + wsize = pvolume_info->wsize; + else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = CIFS_DEFAULT_IOSIZE; + else + wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; /* can server support 24-bit write sizes? (via UNIX extensions) */ if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) diff --git a/fs/dcache.c b/fs/dcache.c index 89509b5a090e..f7908aef484a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -242,6 +242,7 @@ static void dentry_lru_add(struct dentry *dentry) static void __dentry_lru_del(struct dentry *dentry) { list_del_init(&dentry->d_lru); + dentry->d_flags &= ~DCACHE_SHRINK_LIST; dentry->d_sb->s_nr_dentry_unused--; dentry_stat.nr_unused--; } @@ -275,15 +276,15 @@ static void dentry_lru_prune(struct dentry *dentry) } } -static void dentry_lru_move_tail(struct dentry *dentry) +static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) { spin_lock(&dcache_lru_lock); if (list_empty(&dentry->d_lru)) { - list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + list_add_tail(&dentry->d_lru, list); dentry->d_sb->s_nr_dentry_unused++; dentry_stat.nr_unused++; } else { - list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); + list_move_tail(&dentry->d_lru, list); } spin_unlock(&dcache_lru_lock); } @@ -769,14 +770,18 @@ static void shrink_dentry_list(struct list_head *list) } /** - * __shrink_dcache_sb - shrink the dentry LRU on a given superblock - * @sb: superblock to shrink dentry LRU. - * @count: number of entries to prune - * @flags: flags to control the dentry processing + * prune_dcache_sb - shrink the dcache + * @sb: superblock + * @count: number of entries to try to free + * + * Attempt to shrink the superblock dcache LRU by @count entries. This is + * done when we need more memory an called from the superblock shrinker + * function. * - * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. + * This function may fail to free any resources if all the dentries are in + * use. */ -static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) +void prune_dcache_sb(struct super_block *sb, int count) { struct dentry *dentry; LIST_HEAD(referenced); @@ -795,18 +800,13 @@ relock: goto relock; } - /* - * If we are honouring the DCACHE_REFERENCED flag and the - * dentry has this flag set, don't free it. Clear the flag - * and put it back on the LRU. - */ - if (flags & DCACHE_REFERENCED && - dentry->d_flags & DCACHE_REFERENCED) { + if (dentry->d_flags & DCACHE_REFERENCED) { dentry->d_flags &= ~DCACHE_REFERENCED; list_move(&dentry->d_lru, &referenced); spin_unlock(&dentry->d_lock); } else { list_move_tail(&dentry->d_lru, &tmp); + dentry->d_flags |= DCACHE_SHRINK_LIST; spin_unlock(&dentry->d_lock); if (!--count) break; @@ -821,23 +821,6 @@ relock: } /** - * prune_dcache_sb - shrink the dcache - * @sb: superblock - * @nr_to_scan: number of entries to try to free - * - * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is - * done when we need more memory an called from the superblock shrinker - * function. - * - * This function may fail to free any resources if all the dentries are in - * use. - */ -void prune_dcache_sb(struct super_block *sb, int nr_to_scan) -{ - __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED); -} - -/** * shrink_dcache_sb - shrink dcache for a superblock * @sb: superblock * @@ -1091,7 +1074,7 @@ EXPORT_SYMBOL(have_submounts); * drop the lock and return early due to latency * constraints. */ -static int select_parent(struct dentry * parent) +static int select_parent(struct dentry *parent, struct list_head *dispose) { struct dentry *this_parent; struct list_head *next; @@ -1113,17 +1096,21 @@ resume: spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache + /* + * move only zero ref count dentries to the dispose list. + * + * Those which are presently on the shrink list, being processed + * by shrink_dentry_list(), shouldn't be moved. Otherwise the + * loop in shrink_dcache_parent() might not make any progress + * and loop forever. */ - if (!dentry->d_count) { - dentry_lru_move_tail(dentry); - found++; - } else { + if (dentry->d_count) { dentry_lru_del(dentry); + } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { + dentry_lru_move_list(dentry, dispose); + dentry->d_flags |= DCACHE_SHRINK_LIST; + found++; } - /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find @@ -1180,14 +1167,13 @@ rename_retry: * * Prune the dcache to remove unused children of the parent dentry. */ - void shrink_dcache_parent(struct dentry * parent) { - struct super_block *sb = parent->d_sb; + LIST_HEAD(dispose); int found; - while ((found = select_parent(parent)) != 0) - __shrink_dcache_sb(sb, found, 0); + while ((found = select_parent(parent, &dispose)) != 0) + shrink_dentry_list(&dispose); } EXPORT_SYMBOL(shrink_dcache_parent); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a56796814d6a..ab25f57f63de 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -182,19 +182,22 @@ setversion_out: if (err) return err; - if (get_user(n_blocks_count, (__u32 __user *)arg)) - return -EFAULT; + if (get_user(n_blocks_count, (__u32 __user *)arg)) { + err = -EFAULT; + goto group_extend_out; + } if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto group_extend_out; } err = mnt_want_write(filp->f_path.mnt); if (err) - return err; + goto group_extend_out; err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); if (EXT4_SB(sb)->s_journal) { @@ -204,9 +207,10 @@ setversion_out: } if (err == 0) err = err2; + mnt_drop_write(filp->f_path.mnt); +group_extend_out: ext4_resize_end(sb); - return err; } @@ -267,19 +271,22 @@ mext_out: return err; if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, - sizeof(input))) - return -EFAULT; + sizeof(input))) { + err = -EFAULT; + goto group_add_out; + } if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { ext4_msg(sb, KERN_ERR, "Online resizing not supported with bigalloc"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto group_add_out; } err = mnt_want_write(filp->f_path.mnt); if (err) - return err; + goto group_add_out; err = ext4_group_add(sb, &input); if (EXT4_SB(sb)->s_journal) { @@ -289,9 +296,10 @@ mext_out: } if (err == 0) err = err2; + mnt_drop_write(filp->f_path.mnt); +group_add_out: ext4_resize_end(sb); - return err; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e1329e2f826..9281dbe6adae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2006,17 +2006,16 @@ static int ext4_fill_flex_info(struct super_block *sb) struct ext4_group_desc *gdp = NULL; ext4_group_t flex_group_count; ext4_group_t flex_group; - int groups_per_flex = 0; + unsigned int groups_per_flex = 0; size_t size; int i; sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; - groups_per_flex = 1 << sbi->s_log_groups_per_flex; - - if (groups_per_flex < 2) { + if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { sbi->s_log_groups_per_flex = 0; return 1; } + groups_per_flex = 1 << sbi->s_log_groups_per_flex; /* We allocate both existing and potentially added groups */ flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 281ae95932c9..3db6b82e397b 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -146,14 +146,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, { struct bio *bio; + npg = min(npg, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, npg); - if (!bio) - return NULL; + if (!bio && (current->flags & PF_MEMALLOC)) { + while (!bio && (npg /= 2)) + bio = bio_alloc(GFP_NOIO, npg); + } - bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; - bio->bi_bdev = be->be_mdev; - bio->bi_end_io = end_io; - bio->bi_private = par; + if (bio) { + bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; + bio->bi_bdev = be->be_mdev; + bio->bi_end_io = end_io; + bio->bi_private = par; + } return bio; } @@ -779,16 +784,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) static void free_blk_mountid(struct block_mount_id *mid) { if (mid) { - struct pnfs_block_dev *dev; - spin_lock(&mid->bm_lock); - while (!list_empty(&mid->bm_devlist)) { - dev = list_first_entry(&mid->bm_devlist, - struct pnfs_block_dev, - bm_node); + struct pnfs_block_dev *dev, *tmp; + + /* No need to take bm_lock as we are last user freeing bm_devlist */ + list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) { list_del(&dev->bm_node); bl_free_block_dev(dev); } - spin_unlock(&mid->bm_lock); kfree(mid); } } diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 19fa7b0b8c00..c69682a4262a 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -139,11 +139,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) } /* Ensure that future operations on given range of tree will not malloc */ -static int _preload_range(struct my_tree *tree, u64 offset, u64 length) +static int _preload_range(struct pnfs_inval_markings *marks, + u64 offset, u64 length) { u64 start, end, s; int count, i, used = 0, status = -ENOMEM; struct pnfs_inval_tracking **storage; + struct my_tree *tree = &marks->im_tree; dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); start = normalize(offset, tree->mtt_step_size); @@ -161,12 +163,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) goto out_cleanup; } - /* Now need lock - HOW??? */ - + spin_lock(&marks->im_lock); for (s = start; s < end; s += tree->mtt_step_size) used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); + spin_unlock(&marks->im_lock); - /* Unlock - HOW??? */ status = 0; out_cleanup: @@ -286,7 +287,7 @@ int bl_mark_sectors_init(struct pnfs_inval_markings *marks, start = normalize(offset, marks->im_block_size); end = normalize_up(offset + length, marks->im_block_size); - if (_preload_range(&marks->im_tree, start, end - start)) + if (_preload_range(marks, start, end - start)) goto outerr; spin_lock(&marks->im_lock); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 43926add945b..54cea8ad5a76 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -339,7 +339,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) dprintk("%s enter. slotid %d seqid %d\n", __func__, args->csa_slotid, args->csa_sequenceid); - if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS) + if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS) return htonl(NFS4ERR_BADSLOT); slot = tbl->slots + args->csa_slotid; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 606ef0f20aed..c43a452f7da2 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -272,13 +272,13 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) datasync); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - return ret; mutex_lock(&inode->i_mutex); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); + if (status >= 0 && ret < 0) + status = ret; have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d9f4d78c3413..055d7027f7a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3430,19 +3430,6 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) */ #define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) -static void buf_to_pages(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) -{ - const void *p = buf; - - *pgbase = offset_in_page(buf); - p -= *pgbase; - while (p < buf + buflen) { - *(pages++) = virt_to_page(p); - p += PAGE_CACHE_SIZE; - } -} - static int buf_to_pages_noslab(const void *buf, size_t buflen, struct page **pages, unsigned int *pgbase) { @@ -3539,9 +3526,19 @@ out: nfs4_set_cached_acl(inode, acl); } +/* + * The getxattr API returns the required buffer length when called with a + * NULL buf. The NFSv4 acl tool then calls getxattr again after allocating + * the required buf. On a NULL buf, we send a page of data to the server + * guessing that the ACL request can be serviced by a page. If so, we cache + * up to the page of ACL data, and the 2nd call to getxattr is serviced by + * the cache. If not so, we throw away the page, and cache the required + * length. The next getxattr call will then produce another round trip to + * the server, this time with the input buf of the required size. + */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES]; + struct page *pages[NFS4ACL_MAXPAGES] = {NULL, }; struct nfs_getaclargs args = { .fh = NFS_FH(inode), .acl_pages = pages, @@ -3556,41 +3553,60 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - struct page *localpage = NULL; - int ret; + int ret = -ENOMEM, npages, i, acl_len = 0; - if (buflen < PAGE_SIZE) { - /* As long as we're doing a round trip to the server anyway, - * let's be prepared for a page of acl data. */ - localpage = alloc_page(GFP_KERNEL); - resp_buf = page_address(localpage); - if (localpage == NULL) - return -ENOMEM; - args.acl_pages[0] = localpage; - args.acl_pgbase = 0; - args.acl_len = PAGE_SIZE; - } else { - resp_buf = buf; - buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + if (npages == 0) + npages = 1; + + for (i = 0; i < npages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free; + } + if (npages > 1) { + /* for decoding across pages */ + args.acl_scratch = alloc_page(GFP_KERNEL); + if (!args.acl_scratch) + goto out_free; } - ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); + args.acl_len = npages * PAGE_SIZE; + args.acl_pgbase = 0; + /* Let decode_getfacl know not to fail if the ACL data is larger than + * the page we send as a guess */ + if (buf == NULL) + res.acl_flags |= NFS4_ACL_LEN_REQUEST; + resp_buf = page_address(pages[0]); + + dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", + __func__, buf, buflen, npages, args.acl_len); + ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), + &msg, &args.seq_args, &res.seq_res, 0); if (ret) goto out_free; - if (res.acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, res.acl_len); + + acl_len = res.acl_len - res.acl_data_offset; + if (acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, acl_len); else - nfs4_write_cached_acl(inode, resp_buf, res.acl_len); + nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, + acl_len); if (buf) { ret = -ERANGE; - if (res.acl_len > buflen) + if (acl_len > buflen) goto out_free; - if (localpage) - memcpy(buf, resp_buf, res.acl_len); + _copy_from_pages(buf, pages, res.acl_data_offset, + res.acl_len); } - ret = res.acl_len; + ret = acl_len; out_free: - if (localpage) - __free_page(localpage); + for (i = 0; i < npages; i++) + if (pages[i]) + __free_page(pages[i]); + if (args.acl_scratch) + __free_page(args.acl_scratch); return ret; } @@ -3621,6 +3637,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) + /* -ENOENT is returned if there is no ACL or if there is an ACL + * but no cached acl data, just the acl length */ return ret; return nfs4_get_acl_uncached(inode, buf, buflen); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6161b213ed1..dcaf69309d8e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2517,11 +2517,13 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); + xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); + encode_nops(&hdr); } @@ -4957,17 +4959,18 @@ decode_restorefh(struct xdr_stream *xdr) } static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, - size_t *acl_len) + struct nfs_getaclres *res) { - __be32 *savep; + __be32 *savep, *bm_p; uint32_t attrlen, bitmap[3] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; - *acl_len = 0; + res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + bm_p = xdr->p; if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -4979,18 +4982,30 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, size_t hdrlen; u32 recvd; + /* The bitmap (xdr len + bitmaps) and the attr xdr len words + * are stored with the acl data to handle the problem of + * variable length bitmaps.*/ + xdr->p = bm_p; + res->acl_data_offset = be32_to_cpup(bm_p) + 2; + res->acl_data_offset <<= 2; + /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + attrlen += res->acl_data_offset; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - dprintk("NFS: server cheating in getattr" - " acl reply: attrlen %u > recvd %u\n", + if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { + /* getxattr interface called with a NULL buf */ + res->acl_len = attrlen; + goto out; + } + dprintk("NFS: acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; } xdr_read_pages(xdr, attrlen); - *acl_len = attrlen; + res->acl_len = attrlen; } else status = -EOPNOTSUPP; @@ -6028,7 +6043,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_getacl(xdr, rqstp, &res->acl_len); + status = decode_getacl(xdr, rqstp, res); out: return status; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index c807ab93140e..55d01280a609 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -551,7 +551,8 @@ static const struct nfs_pageio_ops objio_pg_write_ops = { static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", - .flags = PNFS_LAYOUTRET_ON_SETATTR, + .flags = PNFS_LAYOUTRET_ON_SETATTR | + PNFS_LAYOUTRET_ON_ERROR, .alloc_layout_hdr = objlayout_alloc_layout_hdr, .free_layout_hdr = objlayout_free_layout_hdr, diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 72074e3a04f9..b3c29039f5b8 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -254,6 +254,8 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) oir->status = rdata->task.tk_status = status; if (status >= 0) rdata->res.count = status; + else + rdata->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ @@ -334,6 +336,8 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) if (status >= 0) { wdata->res.count = status; wdata->verf.committed = oir->committed; + } else { + wdata->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8e672a2b2d69..f881a6387942 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1178,6 +1178,15 @@ void pnfs_ld_write_done(struct nfs_write_data *data) put_lseg(data->lseg); data->lseg = NULL; dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) { + /* Don't lo_commit on error, Server will needs to + * preform a file recovery. + */ + clear_bit(NFS_INO_LAYOUTCOMMIT, + &NFS_I(data->inode)->flags); + pnfs_return_layout(data->inode); + } } data->mds_ops->rpc_release(data); } @@ -1267,6 +1276,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) put_lseg(data->lseg); data->lseg = NULL; dprintk("pnfs write error = %d\n", data->pnfs_error); + if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_ERROR) + pnfs_return_layout(data->inode); nfs_pageio_init_read_mds(&pgio, data->inode); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1509530cb111..53d593a0a4f2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -68,6 +68,7 @@ enum { enum layoutdriver_policy_flags { /* Should the pNFS client commit and return the layout upon a setattr */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, + PNFS_LAYOUTRET_ON_ERROR = 1 << 1, }; struct nfs4_deviceid_node; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 134777406ee3..3ada13c9986a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -909,10 +909,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->auth_flavor_len = 1; data->version = version; data->minorversion = 0; + security_init_mnt_opts(&data->lsm_opts); } return data; } +static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) +{ + if (data) { + kfree(data->client_address); + kfree(data->mount_server.hostname); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); + security_free_mnt_opts(&data->lsm_opts); + kfree(data); + } +} + /* * Sanity-check a server address provided by the mount command. * @@ -2220,9 +2234,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) - goto out_free_fh; - - security_init_mnt_opts(&data->lsm_opts); + goto out; /* Validate the mount data */ error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); @@ -2234,8 +2246,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, #ifdef CONFIG_NFS_V4 if (data->version == 4) { mntroot = nfs4_try_mount(flags, dev_name, data); - kfree(data->client_address); - kfree(data->nfs_server.export_path); goto out; } #endif /* CONFIG_NFS_V4 */ @@ -2290,13 +2300,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type, s->s_flags |= MS_ACTIVE; out: - kfree(data->nfs_server.hostname); - kfree(data->mount_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); -out_free_fh: + nfs_free_parsed_mount_data(data); nfs_free_fhandle(mntfh); - kfree(data); return mntroot; out_err_nosb: @@ -2623,9 +2628,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, mntfh = nfs_alloc_fhandle(); if (data == NULL || mntfh == NULL) - goto out_free_fh; - - security_init_mnt_opts(&data->lsm_opts); + goto out; /* Get a volume representation */ server = nfs4_create_server(data, mntfh); @@ -2677,13 +2680,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; - security_free_mnt_opts(&data->lsm_opts); nfs_free_fhandle(mntfh); return mntroot; out: - security_free_mnt_opts(&data->lsm_opts); -out_free_fh: nfs_free_fhandle(mntfh); return ERR_PTR(error); @@ -2838,7 +2838,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, data = nfs_alloc_parsed_mount_data(4); if (data == NULL) - goto out_free_data; + goto out; /* Validate the mount data */ error = nfs4_validate_mount_data(raw_data, data, dev_name); @@ -2852,12 +2852,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type, error = PTR_ERR(res); out: - kfree(data->client_address); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); -out_free_data: - kfree(data); + nfs_free_parsed_mount_data(data); dprintk("<-- nfs4_mount() = %d%s\n", error, error != 0 ? " [error]" : ""); return res; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 62f3b9074e84..5f312abf1247 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mesg[mlen-1] != '\n') + if (mlen < 1 || mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 47e94e33a975..5abced7a7408 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3809,16 +3809,29 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } +static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +{ + struct nfs4_ol_stateid *lst; + + if (!same_owner_str(&lo->lo_owner, owner, clid)) + return false; + lst = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + return lst->st_file->fi_inode == inode; +} + static struct nfs4_lockowner * find_lockowner_str(struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) { unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + struct nfs4_lockowner *lo; struct nfs4_stateowner *op; list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { - if (same_owner_str(op, owner, clid)) - return lockowner(op); + lo = lockowner(op); + if (same_lockowner_ino(lo, inode, clid, owner)) + return lo; } return NULL; } diff --git a/fs/notify/mark.c b/fs/notify/mark.c index e14587d55689..f104d565b682 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -135,9 +135,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; - /* 1 from caller and 1 for being on i_list/g_list */ - BUG_ON(atomic_read(&mark->refcnt) < 2); - spin_lock(&group->mark_lock); if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { @@ -182,6 +179,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) iput(inode); /* + * We don't necessarily have a ref on mark from caller so the above iput + * may have already destroyed it. Don't touch from now on. + */ + + /* * it's possible that this group tried to destroy itself, but this * this mark was simultaneously being freed by inode. If that's the * case, we finish freeing the group here. diff --git a/fs/proc/base.c b/fs/proc/base.c index 851ba3dcdc29..1fc1dca608ed 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -194,65 +194,7 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -static struct mm_struct *__check_mem_permission(struct task_struct *task) -{ - struct mm_struct *mm; - - mm = get_task_mm(task); - if (!mm) - return ERR_PTR(-EINVAL); - - /* - * A task can always look at itself, in case it chooses - * to use system calls instead of load instructions. - */ - if (task == current) - return mm; - - /* - * If current is actively ptrace'ing, and would also be - * permitted to freshly attach with ptrace now, permit it. - */ - if (task_is_stopped_or_traced(task)) { - int match; - rcu_read_lock(); - match = (ptrace_parent(task) == current); - rcu_read_unlock(); - if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) - return mm; - } - - /* - * No one else is allowed. - */ - mmput(mm); - return ERR_PTR(-EPERM); -} - -/* - * If current may access user memory in @task return a reference to the - * corresponding mm, otherwise ERR_PTR. - */ -static struct mm_struct *check_mem_permission(struct task_struct *task) -{ - struct mm_struct *mm; - int err; - - /* - * Avoid racing if task exec's as we might get a new mm but validate - * against old credentials. - */ - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = __check_mem_permission(task); - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - -struct mm_struct *mm_for_maps(struct task_struct *task) +static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) { struct mm_struct *mm; int err; @@ -263,7 +205,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) mm = get_task_mm(task); if (mm && mm != current->mm && - !ptrace_may_access(task, PTRACE_MODE_READ)) { + !ptrace_may_access(task, mode)) { mmput(mm); mm = ERR_PTR(-EACCES); } @@ -272,6 +214,11 @@ struct mm_struct *mm_for_maps(struct task_struct *task) return mm; } +struct mm_struct *mm_for_maps(struct task_struct *task) +{ + return mm_access(task, PTRACE_MODE_READ); +} + static int proc_pid_cmdline(struct task_struct *task, char * buffer) { int res = 0; @@ -816,38 +763,39 @@ static const struct file_operations proc_single_file_operations = { static int mem_open(struct inode* inode, struct file* file) { - file->private_data = (void*)((long)current->self_exec_id); + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct mm_struct *mm; + + if (!task) + return -ESRCH; + + mm = mm_access(task, PTRACE_MODE_ATTACH); + put_task_struct(task); + + if (IS_ERR(mm)) + return PTR_ERR(mm); + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; + file->private_data = mm; + return 0; } static ssize_t mem_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + int ret; char *page; unsigned long src = *ppos; - int ret = -ESRCH; - struct mm_struct *mm; + struct mm_struct *mm = file->private_data; - if (!task) - goto out_no_task; + if (!mm) + return 0; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; - - mm = check_mem_permission(task); - ret = PTR_ERR(mm); - if (IS_ERR(mm)) - goto out_free; - - ret = -EIO; - - if (file->private_data != (void*)((long)current->self_exec_id)) - goto out_put; + return -ENOMEM; ret = 0; @@ -874,13 +822,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, } *ppos = src; -out_put: - mmput(mm); -out_free: free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return ret; } @@ -889,27 +831,15 @@ static ssize_t mem_write(struct file * file, const char __user *buf, { int copied; char *page; - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); unsigned long dst = *ppos; - struct mm_struct *mm; + struct mm_struct *mm = file->private_data; - copied = -ESRCH; - if (!task) - goto out_no_task; + if (!mm) + return 0; - copied = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out_task; - - mm = check_mem_permission(task); - copied = PTR_ERR(mm); - if (IS_ERR(mm)) - goto out_free; - - copied = -EIO; - if (file->private_data != (void *)((long)current->self_exec_id)) - goto out_mm; + return -ENOMEM; copied = 0; while (count > 0) { @@ -933,13 +863,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, } *ppos = dst; -out_mm: - mmput(mm); -out_free: free_page((unsigned long) page); -out_task: - put_task_struct(task); -out_no_task: return copied; } @@ -959,11 +883,20 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) return file->f_pos; } +static int mem_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + + mmput(mm); + return 0; +} + static const struct file_operations proc_mem_operations = { .llseek = mem_lseek, .read = mem_read, .write = mem_write, .open = mem_open, + .release = mem_release, }; static ssize_t environ_read(struct file *file, char __user *buf, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e418c5abdb0e..7dcd2a250495 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -518,6 +518,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!page) continue; + if (PageReserved(page)) + continue; + /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); ClearPageReferenced(page); diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 766b1d456050..29166ecd03ae 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec uptime; struct timespec idle; + cputime64_t idletime; + u64 nsec; + u32 rem; int i; - cputime_t idletime = cputime_zero; + idletime = 0; for_each_possible_cpu(i) idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); do_posix_clock_monotonic_gettime(&uptime); monotonic_to_bootbased(&uptime); - cputime_to_timespec(idletime, &idle); + nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC; + idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); + idle.tv_nsec = rem; seq_printf(m, "%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, (uptime.tv_nsec / (NSEC_PER_SEC / 100)), diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8d9c46810189..c9d294111a53 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -175,22 +175,23 @@ const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key); /* - * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message - * macros. + * TODO: these macros are now broken because there is no locking around them + * and we use a global buffer for the key string. This means that in case of + * concurrent execution we will end up with incorrect and messy key strings. */ #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) extern spinlock_t dbg_lock; -#define ubifs_dbg_msg(type, fmt, ...) do { \ - spin_lock(&dbg_lock); \ - pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ -} while (0) +#define ubifs_dbg_msg(type, fmt, ...) \ + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) /* Just a debugging messages not related to any specific UBIFS subsystem */ -#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ + __func__, ##__VA_ARGS__) + /* General messages */ #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8a24f0c6c860..286a051f12cf 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -68,7 +68,7 @@ xfs_trim_extents( * Look up the longest btree in the AGF and start with it. */ error = xfs_alloc_lookup_le(cur, 0, - XFS_BUF_TO_AGF(agbp)->agf_longest, &i); + be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i); if (error) goto out_del_cursor; @@ -84,7 +84,7 @@ xfs_trim_extents( if (error) goto out_del_cursor; XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); - ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); + ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest)); /* * Too small? Give up. |