diff options
Diffstat (limited to 'fs')
38 files changed, 350 insertions, 183 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 80e9c18ea64f..fd6b67c40d9d 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -9,6 +9,8 @@ config BTRFS_FS select RAID6_PQ select XOR_BLOCKS select SRCU + depends on !PPC_256K_PAGES # powerpc + depends on !PAGE_SIZE_256KB # hexagon help Btrfs is a general purpose copy-on-write filesystem with extents, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index bae05c5c75ba..92601775ec5e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -290,7 +290,7 @@ static void end_compressed_bio_write(struct bio *bio) cb->start, cb->start + cb->len - 1, NULL, - bio->bi_error ? 0 : 1); + !cb->errors); cb->compressed_pages[0]->mapping = NULL; end_compressed_writeback(inode, cb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 64e449eb2ecd..f0675b7c95ec 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1264,8 +1264,10 @@ int btrfs_defrag_root(struct btrfs_root *root) while (1) { trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } ret = btrfs_defrag_leaves(trans, root); diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index fbf383048409..26de74684c17 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -72,10 +72,6 @@ static int ceph_set_page_dirty(struct page *page) struct inode *inode; struct ceph_inode_info *ci; struct ceph_snap_context *snapc; - int ret; - - if (unlikely(!mapping)) - return !TestSetPageDirty(page); if (PageDirty(page)) { dout("%p set_page_dirty %p idx %lu -- already dirty\n", @@ -121,11 +117,7 @@ static int ceph_set_page_dirty(struct page *page) page->private = (unsigned long)snapc; SetPagePrivate(page); - ret = __set_page_dirty_nobuffers(page); - WARN_ON(!PageLocked(page)); - WARN_ON(!page->mapping); - - return ret; + return __set_page_dirty_nobuffers(page); } /* diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 9d74cd37b395..154c47282a34 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1545,6 +1545,8 @@ static int __mark_caps_flushing(struct inode *inode, * try to invalidate mapping pages without blocking. */ static int try_nonblocking_invalidate(struct inode *inode) + __releases(ci->i_ceph_lock) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); u32 invalidating_gen = ci->i_rdcache_gen; diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 942874257a09..e5e780145728 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -367,14 +367,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen, if (!dst) return NULL; cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, - NO_MAP_UNI_RSVD); + NO_MAP_UNI_RSVD); } else { - len = strnlen(src, maxlen); - len++; - dst = kmalloc(len, GFP_KERNEL); - if (!dst) - return NULL; - strlcpy(dst, src, len); + dst = kstrndup(src, maxlen, GFP_KERNEL); } return dst; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 71c9ed70aa68..cda22b312a4c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2961,9 +2961,10 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); - if (IS_ERR(tlink)) { + if (tlink == NULL) { + /* can not match superblock if tlink were ever null */ spin_unlock(&cifs_tcp_ses_lock); - return rc; + return 0; } tcon = tlink_tcon(tlink); ses = tcon->ses; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 9bc7a29f88d6..2d3918cdcc28 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -602,7 +602,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct) return 0; out_free_smb_buf: - kfree(smb_buf); + cifs_small_buf_release(smb_buf); sess_data->iov[0].iov_base = NULL; sess_data->iov[0].iov_len = 0; sess_data->buf0_type = CIFS_NO_BUFFER; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9d7a4a714907..99f4cd91910f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -554,7 +554,7 @@ static void close_connection(struct connection *con, bool and_other, } if (con->othercon && and_other) { /* Will only re-enter once. */ - close_connection(con->othercon, false, true, true); + close_connection(con->othercon, false, tx, rx); } if (con->rx_page) { __free_page(con->rx_page); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 9f9992b37924..2e4747e0aaf0 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -46,10 +46,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (block_group >= sbi->s_groups_count) { - ext2_error (sb, "ext2_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); + WARN(1, "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -57,10 +56,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext2_error (sb, "ext2_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + WARN(1, "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 7b626e942987..96dc313d6251 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -521,7 +521,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct dir_private_info *info = file->private_data; struct inode *inode = file_inode(file); struct fname *fname; - int ret; + int ret = 0; if (!info) { info = ext4_htree_create_dir_info(file, ctx->pos); @@ -569,7 +569,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) info->curr_minor_hash, &info->next_hash); if (ret < 0) - return ret; + goto finished; if (ret == 0) { ctx->pos = ext4_get_htree_eof(file); break; @@ -600,7 +600,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) } finished: info->last_pos = ctx->pos; - return 0; + return ret < 0 ? ret : 0; } static int ext4_dir_open(struct inode * inode, struct file * filp) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 50f98d6a4416..07ae78ba27a1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -865,6 +865,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) eh->eh_entries = 0; eh->eh_magic = EXT4_EXT_MAGIC; eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); + eh->eh_generation = 0; ext4_mark_inode_dirty(handle, inode); return 0; } @@ -1128,6 +1129,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; + neh->eh_generation = 0; /* move remainder of path[depth] to the new leaf */ if (unlikely(path[depth].p_hdr->eh_entries != @@ -1205,6 +1207,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); neh->eh_depth = cpu_to_le16(depth - i); + neh->eh_generation = 0; fidx = EXT_FIRST_INDEX(neh); fidx->ei_block = border; ext4_idx_store_pblock(fidx, oldblock); diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index ac748b3af1c1..665cf30c95e9 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -1080,11 +1080,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink, ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret); - if (!nr_to_scan) - return ret; - nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL); + ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt); trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret); return nr_shrunk; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b14f7b3a8db3..685a26e9540f 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -405,7 +405,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, * * We always try to spread first-level directories. * - * If there are blockgroups with both free inodes and free blocks counts + * If there are blockgroups with both free inodes and free clusters counts * not worse than average we return one with smallest directory count. * Otherwise we simply return a random group. * @@ -414,7 +414,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, * It's OK to put directory into a group unless * it has too many directories already (max_dirs) or * it has too few free inodes left (min_inodes) or - * it has too few free blocks left (min_blocks) or + * it has too few free clusters left (min_clusters) or * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more @@ -430,7 +430,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, ext4_group_t real_ngroups = ext4_get_groups_count(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei, grp_free; - ext4_fsblk_t freeb, avefreec; + ext4_fsblk_t freec, avefreec; unsigned int ndirs; int max_dirs, min_inodes; ext4_grpblk_t min_clusters; @@ -449,9 +449,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; - freeb = EXT4_C2B(sbi, - percpu_counter_read_positive(&sbi->s_freeclusters_counter)); - avefreec = freeb; + freec = percpu_counter_read_positive(&sbi->s_freeclusters_counter); + avefreec = freec; do_div(avefreec, ngroups); ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index df585267d3c2..6f5e292e86f7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -746,6 +746,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_lock_xattr(inode, &no_expand); BUG_ON(!ext4_has_inline_data(inode)); + /* + * ei->i_inline_off may have changed since ext4_write_begin() + * called ext4_try_to_write_inline_data() + */ + (void) ext4_find_inline_data_nolock(inode); + kaddr = kmap_atomic(page); ext4_write_inline_data(inode, &iloc, kaddr, pos, len); kunmap_atomic(kaddr); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7f068330edb6..0ce7ff7a2ce8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -512,9 +512,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* find and pin the new wb */ rcu_read_lock(); memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys); - if (memcg_css) - isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); + if (memcg_css && !css_tryget(memcg_css)) + memcg_css = NULL; rcu_read_unlock(); + if (!memcg_css) + goto out_free; + + isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC); + css_put(memcg_css); if (!isw->new_wb) goto out_free; @@ -2040,28 +2045,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, return ret; } -static noinline void block_dump___mark_inode_dirty(struct inode *inode) -{ - if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { - struct dentry *dentry; - const char *name = "?"; - - dentry = d_find_alias(inode); - if (dentry) { - spin_lock(&dentry->d_lock); - name = (const char *) dentry->d_name.name; - } - printk(KERN_DEBUG - "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, task_pid_nr(current), inode->i_ino, - name, inode->i_sb->s_id); - if (dentry) { - spin_unlock(&dentry->d_lock); - dput(dentry); - } - } -} - /** * __mark_inode_dirty - internal function * @inode: inode to mark @@ -2120,9 +2103,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) (dirtytime && (inode->i_state & I_DIRTY_INODE))) return; - if (unlikely(block_dump)) - block_dump___mark_inode_dirty(inode); - spin_lock(&inode->i_lock); if (dirtytime && (inode->i_state & I_DIRTY_INODE)) goto out_unlock_inode; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index fc265f4b839a..38a12b0e395f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1315,6 +1315,15 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, goto restart; } spin_lock(&fpq->lock); + /* + * Must not put request on fpq->io queue after having been shut down by + * fuse_abort_conn() + */ + if (!fpq->connected) { + req->out.h.error = err = -ECONNABORTED; + goto out_end; + + } list_add(&req->list, &fpq->io); spin_unlock(&fpq->lock); cs->req = req; @@ -1922,7 +1931,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, } err = -EINVAL; - if (oh.error <= -1000 || oh.error > 0) + if (oh.error <= -512 || oh.error > 0) goto err_finish; spin_lock(&fpq->lock); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 3cbc9147286d..da9f97911852 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -296,6 +296,11 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); + /* don't want to call dlm if we've unmounted the lock protocol */ + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + gfs2_glock_free(gl); + return; + } /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index de69d8a24f6d..7f2ef95dcd05 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -24,7 +24,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) fd->key = ptr + tree->max_key_len + 2; hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); - mutex_lock(&tree->tree_lock); + switch (tree->cnid) { + case HFS_CAT_CNID: + mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX); + break; + case HFS_EXT_CNID: + mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX); + break; + case HFS_ATTR_CNID: + mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX); + break; + default: + return -EINVAL; + } return 0; } diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 221719eac5de..2cda99e61cae 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -14,16 +14,31 @@ #include "btree.h" -void hfs_bnode_read(struct hfs_bnode *node, void *buf, - int off, int len) +void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len) { struct page *page; + int pagenum; + int bytes_read; + int bytes_to_read; + void *vaddr; off += node->page_offset; - page = node->page[0]; + pagenum = off >> PAGE_SHIFT; + off &= ~PAGE_MASK; /* compute page offset for the first page */ - memcpy(buf, kmap(page) + off, len); - kunmap(page); + for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) { + if (pagenum >= node->tree->pages_per_bnode) + break; + page = node->page[pagenum]; + bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off); + + vaddr = kmap_atomic(page); + memcpy(buf + bytes_read, vaddr + off, bytes_to_read); + kunmap_atomic(vaddr); + + pagenum++; + off = 0; /* page offset only applies to the first page */ + } } u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off) diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index 2715f416b5a8..308b5f1af65b 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -12,6 +12,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *); #define NODE_HASH_SIZE 256 +/* B-tree mutex nested subclasses */ +enum hfs_btree_mutex_classes { + CATALOG_BTREE_MUTEX, + EXTENTS_BTREE_MUTEX, + ATTR_BTREE_MUTEX, +}; + /* A HFS BTree held in memory */ struct hfs_btree { struct super_block *sb; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4574fdd3d421..3eb815bb2c78 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -426,14 +426,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!res) { if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) { res = -EIO; - goto bail; + goto bail_hfs_find; } hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength); } - if (res) { - hfs_find_exit(&fd); - goto bail_no_root; - } + if (res) + goto bail_hfs_find; res = -EINVAL; root_inode = hfs_iget(sb, &fd.search_key->cat, &rec); hfs_find_exit(&fd); @@ -449,6 +447,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) /* everything's okay */ return 0; +bail_hfs_find: + hfs_find_exit(&fd); bail_no_root: pr_err("get root inode failed\n"); bail: diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 41aa3ca6a6a4..b318732a8562 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -160,7 +160,8 @@ void jfs_evict_inode(struct inode *inode) if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); - diFree(inode); + if (JFS_SBI(inode->i_sb)->ipimap) + diFree(inode); /* * Free the inode from the quota allocation. diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a69bdf2a1085..d19542a88c2c 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1339,6 +1339,7 @@ int lmLogInit(struct jfs_log * log) } else { if (memcmp(logsuper->uuid, log->uuid, 16)) { jfs_warn("wrong uuid on JFS log device"); + rc = -EINVAL; goto errout20; } log->size = le32_to_cpu(logsuper->size); diff --git a/fs/namespace.c b/fs/namespace.c index 5fb1e4a430f8..ddd9d47059ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1830,6 +1830,20 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } +static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + struct mount *child; + + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, dentry)) + continue; + + if (child->mnt.mnt_flags & MNT_LOCKED) + return true; + } + return false; +} + /** * clone_private_mount - create a private clone of a path * @@ -1844,16 +1858,27 @@ struct vfsmount *clone_private_mount(struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; + down_read(&namespace_sem); if (IS_MNT_UNBINDABLE(old_mnt)) - return ERR_PTR(-EINVAL); + goto invalid; + + if (!check_mnt(old_mnt)) + goto invalid; + + if (has_locked_children(old_mnt, path->dentry)) + goto invalid; - down_read(&namespace_sem); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); up_read(&namespace_sem); + if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); return &new_mnt->mnt; + +invalid: + up_read(&namespace_sem); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -2169,19 +2194,6 @@ static int do_change_type(struct path *path, int flag) return err; } -static bool has_locked_children(struct mount *mnt, struct dentry *dentry) -{ - struct mount *child; - list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { - if (!is_subdir(child->mnt_mountpoint, dentry)) - continue; - - if (child->mnt.mnt_flags & MNT_LOCKED) - return true; - } - return false; -} - /* * do loopback mount. */ diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index cb28cceefebe..9f365b004453 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -363,7 +363,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; case NFS3_CREATE_UNCHECKED: - goto out; + goto out_release_acls; } nfs_fattr_init(data->res.dir_attr); nfs_fattr_init(data->res.fattr); @@ -708,7 +708,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; default: status = -EINVAL; - goto out; + goto out_release_acls; } status = nfs3_do_create(dir, dentry, data); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index c3b629eec294..49a148ebbcda 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -73,11 +73,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \ #define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \ static void nilfs_##name##_attr_release(struct kobject *kobj) \ { \ - struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \ - struct the_nilfs *nilfs = container_of(kobj->parent, \ - struct the_nilfs, \ - ns_##parent_name##_kobj); \ - subgroups = nilfs->ns_##parent_name##_subgroups; \ + struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \ + struct nilfs_sysfs_##parent_name##_subgroups, \ + sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ static struct kobj_type nilfs_##name##_ktype = { \ @@ -103,12 +101,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \ err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \ #name); \ if (err) \ - return err; \ - return 0; \ + kobject_put(kobj); \ + return err; \ } \ static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \ { \ - kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ + kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \ } /************************************************************************ @@ -219,14 +217,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root) } if (err) - return err; + kobject_put(&root->snapshot_kobj); - return 0; + return err; } void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root) { - kobject_del(&root->snapshot_kobj); + kobject_put(&root->snapshot_kobj); } /************************************************************************ @@ -1008,7 +1006,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb) err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL, "%s", sb->s_id); if (err) - goto free_dev_subgroups; + goto cleanup_dev_kobject; err = nilfs_sysfs_create_mounted_snapshots_group(nilfs); if (err) @@ -1045,9 +1043,7 @@ delete_mounted_snapshots_group: nilfs_sysfs_delete_mounted_snapshots_group(nilfs); cleanup_dev_kobject: - kobject_del(&nilfs->ns_dev_kobj); - -free_dev_subgroups: + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); failed_create_device_group: diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 3af6fa324afa..8d4d58b12972 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -502,7 +502,7 @@ err_corrupt_attr: } file_name_attr = (FILE_NAME_ATTR*)((u8*)attr + le16_to_cpu(attr->data.resident.value_offset)); - p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length); + p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length); if (p2 < (u8*)attr || p2 > p) goto err_corrupt_attr; /* This attribute is ok, but is it in the $Extend directory? */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 5b0f2c806f03..0de92ad0ba79 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1532,6 +1532,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, } } +/* + * zero out partial blocks of one cluster. + * + * start: file offset where zero starts, will be made upper block aligned. + * len: it will be trimmed to the end of current cluster if "start + len" + * is bigger than it. + */ +static int ocfs2_zeroout_partial_cluster(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u64 start_block, end_block, nr_blocks; + u64 p_block, offset; + u32 cluster, p_cluster, nr_clusters; + struct super_block *sb = inode->i_sb; + u64 end = ocfs2_align_bytes_to_clusters(sb, start); + + if (start + len < end) + end = start + len; + + start_block = ocfs2_blocks_for_bytes(sb, start); + end_block = ocfs2_blocks_for_bytes(sb, end); + nr_blocks = end_block - start_block; + if (!nr_blocks) + return 0; + + cluster = ocfs2_bytes_to_clusters(sb, start); + ret = ocfs2_get_clusters(inode, cluster, &p_cluster, + &nr_clusters, NULL); + if (ret) + return ret; + if (!p_cluster) + return 0; + + offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); + p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; + return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); +} + static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { @@ -1541,6 +1580,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; + loff_t isize = i_size_read(inode); /* * The "start" and "end" values are NOT necessarily part of @@ -1561,6 +1601,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) goto out; + /* No page cache for EOF blocks, issue zero out to disk. */ + if (end > isize) { + /* + * zeroout eof blocks in last cluster starting from + * "isize" even "start" > "isize" because it is + * complicated to zeroout just at "start" as "start" + * may be not aligned with block size, buffer write + * would be required to do that, but out of eof buffer + * write is not supported. + */ + ret = ocfs2_zeroout_partial_cluster(inode, isize, + end - isize); + if (ret) { + mlog_errno(ret); + goto out; + } + if (start >= isize) + goto out; + end = isize; + } handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1860,45 +1920,6 @@ out: } /* - * zero out partial blocks of one cluster. - * - * start: file offset where zero starts, will be made upper block aligned. - * len: it will be trimmed to the end of current cluster if "start + len" - * is bigger than it. - */ -static int ocfs2_zeroout_partial_cluster(struct inode *inode, - u64 start, u64 len) -{ - int ret; - u64 start_block, end_block, nr_blocks; - u64 p_block, offset; - u32 cluster, p_cluster, nr_clusters; - struct super_block *sb = inode->i_sb; - u64 end = ocfs2_align_bytes_to_clusters(sb, start); - - if (start + len < end) - end = start + len; - - start_block = ocfs2_blocks_for_bytes(sb, start); - end_block = ocfs2_blocks_for_bytes(sb, end); - nr_blocks = end_block - start_block; - if (!nr_blocks) - return 0; - - cluster = ocfs2_bytes_to_clusters(sb, start); - ret = ocfs2_get_clusters(inode, cluster, &p_cluster, - &nr_clusters, NULL); - if (ret) - return ret; - if (!p_cluster) - return 0; - - offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); - p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; - return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); -} - -/* * Parts of this function taken from xfs_change_file_space() */ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, @@ -1939,7 +1960,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - orig_isize = i_size_read(inode); switch (sr->l_whence) { case 0: /*SEEK_SET*/ break; @@ -1947,7 +1967,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, sr->l_start += f_pos; break; case 2: /*SEEK_END*/ - sr->l_start += orig_isize; + sr->l_start += i_size_read(inode); break; default: ret = -EINVAL; @@ -2002,6 +2022,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, ret = -EINVAL; } + orig_isize = i_size_read(inode); /* zeroout eof blocks in the cluster. */ if (!ret && change_size && orig_isize < size) { ret = ocfs2_zeroout_partial_cluster(inode, orig_isize, diff --git a/fs/pipe.c b/fs/pipe.c index 6534470a6c19..37a003b645ef 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -28,6 +28,21 @@ #include "internal.h" /* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + +/* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ @@ -621,7 +636,7 @@ struct pipe_inode_info *alloc_pipe_info(void) if (!too_many_pipe_buffers_hard(user)) { if (too_many_pipe_buffers_soft(user)) - pipe_bufs = 1; + pipe_bufs = PIPE_MIN_DEF_BUFFERS; pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL); } diff --git a/fs/proc/base.c b/fs/proc/base.c index b1ff8eb61802..4d68f5a9e4aa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -887,7 +887,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, flags |= FOLL_WRITE; while (count > 0) { - int this_len = min_t(int, count, PAGE_SIZE); + size_t this_len = min_t(size_t, count, PAGE_SIZE); if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index b218f965817b..613cc38c9efa 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -14,13 +14,48 @@ #include <linux/buffer_head.h> #include "qnx4.h" +/* + * A qnx4 directory entry is an inode entry or link info + * depending on the status field in the last byte. The + * first byte is where the name start either way, and a + * zero means it's empty. + * + * Also, due to a bug in gcc, we don't want to use the + * real (differently sized) name arrays in the inode and + * link entries, but always the 'de_name[]' one in the + * fake struct entry. + * + * See + * + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6 + * + * for details, but basically gcc will take the size of the + * 'name' array from one of the used union entries randomly. + * + * This use of 'de_name[]' (48 bytes) avoids the false positive + * warnings that would happen if gcc decides to use 'inode.di_name' + * (16 bytes) even when the pointer and size were to come from + * 'link.dl_name' (48 bytes). + * + * In all cases the actual name pointer itself is the same, it's + * only the gcc internal 'what is the size of this field' logic + * that can get confused. + */ +union qnx4_directory_entry { + struct { + const char de_name[48]; + u8 de_pad[15]; + u8 de_status; + }; + struct qnx4_inode_entry inode; + struct qnx4_link_info link; +}; + static int qnx4_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned int offset; struct buffer_head *bh; - struct qnx4_inode_entry *de; - struct qnx4_link_info *le; unsigned long blknum; int ix, ino; int size; @@ -37,27 +72,27 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx) } ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { + union qnx4_directory_entry *de; + offset = ix * QNX4_DIR_ENTRY_SIZE; - de = (struct qnx4_inode_entry *) (bh->b_data + offset); - if (!de->di_fname[0]) + de = (union qnx4_directory_entry *) (bh->b_data + offset); + + if (!de->de_name[0]) continue; - if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) + if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) continue; - if (!(de->di_status & QNX4_FILE_LINK)) - size = QNX4_SHORT_NAME_MAX; - else - size = QNX4_NAME_MAX; - size = strnlen(de->di_fname, size); - QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); - if (!(de->di_status & QNX4_FILE_LINK)) + if (!(de->de_status & QNX4_FILE_LINK)) { + size = sizeof(de->inode.di_fname); ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; - else { - le = (struct qnx4_link_info*)de; - ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * + } else { + size = sizeof(de->link.dl_fname); + ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) * QNX4_INODES_PER_BLOCK + - le->dl_inode_ndx; + de->link.dl_inode_ndx; } - if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { + size = strnlen(de->de_name, size); + QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name)); + if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) { brelse(bh); return 0; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 00985f9db9f7..6a0fa0cdc1ed 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2770,6 +2770,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name, goto free_and_return; } + /* + * Sanity check to see if journal first block is correct. + * If journal first block is invalid it can cause + * zeroing important superblock members. + */ + if (!SB_ONDISK_JOURNAL_DEVICE(sb) && + SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) { + reiserfs_warning(sb, "journal-1393", + "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d", + SB_JOURNAL_1st_RESERVED_BLOCK(sb), + SB_ONDISK_JOURNAL_1st_BLOCK(sb)); + goto free_and_return; + } + if (journal_init_dev(sb, journal, j_dev_name) != 0) { reiserfs_warning(sb, "sh-462", "unable to initialize journal device"); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 33b78ee9fb9e..13322c39e6cc 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -386,6 +386,24 @@ void pathrelse(struct treepath *search_path) search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } +static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) +{ + struct reiserfs_de_head *deh; + int i; + + deh = B_I_DEH(bh, ih); + for (i = 0; i < ih_entry_count(ih); i++) { + if (deh_location(&deh[i]) > ih_item_len(ih)) { + reiserfs_warning(NULL, "reiserfs-5094", + "directory entry location seems wrong %h", + &deh[i]); + return 0; + } + } + + return 1; +} + static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; @@ -453,11 +471,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) "(second one): %h", ih); return 0; } - if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) { - reiserfs_warning(NULL, "reiserfs-5093", - "item entry count seems wrong %h", - ih); - return 0; + if (is_direntry_le_ih(ih)) { + if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { + reiserfs_warning(NULL, "reiserfs-5093", + "item entry count seems wrong %h", + ih); + return 0; + } + return has_valid_deh_location(bh, ih); } prev_location = ih_location(ih); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 503d8c06e0d9..2ffcbe451202 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2050,6 +2050,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) unlock_new_inode(root_inode); } + if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || + !root_inode->i_size) { + SWARN(silent, s, "", "corrupt root inode, run fsck"); + iput(root_inode); + errval = -EUCLEAN; + goto error; + } + s->s_root = d_make_root(root_inode); if (!s->s_root) goto error; diff --git a/fs/seq_file.c b/fs/seq_file.c index 6dc4296eed62..95e730506ad2 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/printk.h> #include <linux/string_helpers.h> +#include <linux/pagemap.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -28,6 +29,9 @@ static void *seq_buf_alloc(unsigned long size) void *buf; gfp_t gfp = GFP_KERNEL; + if (unlikely(size > MAX_RW_COUNT)) + return NULL; + /* * For high order allocations, use __GFP_NORETRY to avoid oom-killing - * it's better to fall back to vmalloc() than to kill things. For small diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 71d1c25f360d..8c7f9ea251e5 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -175,13 +175,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, else offset = le32_to_cpu(eahd->appAttrLocation); - while (offset < iinfo->i_lenEAttr) { + while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) { + uint32_t attrLength; + gaf = (struct genericFormat *)&ea[offset]; + attrLength = le32_to_cpu(gaf->attrLength); + + /* Detect undersized elements and buffer overflows */ + if ((attrLength < sizeof(*gaf)) || + (attrLength > (iinfo->i_lenEAttr - offset))) + break; + if (le32_to_cpu(gaf->attrType) == type && gaf->attrSubtype == subtype) return gaf; else - offset += le32_to_cpu(gaf->attrLength); + offset += attrLength; } } diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f34c545f4e54..074560ad190e 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -945,6 +945,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, iinfo->i_location.partitionReferenceNum, 0); epos.bh = udf_tgetblk(sb, block); + if (unlikely(!epos.bh)) { + err = -ENOMEM; + goto out_no_entry; + } lock_buffer(epos.bh); memset(epos.bh->b_data, 0x00, bsize); set_buffer_uptodate(epos.bh); |