diff options
author | Deepak Nibade <dnibade@nvidia.com> | 2013-08-14 16:23:39 +0530 |
---|---|---|
committer | Deepak Nibade <dnibade@nvidia.com> | 2013-08-14 16:23:39 +0530 |
commit | 9282699d7fd7954f11d59254e568e5d4bfbbe71a (patch) | |
tree | 4d945260bc8eb63b0db0423ad55ad7021eb5d4ac /fs | |
parent | 933d6b11dbd7fda89ac094321d0cd9992afb5592 (diff) | |
parent | 67e6589a34ea5360b00869aaaec4a844c29cf713 (diff) |
Merge branch 'linux-3.4.57' into rel-17
Bug 1348440
Change-Id: If25c49f027dc2a69642f7ed4733e965962b2a5a2
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Diffstat (limited to 'fs')
77 files changed, 714 insertions, 397 deletions
@@ -1094,9 +1094,9 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) spin_unlock(&info->ring_lock); out: - kunmap_atomic(ring); dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, (unsigned long)ring->head, (unsigned long)ring->tail); + kunmap_atomic(ring); return ret; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 1feb68ecef95..b1cdb0ae5ca6 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -61,15 +61,6 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) /* This is an autofs submount, we can't expire it */ if (autofs_type_indirect(sbi->type)) goto done; - - /* - * Otherwise it's an offset mount and we need to check - * if we can umount its mount, if there is one. - */ - if (!d_mountpoint(path.dentry)) { - status = 0; - goto done; - } } /* Update the expiry counter if fs is busy */ diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 2790c7e1912e..575796ae3d84 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -42,7 +42,6 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) return -ENOEXEC; } - bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index e1724392d050..87fa90d5863b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -117,10 +117,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!enabled) goto _ret; - retval = -ENOEXEC; - if (bprm->recursion_depth > BINPRM_MAX_RECURSION) - goto _ret; - /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -200,8 +196,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; - bprm->recursion_depth++; - retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index df49d486b0c0..8ae4be199fb4 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -22,15 +22,13 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) char interp[BINPRM_BUF_SIZE]; int retval; - if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || - (bprm->recursion_depth > BINPRM_MAX_RECURSION)) + if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; /* * This section does the #! interpretation. * Sorta complicated, but hopefully it will work. -TYT */ - bprm->recursion_depth++; allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; diff --git a/fs/block_dev.c b/fs/block_dev.c index b3be92c980ee..319d9c74c4d3 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -57,17 +57,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } sector_t blkdev_max_block(struct block_device *bdev) @@ -604,6 +611,7 @@ struct block_device *bdgrab(struct block_device *bdev) ihold(bdev->bd_inode); return bdev; } +EXPORT_SYMBOL(bdgrab); long nr_blockdev_pages(void) { @@ -1086,7 +1094,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 49fd7b66d57b..c4f0a9936f88 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4217,7 +4217,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); - block_rsv->size = num_bytes; + block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + @@ -4486,14 +4486,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) * If the inodes csum_bytes is the same as the original * csum_bytes then we know we haven't raced with any free()ers * so we can just reduce our inodes csum bytes and carry on. - * Otherwise we have to do the normal free thing to account for - * the case that the free side didn't free up its reserve - * because of this outstanding reservation. */ - if (BTRFS_I(inode)->csum_bytes == csum_bytes) + if (BTRFS_I(inode)->csum_bytes == csum_bytes) { calc_csum_metadata_size(inode, num_bytes, 0); - else - to_free = calc_csum_metadata_size(inode, num_bytes, 0); + } else { + u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes; + u64 bytes; + + /* + * This is tricky, but first we need to figure out how much we + * free'd from any free-ers that occured during this + * reservation, so we reset ->csum_bytes to the csum_bytes + * before we dropped our lock, and then call the free for the + * number of bytes that were freed while we were trying our + * reservation. + */ + bytes = csum_bytes - BTRFS_I(inode)->csum_bytes; + BTRFS_I(inode)->csum_bytes = csum_bytes; + to_free = calc_csum_metadata_size(inode, bytes, 0); + + + /* + * Now we need to see how much we would have freed had we not + * been making this reservation and our ->csum_bytes were not + * artificially inflated. + */ + BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes; + bytes = csum_bytes - orig_csum_bytes; + bytes = calc_csum_metadata_size(inode, bytes, 0); + + /* + * Now reset ->csum_bytes to what it should be. If bytes is + * more than to_free then we would have free'd more space had we + * not had an artificially high ->csum_bytes, so we need to free + * the remainder. If bytes is the same or less then we don't + * need to do anything, the other free-ers did the correct + * thing. + */ + BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes; + if (bytes > to_free) + to_free = bytes - to_free; + else + to_free = 0; + } spin_unlock(&BTRFS_I(inode)->lock); if (dropped) to_free += btrfs_calc_trans_metadata_size(root, dropped); @@ -6811,6 +6846,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); if (!path) { @@ -6868,6 +6904,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -6884,6 +6921,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } @@ -6979,12 +7017,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } + root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); btrfs_free_path(path); out: + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); return err; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c9018a05036e..d64fda541483 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1238,6 +1238,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) GFP_NOFS); } +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + clear_page_dirty_for_io(page); + page_cache_release(page); + index++; + } + return 0; +} + +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) +{ + unsigned long index = start >> PAGE_CACHE_SHIFT; + unsigned long end_index = end >> PAGE_CACHE_SHIFT; + struct page *page; + + while (index <= end_index) { + page = find_get_page(inode->i_mapping, index); + BUG_ON(!page); /* Pages should be in the extent_io_tree */ + account_page_redirty(page); + __set_page_dirty_nobuffers(page); + page_cache_release(page); + index++; + } + return 0; +} + /* * helper function to set both pages and extents in the tree writeback */ diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b516c3b8dec6..2edf9120aae5 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -312,6 +312,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); +int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); +int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0df0d1fd4fee..9e51325828bc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -349,6 +349,7 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; + int redirty = 0; /* if this is a small write inside eof, kick off a defrag */ if ((end - start + 1) < 16 * 1024 && @@ -411,6 +412,17 @@ again: if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; + /* + * we need to call clear_page_dirty_for_io on each + * page in the range. Otherwise applications with the file + * mmap'd can wander in and change the page contents while + * we are compressing them. + * + * If the compression fails for any reason, we set the pages + * dirty again later on. + */ + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -552,6 +564,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } + if (redirty) + extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14f8e1faa46e..3a65f4343416 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1653,7 +1653,11 @@ static noinline int copy_to_sk(struct btrfs_root *root, item_off = btrfs_item_ptr_offset(leaf, i); item_len = btrfs_item_size_nr(leaf, i); - if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) item_len = 0; if (sizeof(sh) + item_len + *sk_offset > @@ -1662,10 +1666,6 @@ static noinline int copy_to_sk(struct btrfs_root *root, goto overflow; } - btrfs_item_key_to_cpu(leaf, key, i); - if (!key_in_sk(key, sk)) - continue; - sh.objectid = key->objectid; sh.offset = key->offset; sh.type = key->type; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2f3d6f917fb3..682e5da3143a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -383,7 +383,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); item_size = btrfs_item_size_nr(eb, path->slots[0]); - btrfs_release_path(path); if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { @@ -398,7 +397,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) ret < 0 ? -1 : ref_level, ret < 0 ? -1 : ref_root); } while (ret != 1); + btrfs_release_path(path); } else { + btrfs_release_path(path); swarn.path = path; iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, 1, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dce89da9c863..3ef7f386e265 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -315,6 +315,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, unsigned long src_ptr; unsigned long dst_ptr; int overwrite_root = 0; + bool inode_item = key->type == BTRFS_INODE_ITEM_KEY; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) overwrite_root = 1; @@ -324,6 +325,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, /* look for the key in the destination tree */ ret = btrfs_search_slot(NULL, root, key, path, 0, 0); + if (ret < 0) + return ret; + if (ret == 0) { char *src_copy; char *dst_copy; @@ -365,6 +369,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, return 0; } + /* + * We need to load the old nbytes into the inode so when we + * replay the extents we've logged we get the right nbytes. + */ + if (inode_item) { + struct btrfs_inode_item *item; + u64 nbytes; + + item = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_inode_item); + nbytes = btrfs_inode_nbytes(path->nodes[0], item); + item = btrfs_item_ptr(eb, slot, + struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, nbytes); + } + } else if (inode_item) { + struct btrfs_inode_item *item; + + /* + * New inode, set nbytes to 0 so that the nbytes comes out + * properly when we replay the extents. + */ + item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); + btrfs_set_inode_nbytes(eb, item, 0); } insert: btrfs_release_path(path); @@ -486,7 +514,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, u64 extent_end; u64 alloc_hint; u64 start = key->offset; - u64 saved_nbytes; + u64 nbytes = 0; struct btrfs_file_extent_item *item; struct inode *inode = NULL; unsigned long size; @@ -496,10 +524,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, found_type = btrfs_file_extent_type(eb, item); if (found_type == BTRFS_FILE_EXTENT_REG || - found_type == BTRFS_FILE_EXTENT_PREALLOC) - extent_end = start + btrfs_file_extent_num_bytes(eb, item); - else if (found_type == BTRFS_FILE_EXTENT_INLINE) { + found_type == BTRFS_FILE_EXTENT_PREALLOC) { + nbytes = btrfs_file_extent_num_bytes(eb, item); + extent_end = start + nbytes; + + /* + * We don't add to the inodes nbytes if we are prealloc or a + * hole. + */ + if (btrfs_file_extent_disk_bytenr(eb, item) == 0) + nbytes = 0; + } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size = btrfs_file_extent_inline_len(eb, item); + nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = (start + size + mask) & ~mask; } else { ret = 0; @@ -548,7 +585,6 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ ret = btrfs_drop_extents(trans, inode, start, extent_end, &alloc_hint, 1); @@ -636,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, BUG_ON(ret); } - inode_set_bytes(inode, saved_nbytes); + inode_add_bytes(inode, nbytes); btrfs_update_inode(trans, root, inode); out: if (inode) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1411b99555a4..06744f1e91f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -557,6 +557,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->writeable = 0; new_device->in_fs_metadata = 0; new_device->can_discard = 0; + spin_lock_init(&new_device->io_lock); list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -590,6 +591,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) __btrfs_close_devices(fs_devices); free_fs_devices(fs_devices); } + /* + * Wait for rcu kworkers under __btrfs_close_devices + * to finish all blkdev_puts so device is really + * free when umount is done. + */ + rcu_barrier(); return ret; } diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 80576d05d687..bb5fb3d46e8f 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -191,27 +191,23 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) } /** - * Encode the flock and fcntl locks for the given inode into the pagelist. - * Format is: #fcntl locks, sequential fcntl locks, #flock locks, - * sequential flock locks. - * Must be called with lock_flocks() already held. - * If we encounter more of a specific lock type than expected, - * we return the value 1. + * Encode the flock and fcntl locks for the given inode into the ceph_filelock + * array. Must be called with lock_flocks() already held. + * If we encounter more of a specific lock type than expected, return -ENOSPC. */ -int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, - int num_fcntl_locks, int num_flock_locks) +int ceph_encode_locks_to_buffer(struct inode *inode, + struct ceph_filelock *flocks, + int num_fcntl_locks, int num_flock_locks) { struct file_lock *lock; - struct ceph_filelock cephlock; int err = 0; int seen_fcntl = 0; int seen_flock = 0; + int l = 0; dout("encoding %d flock and %d fcntl locks", num_flock_locks, num_fcntl_locks); - err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); - if (err) - goto fail; + for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { if (lock->fl_flags & FL_POSIX) { ++seen_fcntl; @@ -219,19 +215,12 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &cephlock); + err = lock_to_ceph_filelock(lock, &flocks[l]); if (err) goto fail; - err = ceph_pagelist_append(pagelist, &cephlock, - sizeof(struct ceph_filelock)); + ++l; } - if (err) - goto fail; } - - err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32)); - if (err) - goto fail; for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { if (lock->fl_flags & FL_FLOCK) { ++seen_flock; @@ -239,19 +228,51 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, err = -ENOSPC; goto fail; } - err = lock_to_ceph_filelock(lock, &cephlock); + err = lock_to_ceph_filelock(lock, &flocks[l]); if (err) goto fail; - err = ceph_pagelist_append(pagelist, &cephlock, - sizeof(struct ceph_filelock)); + ++l; } - if (err) - goto fail; } fail: return err; } +/** + * Copy the encoded flock and fcntl locks into the pagelist. + * Format is: #fcntl locks, sequential fcntl locks, #flock locks, + * sequential flock locks. + * Returns zero on success. + */ +int ceph_locks_to_pagelist(struct ceph_filelock *flocks, + struct ceph_pagelist *pagelist, + int num_fcntl_locks, int num_flock_locks) +{ + int err = 0; + __le32 nlocks; + + nlocks = cpu_to_le32(num_fcntl_locks); + err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); + if (err) + goto out_fail; + + err = ceph_pagelist_append(pagelist, flocks, + num_fcntl_locks * sizeof(*flocks)); + if (err) + goto out_fail; + + nlocks = cpu_to_le32(num_flock_locks); + err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks)); + if (err) + goto out_fail; + + err = ceph_pagelist_append(pagelist, + &flocks[num_fcntl_locks], + num_flock_locks * sizeof(*flocks)); +out_fail: + return err; +} + /* * Given a pointer to a lock, convert it to a ceph filelock */ diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3fd08ad5c478..cf1b9e043b08 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -335,9 +335,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); if (atomic_dec_and_test(&s->s_ref)) { if (s->s_auth.authorizer) - s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( - s->s_mdsc->fsc->client->monc.auth, - s->s_auth.authorizer); + ceph_auth_destroy_authorizer( + s->s_mdsc->fsc->client->monc.auth, + s->s_auth.authorizer); kfree(s); } } @@ -2455,39 +2455,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, if (recon_state->flock) { int num_fcntl_locks, num_flock_locks; - struct ceph_pagelist_cursor trunc_point; - - ceph_pagelist_set_cursor(pagelist, &trunc_point); - do { - lock_flocks(); - ceph_count_locks(inode, &num_fcntl_locks, - &num_flock_locks); - rec.v2.flock_len = (2*sizeof(u32) + - (num_fcntl_locks+num_flock_locks) * - sizeof(struct ceph_filelock)); - unlock_flocks(); - - /* pre-alloc pagelist */ - ceph_pagelist_truncate(pagelist, &trunc_point); - err = ceph_pagelist_append(pagelist, &rec, reclen); - if (!err) - err = ceph_pagelist_reserve(pagelist, - rec.v2.flock_len); - - /* encode locks */ - if (!err) { - lock_flocks(); - err = ceph_encode_locks(inode, - pagelist, - num_fcntl_locks, - num_flock_locks); - unlock_flocks(); - } - } while (err == -ENOSPC); + struct ceph_filelock *flocks; + +encode_again: + lock_flocks(); + ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); + unlock_flocks(); + flocks = kmalloc((num_fcntl_locks+num_flock_locks) * + sizeof(struct ceph_filelock), GFP_NOFS); + if (!flocks) { + err = -ENOMEM; + goto out_free; + } + lock_flocks(); + err = ceph_encode_locks_to_buffer(inode, flocks, + num_fcntl_locks, + num_flock_locks); + unlock_flocks(); + if (err) { + kfree(flocks); + if (err == -ENOSPC) + goto encode_again; + goto out_free; + } + /* + * number of encoded locks is stable, so copy to pagelist + */ + rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) + + (num_fcntl_locks+num_flock_locks) * + sizeof(struct ceph_filelock)); + err = ceph_pagelist_append(pagelist, &rec, reclen); + if (!err) + err = ceph_locks_to_pagelist(flocks, pagelist, + num_fcntl_locks, + num_flock_locks); + kfree(flocks); } else { err = ceph_pagelist_append(pagelist, &rec, reclen); } - out_free: kfree(path); out_dput: @@ -3414,13 +3419,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, struct ceph_auth_handshake *auth = &s->s_auth; if (force_new && auth->authorizer) { - if (ac->ops && ac->ops->destroy_authorizer) - ac->ops->destroy_authorizer(ac, auth->authorizer); + ceph_auth_destroy_authorizer(ac, auth->authorizer); auth->authorizer = NULL; } - if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { - int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, - auth); + if (!auth->authorizer) { + int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, + auth); + if (ret) + return ERR_PTR(ret); + } else { + int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS, + auth); if (ret) return ERR_PTR(ret); } @@ -3436,7 +3445,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len); + return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer, len); } static int invalidate_authorizer(struct ceph_connection *con) @@ -3445,8 +3454,7 @@ static int invalidate_authorizer(struct ceph_connection *con) struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; - if (ac->ops->invalidate_authorizer) - ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); + ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); return ceph_monc_validate_auth(&mdsc->fsc->client->monc); } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f36391815461..f4fa5cf0cdf1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -70,8 +70,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); @@ -79,7 +85,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc35036d258d..d2e01a61f9d2 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ @@ -847,8 +847,13 @@ extern const struct export_operations ceph_export_ops; extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); -extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, - int p_locks, int f_locks); +extern int ceph_encode_locks_to_buffer(struct inode *inode, + struct ceph_filelock *flocks, + int num_fcntl_locks, + int num_flock_locks); +extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks, + struct ceph_pagelist *pagelist, + int num_fcntl_locks, int num_flock_locks); extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); /* debugfs.c */ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index cfd1ce34e0bc..1d36db114772 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -614,53 +614,10 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } - /* mechlistMIC */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - /* Check if we have reached the end of the blob, but with - no mechListMic (e.g. NTLMSSP instead of KRB5) */ - if (ctx.error == ASN1_ERR_DEC_EMPTY) - goto decode_negtoken_exit; - cFYI(1, "Error decoding last part negTokenInit exit3"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ - cFYI(1, "Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* sequence */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit5"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, "cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - } - - /* sequence of */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit 7"); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, "Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - - /* general string */ - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, "Error decoding last part negTokenInit exit9"); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, "Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end); - return 0; - } - cFYI(1, "Need to call asn1_octets_decode() function for %s", - ctx.pointer); /* is this UTF-8 or ASCII? */ -decode_negtoken_exit: + /* + * We currently ignore anything at the end of the SPNEGO blob after + * the mechTypes have been parsed, since none of that info is + * used at the moment. + */ return 1; } diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 22631445a4ac..d0e5fc57bfa8 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/vfs.h> #include <linux/fs.h> +#include <linux/inet.h> #include "cifsglob.h" #include "cifsproto.h" #include "cifsfs.h" @@ -150,7 +151,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, * assuming that we have 'unc=' and 'ip=' in * the original sb_mountdata */ - md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12; + md_len = strlen(sb_mountdata) + rc + strlen(ref->node_name) + 12 + + INET6_ADDRSTRLEN; mountdata = kzalloc(md_len+1, GFP_KERNEL); if (mountdata == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index a513a546700b..f2b2ffd452c8 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -323,14 +323,14 @@ UniToupper(register wchar_t uc) /* * UniStrupr: Upper case a unicode string */ -static inline wchar_t * -UniStrupr(register wchar_t *upin) +static inline __le16 * +UniStrupr(register __le16 *upin) { - register wchar_t *up; + register __le16 *up; up = upin; while (*up) { /* For all characters */ - *up = UniToupper(*up); + *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); up++; } return upin; /* Return input pointer */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 63c460e503b6..6d0c62a529a0 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -394,7 +394,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; - wchar_t *user; + __le16 *user; wchar_t *domain; wchar_t *server; @@ -419,7 +419,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return rc; } - /* convert ses->user_name to unicode and uppercase */ + /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { @@ -429,7 +429,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, } if (len) { - len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); + len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { memset(user, '\0', 2); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 541ef81f6ae8..d7561e03870c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -557,6 +557,11 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) dentry = ERR_PTR(-ENOENT); break; } + if (!S_ISDIR(dir->i_mode)) { + dput(dentry); + dentry = ERR_PTR(-ENOTDIR); + break; + } /* skip separators */ while (*s == sep) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f771e9f98f9f..9cc574cb00d1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1576,14 +1576,24 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } break; case Opt_blank_pass: - vol->password = NULL; - break; - case Opt_pass: /* passwords have to be handled differently * to allow the character used for deliminator * to be passed within them */ + /* + * Check if this is a case where the password + * starts with a delimiter + */ + tmp_end = strchr(data, '='); + tmp_end++; + if (!(tmp_end < end && tmp_end[1] == delim)) { + /* No it is not. Set the password to NULL */ + vol->password = NULL; + break; + } + /* Yes it is. Drop down to Opt_pass below.*/ + case Opt_pass: /* Obtain the value string */ value = strchr(data, '='); value++; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 745da3d0653e..43944c6d7b41 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -173,7 +173,8 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr) if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL) inode->i_flags |= S_AUTOMOUNT; - cifs_set_ops(inode); + if (inode->i_state & I_NEW) + cifs_set_ops(inode); } void @@ -548,6 +549,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_mode &= ~(S_IWUGO); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + if (fattr->cf_nlink < 1) { + cFYI(1, "replacing bogus file nlink value %u\n", + fattr->cf_nlink); + fattr->cf_nlink = 1; + } } fattr->cf_uid = cifs_sb->mnt_uid; diff --git a/fs/compat.c b/fs/compat.c index 2b371b38911d..56acdcb84fb0 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -558,6 +558,10 @@ ssize_t compat_rw_copy_check_uvector(int type, } *ret_pointer = iov; + ret = -EFAULT; + if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an @@ -1089,17 +1093,12 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (!file->f_op) goto out; - ret = -EFAULT; - if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) - goto out; - - tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs, + ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov, 1); - if (tot_len == 0) { - ret = 0; + if (ret <= 0) goto out; - } + tot_len = ret; ret = rw_verify_area(type, file, pos, tot_len); if (ret < 0) goto out; diff --git a/fs/dcache.c b/fs/dcache.c index f104945dcc7d..e498de23bdcc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1238,8 +1238,10 @@ void shrink_dcache_parent(struct dentry * parent) LIST_HEAD(dispose); int found; - while ((found = select_parent(parent, &dispose)) != 0) + while ((found = select_parent(parent, &dispose)) != 0) { shrink_dentry_list(&dispose); + cond_resched(); + } } EXPORT_SYMBOL(shrink_dcache_parent); diff --git a/fs/exec.c b/fs/exec.c index c136a5e750fc..0ea0b4c476d8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -627,7 +627,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * when the old and new regions overlap clear from new_end. */ free_pgd_range(&tlb, new_end, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : TASK_SIZE); + vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } else { /* * otherwise, clean from old_start; this is done to not touch @@ -636,7 +636,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) * for the others its just a little faster. */ free_pgd_range(&tlb, old_start, old_end, new_end, - vma->vm_next ? vma->vm_next->vm_start : TASK_SIZE); + vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } tlb_finish_mmu(&tlb, new_end, old_end); @@ -1163,13 +1163,6 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - /* - * Flush performance counters when crossing a - * security domain: - */ - if (!get_dumpable(current->mm)) - perf_event_exit_task(current); - /* An exec changes our domain. We are no longer part of the thread group */ @@ -1233,6 +1226,15 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; + + /* + * Disable monitoring for regular users + * when executing setuid binaries. Must + * wait until new credentials are committed + * by commit_creds() above + */ + if (get_dumpable(current->mm) != SUID_DUMP_USER) + perf_event_exit_task(current); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's @@ -1389,6 +1391,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) struct linux_binfmt *fmt; pid_t old_pid, old_vpid; + /* This allows 4 levels of binfmt rewrites before failing hard. */ + if (depth > 5) + return -ELOOP; + retval = security_bprm_check(bprm); if (retval) return retval; @@ -1413,12 +1419,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); + bprm->recursion_depth = depth + 1; retval = fn(bprm, regs); - /* - * Restore the depth counter to its starting value - * in this call, so we don't have to rely on every - * load_binary function to restore it on return. - */ bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 075281734fcb..aad0f3989e52 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -218,7 +218,8 @@ void ext3_evict_inode (struct inode *inode) */ if (inode->i_nlink && ext3_should_journal_data(inode) && EXT3_SB(inode->i_sb)->s_journal && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && + inode->i_ino != EXT3_JOURNAL_INO) { tid_t commit_tid = atomic_read(&ei->i_datasync_tid); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index d7940b24cf68..fbb9b82b59cc 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -573,11 +573,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) +((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); - return count; + /* silently ignore the rest of the block */ + break; } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext3/super.c b/fs/ext3/super.c index cf0b5921cf0f..ef4c812c7a63 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -364,7 +364,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) return bdev; fail: - ext3_msg(sb, "error: failed to open journal device %s: %ld", + ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; @@ -891,7 +891,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) /*todo: use simple_strtoll with >32bit ext3 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - ext3_msg(sb, "error: invalid sb specification: %s", + ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", (char *) *data); return 1; } diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 9ed1bb1f319f..5459168d9db0 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -82,4 +82,5 @@ config EXT4_DEBUG Enables run-time debugging support for the ext4 filesystem. If you select Y here, then you will be able to turn on debugging - with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" + with a command such as: + echo 1 > /sys/module/ext4/parameters/mballoc_debug diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 3e1018ace702..d33733eaa1b0 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -600,7 +600,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) brelse(bitmap_bh); printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu" ", computed = %llu, %llu\n", - EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), + EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)), desc_count, bitmap_count); return bitmap_count; #else diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 47d1c8cda0cb..d918b55f009c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -316,9 +316,9 @@ struct ext4_group_desc */ struct flex_groups { - atomic_t free_inodes; - atomic_t free_clusters; - atomic_t used_dirs; + atomic64_t free_clusters; + atomic_t free_inodes; + atomic_t used_dirs; }; #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 83b20fcf9400..6b23a96e8095 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -164,16 +164,20 @@ static inline void ext4_journal_callback_add(handle_t *handle, * ext4_journal_callback_del: delete a registered callback * @handle: active journal transaction handle on which callback was registered * @jce: registered journal callback entry to unregister + * Return true if object was sucessfully removed */ -static inline void ext4_journal_callback_del(handle_t *handle, +static inline bool ext4_journal_callback_try_del(handle_t *handle, struct ext4_journal_cb_entry *jce) { + bool deleted; struct ext4_sb_info *sbi = EXT4_SB(handle->h_transaction->t_journal->j_private); spin_lock(&sbi->s_md_lock); + deleted = !list_empty(&jce->jce_list); list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); + return deleted; } int diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 852d4c257ace..b9a3726ea048 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2959,6 +2959,7 @@ static int ext4_split_extent(handle_t *handle, int err = 0; int uninitialized; int split_flag1, flags1; + int allocated = map->m_len; depth = ext_depth(inode); ex = path[depth].p_ext; @@ -2978,6 +2979,8 @@ static int ext4_split_extent(handle_t *handle, map->m_lblk + map->m_len, split_flag1, flags1); if (err) goto out; + } else { + allocated = ee_len - (map->m_lblk - ee_block); } ext4_ext_drop_refs(path); @@ -3000,7 +3003,7 @@ static int ext4_split_extent(handle_t *handle, ext4_ext_show_leaf(inode, path); out: - return err ? err : map->m_len; + return err ? err : allocated; } #define EXT4_EXT_ZERO_LEN 7 @@ -3668,6 +3671,7 @@ out: allocated - map->m_len); allocated = map->m_len; } + map->m_len = allocated; /* * If we have done fallocate with the offset that is already @@ -4709,7 +4713,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4717,7 +4721,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 902544e7ee5c..e42b4687dbf0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -305,8 +305,8 @@ error_return: } struct orlov_stats { + __u64 free_clusters; __u32 free_inodes; - __u32 free_clusters; __u32 used_dirs; }; @@ -323,7 +323,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, if (flex_size > 1) { stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic_read(&flex_group[g].free_clusters); + stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); stats->used_dirs = atomic_read(&flex_group[g].used_dirs); return; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a0d7e2617fda..98bff01eed6e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -144,7 +144,8 @@ void ext4_evict_inode(struct inode *inode) * don't use page cache. */ if (ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && + inode->i_ino != EXT4_JOURNAL_INO) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; @@ -4220,7 +4221,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; - unsigned long delalloc_blocks; + unsigned long long delalloc_blocks; inode = dentry->d_inode; generic_fillattr(inode, stat); @@ -4237,7 +4238,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, */ delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks; - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3122ece15147..878484256a6f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1980,7 +1980,11 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { - if (group == ngroups) + /* + * Artificially restricted ngroups for non-extent + * files makes group > ngroups possible on first loop. + */ + if (group >= ngroups) group = 0; /* This now checks without needing the buddy page */ @@ -2813,8 +2817,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -3433,7 +3437,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) win = offs; ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - - EXT4_B2C(sbi, win); + EXT4_NUM_B2C(sbi, win); BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); } @@ -4436,11 +4440,11 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, node = rb_prev(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(entry, new_entry)) { + if (can_merge(entry, new_entry) && + ext4_journal_callback_try_del(handle, &entry->efd_jce)) { new_entry->efd_start_cluster = entry->efd_start_cluster; new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); - ext4_journal_callback_del(handle, &entry->efd_jce); kmem_cache_free(ext4_free_data_cachep, entry); } } @@ -4448,10 +4452,10 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, node = rb_next(new_node); if (node) { entry = rb_entry(node, struct ext4_free_data, efd_node); - if (can_merge(new_entry, entry)) { + if (can_merge(new_entry, entry) && + ext4_journal_callback_try_del(handle, &entry->efd_jce)) { new_entry->efd_count += entry->efd_count; rb_erase(node, &(db->bb_free_root)); - ext4_journal_callback_del(handle, &entry->efd_jce); kmem_cache_free(ext4_free_data_cachep, entry); } } @@ -4577,7 +4581,7 @@ do_more: EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; } - count_clusters = EXT4_B2C(sbi, count); + count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) { err = -EIO; @@ -4635,11 +4639,16 @@ do_more: * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ + retry: new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; + /* + * We use a retry loop because + * ext4_free_blocks() is not allowed to fail. + */ + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; } new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; @@ -4667,8 +4676,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(count_clusters, + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); @@ -4808,12 +4817,12 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, blocks_freed)); + EXT4_NUM_B2C(sbi, blocks_freed)); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(EXT4_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index ac769399b141..9fb3fae4898a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -585,11 +585,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (ext4_check_dir_entry(dir, NULL, de, bh, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0dbeb39f79ef..c299dde3f6ec 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1112,7 +1112,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_free_group_clusters_set(sb, gdp, - EXT4_B2C(sbi, group_data->free_blocks_count)); + EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); gdp->bg_flags = cpu_to_le16(*bg_flags); gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); @@ -1202,6 +1202,8 @@ static void ext4_update_super(struct super_block *sb, /* Update the global fs size fields */ sbi->s_groups_count += flex_gd->count; + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); /* Update the reserved block counts only once the new group is * active. */ @@ -1210,7 +1212,7 @@ static void ext4_update_super(struct super_block *sb, /* Update the free space counts */ percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, free_blocks)); + EXT4_NUM_B2C(sbi, free_blocks)); percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb) * flex_gd->count); @@ -1219,8 +1221,8 @@ static void ext4_update_super(struct super_block *sb, sbi->s_log_groups_per_flex) { ext4_group_t flex_group; flex_group = ext4_flex_group(sbi, group_data[0].group); - atomic_add(EXT4_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, &sbi->s_flex_groups[flex_group].free_inodes); } @@ -1628,6 +1630,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) return 0; ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); + if (n_group > (0xFFFFFFFFUL / EXT4_INODES_PER_GROUP(sb))) { + ext4_warning(sb, "resize would cause inodes_count overflow"); + return -EINVAL; + } ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94dc725a1bb8..0edd29412912 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -437,10 +437,13 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); int error = is_journal_aborted(journal); - struct ext4_journal_cb_entry *jce, *tmp; + struct ext4_journal_cb_entry *jce; + BUG_ON(txn->t_state == T_FINISHED); spin_lock(&sbi->s_md_lock); - list_for_each_entry_safe(jce, tmp, &txn->t_private_list, jce_list) { + while (!list_empty(&txn->t_private_list)) { + jce = list_entry(txn->t_private_list.next, + struct ext4_journal_cb_entry, jce_list); list_del_init(&jce->jce_list); spin_unlock(&sbi->s_md_lock); jce->jce_func(sb, jce, error); @@ -1907,8 +1910,8 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group = ext4_flex_group(sbi, i); atomic_add(ext4_free_inodes_count(sb, gdp), &sbi->s_flex_groups[flex_group].free_inodes); - atomic_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(ext4_free_group_clusters(sb, gdp), + &sbi->s_flex_groups[flex_group].free_clusters); atomic_add(ext4_used_dirs_count(sb, gdp), &sbi->s_flex_groups[flex_group].used_dirs); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d403f76c5a68..fe6d34000454 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1237,6 +1237,19 @@ static int fat_read_root(struct inode *inode) return 0; } +static unsigned long calc_fat_clusters(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + + /* Divide first to avoid overflow */ + if (sbi->fat_bits != 12) { + unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits; + return ent_per_sec * sbi->fat_length; + } + + return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; +} + /* * Read the super block of an MS-DOS FS. */ @@ -1442,7 +1455,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; /* check that FAT table does not overflow */ - fat_clusters = sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits; + fat_clusters = calc_fat_clusters(sb); total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); if (total_clusters > MAX_FAT(sb)) { if (!silent) diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c index 4765190d537f..73c0bd7f7424 100644 --- a/fs/fscache/stats.c +++ b/fs/fscache/stats.c @@ -276,5 +276,5 @@ const struct file_operations fscache_stats_fops = { .open = fscache_stats_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = single_release, }; diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 5849e3ef35cc..32b12e5e33c9 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -517,7 +517,7 @@ void hfsplus_file_truncate(struct inode *inode) struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; - u32 size = inode->i_size; + loff_t size = inode->i_size; res = pagecache_write_begin(NULL, mapping, size, 0, AOP_FLAG_UNINTERRUPTIBLE, diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821366a7..ea3d1ca43e19 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -17,7 +17,8 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; - if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { + unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; + if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 54f6eccb79d9..0bf578dbfce7 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -552,7 +552,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; - + + if (sbi->sb_fs_size >= 0x80000000) { + hpfs_error(s, "invalid size in superblock: %08x", + (unsigned)sbi->sb_fs_size); + goto bail4; + } + /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 001ef01d2fe2..36ad5b4aae62 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -927,9 +927,13 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } -struct file *hugetlb_file_setup(const char *name, unsigned long addr, - size_t size, vm_flags_t acctflag, - struct user_struct **user, int creat_flags) +/* + * Note that size should be aligned to proper hugepage size in caller side, + * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. + */ +struct file *hugetlb_file_setup(const char *name, size_t size, + vm_flags_t acctflag, struct user_struct **user, + int creat_flags) { int error = -ENOMEM; struct file *file; @@ -937,8 +941,6 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, struct path path; struct dentry *root; struct qstr quick_string; - struct hstate *hstate; - unsigned long num_pages; *user = NULL; if (!hugetlbfs_vfsmount) @@ -972,12 +974,10 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, if (!inode) goto out_dentry; - hstate = hstate_inode(inode); - size += addr & ~huge_page_mask(hstate); - num_pages = ALIGN(size, huge_page_size(hstate)) >> - huge_page_shift(hstate); error = -ENOMEM; - if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) goto out_inode; d_instantiate(path.dentry, inode); diff --git a/fs/inode.c b/fs/inode.c index 9f4f5fecc096..8de457e8ed09 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -705,7 +705,7 @@ void prune_icache_sb(struct super_block *sb, int nr_to_scan) * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move_tail(&inode->i_lru, &sb->s_inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 516eb21895c6..fd88add6ca46 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -135,6 +135,7 @@ isofs_export_encode_fh(struct dentry *dentry, len = 3; fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ + fh16[3] = 0; /* avoid leaking uninitialized data */ fh32[2] = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { struct inode *parent; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 840f70f50792..a0dcbd62b180 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -325,7 +325,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i, to_free = 0; + int i; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -1044,7 +1044,7 @@ restart_loop: journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; spin_unlock(&journal->j_history_lock); - commit_transaction->t_state = T_FINISHED; + commit_transaction->t_state = T_COMMIT_CALLBACK; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; @@ -1059,38 +1059,44 @@ restart_loop: journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; + write_unlock(&journal->j_state_lock); - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); - to_free = 1; + if (journal->j_checkpoint_transactions == NULL) { + journal->j_checkpoint_transactions = commit_transaction; + commit_transaction->t_cpnext = commit_transaction; + commit_transaction->t_cpprev = commit_transaction; } else { - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; - } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = + commit_transaction->t_cpnext = + journal->j_checkpoint_transactions; + commit_transaction->t_cpprev = + commit_transaction->t_cpnext->t_cpprev; + commit_transaction->t_cpnext->t_cpprev = + commit_transaction; + commit_transaction->t_cpprev->t_cpnext = commit_transaction; - } } spin_unlock(&journal->j_list_lock); - + /* Drop all spin_locks because commit_callback may be block. + * __journal_remove_checkpoint() can not destroy transaction + * under us because it is not marked as T_FINISHED yet */ if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); - if (to_free) - jbd2_journal_free_transaction(commit_transaction); + write_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + commit_transaction->t_state = T_FINISHED; + /* Recheck checkpoint lists after j_list_lock was dropped */ + if (commit_transaction->t_checkpoint_list == NULL && + commit_transaction->t_checkpoint_io_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + jbd2_journal_free_transaction(commit_transaction); + } + spin_unlock(&journal->j_list_lock); + write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index de8b4cb7c31e..f5671271b5ba 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -500,10 +500,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) @@ -1047,9 +1047,12 @@ out: void jbd2_journal_set_triggers(struct buffer_head *bh, struct jbd2_buffer_trigger_type *type) { - struct journal_head *jh = bh2jh(bh); + struct journal_head *jh = jbd2_journal_grab_journal_head(bh); + if (WARN_ON(!jh)) + return; jh->b_triggers = type; + jbd2_journal_put_journal_head(jh); } void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, @@ -1101,17 +1104,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); + struct journal_head *jh; int ret = 0; - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); if (is_handle_aborted(handle)) goto out; - if (!buffer_jbd(bh)) { + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) { ret = -EUCLEAN; goto out; } + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); jbd_lock_bh_state(bh); @@ -1202,6 +1206,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); + jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); WARN_ON(ret); /* All errors are bugs, so dump the stack */ diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 77b69b27f825..13fc88561da3 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -125,7 +125,7 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { int wait = wbc->sync_mode == WB_SYNC_ALL; - if (test_cflag(COMMIT_Nolink, inode)) + if (inode->i_nlink == 0) return 0; /* * If COMMIT_DIRTY is not set, the inode isn't really dirty. diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index 2eb952c41a69..cbe48ea9318e 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1058,7 +1058,8 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) */ void jfs_syncpt(struct jfs_log *log, int hard_sync) { LOG_LOCK(log); - lmLogSync(log, hard_sync); + if (!test_bit(log_QUIESCE, &log->flag)) + lmLogSync(log, hard_sync); LOG_UNLOCK(log); } diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ca0a08001449..193f04cc97ba 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -144,6 +144,9 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) timeout); if (ret < 0) return -ERESTARTSYS; + /* Reset the lock status after a server reboot so we resend */ + if (block->b_status == nlm_lck_denied_grace_period) + block->b_status = nlm_lck_blocked; req->a_res.status = block->b_status; return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a3a09877ea62..8392cb85bd54 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -551,9 +551,6 @@ again: status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); if (status < 0) break; - /* Resend the blocking lock request after a server reboot */ - if (resp->status == nlm_lck_denied_grace_period) - continue; if (resp->status != nlm_lck_blocked) break; } diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e46353f41a42..aff1c616aedf 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -941,6 +941,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -950,6 +951,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -959,7 +961,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index 737d839bc17b..6fc7b5cae92b 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -55,7 +55,8 @@ static void dev_remove(struct net *net, dev_t dev) bl_pipe_msg.bl_wq = &nn->bl_wq; memset(msg, 0, sizeof(*msg)); - msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + msg->len = sizeof(bl_msg) + bl_msg.totallen; + msg->data = kzalloc(msg->len, GFP_NOFS); if (!msg->data) goto out; @@ -66,7 +67,6 @@ static void dev_remove(struct net *net, dev_t dev) memcpy(msg->data, &bl_msg, sizeof(bl_msg)); dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg->len = sizeof(bl_msg) + bl_msg.totallen; add_wait_queue(&nn->bl_wq, &wq); if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 30351877aee2..d121c67f87d0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1053,7 +1053,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); + int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; @@ -1362,6 +1362,12 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOMEM: err = 0; goto out; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + set_bit(NFS_DELEGATED_STATE, &state->flags); + ssleep(1); + err = -EAGAIN; + goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3210a03342f9..2781563ed542 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -336,20 +336,14 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; - struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); return; } - if (task->tk_status != 0) { + if (task->tk_status != 0) nfs_cancel_async_unlink(old_dentry); - return; - } - - d_drop(old_dentry); - d_drop(new_dentry); } /** @@ -550,6 +544,18 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) error = rpc_wait_for_completion_task(task); if (error == 0) error = task->tk_status; + switch (error) { + case 0: + /* The rename succeeded */ + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + break; + case -ERESTARTSYS: + /* The result of the rename is unknown. Play it safe by + * forcing a new lookup */ + d_drop(dentry); + d_drop(sdentry); + } rpc_put_task(task); out_dput: dput(sdentry); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index dd0308d65f66..64198ed1d64a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -270,6 +270,7 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { __be32 status; + int accmode = 0; /* We don't know the target directory, and therefore can not * set the change info @@ -283,9 +284,19 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && (open->op_iattr.ia_size == 0); + /* + * In the delegation case, the client is telling us about an + * open that it *already* performed locally, some time ago. We + * should let it succeed now if possible. + * + * In the case of a CLAIM_FH open, on the other hand, the client + * may be counting on us to enforce permissions (the Linux 4.1 + * client uses this for normal opens, for example). + */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) + accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, - NFSD_MAY_OWNER_OVERRIDE); + status = do_open_permission(rqstp, current_fh, open, accmode); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index abd785e5fd2d..f90b197ceffa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -213,13 +213,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { if (atomic_dec_and_test(&fp->fi_access[oflag])) { nfs4_file_put_fd(fp, oflag); - /* - * It's also safe to get rid of the RDWR open *if* - * we no longer have need of the other kind of access - * or if we already have the other kind of open: - */ - if (fp->fi_fds[1-oflag] - || atomic_read(&fp->fi_access[1 - oflag]) == 0) + if (atomic_read(&fp->fi_access[1 - oflag]) == 0) nfs4_file_put_fd(fp, O_RDWR); } } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 967d68eef282..cb997b1bb398 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -161,8 +161,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; @@ -263,7 +263,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - int nace; + u32 nace; struct nfs4_ace *ace; READ_BUF(4); len += 4; @@ -343,10 +343,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_atime.tv_sec); + READ64(iattr->ia_atime.tv_sec); READ32(iattr->ia_atime.tv_nsec); if (iattr->ia_atime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -369,10 +366,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, all 32 bits of 'nseconds'. */ READ_BUF(12); len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_mtime.tv_sec); + READ64(iattr->ia_mtime.tv_sec); READ32(iattr->ia_mtime.tv_nsec); if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) return nfserr_inval; @@ -2371,8 +2365,7 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.atime.tv_sec); + WRITE64((s64)stat.atime.tv_sec); WRITE32(stat.atime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_DELTA) { @@ -2385,15 +2378,13 @@ out_acl: if (bmval1 & FATTR4_WORD1_TIME_METADATA) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.ctime.tv_sec); + WRITE64((s64)stat.ctime.tv_sec); WRITE32(stat.ctime.tv_nsec); } if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { if ((buflen -= 12) < 0) goto out_resource; - WRITE32(0); - WRITE32(stat.mtime.tv_sec); + WRITE64((s64)stat.mtime.tv_sec); WRITE32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 8f7b95ac1f7e..aa526bebee3f 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -195,13 +195,32 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) static int nilfs_set_page_dirty(struct page *page) { - int ret = __set_page_dirty_buffers(page); + int ret = __set_page_dirty_nobuffers(page); - if (ret) { + if (page_has_buffers(page)) { struct inode *inode = page->mapping->host; - unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits); + unsigned nr_dirty = 0; + struct buffer_head *bh, *head; - nilfs_set_file_dirty(inode, nr_dirty); + /* + * This page is locked by callers, and no other thread + * concurrently marks its buffers dirty since they are + * only dirtied through routines in fs/buffer.c in + * which call sites of mark_buffer_dirty are protected + * by page lock. + */ + bh = head = page_buffers(page); + do { + /* Do not mark hole blocks dirty */ + if (buffer_dirty(bh) || !buffer_mapped(bh)) + continue; + + set_buffer_dirty(bh); + nr_dirty++; + } while (bh = bh->b_this_page, bh != head); + + if (nr_dirty) + nilfs_set_file_dirty(inode, nr_dirty); } return ret; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3568c8a8b138..48bc91d60fce 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -120,6 +120,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 6f292dd53c6d..f255d372a5df 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -577,7 +577,6 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, int add = (arg & IN_MASK_ADD); int ret; - /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); fsn_mark = fsnotify_find_inode_mark(group, inode); @@ -628,7 +627,6 @@ static int inotify_new_watch(struct fsnotify_group *group, struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; - /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); @@ -757,6 +755,10 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, int ret, fput_needed; unsigned flags = 0; + /* don't allow invalid bits: we don't want flags set */ + if (unlikely(!(mask & ALL_INOTIFY_BITS))) + return -EINVAL; + filp = fget_light(fd, &fput_needed); if (unlikely(!filp)) return -EBADF; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2f5b92ef0e53..7eb1c0c7c166 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -791,7 +791,7 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, &hole_size, &rec, &is_last); if (ret) { mlog_errno(ret); - goto out; + goto out_unlock; } if (rec.e_blkno == 0ULL) { diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..5b8d94436105 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6499,6 +6499,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); diff --git a/fs/pipe.c b/fs/pipe.c index fec5e4ad071a..1667e6fe0416 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -860,6 +860,9 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) { int ret = -ENOENT; + if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE))) + return -EINVAL; + mutex_lock(&inode->i_mutex); if (inode->i_pipe) { diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46fc1c20a6b1..048d9900910c 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -187,8 +187,8 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, if (dbuf->count == ARRAY_SIZE(dbuf->dentries)) return -ENOSPC; - if (name[0] == '.' && (name[1] == '\0' || - (name[1] == '.' && name[2] == '\0'))) + if (name[0] == '.' && (namelen < 2 || + (namelen == 2 && name[1] == '.'))) return 0; dentry = lookup_one_len(name, dbuf->xadir, namelen); diff --git a/fs/super.c b/fs/super.c index cf001775617f..3c520a5ed715 100644 --- a/fs/super.c +++ b/fs/super.c @@ -298,19 +298,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -440,11 +440,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -677,10 +672,10 @@ restart: if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index a545d819b495..9a5f07d962ab 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -994,6 +994,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) enum kobj_ns_type type; const void *ns; ino_t ino; + loff_t off; type = sysfs_ns_type(parent_sd); ns = sysfs_info(dentry->d_sb)->ns[type]; @@ -1002,6 +1003,8 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } if (filp->f_pos == 1) { if (parent_sd->s_parent) @@ -1010,8 +1013,11 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ino = parent_sd->s_ino; if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) filp->f_pos++; + else + return 0; } mutex_lock(&sysfs_mutex); + off = filp->f_pos; for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); pos; pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { @@ -1023,27 +1029,43 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) len = strlen(name); ino = pos->s_ino; type = dt_type(pos); - filp->f_pos = pos->s_hash; + off = filp->f_pos = pos->s_hash; filp->private_data = sysfs_get(pos); mutex_unlock(&sysfs_mutex); - ret = filldir(dirent, name, len, filp->f_pos, ino, type); + ret = filldir(dirent, name, len, off, ino, type); mutex_lock(&sysfs_mutex); if (ret < 0) break; } mutex_unlock(&sysfs_mutex); - if ((filp->f_pos > 1) && !pos) { /* EOF */ - filp->f_pos = INT_MAX; + + /* don't reference last entry if its refcount is dropped */ + if (!pos) { filp->private_data = NULL; + + /* EOF and not changed as 0 or 1 in read/write path */ + if (off == filp->f_pos && off > 1) + filp->f_pos = INT_MAX; } return 0; } +static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_path.dentry->d_inode; + loff_t ret; + + mutex_lock(&inode->i_mutex); + ret = generic_file_llseek(file, offset, whence); + mutex_unlock(&inode->i_mutex); + + return ret; +} const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, .release = sysfs_dir_release, - .llseek = generic_file_llseek, + .llseek = sysfs_dir_llseek, }; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 8640a12f0650..25c472bb8ab5 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -357,31 +357,50 @@ static unsigned int vfs_dent_type(uint8_t type) static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) { int err, over = 0; + loff_t pos = file->f_pos; struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; struct inode *dir = file->f_path.dentry->d_inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, pos); - if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) + if (pos > UBIFS_S_KEY_HASH_MASK || pos == 2) /* * The directory was seek'ed to a senseless position or there * are no more entries. */ return 0; + if (file->f_version == 0) { + /* + * The file was seek'ed, which means that @file->private_data + * is now invalid. This may also be just the first + * 'ubifs_readdir()' invocation, in which case + * @file->private_data is NULL, and the below code is + * basically a no-op. + */ + kfree(file->private_data); + file->private_data = NULL; + } + + /* + * 'generic_file_llseek()' unconditionally sets @file->f_version to + * zero, and we use this for detecting whether the file was seek'ed. + */ + file->f_version = 1; + /* File positions 0 and 1 correspond to "." and ".." */ - if (file->f_pos == 0) { + if (pos == 0) { ubifs_assert(!file->private_data); over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); if (over) return 0; - file->f_pos = 1; + file->f_pos = pos = 1; } - if (file->f_pos == 1) { + if (pos == 1) { ubifs_assert(!file->private_data); over = filldir(dirent, "..", 2, 1, parent_ino(file->f_path.dentry), DT_DIR); @@ -397,7 +416,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -405,17 +424,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) if (!dent) { /* * The directory was seek'ed to and is now readdir'ed. - * Find the entry corresponding to @file->f_pos or the - * closest one. + * Find the entry corresponding to @pos or the closest one. */ - dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); + dent_key_init_hash(c, &key, dir->i_ino, pos); nm.name = NULL; dent = ubifs_tnc_next_ent(c, &key, &nm); if (IS_ERR(dent)) { err = PTR_ERR(dent); goto out; } - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; } @@ -427,7 +445,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) ubifs_inode(dir)->creat_sqnum); nm.len = le16_to_cpu(dent->nlen); - over = filldir(dirent, dent->name, nm.len, file->f_pos, + over = filldir(dirent, dent->name, nm.len, pos, le64_to_cpu(dent->inum), vfs_dent_type(dent->type)); if (over) @@ -443,9 +461,17 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) } kfree(file->private_data); - file->f_pos = key_hash_flash(c, &dent->key); + file->f_pos = pos = key_hash_flash(c, &dent->key); file->private_data = dent; cond_resched(); + + if (file->f_version == 0) + /* + * The file was seek'ed meanwhile, lets return and start + * reading direntries from the new position on the next + * invocation. + */ + return 0; } out: @@ -456,15 +482,13 @@ out: kfree(file->private_data); file->private_data = NULL; + /* 2 is a special value indicating that there are no more direntries */ file->f_pos = 2; return 0; } -/* If a directory is seeked, we have to free saved readdir() state */ static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin) { - kfree(file->private_data); - file->private_data = NULL; return generic_file_llseek(file, offset, origin); } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 76e4e0566ad6..d867bd97bc60 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1582,6 +1582,12 @@ static int ubifs_remount_rw(struct ubifs_info *c) c->remounting_rw = 1; c->ro_mount = 0; + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + return err; + } + err = check_free_space(c); if (err) goto out; @@ -1698,12 +1704,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) err = dbg_check_space_info(c); } - if (c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out; - } - mutex_unlock(&c->umount_mutex); return err; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 38de8f234b94..78bff11169ea 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1280,6 +1280,7 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, *lenp = 3; fid->udf.block = location.logicalBlockNum; fid->udf.partref = location.partitionReferenceNum; + fid->udf.parent_partref = 0; fid->udf.generation = inode->i_generation; if (connectable && !S_ISDIR(inode->i_mode)) { diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 42ad69ac9576..9e8e08ef246b 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -82,7 +82,7 @@ struct udf_virtual_data { struct udf_bitmap { __u32 s_extLength; __u32 s_extPosition; - __u16 s_nr_groups; + int s_nr_groups; struct buffer_head **s_block_bitmap; }; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 3011b879f850..23c79cab08de 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -457,6 +457,28 @@ xfs_vn_getattr( return 0; } +static void +xfs_setattr_mode( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct iattr *iattr) +{ + struct inode *inode = VFS_I(ip); + umode_t mode = iattr->ia_mode; + + ASSERT(tp); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; +} + int xfs_setattr_nonsize( struct xfs_inode *ip, @@ -608,18 +630,8 @@ xfs_setattr_nonsize( /* * Change file access modes. */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } + if (mask & ATTR_MODE) + xfs_setattr_mode(tp, ip, iattr); /* * Change file access or modified times. @@ -716,9 +728,8 @@ xfs_setattr_size( return XFS_ERROR(error); ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| - ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); lock_flags = XFS_ILOCK_EXCL; if (!(flags & XFS_ATTR_NOLOCK)) @@ -861,6 +872,12 @@ xfs_setattr_size( xfs_iflags_set(ip, XFS_ITRUNCATED); } + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) + xfs_setattr_mode(tp, ip, iattr); + if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; |